diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c609633 diff --git a/sys/amd64/amd64/kload.c b/sys/amd64/amd64/kload.c new file mode 100644 index 0000000..ed203ae --- /dev/null +++ b/sys/amd64/amd64/kload.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011 - 2012 + * Russell Cattelan Digital Elves Inc + * Copyright (c) 2011 - 2012 + * Isilon Systems, LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define GUEST_NULL_SEL 0 +#define GUEST_CODE_SEL 1 +#define GUEST_DATA_SEL 2 + +void +setup_freebsd_gdt(uint64_t *gdtr) +{ + gdtr[GUEST_NULL_SEL] = 0x0000000000000000; + gdtr[GUEST_CODE_SEL] = 0x0020980000000000; + gdtr[GUEST_DATA_SEL] = 0x0000920000000000; +} + +pt_entry_t * +kload_build_page_table(void) +{ + pt_entry_t *PT4; + pt_entry_t *PT3; + pt_entry_t *PT2; + int i; + unsigned long va; + + va = (unsigned long)kmem_alloc(kernel_map,PAGE_SIZE * 3); + PT4 = (pt_entry_t *)va; + PT3 = (pt_entry_t *)(PT4 + (PAGE_SIZE / sizeof(unsigned long))); + PT2 = (pt_entry_t *)(PT3 + (PAGE_SIZE / sizeof(unsigned long))); + + if (bootverbose) + printf("%s PT4 0x%lx (0x%lx) PT3 0x%lx (0x%lx) " + "PT2 0x%lx (0x%lx)\n", + __func__, + (unsigned long)PT4, (unsigned long)vtophys(PT4), + (unsigned long)PT3, (unsigned long)vtophys(PT3), + (unsigned long)PT2, (unsigned long)vtophys(PT2)); + + /* + * The following section is a direct copy of + * head/src/sys/boot/i386/libi386/elf64_freebsd.c:92 at r236688 + */ + + bzero(PT4, PAGE_SIZE); + bzero(PT3, PAGE_SIZE); + bzero(PT2, PAGE_SIZE); + + /* + * This is kinda brutal, but every single 1GB VM memory segment points + * to the same first 1GB of physical memory. But it is more than + * adequate. + */ + for (i = 0; i < 512; i++) { + /* + * Each slot of the level 4 pages points to the + * same level 3 page + */ + PT4[i] = (pt_entry_t)(vtophys(PT3)); + PT4[i] |= PG_V | PG_RW | PG_U; + + /* + * Each slot of the level 3 pages points to the + * same level 2 page + */ + PT3[i] = (pt_entry_t)(vtophys(PT2)); + PT3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + PT2[i] = i * (2 * 1024 * 1024); + PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } + return ((pt_entry_t *)vtophys(PT4)); +} diff --git a/sys/amd64/amd64/kload_exec.S b/sys/amd64/amd64/kload_exec.S new file mode 100644 index 0000000..75bff3b --- /dev/null +++ b/sys/amd64/amd64/kload_exec.S @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2011 - 2012 + * Russell Cattelan Digital Elves Inc + * Copyright (c) 2011 - 2012 + * Isilon Systems, LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "assym.s" + +#define CR0_PG 0x80000000 /* PaGing enable */ + +#define X86_CR0_PE 0x00000001 /* Protection Enable */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ +#define X86_CR0_EM 0x00000004 /* Emulation */ +#define X86_CR0_TS 0x00000008 /* Task Switched */ +#define X86_CR0_ET 0x00000010 /* Extension Type */ +#define X86_CR0_NE 0x00000020 /* Numeric Error */ +#define X86_CR0_WP 0x00010000 /* Write Protect */ +#define X86_CR0_AM 0x00040000 /* Alignment Mask */ +#define X86_CR0_NW 0x20000000 /* Not Write-through */ +#define X86_CR0_CD 0x40000000 /* Cache Disable */ +#define X86_CR0_PG 0x80000000 /* Paging */ + +#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ +#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ + + .globl relocate_kernel +relocate_kernel: + /* first install the new page table */ + movq 32(%rcx), %rax /* page table */ + movq 40(%rcx), %r9 /* address of control_page with new PT */ + movq %rax, %cr3 + + /* + * Set cr4 to a known state: + * - page size extensions + * - physical address extension enabled + */ + movq $(X86_CR4_PSE | X86_CR4_PAE), %rax + movq %rax, %cr4 + + /* then move the stack to the end of control page */ + lea 4096(%r9), %rsp + + /* now save stuff onto the new stack */ + pushq %rcx /* arg 4 control page */ + pushq %rdx /* arg 3 code page */ + pushq %rsi /* arg 2 kern base */ + pushq %rdi /* arg 1 va_list */ + + /* zero out flags, and disable interrupts */ + pushq $0 + popfq + cli + + /* install simple gdt */ + movq 24(%r9), %rax /* gdt */ + lgdt (%rax) + movq 56(%r9), %rax + lidt (%rax) /* null idt */ + /* + * now move to the code page + * should have been passed code_page based + * on new page table + */ + movq %rdx, %r8 + addq $(identity_mapped - relocate_kernel), %r8 + /* offset of code segment in new gdt */ + pushq $0x08 + pushq %r8 + /* jump to this spot in the new page */ + lretq +identity_mapped: + + movq $0x10,%rax + movq %rax,%ds + movq %rax,%es + movq %rax,%fs + movq %rax,%gs + movq %rax,%ss + + /* + * Set cr0 to a known state: + * - Paging enabled + * - Alignment check disabled + * - Write protect disabled + * - No task switch + * - Don't do FP software emulation. + * - Proctected mode enabled + */ + movq %cr0, %rax + andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | X86_CR0_NE), %rax + orl $(X86_CR0_PG | X86_CR0_PE), %eax + movq %rax, %cr0 + + /* Do the copies */ + cld + /* saved list of source pages */ + movq 0(%rsp), %rbx + /* + * the initial dest page + * this is KERNBASE + 0x200000 + * kernel is contigious in memory + */ + movq 8(%rsp), %rdi +0: /* top, read another word for the indirection page */ + movq (%rbx), %rcx + + addq $8, %rbx + testq $0x1, %rcx /* is it a destination page */ + jz 1f + movq %rcx, %rdi + andq $0xFFFFFFFFfffff000, %rdi + jmp 0b +1: + testq $0x2, %rcx /* is it an indirection page */ + jz 1f + movq %rcx, %rbx + andq $0xFFFFFFFFfffff000, %rbx + jmp 0b +1: + testq $0x4, %rcx /* is it the done indicator */ + jz 1f + jmp 2f +1: + testq $0x8, %rcx /* is it the source indicator */ + jz 0b /* Ignore it otherwise */ + movq %rcx, %rsi /* For every source page do a copy */ + andq $0xfffffffffffff000, %rsi + movq $512, %rcx + rep + movsq + jmp 0b +2: + /* + * set all of the registers to known values + * leave %rsp alone + */ + xorq %rax, %rax + xorq %rbx, %rbx + xorq %rcx, %rcx + xorq %rdx, %rdx + xorq %rsi, %rsi + xorq %rdi, %rdi + xorq %rbp, %rbp + + pushq 16(%r9) /* physfree */ + movq 8(%r9), %rax /* modulep */ + salq $32, %rax + pushq %rax + + pushq $0x8 + pushq 48(%r9) /* entry # kernel entry pt */ + lretq +relocate_kernel_end: + .globl relocate_kernel_size +relocate_kernel_size: + .long relocate_kernel_end - relocate_kernel diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index d2e4aad..b085326 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -78,6 +78,9 @@ __FBSDID("$FreeBSD$"); #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) + +// quick hack to access the kload page table so we can set the APs to a know pgtbl */ +extern unsigned long kload_pgtbl; /* lock region used by kernel profiling */ int mcount_lock; @@ -1409,10 +1412,20 @@ cpustop_handler(void) void cpususpend_handler(void) { + register_t cr3, rf; + register_t cr0, cr4; u_int cpu; cpu = PCPU_GET(cpuid); + printf("%s called on cpu%d\n",__FUNCTION__,cpu); + + rf = intr_disable(); + cr3 = rcr3(); + + lapic_clear_lapic(1 /* disable lapic */); + /* shutdown interrupts to the cpu and then set the mask as stopped */ + if (savectx(susppcbs[cpu])) { ctx_fpusave(susppcbs[cpu]->pcb_fpususpend); wbinvd(); @@ -1422,20 +1435,37 @@ cpususpend_handler(void) initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); - - /* Indicate that we are resumed */ - CPU_CLR_ATOMIC(cpu, &suspended_cpus); } + /* make sure the page table is not the same one that boot process sets up */ + load_cr3(kload_pgtbl); + + /* Disable PGE. */ + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + + /* Disable caches (CD = 1, NW = 0) and paging*/ + cr0 = rcr0(); + load_cr0((cr0 & ~CR0_NW) | CR0_CD | CR0_PG); + + /* Flushes caches and TLBs. */ + wbinvd(); + invltlb(); + + halt(); + /* Wait for resume */ while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); CPU_CLR_ATOMIC(cpu, &started_cpus); + CPU_CLR_ATOMIC(cpu, &stopped_cpus); - /* Resume MCA and local APIC */ + /* Restore CR3 and enable interrupts */ + load_cr3(cr3); mca_resume(); lapic_setup(0); + intr_restore(rf); } /* diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index ae2f5b9..7fded95 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -227,6 +227,7 @@ int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(int boot); +void lapic_clear_lapic(u_int); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h index 700e35f..a8ef1fc 100644 --- a/sys/amd64/include/intr_machdep.h +++ b/sys/amd64/include/intr_machdep.h @@ -158,6 +158,7 @@ struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); +int intr_clear_all_handlers(void); void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); diff --git a/sys/boot/common/load_elf.c b/sys/boot/common/load_elf.c index e1e6de7..36df22c 100644 --- a/sys/boot/common/load_elf.c +++ b/sys/boot/common/load_elf.c @@ -317,25 +317,30 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) continue; #ifdef ELF_VERBOSE - printf("Segment: 0x%lx@0x%lx -> 0x%lx-0x%lx", - (long)phdr[i].p_filesz, (long)phdr[i].p_offset, - (long)(phdr[i].p_vaddr + off), - (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1)); + printf("Segment: filesz 0x%llx @ 0x%016llx -> vaddr_range 0x%016llx - 0x%016llx", + (long long)phdr[i].p_filesz, (long long)phdr[i].p_offset, + (long long)(phdr[i].p_vaddr + off), + (long long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1)); #else if ((phdr[i].p_flags & PF_W) == 0) { - printf("text=0x%lx ", (long)phdr[i].p_filesz); + printf("text=0x%llx ", (long long)phdr[i].p_filesz); } else { - printf("data=0x%lx", (long)phdr[i].p_filesz); + printf("data=0x%llx", (long long)phdr[i].p_filesz); if (phdr[i].p_filesz < phdr[i].p_memsz) - printf("+0x%lx", (long)(phdr[i].p_memsz -phdr[i].p_filesz)); + printf("+0x%llx", (long long)(phdr[i].p_memsz -phdr[i].p_filesz)); printf(" "); } #endif fpcopy = 0; if (ef->firstlen > phdr[i].p_offset) { fpcopy = ef->firstlen - phdr[i].p_offset; - archsw.arch_copyin(ef->firstpage + phdr[i].p_offset, - phdr[i].p_vaddr + off, fpcopy); + printf("\n%s:%d firstpage 0x%lx p_offset 0x%lx p_vaddr 0x%lx off 0x%lx\n", + __FUNCTION__,__LINE__, + (unsigned long)ef->firstpage, + (unsigned long) phdr[i].p_offset, + (unsigned long)phdr[i].p_vaddr, + (unsigned long)off); + archsw.arch_copyin(ef->firstpage + phdr[i].p_offset, phdr[i].p_vaddr + off, fpcopy); } if (phdr[i].p_filesz > fpcopy) { if (kern_pread(ef->fd, phdr[i].p_vaddr + off + fpcopy, @@ -348,9 +353,11 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) /* clear space from oversized segments; eg: bss */ if (phdr[i].p_filesz < phdr[i].p_memsz) { #ifdef ELF_VERBOSE - printf(" (bss: 0x%lx-0x%lx)", - (long)(phdr[i].p_vaddr + off + phdr[i].p_filesz), - (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1)); + printf("\n\t(bss: 0x%lx-0x%lx) vaddr 0x%lx size 0x%lx clearing\n", + (long)(phdr[i].p_vaddr + off + phdr[i].p_filesz), + (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1), + (long)(phdr[i].p_vaddr + phdr[i].p_filesz), + (long)(phdr[i].p_memsz - phdr[i].p_filesz) ); #endif kern_bzero(phdr[i].p_vaddr + off + phdr[i].p_filesz, diff --git a/sys/boot/userboot/ficl/Makefile b/sys/boot/userboot/ficl/Makefile index 42b9309..d7818b9 100644 --- a/sys/boot/userboot/ficl/Makefile +++ b/sys/boot/userboot/ficl/Makefile @@ -62,6 +62,10 @@ softcore.c: ${SOFTWORDS} softcore.awk (cd ${.CURDIR}/../../ficl/softwords; cat ${SOFTWORDS} \ | awk -f softcore.awk -v datestamp="`LC_ALL=C date`") > ${.TARGET} +beforedepend ${OBJS}: no-machine + +no-machine: + rm -f ${.CURDIR}/../../ficl/machine #.if ${MACHINE_CPUARCH} == "amd64" #${SRCS:M*.c:R:S/$/.o/g}: machine # diff --git a/sys/boot/userboot/test/test.c b/sys/boot/userboot/test/test.c index 36258a7..77202c1 100644 --- a/sys/boot/userboot/test/test.c +++ b/sys/boot/userboot/test/test.c @@ -376,6 +376,12 @@ test_getenv(void *arg, int idx) return (vars[idx]); } +static int +test_buildsmap(void *arg, void **smap_void, size_t *outlen) +{ + return (0); +} + struct loader_callbacks cb = { .putc = test_putc, .getc = test_getc, @@ -405,6 +411,7 @@ struct loader_callbacks cb = { .getmem = test_getmem, .getenv = test_getenv, + .buildsmap = test_buildsmap, }; void @@ -464,5 +471,5 @@ main(int argc, char** argv) term.c_lflag &= ~(ICANON|ECHO); tcsetattr(0, TCSAFLUSH, &term); - func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0); + func(&cb, NULL, USERBOOT_VERSION_4, disk_fd >= 0); } diff --git a/sys/boot/userboot/userboot.h b/sys/boot/userboot/userboot.h index e38927e..0a9d2f1 100644 --- a/sys/boot/userboot/userboot.h +++ b/sys/boot/userboot/userboot.h @@ -32,6 +32,7 @@ #define USERBOOT_VERSION_1 1 #define USERBOOT_VERSION_2 2 #define USERBOOT_VERSION_3 3 +#define USERBOOT_VERSION_4 4 /* * Exit codes from the loader @@ -195,4 +196,11 @@ struct loader_callbacks { * each invocation will add 1 to the previous value of 'num'. */ const char * (*getenv)(void *arg, int num); + + /* + * build system smap + * this is for kload to build pass back in a copy of the running + * systems smap returns + */ + int (*buildsmap)(void *arg, void **smap, size_t *len); }; diff --git a/sys/boot/userboot/userboot/bootinfo64.c b/sys/boot/userboot/userboot/bootinfo64.c index fc7c14d..28c47ab 100644 --- a/sys/boot/userboot/userboot/bootinfo64.c +++ b/sys/boot/userboot/userboot/bootinfo64.c @@ -187,33 +187,46 @@ bios_addsmapdata(struct preloaded_file *kfp) { uint64_t lowmem, highmem; int smapnum, len; - struct smap smap[3], *sm; + struct smap *smap = NULL, *sm = NULL; + int error = 1; - CALLBACK(getmem, &lowmem, &highmem); + printf("%s\n",__FUNCTION__); - sm = &smap[0]; + if (callbacks->buildsmap) + error = callbacks->buildsmap(NULL, (void **)&smap, &len); - sm->base = 0; /* base memory */ - sm->length = 640 * 1024; - sm->type = SMAP_TYPE_MEMORY; - sm++; + /* either there is no buildsmap function or it failed + * revert back to using getmem and a simple smap + */ - sm->base = 0x100000; /* extended memory */ - sm->length = lowmem - 0x100000; - sm->type = SMAP_TYPE_MEMORY; - sm++; + if (error) { + smap = sm = malloc(3 * sizeof(struct smap)); + CALLBACK(getmem, &lowmem, &highmem); - smapnum = 2; + sm->base = 0; /* base memory */ + sm->length = 640 * 1024; + sm->type = SMAP_TYPE_MEMORY; + sm++; - if (highmem != 0) { - sm->base = 4 * GB; - sm->length = highmem; - sm->type = SMAP_TYPE_MEMORY; - smapnum++; - } + sm->base = 0x100000; /* extended memory */ + sm->length = lowmem - 0x100000; + sm->type = SMAP_TYPE_MEMORY; + sm++; - len = smapnum * sizeof (struct smap); - file_addmetadata(kfp, MODINFOMD_SMAP, len, &smap[0]); + smapnum = 2; + + if (highmem != 0) { + sm->base = 4 * GB; + sm->length = highmem; + sm->type = SMAP_TYPE_MEMORY; + smapnum++; + } + + len = smapnum * sizeof (struct smap); + } + + file_addmetadata(kfp, MODINFOMD_SMAP, len, smap); + free(smap); } /* diff --git a/sys/boot/userboot/userboot/conf.c b/sys/boot/userboot/userboot/conf.c index 0c57eba..d2c1067 100644 --- a/sys/boot/userboot/userboot/conf.c +++ b/sys/boot/userboot/userboot/conf.c @@ -86,8 +86,11 @@ struct file_format *file_formats[] = { * data structures from bootstrap.h as well. */ extern struct console userboot_console; +extern struct console comconsole; + struct console *consoles[] = { &userboot_console, + &comconsole, NULL }; diff --git a/sys/boot/userboot/userboot/main.c b/sys/boot/userboot/userboot/main.c index 4092b9b..0e2e0b7 100644 --- a/sys/boot/userboot/userboot/main.c +++ b/sys/boot/userboot/userboot/main.c @@ -36,8 +36,9 @@ __FBSDID("$FreeBSD$"); #include "disk.h" #include "libuserboot.h" -#define USERBOOT_VERSION USERBOOT_VERSION_3 +#define USERBOOT_VERSION USERBOOT_VERSION_4 +static char malloc_buf[512*1024]; struct loader_callbacks *callbacks; void *callbacks_arg; @@ -67,31 +68,47 @@ exit(int v) } void +loader_init(void) +{ + /* + * It does not hurt to re-call this as it just sets global + * ptrs that never change + */ + setheap((void *)malloc_buf, (void *)(malloc_buf + 512*1024)); +} + +int loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) { - static char malloc[512*1024]; const char *var; int i; - - if (version != USERBOOT_VERSION) - abort(); - - callbacks = cb; - callbacks_arg = arg; - userboot_disk_maxunit = ndisks; - + /* * initialise the heap as early as possible. Once this is done, * alloc() is usable. The stack is buried inside us, so this is * safe. */ - setheap((void *)malloc, (void *)(malloc + 512*1024)); + loader_init(); + + if (cb != NULL) { + callbacks = cb; + callbacks_arg = arg; + userboot_disk_maxunit = ndisks; + } else { + return (EFAULT); + } - /* + /* * Hook up the console */ cons_probe(); + if (version != USERBOOT_VERSION) { + printf("%s: version expected %d got %d\n", __func__, + USERBOOT_VERSION, version); + return(EOPNOTSUPP); + } + /* * March through the device switch probing for things. */ @@ -128,11 +145,11 @@ loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) extract_currdev(); if (setjmp(jb)) - return; + return (0); interact(); /* doesn't return */ - exit(0); + return(0); } /* diff --git a/sys/boot/userboot/userboot/userboot_cons.c b/sys/boot/userboot/userboot/userboot_cons.c index 5ecb7c8..5a9a573 100644 --- a/sys/boot/userboot/userboot/userboot_cons.c +++ b/sys/boot/userboot/userboot/userboot_cons.c @@ -50,6 +50,18 @@ struct console userboot_console = { userboot_cons_poll, }; + +struct console comconsole = { + "comconsole", + "comsonsole", + 0, + userboot_cons_probe, + userboot_cons_init, + userboot_cons_putchar, + userboot_cons_getchar, + userboot_cons_poll, +}; + static void userboot_cons_probe(struct console *cp) { diff --git a/sys/conf/files b/sys/conf/files index 5554ec0..49de90a 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2607,6 +2607,7 @@ kern/kern_khelp.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr kern/kern_ktrace.c standard +kern/kern_kload.c standard kern/kern_linker.c standard kern/kern_lock.c standard kern/kern_lockf.c standard diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index b6a474e..2447c7bd 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -93,6 +93,9 @@ acpi_wakedata.h optional acpi \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # +amd64/amd64/kload_exec.S standard +amd64/amd64/kload.c standard +# amd64/amd64/amd64_mem.c optional mem #amd64/amd64/apic_vector.S standard amd64/amd64/atomic.c standard diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 68c24e0..f81a05f 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD */ #include "opt_compat.h" @@ -567,4 +567,5 @@ struct sysent sysent[] = { { AS(posix_fallocate_args), (sy_call_t *)sys_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = posix_fallocate */ { AS(posix_fadvise_args), (sy_call_t *)sys_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = posix_fadvise */ { AS(wait6_args), (sy_call_t *)sys_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = wait6 */ + { AS(kload_args), (sy_call_t *)sys_kload, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 533 = kload */ }; diff --git a/sys/kern/kern_kload.c b/sys/kern/kern_kload.c new file mode 100644 index 0000000..636830e --- /dev/null +++ b/sys/kern/kern_kload.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2011 - 2012 + * Russell Cattelan Digital Elves Inc + * Copyright (c) 2011 - 2012 + * Isilon Systems, LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +static struct kload_items *k_items = NULL; +static MALLOC_DEFINE(M_KLOAD, "kload_items", "kload items"); +int kload_ready = 0; + +static vm_offset_t kload_image_va = 0; +/* + * Warning this is somewhat arbitrary, but should go + * away once the allocate delays in kmem_alloc_attr are + * fixed. + */ +#define IMAGE_PREALLOC (24 * 1024 * 1024) + +static void kload_init(void); +SYSINIT(kload_mem, SI_SUB_DRIVERS, SI_ORDER_ANY, kload_init, NULL); + +static int kload_copyin_segment(struct kload_segment *,int); +static int kload_add_page(struct kload_items *, unsigned long); +static void kload_shutdown_final(void *, int); +static struct region_descriptor *mygdt; +static vm_offset_t control_page; +static vm_offset_t code_page; +static void *gdt_desc; +static pt_entry_t *pgtbl; +unsigned long kload_pgtbl; +static unsigned long max_addr = 0 , min_addr = 0; + +#define GIGMASK (~((1<<30)-1)) +#define ONEGIG (1<<30) +#define GUEST_GDTR_LIMIT (3 * 8 - 1) + +extern char kernphys[]; +#define KLOADBASE KERNBASE + +static void +update_max_min(vm_offset_t addr, int count) +{ + int i; + + for(i = 0; i < count; i++) { + if (vtophys(addr + (i * PAGE_SIZE)) < min_addr) + min_addr = vtophys(addr + (i * PAGE_SIZE)); + if (vtophys(addr + (i * PAGE_SIZE)) > max_addr) + max_addr = vtophys(addr + (i * PAGE_SIZE)); + } +} + +static vm_offset_t +kload_kmem_alloc(vm_map_t map, vm_size_t size) +{ + vm_offset_t va; + int num_pages; + + va = kmem_alloc_attr(map, size, + M_WAITOK | M_ZERO, + 0, (1 << 30) /* 1Gig limit */, + VM_MEMATTR_WRITE_COMBINING); + + num_pages = roundup2(size,PAGE_SIZE) >> PAGE_SHIFT; + update_max_min(va, num_pages); + + return (va); + } + +struct kload_cpage { + unsigned long kcp_magic; /* 0 */ + unsigned long kcp_modulep; /* 1 */ + unsigned long kcp_physfree; /* 2 */ + unsigned long kcp_gdt; /* 3 */ + unsigned long kcp_pgtbl; /* 4 */ + unsigned long kcp_cp; /* 5 */ + unsigned long kcp_entry_pt; /* 6 */ + unsigned long kcp_idt; /* 7 */ +} __packed; + +static int +kload_add_page(struct kload_items *items, unsigned long item_m) +{ + vm_paddr_t phys; + unsigned long va; + + if (*items->item != 0) { + printf(" item != 0 0x%lx\n",*items->item); + items->item++; + items->i_count--; + } + + + if ((items->item == items->last_item) || (items->i_count == 0)) { + /* out of space in current page grab a new one */ + va = (unsigned long)kload_kmem_alloc(kernel_map,PAGE_SIZE); + if (items->head_va == 0) + items->head_va = va; + + phys = vtophys(va); + /* store the address of indrect page */ + *items->item = (unsigned long) + (vtophys(va) + KLOADBASE) | KLOAD_INDIRECT; + items->item = (unsigned long *)va; + /* ok now move to new page to start storing address */ + items->last_item = (unsigned long *)va + + ((PAGE_SIZE/sizeof(unsigned long)) - 1); + items->i_count = ((PAGE_SIZE/sizeof(unsigned long)) - 1); + } + *items->item = item_m; + items->item++; + items->i_count--; + + return (0); +} + +static void +kload_init(void) +{ + int size = IMAGE_PREALLOC; + kload_image_va = kload_kmem_alloc(kernel_map, size); + printf("%s 0x%lx preallocated size %d\n", __func__, + kload_image_va, size); +} + +int +kload_copyin_segment(struct kload_segment *khdr, int seg) +{ + int i; + int num_pages; + int error = 0; + vm_offset_t va = kload_image_va; + + num_pages = roundup2(khdr->k_memsz,PAGE_SIZE) >> PAGE_SHIFT; + + /* check to make sure the preallocate space is beg enough */ + if (va && ((num_pages * PAGE_SIZE) > IMAGE_PREALLOC)) { + printf("%s size over 24Meg %d\n", __func__, + num_pages * PAGE_SIZE); + kmem_free(kernel_map, va, IMAGE_PREALLOC); + va = 0; + } + + if (va == 0) { + va = kload_kmem_alloc(kernel_map, num_pages * PAGE_SIZE); + if (va == 0) + return (ENOMEM); + } + + /* need to set up a START dst page */ + for (i = 0; i < num_pages; i++) { + kload_add_page(k_items, + (vtophys(va + (i * PAGE_SIZE)) + KLOADBASE) | KLOAD_SOURCE); + } + printf("%s starting copyin... ", __func__); + *k_items->item = KLOAD_DONE; + if ((error = copyin(khdr->k_buf, (void *)va, khdr->k_memsz)) != 0) + return (error); + printf("copied %d bytes to va %p done marker at %p\n", + (int)khdr->k_memsz, (void *)va, &k_items->item ); + + return (error); +} + +int +sys_kload(struct thread *td, struct kload_args *uap) +{ + struct region_descriptor *null_idt; + struct kload_cpage *k_cpage; + struct kload kld; + int error = 0; + int i; + size_t bufsize = uap->buflen; + + error = priv_check(td, PRIV_REBOOT); + if (error) + return (error); + + /* + * hook into the shutdown/reboot path so + * we end up here before cpu reset + */ + EVENTHANDLER_REGISTER(shutdown_final, kload_shutdown_final, + NULL, SHUTDOWN_PRI_KLOAD); + + max_addr = 0; + min_addr = ~0UL; + + if (bufsize != sizeof(struct kload)) { + printf("Hmm size not right %jd %jd\n", (uintmax_t)bufsize, + (uintmax_t)sizeof(struct kload)); + return (error); + } + if ((error = copyin(uap->buf, &kld, bufsize)) != 0) + return (error); + + if (k_items == NULL) { + if((k_items = malloc(sizeof(struct kload_items), + M_KLOAD, M_WAITOK|M_ZERO)) == NULL) + return (ENOMEM); + + k_items->head = 0; + k_items->head_va = 0; + k_items->item = &k_items->head; + k_items->last_item = &k_items->head; + } + + control_page = kload_kmem_alloc(kernel_map, PAGE_SIZE * 2); + k_cpage = (struct kload_cpage *)control_page; + code_page = control_page + PAGE_SIZE; + + printf("copy from %p kernel_kump to 0x%lx size %d\n", + relocate_kernel, (unsigned long)code_page, relocate_kernel_size); + memset((void *)control_page, 0, PAGE_SIZE * 2); + memcpy((void *)code_page, relocate_kernel, relocate_kernel_size); + + k_cpage->kcp_magic = 0xC0DE; + k_cpage->kcp_modulep = kld.k_modulep; + k_cpage->kcp_physfree = kld.k_physfree; + + mygdt = (struct region_descriptor *)kload_kmem_alloc(kernel_map, + PAGE_SIZE); + k_cpage->kcp_gdt = (unsigned long)vtophys(mygdt) + KLOADBASE; + + gdt_desc = (char *)mygdt + sizeof(struct region_descriptor); + setup_freebsd_gdt(gdt_desc); + mygdt->rd_limit = GUEST_GDTR_LIMIT; + mygdt->rd_base = (unsigned long)(vtophys(gdt_desc) + KLOADBASE); + + /* + * we pass the virt addr of control_page but we need + * new virt addr as well + */ + k_cpage->kcp_cp = (unsigned long)(vtophys(control_page) + KLOADBASE); + k_cpage->kcp_entry_pt = kld.k_entry_pt; + + /* 10 segments should be more than enough */ + for (i = 0 ; (i < kld.num_hdrs && i <= 10); i++) + kload_copyin_segment(&kld.khdr[i],i); + + null_idt = (struct region_descriptor*) + kload_kmem_alloc(kernel_map,PAGE_SIZE); + k_cpage->kcp_idt = (unsigned long)vtophys(null_idt) + KLOADBASE; + /* Wipe the IDT. */ + null_idt->rd_limit = 0; + null_idt->rd_base = 0; + /* + * This must be built after all other allocations so it can + * build a page table entry based on min max addresses + */ + /* returns new page table phys addr */ + pgtbl = kload_build_page_table(); + if (pgtbl == NULL) + return (ENOMEM); + kload_pgtbl = (unsigned long)pgtbl; + k_cpage->kcp_pgtbl = (unsigned long)pgtbl; + + kload_ready = 1; + + if (bootverbose) + printf("%s:\n\t" + "head_va 0x%lx (phys 0x%lx)\n\t" + "kernbase 0x%lx\n\t" + "code_page 0x%lx (phys 0x%lx)\n\t" + "control_page 0x%lx (phys 0x%lx)\n\t" + "gdt 0x%lx (phys 0x%lx)\n\t" + "idt 0x%lx (phys 0x%lx)\n\t" + "k_entry_pt 0x%lx\n\t" + "pgtbl (phys 0x%lx)\n\t" + "max_addr (phys 0x%lx)\n\t" + "min_addr (phys 0x%lx)\n\t" + "modulep (phys 0x%lx)\n\t" + "physfree (phys 0x%lx)\n", + __func__, + (unsigned long)k_items->head_va, + (unsigned long)vtophys(k_items->head_va), + (unsigned long)(KERNBASE + (vm_paddr_t)kernphys), + (unsigned long)(control_page + PAGE_SIZE), + (unsigned long)vtophys(control_page + PAGE_SIZE), + (unsigned long)control_page, + (unsigned long)vtophys(control_page), + (unsigned long)mygdt,(unsigned long)vtophys(mygdt), + (unsigned long)null_idt,(unsigned long)vtophys(null_idt), + (unsigned long)kld.k_entry_pt, + (unsigned long)pgtbl, + (unsigned long)max_addr, + (unsigned long)min_addr, + (unsigned long)kld.k_modulep, + (unsigned long)kld.k_physfree); + + if(!(uap->flags & (KLOAD_EXEC | KLOAD_REBOOT))) + goto just_load; +#if defined(SMP) + /* + * Bind us to CPU 0 so that all shutdown code runs there. Some + * systems don't shutdown properly (i.e., ACPI power off) if we + * run on another processor. + */ + printf("Binding process to cpu 0\n"); + thread_lock(curthread); + sched_bind(curthread, 0); + thread_unlock(curthread); + KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__)); +#endif + if(uap->flags & KLOAD_REBOOT) { + mtx_lock(&Giant); + kern_reboot(RB_KLOAD); + /* should not return */ + mtx_unlock(&Giant); + } + /* + * the reboot code will do a module shutdown so it is not + * part kload_shutdown_final but it needs to happen. + * So in the case of exec run it here + */ + if (bootverbose) + printf("%s: module_shutdown\n", __func__); + kload_module_shutdown(); + kload_shutdown_final(NULL, RB_KLOAD); +just_load: + printf("%s: Kernel image loaded waiting for reboot\n", __func__); + return (0); +} + +static void +kload_shutdown_final(void *arg, int howto) +{ + int ret; + cpuset_t map; + + /* Just to make sure we are on cpu 0 */ + KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__)); + if (kload_ready) { + printf("%s: suspend APs\n",__FUNCTION__); + map = all_cpus; + /* we should be bound to cpu 0 at this point */ + printf("%s cpuid %d\n",__FUNCTION__,PCPU_GET(cpuid)); + CPU_CLR(PCPU_GET(cpuid), &map); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { + printf("cpu_reset: Stopping other CPUs\n"); + suspend_cpus(map); + } + + if (bootverbose) + printf("%s: clear all handlers\n", __func__); + intr_clear_all_handlers(); + + if (bootverbose) + printf("%s: loapic_clear_lapic\n", __func__); + lapic_clear_lapic(1); + + intr_suspend(); + + if (bootverbose) + printf("%s disable_interrupts cpuid %d\n", + __func__, PCPU_GET(cpuid)); + disable_intr(); + + printf("calling relocate_kernel\n"); + ret = relocate_kernel(vtophys(k_items->head_va) + KLOADBASE, + /* dest addr i.e. overwrite existing kernel */ + KERNBASE + (vm_paddr_t)kernphys, + vtophys(code_page) + KLOADBASE, + control_page); + /* currently this will never happen */ + printf("\trelocate_new_kernel returned %d\n",ret); + } else { + printf("kload_shutdown_final called without " + "a new kernel loaded\n"); + } +} diff --git a/sys/kern/kern_module.c b/sys/kern/kern_module.c index b769320..2a880c5 100644 --- a/sys/kern/kern_module.c +++ b/sys/kern/kern_module.c @@ -64,6 +64,7 @@ static TAILQ_HEAD(modulelist, module) modules; struct sx modules_sx; static int nextid = 1; static void module_shutdown(void *, int); +void kload_module_shutdown(void); static int modevent_nop(module_t mod, int what, void *arg) @@ -107,6 +108,12 @@ module_shutdown(void *arg1, int arg2) } void +kload_module_shutdown(void) { + module_shutdown(NULL, 0); +} + + +void module_register_init(const void *arg) { const moduledata_t *data = (const moduledata_t *)arg; diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 96f2400..6edd7fa 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD */ const char *syscallnames[] = { @@ -540,4 +540,5 @@ const char *syscallnames[] = { "posix_fallocate", /* 530 = posix_fallocate */ "posix_fadvise", /* 531 = posix_fadvise */ "wait6", /* 532 = wait6 */ + "kload", /* 533 = kload */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 148dea3..eb2b648 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -955,5 +955,7 @@ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } +533 AUE_NULL STD { int kload(const void *buf, size_t buflen, \ + int flags); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index c755f92..c48057a 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3286,6 +3286,15 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 6; break; } + /* kload */ + case 533: { + struct kload_args *p = params; + uarg[0] = (intptr_t) p->buf; /* const void * */ + uarg[1] = p->buflen; /* size_t */ + iarg[2] = p->flags; /* int */ + *n_args = 3; + break; + } default: *n_args = 0; break; @@ -8745,6 +8754,22 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* kload */ + case 533: + switch(ndx) { + case 0: + p = "const void *"; + break; + case 1: + p = "size_t"; + break; + case 2: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10638,6 +10663,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* kload */ + case 533: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index 6d37bf4..31aa4e4 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -173,6 +173,7 @@ typedef void (*shutdown_fn)(void *, int); #define SHUTDOWN_PRI_FIRST EVENTHANDLER_PRI_FIRST #define SHUTDOWN_PRI_DEFAULT EVENTHANDLER_PRI_ANY #define SHUTDOWN_PRI_LAST EVENTHANDLER_PRI_LAST +#define SHUTDOWN_PRI_KLOAD EVENTHANDLER_PRI_LAST - 100 EVENTHANDLER_DECLARE(shutdown_pre_sync, shutdown_fn); /* before fs sync */ EVENTHANDLER_DECLARE(shutdown_post_sync, shutdown_fn); /* after fs sync */ diff --git a/sys/sys/kload.h b/sys/sys/kload.h new file mode 100644 index 0000000..0920176 --- /dev/null +++ b/sys/sys/kload.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011 - 2012 + * Russell Cattelan Digital Elves Inc + * Copyright (c) + * Isilon Systems, LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef __KLOAD_H__ +#define __KLOAD_H__ + +#include +#include + +#include +#include + +#define KLOAD_LOAD 0 +#define KLOAD_REBOOT (1 << 0 ) +#define KLOAD_EXEC (1 << 1 ) + +struct kload_segment { + void *k_buf; + size_t k_memsz; + unsigned long *k_pages; + unsigned long k_seg_start; +}; + +struct kload { + struct kload_segment khdr[10]; + int num_hdrs; + unsigned long k_entry_pt; + unsigned int k_modulep; + unsigned int k_physfree; +}; + +//typedef u_long kload_item_t; +#define KLOAD_DESTINATION 0x1 +#define KLOAD_INDIRECT 0x2 +#define KLOAD_DONE 0x4 +#define KLOAD_SOURCE 0x8 + +struct kload_items { + unsigned long head; + vm_offset_t head_va; + unsigned long *last_item; + unsigned long *item; + int i_count; + unsigned long flags; /* not used yet */ +}; + +/* + * defined in /kload.c + */ +pt_entry_t * kload_build_page_table(void); +void setup_freebsd_gdt(uint64_t *); +void kload_module_shutdown(void); + +/* + * defined in /kload_exec.S + */ +unsigned long relocate_kernel(unsigned long indirection_page, + unsigned long page_list, unsigned long code_page, + unsigned long control_page); +extern int relocate_kernel_size; + +#endif diff --git a/sys/sys/reboot.h b/sys/sys/reboot.h index 6b8e25e..9b70160 100644 --- a/sys/sys/reboot.h +++ b/sys/sys/reboot.h @@ -59,6 +59,7 @@ #define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */ #define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */ #define RB_PAUSE 0x100000 /* pause after each output line during probe */ +#define RB_KLOAD 0x200000 /* reboot using kload'ed kernel image */ #define RB_MULTIPLE 0x20000000 /* use multiple consoles */ #define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 6b0cd67..f90cad2 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD */ #define SYS_syscall 0 @@ -452,4 +452,5 @@ #define SYS_posix_fallocate 530 #define SYS_posix_fadvise 531 #define SYS_wait6 532 -#define SYS_MAXSYSCALL 533 +#define SYS_kload 533 +#define SYS_MAXSYSCALL 534 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 25f0470..39fd05b 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -1,7 +1,7 @@ # FreeBSD system call names. # DO NOT EDIT-- this file is automatically generated. # $FreeBSD$ -# created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib +# created from FreeBSD MIASM = \ syscall.o \ exit.o \ @@ -400,4 +400,5 @@ MIASM = \ rctl_remove_rule.o \ posix_fallocate.o \ posix_fadvise.o \ - wait6.o + wait6.o \ + kload.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index ef59ad5..81dac15 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD */ #ifndef _SYS_SYSPROTO_H_ @@ -1762,6 +1762,11 @@ struct wait6_args { char wrusage_l_[PADL_(struct __wrusage *)]; struct __wrusage * wrusage; char wrusage_r_[PADR_(struct __wrusage *)]; char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)]; }; +struct kload_args { + char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)]; + char buflen_l_[PADL_(size_t)]; size_t buflen; char buflen_r_[PADR_(size_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2144,6 +2149,7 @@ int sys_rctl_remove_rule(struct thread *, struct rctl_remove_rule_args *); int sys_posix_fallocate(struct thread *, struct posix_fallocate_args *); int sys_posix_fadvise(struct thread *, struct posix_fadvise_args *); int sys_wait6(struct thread *, struct wait6_args *); +int sys_kload(struct thread *, struct kload_args *); #ifdef COMPAT_43 @@ -2840,6 +2846,7 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *); #define SYS_AUE_posix_fallocate AUE_NULL #define SYS_AUE_posix_fadvise AUE_NULL #define SYS_AUE_wait6 AUE_WAIT6 +#define SYS_AUE_kload AUE_NULL #undef PAD_ #undef PADL_ diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index 31cc80b..eee7678 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -197,6 +197,37 @@ intr_add_handler(const char *name, int vector, driver_filter_t filter, } int +intr_clear_all_handlers(void) +{ + int i; + struct intsrc *isrc; + + mtx_lock(&intr_table_lock); + for (i = 0; i < NUM_IO_INTS; i++) { + isrc = interrupt_sources[i]; + if (isrc != NULL && isrc->is_handlers > 0) { + printf("%s:%d isrc[%d] %p is_handlers %d\n", + __FUNCTION__,__LINE__,i,isrc, + isrc->is_handlers); + isrc->is_handlers--; + if (isrc->is_handlers == 0) { + printf("\t dis_source %p dis_intr %p\n", + isrc->is_pic->pic_disable_source, + isrc->is_pic->pic_disable_intr); + isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); + isrc->is_pic->pic_disable_intr(isrc); + } + intrcnt_updatename(isrc); + + } + } + mtx_unlock(&intr_table_lock); + return 0; +} + + + +int intr_remove_handler(void *cookie) { struct intsrc *isrc; diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index e994172..6593e8b 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -346,6 +346,60 @@ lapic_dump(const char* str) } void +lapic_clear_lapic(u_int disable) { + + struct lapic *la; + la = &lapics[lapic_id()]; + + uint32_t value; + + if (bootverbose) + printf("%s lapic_id(%d) cpu(%d) la %p lapic %p\n",__FUNCTION__, + lapic_id(), PCPU_GET(cpuid), la, lapic); + + /* + * Fist we set the mask bit to keep and new interrupts from + * arriving but allowing any pending interrupts to finish + * *THEN* set the registers to default values + * If the interrupts are not allowed to clear a kload'ed / booted + * kernel will see the old interrupts before the appropriate handlers + * are in place and trigger a panic. + */ +#ifdef notyet + /* this seems to be causing APIC error in the new kernel */ + value = lapic->lvt_error; + value |= APIC_LVT_M; + lapic->lvt_error = value; +#endif + + value = lapic->lvt_timer; + value |= APIC_LVT_M; + lapic->lvt_timer = value; + + value = lapic->lvt_lint0; + value |= APIC_LVT_M; + lapic->lvt_lint0 = value; + + value = lapic->lvt_lint1; + value |= APIC_LVT_M; + lapic->lvt_lint1 = value; + + value = lapic->lvt_pcint; + value |= APIC_LVT_M; + lapic->lvt_pcint = value; + + /* Program timer LVT and setup handler. */ + lapic->lvt_timer = APIC_LVTT_M; /* masked */ + lapic->lvt_lint0 = APIC_LVT_M; /* masked */ + lapic->lvt_lint1 = APIC_LVT_M; /* masked */ + + if (disable) { + printf("\tlapic disable\n"); + lapic_disable(); + } +} + +void lapic_setup(int boot) { struct lapic *la; @@ -924,7 +978,20 @@ lapic_handle_error(void) lapic->esr = 0; esr = lapic->esr; - printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); + printf("CPU%d: local APIC error 0x%x\t", PCPU_GET(cpuid), esr); + if (lapic->esr & APIC_ESR_SEND_CS_ERROR) + printf("send_cs_error\n"); + if (lapic->esr & APIC_ESR_RECEIVE_CS_ERROR) + printf("receive_cs_error\n"); + if (lapic->esr & APIC_ESR_SEND_ACCEPT) + printf("send_accept\n"); + if (lapic->esr & APIC_ESR_RECEIVE_ACCEPT) + printf("receive_accept\n"); + if (lapic->esr & APIC_ESR_SEND_ILLEGAL_VECTOR) + printf("send_illegal_vector\n"); + if (lapic->esr & APIC_ESR_ILLEGAL_REGISTER) + printf("illegal_register\n"); + lapic_eoi(); } diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c index 9ead8c8..0b28465 100644 --- a/sys/x86/x86/nexus.c +++ b/sys/x86/x86/nexus.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -675,6 +676,52 @@ ram_probe(device_t dev) } static int +smap_hdlr(SYSCTL_HANDLER_ARGS) { + + /* SYSCTL_HANDLER_ARGS + struct sysctl_oid *oidp, void *arg1, + intptr_t arg2, struct sysctl_req *req + */ + + struct bios_smap *smapbase; + caddr_t kmdp; + uint32_t smapsize = 0; + + /* Retrieve the system memory map from the loader. */ + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type(ELF_KERN_STR); + if (kmdp != NULL) { + smapbase = (struct bios_smap *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_SMAP); + } else { + smapbase = NULL; + goto out; + } + + + printf("%s smapbase %p\n",__FUNCTION__,smapbase); + smapsize = *((u_int32_t *)smapbase - 1); + +#if 0 + { + struct bios_smap *smap, *smapend; + smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); + for (smap = smapbase; smap < smapend; smap++) { + printf("\ttype %d base 0x%lx length 0x%lx\n", + smap->type,smap->base, smap->length); + } + } +#endif + +out: + return (sysctl_handle_opaque(oidp, smapbase, smapsize, req)); +} +SYSCTL_PROC(_hw, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, + 0, sizeof(struct bios_smap), smap_hdlr, "S,smap", + "Bios System Map"); + +static int ram_attach(device_t dev) { struct bios_smap *smapbase, *smap, *smapend; diff --git a/usr.sbin/kload/Makefile b/usr.sbin/kload/Makefile new file mode 100644 index 0000000..0d4a27a --- /dev/null +++ b/usr.sbin/kload/Makefile @@ -0,0 +1,15 @@ +# $FreeBSD$ + +PROG= kload +SRCS= kload.c +NO_MAN= + +#DPADD+= ${LIBVMMAPI} +#LDADD+= -lvmmapi + +WARNS?= 3 + +CFLAGS+=-I${.CURDIR}/../../sys/boot/userboot +CFLAGS+=-I${.CURDIR}/../../sys + +.include diff --git a/usr.sbin/kload/kload.c b/usr.sbin/kload/kload.c new file mode 100644 index 0000000..51ac3b7 --- /dev/null +++ b/usr.sbin/kload/kload.c @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2011 - 2012 + * Russell Cattelan Digital Elves Inc + * Copyright (c) 2011 - 2012 + * Isilon Systems, LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * process kill code borrowed from halt.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char *host_base = "/"; +/* how can we get rid of these? I don't think we need them */ + +struct termios term, oldterm; +char *image; +size_t image_size; +size_t image_max_used = 0; +int disk_fd = -1; +uint64_t regs[16]; +uint64_t pc; +static int k_execute = 0; +static int k_reboot = 0; +static void *dl_lib; +typedef void *(*M_func)(size_t bytes, const char *file, int line); +M_func Malloc_func; +static void k_exit(void *, int); +static int shutdown_processes(void); +static u_int get_pageins(void); +static int kload_load_image(void *image,unsigned long entry_pt); + +struct load_file { + int l_isdir; + size_t l_size; + struct stat l_stat; + union { + int fd; + DIR *dir; + } l_u; +}; + +struct smap { + uint64_t base; + uint64_t length; + uint32_t type; +} __packed; + +static int +name2oid(char *name, int *oidp) +{ + int oid[2]; + int i; + size_t j; + + oid[0] = 0; + oid[1] = 3; + + j = CTL_MAXNAME * sizeof(int); + i = sysctl(oid, 2, oidp, &j, name, strlen(name)); + if (i < 0) + return (i); + j /= sizeof(int); + + return (j); +} + +static void +k_putc(void *arg, int chr) +{ + write(1, &chr, 1); +} + +static int +k_getc(void *arg) +{ + char chr; + if(read(0, &chr, 1) == 1) + return (chr); + return (-1); +} + +static int +k_poll(void *arg) +{ + int n; + if (ioctl(0, FIONREAD, &n) >= 0) + return (n > 0); + return 0; +} + +static int +k_open(void *arg, const char *filename, void **lf_ret) +{ + struct stat st; + struct load_file *lf; + int error = -1; + char path[PATH_MAX]; + + if (!host_base) { + printf("Host base not set\n"); + return (ENOENT); + } + + strlcpy(path, host_base, PATH_MAX); + if (path[strlen(path) - 1] == '/') + path[strlen(path) - 1] = 0; + strlcat(path, filename, PATH_MAX); + lf = malloc(sizeof(struct load_file)); + if (stat(path, &lf->l_stat) < 0) { + error = errno; + goto out; + } + + lf->l_size = st.st_size; + if (S_ISDIR(lf->l_stat.st_mode)) { + lf->l_isdir = 1; + lf->l_u.dir = opendir(path); + if (!lf->l_u.dir) { + error = EINVAL; + goto out; + } + *lf_ret = lf; + return (0); + } + if (S_ISREG(lf->l_stat.st_mode)) { + lf->l_isdir = 0; + lf->l_u.fd = open(path, O_RDONLY); + if (lf->l_u.fd < 0) { + error = EINVAL; + goto out; + } + *lf_ret = lf; + return (0); + } + +out: + free(lf); + return (error); +} + +static int +k_close(void *arg, void *h) +{ + struct load_file *lf = (struct load_file *)h; + + if (lf->l_isdir) + closedir(lf->l_u.dir); + else + close(lf->l_u.fd); + free(lf); + + return (0); +} + +static int +k_isdir(void *arg, void *h) +{ + return (((struct load_file *)h)->l_isdir); +} + +static int +k_read(void *arg, void *h, void *dst, size_t size, size_t *resid_return) +{ + struct load_file *lf = (struct load_file *)h; + ssize_t sz; + + if (lf->l_isdir) + return (EINVAL); + + if((sz = read(lf->l_u.fd, dst, size)) < 0) + return (EINVAL); + *resid_return = size - sz; + return (0); +} + +static int +k_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, + size_t *namelen_return, char *name) +{ + struct load_file *lf = (struct load_file *)h; + struct dirent *dp; + + if (!lf->l_isdir) + return (EINVAL); + + dp = readdir(lf->l_u.dir); + if (!dp) + return (ENOENT); + + /* + * Note: d_namlen is in the range 0..255 and therefore less + * than PATH_MAX so we don't need to test before copying. + */ + *fileno_return = dp->d_fileno; + *type_return = dp->d_type; + *namelen_return = dp->d_namlen; + memcpy(name, dp->d_name, dp->d_namlen); + name[dp->d_namlen] = 0; + + return (0); +} + +static int +k_seek(void *arg, void *h, uint64_t offset, int whence) +{ + struct load_file *lf = (struct load_file *)h; + + if (lf->l_isdir) + return (EINVAL); + + if (lseek(lf->l_u.fd, offset, whence) < 0) + return (errno); + + return (0); +} + +static int +k_stat(void *arg, void *h, + int *mode_return, int *uid_return, + int *gid_return, uint64_t *size_return) +{ + + struct load_file *lf = (struct load_file *)h; + + *mode_return = lf->l_stat.st_mode; + *uid_return = lf->l_stat.st_uid; + *gid_return = lf->l_stat.st_gid; + *size_return = lf->l_stat.st_size; + return (0); +} + +static int +k_diskread(void *arg, int unit, uint64_t offset, void *dst, size_t size, + size_t *resid_return) +{ + ssize_t n; + + if (unit != 0 || disk_fd == -1) + return (EIO); + n = pread(disk_fd, dst, size, offset); + if (n < 0) + return (errno); + *resid_return = size - n; + return (0); +} + +static int +k_diskioctl(void *arg, int unit, u_long cmd, void *data) +{ + /* not supported on by kload */ + return (ENOTTY); +} + +/* + * This is really confusing since this is not really like doing copyin / copyout + * in kernel land this will copy the data pointed to by the "from" ptr and copy + * "to" the offset into the load image + */ +static int +k_copy_to_image(void *arg, const void *from, uint64_t to, size_t size) +{ + to &= 0x7fffffff; + if (to > image_size) + return (EFAULT); + if (to + size > image_size) { + size = image_size - to; + printf("WARNING this should never happen\n"); + } + memcpy(&image[to], from, size); + + if (to + size > image_max_used) + image_max_used = to + size; + + return (0); +} + +/* + * copyout is copying FROM the image at "from" offset to memory pointed to by to + * ptr + */ +static int +k_copy_from_image(void *arg, uint64_t from, void *to, size_t size) +{ + from &= 0x7fffffff; + if (from > image_size) + return (EFAULT); + if (from + size > image_size) + size = image_size - from; + memcpy(to, &image[from], size); + + return (0); +} + +static void +k_setreg(void *arg, int r, uint64_t v) +{ + if (r < 0 || r >= 16) + return; + regs[r] = v; +} + +static void +k_setmsr(void *arg, int r, uint64_t v) +{ + /* Unneeded */ +} + +static void +k_setcr(void *arg, int r, uint64_t v) +{ + /* Unneeded */ +} + +static void +k_setgdt(void *arg, uint64_t v, size_t sz) +{ + /* Unneeded */ +} + +static void +k_exec(void *arg, uint64_t entry_pt) +{ +#ifdef DEBUG + printf("Execute at 0x%jx\n", entry_pt); + printf("image size max used %jd endof page %jd\n", image_max_used, + roundup2(image_max_used, PAGE_SIZE)); +#endif + kload_load_image(image, entry_pt); + k_exit(arg, 0); +} + +static void +k_delay(void *arg, int usec) +{ + usleep(usec); +} + +static void +k_exit(void *arg, int v) +{ + tcsetattr(0, TCSAFLUSH, &oldterm); + exit(v); +} + +static void +k_getmem(void *arg, uint64_t *lowmem, uint64_t *highmem) +{ + int mib[2]; + unsigned long long physmem; + size_t len; + + mib[0] = CTL_HW; + mib[1] = HW_PHYSMEM; + len = sizeof(physmem); + sysctl(mib, 2, &physmem, &len, NULL, 0); + + *lowmem = physmem; + *highmem = 0; + + printf("%s:%d lowmem %ju highmem %ju\n",__FUNCTION__,__LINE__, + *lowmem, + *highmem + ); +} + +static const char * +k_getenv(void *arg, int idx) +{ + static const char *vars[] = { + "foo=bar", + "bar=barbar", + NULL + }; + + return (vars[idx]); +} + +static int +k_buildsmap(void *arg, void **smap_void, size_t *outlen) +{ + struct smap *smapbase; + size_t i,j; + size_t len; + char name[] = "hw.smap"; + int mib[CTL_MAXNAME]; + + len = name2oid(name, mib); + + /* get the current smap from the running system */ + i = sysctl(mib, 2, 0, &j, 0, 0); + len = j; + + /* + * Use the malloc function from libstand/userboot.so since + * bios_addsmapdata will free the memory using the libstand Free + * so be careful to use not use standard malloc here + */ + smapbase = Malloc_func(j, __FILE__, __LINE__); + if (!smapbase) { + printf("kload failed to allocate space for smap\n"); + return 1; + } + + i = sysctl(mib, 2, smapbase, &j, NULL, 0); + + *outlen = len; + *smap_void = smapbase; + +#ifdef DEBUG + { + struct smap *smap, *smapend; + smapend = (struct smap *)((uintptr_t)smapbase + len); + for (smap = smapbase; smap < smapend; smap++) { + printf("\ttype %d base 0x%016lx length 0x%016lx\n", + smap->type, smap->base, smap->length); + } + } +#endif + + return 0; +} + +struct loader_callbacks cb = { + + .open = k_open, + .close = k_close, + .isdir = k_isdir, + .read = k_read, + .readdir = k_readdir, + .seek = k_seek, + .stat = k_stat, + + .diskread = k_diskread, + .diskioctl = k_diskioctl, + + .copyin = k_copy_to_image, + .copyout = k_copy_from_image, + .setreg = k_setreg, + .setmsr = k_setmsr, + .setcr = k_setcr, + .setgdt = k_setgdt, + .exec = k_exec, + + .delay = k_delay, + .exit = k_exit, + .getmem = k_getmem, + + .putc = k_putc, + .getc = k_getc, + .poll = k_poll, + .getenv = k_getenv, + .buildsmap = k_buildsmap, +}; + +static void +usage(void) +{ + printf("usage: kload [-d ] " + "[-h ] [-e | -r]\n"); + exit(1); +} + +int +main(int argc, char** argv) +{ + int (*loader_main)(struct loader_callbacks *, void *, int, int); + void (*loader_init)(void); + int (*setenv)(const char *, const char *, int); + int opt; + char *disk_image = NULL; + char karg[20]; + char kval[128]; + + if (geteuid()) { + errno = EPERM; + err(1, NULL); + } + + dl_lib = dlopen("/boot/userboot.so", RTLD_LOCAL); + if (!dl_lib) { + printf("%s\n", dlerror()); + return (1); + } + loader_main = dlsym(dl_lib, "loader_main"); + if (!loader_main) { + printf("%s\n", dlerror()); + return (1); + } + Malloc_func = dlsym(dl_lib, "Malloc"); + if (!Malloc_func) { + printf("%s\n", dlerror()); + return (1); + } + /* + * pull in the libstand setenv for setting name value pairs + * in the kernel env page + */ + setenv = dlsym(dl_lib, "setenv"); + if (!setenv) { + printf("%s\n", dlerror()); + return (1); + } + loader_init = dlsym(dl_lib, "loader_init"); + if (!loader_init) { + printf("%s\n", dlerror()); + return (1); + } + /* call libstand setheap to init memory allocations */ + loader_init(); + + while ((opt = getopt(argc, argv, "d:h:erk:")) != -1) { + switch (opt) { + case 'd': + disk_image = optarg; + break; + + case 'h': + host_base = optarg; + break; + case 'e': + k_execute = 1; + break; + case 'r': + k_reboot = 1; + break; + case 'k': + memset(karg,0,sizeof(karg)); + memset(kval,0,sizeof(kval)); + if(sscanf(optarg,"%[a-zA-Z_-]=%s",karg,kval) == 2) { + printf("got value %s %s\n",karg,kval); + setenv(karg, kval, 1); + } else { + fprintf(stderr,"-k failure %s\n",optarg); + } + break; + + case '?': + usage(); + } + } + + image_size = 128*1024*1024; + image = malloc(image_size); + if (disk_image) { + disk_fd = open(disk_image, O_RDONLY); + if (disk_fd < 0) + err(1, "Can't open disk image '%s'", disk_image); + } + + tcgetattr(0, &term); + oldterm = term; + term.c_iflag &= ~(ICRNL); + term.c_lflag &= ~(ICANON|ECHO); + tcsetattr(0, TCSAFLUSH, &term); + + return(loader_main(&cb, NULL, USERBOOT_VERSION_4, disk_fd >= 0)); +} + +static int +kload_load_image(void *image, unsigned long entry_pt) +{ + char *stack = (char *)image + 0x1000; /* PAGESIZE */ + struct kload kld; + int flags = KLOAD_LOAD; + /* + * This must the same value sys/conf/ldscript.xxx + * This value was changed at one point when a new version + * of binutils was imported. The value is aligned to + * max page size supported by given processor + */ + unsigned long kernphys = 0x200000; + + kld.khdr[0].k_buf = &((char *)image)[kernphys]; + kld.khdr[0].k_memsz = roundup2(image_max_used,PAGE_SIZE) - kernphys; + kld.k_entry_pt = entry_pt; + kld.num_hdrs = 1; + + /* + * pull paramaters from the stack page + * a better interface should be developed for kload + * in the future + */ + kld.k_modulep = ((unsigned int *)stack)[1]; + kld.k_physfree = ((unsigned int *)stack)[2]; + + /* + * Make sure there is 4 pages of kenv pages between the end of the + * kernel and start of free memory. + * Why you ask? Well that is a question without a good answer as of yet + * for some strange reason some ata chips will not respond correctly + * unless free memory starts at greater than 2 pages out. + * The obvoius assumption is that something is getting stommped on but + * that has yet to be determined. Adding this workaround. + */ + kld.k_physfree = MAX(kld.k_modulep + (4 * PAGE_SIZE), kld.k_physfree); + + printf("WARNING kernphys set to 0x%lx make sure this matches kernphys " + "from sys/config/ldscript\n", kernphys); + + if (k_execute) { + flags &= ~KLOAD_REBOOT; + flags |= KLOAD_EXEC; + } + if (k_reboot) { + flags &= ~KLOAD_EXEC; + flags |= KLOAD_REBOOT; + shutdown_processes(); + } + + return (syscall(SYS_kload, &kld, sizeof(struct kload), flags)); +} + +static int +shutdown_processes(void) +{ + int i; + u_int pageins; + int sverrno; + /* + * Do a sync early on, so disks start transfers while we're off + * killing processes. Don't worry about writes done before the + * processes die, the reboot system call syncs the disks. + */ + sync(); + + /* + * Ignore signals that we can get as a result of killing + * parents, group leaders, etc. + */ + (void)signal(SIGHUP, SIG_IGN); + (void)signal(SIGINT, SIG_IGN); + (void)signal(SIGQUIT, SIG_IGN); + (void)signal(SIGTERM, SIG_IGN); + (void)signal(SIGTSTP, SIG_IGN); + + /* + * If we're running in a pipeline, we don't want to die + * after killing whatever we're writing to. + */ + (void)signal(SIGPIPE, SIG_IGN); + + /* Just stop init -- if we fail, we'll restart it. */ + if (kill(1, SIGTSTP) == -1) + err(1, "SIGTSTP init"); + + /* Send a SIGTERM first, a chance to save the buffers. */ + if (kill(-1, SIGTERM) == -1 && errno != ESRCH) + err(1, "SIGTERM processes"); + + /* + * After the processes receive the signal, start the rest of the + * buffers on their way. Wait 5 seconds between the SIGTERM and + * the SIGKILL to give everybody a chance. If there is a lot of + * paging activity then wait longer, up to a maximum of approx + * 60 seconds. + */ + sleep(2); + for (i = 0; i < 20; i++) { + pageins = get_pageins(); + sync(); + sleep(3); + if (get_pageins() == pageins) + break; + } + + for (i = 1;; ++i) { + if (kill(-1, SIGKILL) == -1) { + if (errno == ESRCH) + break; + goto restart; + } + if (i > 5) { + (void)fprintf(stderr, + "WARNING: some process(es) wouldn't die\n"); + break; + } + (void)sleep(2 * i); + } + return 1; +restart: + sverrno = errno; + errx(1, "%s%s", kill(1, SIGHUP) == -1 ? + "(can't restart init): " : "", strerror(sverrno)); + /* NOTREACHED */ + return 0; +} + +static u_int +get_pageins(void) +{ + u_int pageins; + size_t len; + + len = sizeof(pageins); + if (sysctlbyname("vm.stats.vm.v_swappgsin", &pageins, &len, NULL, 0) + != 0) { + warnx("v_swappgsin"); + return (0); + } + return (pageins); +}