Re: Fatal trap 12: page fault on Acer Chromebook 720 (peppy)

From: Michael Gmelin <freebsd_at_grem.de>
Date: Fri, 24 Aug 2018 22:32:06 +0200
> On 24. Aug 2018, at 21:59, Konstantin Belousov <kostikbel_at_gmail.com> wrote:
> 
>> On Thu, Aug 23, 2018 at 12:10:34AM +0200, Michael Gmelin wrote:
>> 
>> 
>>>> On 22. Aug 2018, at 23:15, Konstantin Belousov <kostikbel_at_gmail.com> wrote:
>>>> 
>>>> On Wed, Aug 22, 2018 at 10:03:54PM +0200, Michael Gmelin wrote:
>>>> 
>>>> 
>>>>>> On 22. Aug 2018, at 17:46, Konstantin Belousov <kostikbel_at_gmail.com> wrote:
>>>>>> 
>>>>>> On Tue, Aug 21, 2018 at 12:14:35AM +0200, Michael Gmelin wrote:
>>>>>> 
>>>>>> 
>>>>>>>> On 20. Aug 2018, at 17:09, Konstantin Belousov <kostikbel_at_gmail.com> wrote:
>>>>>>>> 
>>>>>>>> On Mon, Aug 20, 2018 at 12:45:12AM +0200, Michael Gmelin wrote:
>>>>>>>> 
>>>>>>>> See here for a screenshot (also including the output of "show pte
>>>>>>>> 0xfffff80001000000"):
>>>>>>>> 
>>>>>>>> https://gist.github.com/grembo/78d0f2a100dd4f16775b85a118769658#file-ddb1-png
>>>>>>> It is too early for ddb routines to register.
>>>>>>> Ok can you try the following debugging patch, to verify my guess ?
>>>>>>> 
>>>>>>> diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
>>>>>>> index 18777d23f09..cd05fdb763f 100644
>>>>>>> --- a/sys/amd64/amd64/pmap.c
>>>>>>> +++ b/sys/amd64/amd64/pmap.c
>>>>>>> _at__at_ -1052,8 +1052,7 _at__at_ create_pagetables(vm_paddr_t *firstaddr)
>>>>>>>     pd_p = (pd_entry_t *)DMPDkernphys;
>>>>>>>     for (i = 0; i < (NPDEPG * nkdmpde); i++)
>>>>>>>         pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g |
>>>>>>> -                X86_PG_M | X86_PG_A | pg_nx |
>>>>>>> -                bootaddr_rwx(i << PDRSHIFT);
>>>>>>> +                X86_PG_M | X86_PG_A | pg_nx | X86_PG_RW;
>>>>>>>     for (i = 0; i < nkdmpde; i++)
>>>>>>>         pdp_p[i] = (DMPDkernphys + ptoa(i)) | X86_PG_RW |
>>>>>>>             X86_PG_V;
>>>>>> 
>>>>>> With this change it boots okay (mptramp_pagetables is 0x1000000, as expected).
>>>>> 
>>>>> Can you apply the following on top of the previous debugging patch and show
>>>>> me the line printed ?
>>>>> 
>>>>> diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
>>>>> index 3d70532b7fd..613fa9f2165 100644
>>>>> --- a/sys/amd64/amd64/pmap.c
>>>>> +++ b/sys/amd64/amd64/pmap.c
>>>>> _at__at_ -2662,6 +2662,7 _at__at_ pmap_pinit0(pmap_t pmap)
>>>>>      pmap->pm_pcids[i].pm_gen = 1;
>>>>>  }
>>>>>  pmap_activate_boot(pmap);
>>>>> +printf("bootaddr addr %#lx rwx %#lx btext %#lx _end %#lx brwsection %#lx etext %#lx KERNBASE %#lx\n", 0x1000000UL, bootaddr_rwx(0x1000000UL), (uintptr_t)btext, (uintptr_t)_end, (uintptr_t)brwsection, (uintptr_t)etext, (uintptr_t)KERNBASE);
>>>>> }
>>>>> 
>>>>> void
>>>> 
>>>> bootaddr addr 0x1000000 rwx 0 btext 0xffffffff80342000 _end 0xffffffff823cf840 brwsection #ffffffff81a00000 etext 0xffffffff812041e4 KERNBASE 0xffffffff80000000
>>>> 
>>> 
>>> Try this, please.  Revert all debugging pmap.c patches that I provided
>>> before.
>>> 
>>> diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
>>> index 4ca2e07e578..2ee8f862854 100644
>>> --- a/sys/amd64/amd64/mp_machdep.c
>>> +++ b/sys/amd64/amd64/mp_machdep.c
>>> _at__at_ -87,6 +87,8 _at__at_ __FBSDID("$FreeBSD$");
>>> 
>>> #define GiB(v)            (v ## ULL << 30)
>>> 
>>> +#define    AP_BOOTPT_SZ        (PAGE_SIZE * 3)
>>> +
>>> extern    struct pcpu __pcpu[];
>>> 
>>> /* Temporary variables for init_secondary()  */
>>> _at__at_ -101,45 +103,78 _at__at_ char *dbg_stack;
>>> 
>>> static int    start_ap(int apic_id);
>>> 
>>> +static bool
>>> +is_kernel_paddr(vm_paddr_t pa)
>>> +{
>>> +
>>> +    return (pa >= trunc_2mpage(btext - KERNBASE) &&
>>> +       pa < round_page(_end - KERNBASE));
>>> +}
>>> +
>>> +static bool
>>> +is_mpboot_good(vm_paddr_t start, vm_paddr_t end)
>>> +{
>>> +
>>> +    return (start + AP_BOOTPT_SZ <= GiB(4) &&
>>> +        end >= start + AP_BOOTPT_SZ && atop(end) < Maxmem);
>>> +}
>>> +
>>> /*
>>> * Calculate usable address in base memory for AP trampoline code.
>>> */
>>> void
>>> mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
>>> {
>>> +    vm_paddr_t start, end;
>>>   unsigned int i;
>>>   bool allocated;
>>> 
>>>   alloc_ap_trampoline(physmap, physmap_idx);
>>> 
>>> +    /*
>>> +     * Find a memory region big enough below the 4GB boundary to
>>> +     * store the initial page tables.  Region must be mapped by
>>> +     * the direct map.
>>> +     *
>>> +     * Note that it needs to be aligned to a page boundary.
>>> +     */
>>>   allocated = false;
>>>   for (i = *physmap_idx; i <= *physmap_idx; i -= 2) {
>>>       /*
>>> -         * Find a memory region big enough below the 4GB
>>> -         * boundary to store the initial page tables.  Region
>>> -         * must be mapped by the direct map.
>>> -         *
>>> -         * Note that it needs to be aligned to a page
>>> -         * boundary.
>>> +         * First, try to chomp at the start of the physmap region.
>>> +         * Kernel binary might claim it already.
>>> +         */
>>> +        start = round_page(physmap[i]);
>>> +        end = trunc_page(physmap[i + 1]);
>>> +        if (is_mpboot_good(start, end) &&
>>> +            !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
>>> +            allocated = true;
>>> +            physmap[i] = start + AP_BOOTPT_SZ;
>>> +            break;
>>> +        }
>>> +
>>> +        /*
>>> +         * Second, try to chomp at the end.  Again, check
>>> +         * against kernel.
>>>        */
>>> -        if (physmap[i] >= GiB(4) || physmap[i + 1] -
>>> -            round_page(physmap[i]) < PAGE_SIZE * 3 ||
>>> -            atop(physmap[i + 1]) > Maxmem)
>>> -            continue;
>>> -
>>> -        allocated = true;
>>> -        mptramp_pagetables = round_page(physmap[i]);
>>> -        physmap[i] = round_page(physmap[i]) + (PAGE_SIZE * 3);
>>> +        end = trunc_page(physmap[i + 1]);
>>> +        start = end - AP_BOOTPT_SZ;
>>> +        if (start >= physmap[i] && is_mpboot_good(start, end) &&
>>> +            !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
>>> +            allocated = true;
>>> +            physmap[i + 1] = start;
>>> +            break;
>>> +        }
>>> +    }
>>> +    if (allocated) {
>>> +        mptramp_pagetables = start;
>>>       if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) {
>>>           memmove(&physmap[i], &physmap[i + 2],
>>>               sizeof(*physmap) * (*physmap_idx - i + 2));
>>>           *physmap_idx -= 2;
>>>       }
>>> -        break;
>>> -    }
>>> -
>>> -    if (!allocated) {
>>> -        mptramp_pagetables = trunc_page(boot_address) - (PAGE_SIZE * 3);
>>> +    } else {
>>> +        mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ;
>>>       if (bootverbose)
>>>           printf(
>>> "Cannot find enough space for the initial AP page tables, placing them at %#x",
>> 
>> Reverted back to r337813 and applied the patch. Unfortunately it panics just like before. Adding back physmap debugging like before shows that it???s still using pages 0x1000-0x1003 (Stopped at native_start_all_aps+0x92: movq %rax,(%rsi))
>> 
> 
> Please apply the following debugging patch on top of the previous 'fix'.
> You need debug.late_console=0.

Unfortunately debug.late_console=0 doesn’t work on this machine (no more output on the console), I tried that earlier in this thread - hence the slightly complicated debugging code I had to add to see the contents of physmap.

I could run this code after boot (feeding it an identical physmap) to get debug output, would this make sense?

Best,
Michael

> 
> diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
> index 2ee8f862854..1a14b1800b1 100644
> --- a/sys/amd64/amd64/mp_machdep.c
> +++ b/sys/amd64/amd64/mp_machdep.c
> _at__at_ -130,7 +130,7 _at__at_ mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
>    bool allocated;
> 
>    alloc_ap_trampoline(physmap, physmap_idx);
> -
> +printf("btext %#lx _end %#lx brwsection %#lx etext %#lx KERNBASE %#lx\n", (uintptr_t)btext, (uintptr_t)_end, (uintptr_t)brwsection, (uintptr_t)etext, (uintptr_t)KERNBASE);
>    /*
>     * Find a memory region big enough below the 4GB boundary to
>     * store the initial page tables.  Region must be mapped by
> _at__at_ -146,10 +146,13 _at__at_ mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
>         */
>        start = round_page(physmap[i]);
>        end = trunc_page(physmap[i + 1]);
> +printf("physmap[%d] %#lx physmap[%d] %#lx\n", i, physmap[i], i + 1, physmap[i + 1]);
> +printf("start %#lx end %#lx is_mpboot_good %d is_kernel_paddr(start) %d is_kernel_paddr(end - 1) %d\n", start, end, is_mpboot_good(start, end), is_kernel_paddr(start), is_kernel_paddr(end - 1));
>        if (is_mpboot_good(start, end) &&
>            !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
>            allocated = true;
>            physmap[i] = start + AP_BOOTPT_SZ;
> +printf("allocated\n");
>            break;
>        }
> 
> _at__at_ -159,10 +162,12 _at__at_ mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
>         */
>        end = trunc_page(physmap[i + 1]);
>        start = end - AP_BOOTPT_SZ;
> +printf("start %#lx end %#lx is_mpboot_good %d is_kernel_paddr(start) %d is_kernel_paddr(end - 1) %d\n", start, end, is_mpboot_good(start, end), is_kernel_paddr(start), is_kernel_paddr(end - 1));
>        if (start >= physmap[i] && is_mpboot_good(start, end) &&
>            !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
>            allocated = true;
>            physmap[i + 1] = start;
> +printf("allocated\n");
>            break;
>        }
>    }
Received on Fri Aug 24 2018 - 18:32:17 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:18 UTC