Re: FYI: [My FreeBSD-12.0-CURRENT-arm64-aarch64.raw ] under qemu-system-aarch64 on odroid-c2 under UbuntuMate : [A combination that boots but gets some panics]

From: Mark Millard <markmi_at_dsl-only.net>
Date: Tue, 2 May 2017 18:53:32 -0700
On 2017-May-2, at 2:59 PM, Mark Millard <markmi at dsl-only.net> wrote:

> The code around handle_el1h_sync+0x70 :
> 
> ffff000000607804 <handle_el1h_sync+0x4> sub     sp, sp, #0x80
> ffff000000607808 <handle_el1h_sync+0x8> sub     sp, sp, #0x120
> ffff00000060780c <handle_el1h_sync+0xc> stp     x29, x30, [sp,#272]
> ffff000000607810 <handle_el1h_sync+0x10> stp    x28, x29, [sp,#256]
> ffff000000607814 <handle_el1h_sync+0x14> stp    x26, x27, [sp,#240]
> ffff000000607818 <handle_el1h_sync+0x18> stp    x24, x25, [sp,#224]
> ffff00000060781c <handle_el1h_sync+0x1c> stp    x22, x23, [sp,#208]
> ffff000000607820 <handle_el1h_sync+0x20> stp    x20, x21, [sp,#192]
> ffff000000607824 <handle_el1h_sync+0x24> stp    x18, x19, [sp,#176]
> ffff000000607828 <handle_el1h_sync+0x28> stp    x16, x17, [sp,#160]
> ffff00000060782c <handle_el1h_sync+0x2c> stp    x14, x15, [sp,#144]
> ffff000000607830 <handle_el1h_sync+0x30> stp    x12, x13, [sp,#128]
> ffff000000607834 <handle_el1h_sync+0x34> stp    x10, x11, [sp,#112]
> ffff000000607838 <handle_el1h_sync+0x38> stp    x8, x9, [sp,#96]
> ffff00000060783c <handle_el1h_sync+0x3c> stp    x6, x7, [sp,#80]
> ffff000000607840 <handle_el1h_sync+0x40> stp    x4, x5, [sp,#64]
> ffff000000607844 <handle_el1h_sync+0x44> stp    x2, x3, [sp,#48]
> ffff000000607848 <handle_el1h_sync+0x48> stp    x0, x1, [sp,#32]
> ffff00000060784c <handle_el1h_sync+0x4c> mrs    x10, elr_el1
> ffff000000607850 <handle_el1h_sync+0x50> mrs    x11, spsr_el1
> ffff000000607854 <handle_el1h_sync+0x54> mrs    x12, esr_el1
> ffff000000607858 <handle_el1h_sync+0x58> str    x10, [sp,#16]
> ffff00000060785c <handle_el1h_sync+0x5c> stp    w11, w12, [sp,#24]
> ffff000000607860 <handle_el1h_sync+0x60> stp    x18, x30, [sp]
> ffff000000607864 <handle_el1h_sync+0x64> mrs    x18, tpidr_el1
> ffff000000607868 <handle_el1h_sync+0x68> add    x29, sp, #0x110
> ffff00000060786c <handle_el1h_sync+0x6c> mov    x0, sp
> ffff000000607870 <handle_el1h_sync+0x70> bl     ffff00000061aad8 <do_el1h_sync>
> ffff000000607874 <handle_el1h_sync+0x74> msr    daifset, #0x2
> ffff000000607878 <handle_el1h_sync+0x78> ldp    x18, x30, [sp]
> ffff00000060787c <handle_el1h_sync+0x7c> ldp    x10, x11, [sp,#16]
> ffff000000607880 <handle_el1h_sync+0x80> msr    spsr_el1, x11
> ffff000000607884 <handle_el1h_sync+0x84> msr    elr_el1, x10
> ffff000000607888 <handle_el1h_sync+0x88> ldp    x0, x1, [sp,#32]
> ffff00000060788c <handle_el1h_sync+0x8c> ldp    x2, x3, [sp,#48]
> ffff000000607890 <handle_el1h_sync+0x90> ldp    x4, x5, [sp,#64]
> ffff000000607894 <handle_el1h_sync+0x94> ldp    x6, x7, [sp,#80]
> ffff000000607898 <handle_el1h_sync+0x98> ldp    x8, x9, [sp,#96]
> ffff00000060789c <handle_el1h_sync+0x9c> ldp    x10, x11, [sp,#112]
> ffff0000006078a0 <handle_el1h_sync+0xa0> ldp    x12, x13, [sp,#128]
> ffff0000006078a4 <handle_el1h_sync+0xa4> ldp    x14, x15, [sp,#144]
> ffff0000006078a8 <handle_el1h_sync+0xa8> ldp    x16, x17, [sp,#160]
> ffff0000006078ac <handle_el1h_sync+0xac> ldr    x29, [sp,#264]
> ffff0000006078b0 <handle_el1h_sync+0xb0> mov    sp, x18
> ffff0000006078b4 <handle_el1h_sync+0xb4> mrs    x18, tpidr_el1
> ffff0000006078b8 <handle_el1h_sync+0xb8> eret
> 
> So the bl to do_el1h_sync apparently gets the data_abort.

It turns out that in the first type of example there
is also a:

data_abort() at handle_el1h_sync+0x70
	 pc = 0xffff00000061ad94  lr = 0xffff000000607870
	 sp = 0xffff000040238180  fp = 0xffff000040238290
handle_el1h_sync() at pmap_enter+0x678
	 pc = 0xffff000000607870  lr = 0xffff000000615684
	 sp = 0xffff0000402382a0  fp = 0xffff0000402383b0

in what I showed. And around pmap_enter+0x678
happens to be:

ffff00000061566c <pmap_enter+0x660> b   ffff000000615688 <pmap_enter+0x67c>
ffff000000615670 <pmap_enter+0x664> and x8, x28, #0x1f
ffff000000615674 <pmap_enter+0x668> cmp x8, #0xb
ffff000000615678 <pmap_enter+0x66c> b.ne        ffff000000615688 <pmap_enter+0x67c>
ffff00000061567c <pmap_enter+0x670> ldr x0, [sp,#32]
ffff000000615680 <pmap_enter+0x674> orr w1, wzr, #0x1000
ffff000000615684 <pmap_enter+0x678> bl  ffff000000605884 <arm64_dcache_wb_range>
ffff000000615688 <pmap_enter+0x67c> ldrb        w8, [x22,#93]
ffff00000061568c <pmap_enter+0x680> tbnz        w8, #2, ffff0000006157a4 <pmap_enter+0x798>
ffff000000615690 <pmap_enter+0x684> add x1, sp, #0x38
ffff000000615694 <pmap_enter+0x688> mov x0, x19
ffff000000615698 <pmap_enter+0x68c> mov x24, x23
ffff00000061569c <pmap_enter+0x690> orr x23, x23, #0x100000000000000
ffff0000006156a0 <pmap_enter+0x694> bl  ffff000000615f44 <get_pv_entry>

So again handle_el1h_sync happens at a bl to
arm64_dcache_wb_range and ends up with a
data_abort at handle_el1h_sync+0x70 .

The context is pmap_enter instead of
pmap_remove_pages.

But an example of a pmap_remove_pages+0x2a8
context for handle_el1h_sync is also in the
call chain for the first type of example
that I originally showed.

> The code around pmap_remove_pages+0x2a8 :
> 
> ffff000000617570 <pmap_remove_pages+0x244> bl   ffff0000005cf83c <PHYS_TO_VM_PAGE>
> ffff000000617574 <pmap_remove_pages+0x248> ldr  x9, [sp,#80]
> ffff000000617578 <pmap_remove_pages+0x24c> adrp x8, ffff000000bbd000 <pmc_tf+0x6210>
> ffff00000061757c <pmap_remove_pages+0x250> add  x8, x8, #0x848
> ffff000000617580 <pmap_remove_pages+0x254> str  x0, [sp,#48]
> ffff000000617584 <pmap_remove_pages+0x258> cmp  x9, x8
> ffff000000617588 <pmap_remove_pages+0x25c> b.eq ffff0000006175a4 <pmap_remove_pages+0x278>
> ffff00000061758c <pmap_remove_pages+0x260> ldr  x8, [x18]
> ffff000000617590 <pmap_remove_pages+0x264> ldr  x8, [x8,#8]
> ffff000000617594 <pmap_remove_pages+0x268> ldr  x8, [x8,#512]
> ffff000000617598 <pmap_remove_pages+0x26c> ldr  x8, [x8,#224]
> ffff00000061759c <pmap_remove_pages+0x270> cmp  x8, x9
> ffff0000006175a0 <pmap_remove_pages+0x274> b.ne ffff0000006175d8 <pmap_remove_pages+0x2ac>
> ffff0000006175a4 <pmap_remove_pages+0x278> and  x8, x22, #0x1f
> ffff0000006175a8 <pmap_remove_pages+0x27c> cmp  x28, #0x3
> ffff0000006175ac <pmap_remove_pages+0x280> b.ne ffff0000006175c4 <pmap_remove_pages+0x298>
> ffff0000006175b0 <pmap_remove_pages+0x284> cmp  x8, #0xb
> ffff0000006175b4 <pmap_remove_pages+0x288> b.ne ffff0000006175d8 <pmap_remove_pages+0x2ac>
> ffff0000006175b8 <pmap_remove_pages+0x28c> ldr  x0, [x24]
> ffff0000006175bc <pmap_remove_pages+0x290> orr  w1, wzr, #0x1000
> ffff0000006175c0 <pmap_remove_pages+0x294> b    ffff0000006175d4 <pmap_remove_pages+0x2a8>
> ffff0000006175c4 <pmap_remove_pages+0x298> cmp  x8, #0x9
> ffff0000006175c8 <pmap_remove_pages+0x29c> b.ne ffff0000006175d8 <pmap_remove_pages+0x2ac>
> ffff0000006175cc <pmap_remove_pages+0x2a0> ldr  x0, [x24]
> ffff0000006175d0 <pmap_remove_pages+0x2a4> orr  w1, wzr, #0x200000
> ffff0000006175d4 <pmap_remove_pages+0x2a8> bl   ffff000000605884 <arm64_dcache_wb_range>
> ffff0000006175d8 <pmap_remove_pages+0x2ac> mov  x8, xzr
> ffff0000006175dc <pmap_remove_pages+0x2b0> orr  w1, wzr, #0x8
> ffff0000006175e0 <pmap_remove_pages+0x2b4> mov  x0, x26
> ffff0000006175e4 <pmap_remove_pages+0x2b8> ldxr x9, [x26]
> ffff0000006175e8 <pmap_remove_pages+0x2bc> stxr w10, x8, [x26]
> ffff0000006175ec <pmap_remove_pages+0x2c0> cbnz w10, ffff0000006175e4 <pmap_remove_pages+0x2b8>
> ffff0000006175f0 <pmap_remove_pages+0x2c4> bl   ffff000000605884 <arm64_dcache_wb_range>
> 
> So this happens to involve arm64_dcache_wb_range (that has
> not started yet).

I still have not replicated the example that
involved instruction-cache related code. I'm
going to give up on directly attempting to
get examples of that. But if I happen to see
it again I'll try to remember to get a backtrace
(bt) for it.

===
Mark Millard
markmi at dsl-only.net
Received on Tue May 02 2017 - 23:53:35 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:11 UTC