Re: LOR and boot time lockups

From: Ian FREISLICH <if_at_hetzner.co.za>
Date: Sat, 22 Jan 2005 20:55:34 +0200
Ian FREISLICH wrote:
> Hi
> 
> I've been unable to successfully boot a kernel since Jan 19.  The
> offending change happened between Jan 10 and Jan 19.  I will do the
> binary search for the change if nobody has any ideas, but I want to
> ask first; it takes little over an hour to compile a kernel on this
> system.
> 
> It fails at the ATA probe, or if I remove ata from the kernel, the
> last message is from somewhere in the SCSI probe: "Waiting 15 seconds
> for SCSI devices to settle".  I can't reasonably remove the SCSI
> stuff from the kernel to see what happens next because the ufs root
> is on a SCSI disk.

I narrowed it down to this change (it boots with 1.238, but not 1.239):

Index: mptable.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/mptable.c,v
retrieving revision 1.238
retrieving revision 1.239
diff -u -d -r1.238 -r1.239
--- mptable.c   12 Jan 2005 18:24:32 -0000      1.238
+++ mptable.c   18 Jan 2005 20:27:24 -0000      1.239
_at__at_ -25,7 +25,7 _at__at_
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.238 2005/01/12 18:24:32 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.239 2005/01/18 20:27:24 jhb Exp $");
 
 #include "opt_mptable_force_htt.h"
 #include <sys/param.h>
_at__at_ -580,12 +580,18 _at__at_
        KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
        switch (busses[src_bus].bus_type) {
        case ISA:
-               return (INTR_TRIGGER_EDGE);
+#ifndef PC98
+               if (elcr_found)
+                       return (elcr_read_trigger(src_bus_irq));
+               else
+#endif
+                       return (INTR_TRIGGER_EDGE);
        case PCI:
                return (INTR_TRIGGER_LEVEL);
 #ifndef PC98
        case EISA:
                KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
+               KASSERT(elcr_found, ("Missing ELCR"));
                return (elcr_read_trigger(src_bus_irq));
 #endif
        default:


> At least the debugger is avalable.  I don't really know what is
> useful information so here's what I can think of off the top of my
> head.
> 
> Timecounters tick every 1.000 msec
> ipfw2 initialized, divert loadable, rule-based forwarding enabled, default to
 deny, logging unlimited
> Waiting 15 seconds for SCSI devices to settle
> *** Waited 1 minute then sent a line break***
> KDB: enter: Line break on console
> [thread pid 45 tid 100061 ]
> Stopped at      kdb_enter+0x2b: nop     
> 	db> ce
> Tracing pid 45 tid 100061 td 0xc10b8170
> kdb_enter(c062c0eb) at kdb_enter+0x2b
> siointr1(c10d8400) at siointr1+0xce
> siointr(c10d8400) at siointr+0x5e
> intr_execute_handlers(c1018090,d0e98cdc,4,d0e98d28,c05d0633) at intr_execute_
han
> dlers+0x7d
> lapic_handle_intr(34) at lapic_handle_intr+0x2e
> Xapic_isr1() at Xapic_isr1+0x33
> --- interrupt, eip = 0xc04dd1aa, esp = 0xd0e98d20, ebp = 0xd0e98d28 ---
> critical_exit(c10afa80,0,c05d02dc,c04b639c,c10afa80) at critical_exit+0xd2
> fork_exit(c04b639c,c10afa80,d0e98d48) at fork_exit+0x6c
> fork_trampoline() at fork_trampoline+0x8
> --- trap 0x1, eip = 0, esp = 0xd0e98d7c, ebp = 0 ---
> db> sho pcpu 0
> cpuid        = 0
> curthread    = 0xc10b8170: pid 45 "swi0: sio"
> curpcb       = 0xd0e98da0
> fpcurthread  = none
> idlethread   = 0xc0fe1730: pid 12 "idle: cpu0"
> APIC ID      = 0
> currentldt   = 0x28
> db> sho pcpu 1
> cpuid        = 1
> curthread    = 0xc0fe15c0: pid 11 "idle: cpu1"
> curpcb       = 0
> fpcurthread  = none
> idlethread   = 0xc0fe15c0: pid 11 "idle: cpu1"
> APIC ID      = 1
> currentldt   = 0x28
> db> ps
>   pid   proc     uid  ppid  pgrp  flag   stat  wmesg    wchan  cmd
>    45 c10b59d8    0     0     0 0000204 [CPU 0] swi0: sio
>     8 c10b5bd0    0     0     0 0000204 [RUNQ] aic_recovery0
>     7 c102bbd0    0     0     0 0000204 [RUNQ] aic_recovery0
>    44 c102bdc8    0     0     0 0000204 [RUNQ] usbtask
>    43 c103d000    0     0     0 0000204 [RUNQ] usb0
>     6 c103d1f8    0     0     0 0000204 [RUNQ] kqueue taskq
>    42 c103d3f0    0     0     0 0000204 [IWAIT] swi2: cambio
>    41 c103d5e8    0     0     0 0000204 [IWAIT] swi5:+
>     5 c103d7e0    0     0     0 0000204 [RUNQ] thread taskq
>    40 c103d9d8    0     0     0 0000204 [IWAIT] swi6:+
>    39 c103dbd0    0     0     0 0000204 [IWAIT] swi6: task queue
>    38 c103ddc8    0     0     0 0000204 [RUNQ] yarrow
>     4 c103f000    0     0     0 0000204 [RUNQ] g_down
>     3 c103f1f8    0     0     0 0000204 [RUNQ] g_up
>     2 c10225e8    0     0     0 0000204 [RUNQ] g_event
>    37 c10227e0    0     0     0 0000204 [IWAIT] swi1: net
>    36 c10229d8    0     0     0 0000204 [IWAIT] swi3: vm
>    35 c1022bd0    0     0     0 000020c [RUNQ] swi4: clock sio
>    34 c1022dc8    0     0     0 0000204 [IWAIT] irq0: clk
>    33 c102b000    0     0     0 0000204 [IWAIT] irq22:
>    32 c102b1f8    0     0     0 0000204 [IWAIT] irq21:
>    31 c102b3f0    0     0     0 0000204 [IWAIT] irq20:
>    30 c102b5e8    0     0     0 0000204 [IWAIT] irq19:
>    29 c102b7e0    0     0     0 0000204 [IWAIT] irq18:
>    28 c102b9d8    0     0     0 0000204 [IWAIT] irq17:
>    27 c0fea1f8    0     0     0 0000204 [IWAIT] irq16: xl0 ahc0
>    26 c0fea3f0    0     0     0 0000204 [IWAIT] irq15:
>    25 c0fea5e8    0     0     0 0000204 [IWAIT] irq14:
>    24 c0fea7e0    0     0     0 0000204 [IWAIT] irq13:
>    23 c0fea9d8    0     0     0 0000204 [IWAIT] irq12:
>    22 c0feabd0    0     0     0 0000204 [IWAIT] irq11: uhci0
>    21 c0feadc8    0     0     0 0000204 [IWAIT] irq10:
>    20 c1022000    0     0     0 0000204 [IWAIT] irq9: intpm0
>    19 c10221f8    0     0     0 0000204 [IWAIT] irq8: rtc
>    18 c10223f0    0     0     0 0000204 [IWAIT] irq7: ppc0
>    17 c0fe0000    0     0     0 0000204 [IWAIT] irq6:
>    16 c0fe01f8    0     0     0 0000204 [IWAIT] irq5:
>    15 c0fe03f0    0     0     0 0000204 [IWAIT] irq4: sio0
>    14 c0fe05e8    0     0     0 0000204 [IWAIT] irq3: sio1
>    13 c0fe07e0    0     0     0 0000204 [IWAIT] irq1: atkbd0
>    12 c0fe09d8    0     0     0 000020c [Can run] idle: cpu0
>    11 c0fe0bd0    0     0     0 000020c [Can run] idle: cpu1
>     1 c0fe0dc8    0     0     0 0000200 [INACTIVE] swapper
>    10 c0fea000    0     0     0 0000204 [RUNQ] ktrace
>     0 c0667980    0     0     0 0000200 [SLPQ conifhk 0xc0648f6c][SLP] swappe
r
> 
> 
> I then recompiled with witness and invarients and got the following
> LOR earlier in the boot process.  I'm not sure if it's related, but
> I couldn't find a reference it this particular LOR at
> http://sources.zabbadoz.net/freebsd/lor.html.
> 
> lock order reversal
>  1st 0xc0c64968 32 (UMA zone) _at_ /usr/src/sys/vm/uma_core.c:1842
>  2nd 0xc0c59084 system map (system map) _at_ /usr/src/sys/vm/vm_map.c:2998
> KDB: stack backtrace:
> kdb_backtrace(0,ffffffff,c0672c88,c0672b70,c063dee8) at kdb_backtrace+0x29
> witness_checkorder(c0c59084,9,c061f340,bb6) at witness_checkorder+0x54c
> _mtx_lock_flags(c0c59084,0,c061f340,bb6) at _mtx_lock_flags+0x5b
> _vm_map_lock_read(c0c59000,c061f340,bb6,200007d,c) at _vm_map_lock_read+0x24
> vm_map_lookup(c0820ad8,c7057000,2,c0820adc,c0820acc) at vm_map_lookup+0x28
> vm_fault(c0c59000,c7057000,2,0,c065fea0) at vm_fault+0x66
> trap_pfault(c0820ba0,0,c7057a69) at trap_pfault+0x11b
> trap(c0820018,c04e0010,c0610010,c0fd8620,c0c52420) at trap+0x30d
> calltrap() at calltrap+0x5
> --- trap 0xc, eip = 0xc0595100, esp = 0xc0820be0, ebp = 0xc0820bec ---
> uma_dbg_alloc(c0c52420,0,c0fd8620,c0c64968,0) at uma_dbg_alloc+0x54
> uma_zalloc_arg(c0c52420,0,1) at uma_zalloc_arg+0xfe
> malloc(1c,c0640820,1,4,0) at malloc+0xae
> resource_list_add(c10ad880,4,0,378,37f) at resource_list_add+0x2e
> isa_set_resource(c1042580,c1095e00,4,0,378,8) at isa_set_resource+0x8d
> bus_set_resource(c1095e00,4,0,378,8) at bus_set_resource+0x67
> ppc_probe(c1095e00,c1095e00,c0fc7270,c1095e00,c0820d30) at ppc_probe+0xd7
> ppc_isa_probe(c1095e00) at ppc_isa_probe+0x72
> device_probe_child(c1042580,c1095e00,c10af000,c1095e00,1d) at device_probe_ch
ild
> +0xf8
> device_probe_and_attach(c1095e00) at device_probe_and_attach+0x7d
> isa_probe_children(c1042580) at isa_probe_children+0x103
> configure(0,81ec00,81e000,0,c042da65) at configure+0x32
> mi_startup() at mi_startup+0x96
> begin() at begin+0x2c
> 
> 
> Fatal trap 12: page fault while in kernel mode
> cpuid = 0; apic id = 00
> fault virtual address   = 0xc7057a69
> fault code              = supervisor write, page not present
> instruction pointer     = 0x8:0xc0595100
> stack pointer           = 0x10:0xc0820be0
> frame pointer           = 0x10:0xc0820bec
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
> current process         = 0 (swapper)
> [thread pid 0 tid 0 ]
> Stopped at      uma_dbg_alloc+0x54:     movb    $0xff,0x18(%eax,%ecx,1)
> db> sho proc
> p = 0xc065fca0, vmspace = 0xc0660000, map = 0xc0660000, pmap = 0xc06600c0
> Task map 0xc0660000: pmap=0xc06600c0, nentries=0, version=1
> db> ps
>   pid   proc     uid  ppid  pgrp  flag   stat  wmesg    wchan  cmd
>     8 c10b5bd0    0     0     0 0000204 [RUNQ] aic_recovery0
>     7 c102bbd0    0     0     0 0000204 [RUNQ] aic_recovery0
>    44 c102bdc8    0     0     0 0000204 [RUNQ] usbtask
>    43 c103d000    0     0     0 0000204 [RUNQ] usb0
>    42 c103d1f8    0     0     0 0000204 [IWAIT] swi2: cambio
>    41 c103d3f0    0     0     0 0000204 [IWAIT] swi5:+
>     6 c103d5e8    0     0     0 0000204 [RUNQ] thread taskq
>    40 c103d7e0    0     0     0 0000204 [IWAIT] swi6:+
>    39 c103d9d8    0     0     0 0000204 [IWAIT] swi6: task queue
>     5 c103dbd0    0     0     0 0000204 [RUNQ] kqueue taskq
>    38 c103ddc8    0     0     0 0000204 [RUNQ] yarrow
>     4 c103f000    0     0     0 0000204 [RUNQ] g_down
>     3 c103f1f8    0     0     0 0000204 [RUNQ] g_up
>     2 c10225e8    0     0     0 0000204 [RUNQ] g_event
>    37 c10227e0    0     0     0 0000204 [IWAIT] swi1: net
>    36 c10229d8    0     0     0 0000204 [IWAIT] swi3: vm
>    35 c1022bd0    0     0     0 000020c [IWAIT] swi4: clock
>    34 c1022dc8    0     0     0 0000204 [IWAIT] irq0:
>    33 c102b000    0     0     0 0000204 [IWAIT] irq22:
>    32 c102b1f8    0     0     0 0000204 [IWAIT] irq21:
>    31 c102b3f0    0     0     0 0000204 [IWAIT] irq20:
>    30 c102b5e8    0     0     0 0000204 [IWAIT] irq19:
>    29 c102b7e0    0     0     0 0000204 [IWAIT] irq18:
>    28 c102b9d8    0     0     0 0000204 [IWAIT] irq17:
>    27 c0fea1f8    0     0     0 0000204 [IWAIT] irq16: xl0 ahc0
>    26 c0fea3f0    0     0     0 0000204 [IWAIT] irq15:
>    25 c0fea5e8    0     0     0 0000204 [IWAIT] irq14:
>    24 c0fea7e0    0     0     0 0000204 [IWAIT] irq13:
>    23 c0fea9d8    0     0     0 0000204 [IWAIT] irq12:
>    22 c0feabd0    0     0     0 0000204 [RUNQ] irq11: uhci0
>    21 c0feadc8    0     0     0 0000204 [IWAIT] irq10:
>    20 c1022000    0     0     0 0000204 [IWAIT] irq9: intpm0
>    19 c10221f8    0     0     0 0000204 [IWAIT] irq8:
>    18 c10223f0    0     0     0 0000204 [IWAIT] irq7:
>    17 c0fe0000    0     0     0 0000204 [IWAIT] irq6:
>    16 c0fe01f8    0     0     0 0000204 [IWAIT] irq5:
>    15 c0fe03f0    0     0     0 0000204 [IWAIT] irq4:
>    14 c0fe05e8    0     0     0 0000204 [IWAIT] irq3:
>    13 c0fe07e0    0     0     0 0000204 [IWAIT] irq1: atkbd0
>    12 c0fe09d8    0     0     0 000020c [Can run] idle: cpu0
>    11 c0fe0bd0    0     0     0 000020c [Can run] idle: cpu1
>     1 c0fe0dc8    0     0     0 0000200 [INACTIVE] swapper
>    10 c0fea000    0     0     0 0000204 [RUNQ] ktrace
>     0 c065fca0    0     0     0 0000200 [CPU 0] swapper
> db> show pcpu
> cpuid        = 0
> curthread    = 0xc065fea0: pid 0 "swapper"
> curpcb       = 0xc0820da0
> fpcurthread  = none
> idlethread   = 0xc0fe1730: pid 12 "idle: cpu0"
> APIC ID      = 0
> currentldt   = 0x28
> spin locks held:
> db> show pcpu 1
> cpuid        = 1
> curthread    = 0xc0fe15c0: pid 11 "idle: cpu1"
> curpcb       = 0
> fpcurthread  = none
> idlethread   = 0xc0fe15c0: pid 11 "idle: cpu1"
> APIC ID      = 1
> currentldt   = 0x28
> spin locks held:
> 
> 
> --
> Ian Freislich
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 

--
Ian Freislich
Received on Sat Jan 22 2005 - 17:55:38 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:26 UTC