Hi. Last night I upgraded to the most recent -current source and rebuilt everything, and decided on building the kernel to try the new SCHED_ULE scheduler (I had been using SCHED_4BSD before). Alas, the experiment did not go well; every time I booted the machine, I got a panic just as the system was about to put up the login prompt. Switching the kernel config back to SCHED_4BSD and building a kernel with the same (last night's) sources gave me a working kernel. This is on a dual-processor PII/400 box. Below I list what I've got from a kernel coredump of the SCHED_ULE kernel; I've added my comments on the gdb listing preceded by "#" signs. ichotolot# gdb -k ./kernel.debug ./vmcore.45 GNU gdb 5.2.1 (FreeBSD) Copyright 2002 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "i386-undermydesk-freebsd"... panic: page fault panic messages: --- Fatal trap 12: page fault while in kernel mode cpuid = 1; lapic.id = 01000000 fault virtual address = 0x38 fault code = supervisor read, page not present instruction pointer = 0x8:0xc036835d stack pointer = 0x10:0xe1cfbbbc frame pointer = 0x10:0xe1cfbbcc code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 649 (squid) trap number = 12 panic: page fault cpuid = 1; lapic.id = 01000000 boot() called on cpu#1 syncing disks, buffers remaining... panic: absolutely cannot call smp_ipi_shootdown with interrupts already disabled cpuid = 1; lapic.id = 01000000 boot() called on cpu#1 Uptime: 1m12s Dumping 638 MB 16 32 48 64 80 96 112 128 144 160 176 192 208 224 240 256 272 288 304 320 336 352 368 384 400 416 432 448 464 480 496 512 528 544 560 576 592 608 624 --- Reading symbols from /usr/src/sys/i386/compile/ICHOTOLOTSMP/modules/usr/src/sys/modules/acpi/acpi.ko.debug...done. Loaded symbols for /usr/src/sys/i386/compile/ICHOTOLOTSMP/modules/usr/src/sys/modules/acpi/acpi.ko.debug Reading symbols from /usr/src/sys/i386/compile/ICHOTOLOTSMP/modules/usr/src/sys/modules/linprocfs/linprocfs.ko.debug...done. Loaded symbols for /usr/src/sys/i386/compile/ICHOTOLOTSMP/modules/usr/src/sys/modules/linprocfs/linprocfs.ko.debug Reading symbols from /usr/src/sys/i386/compile/ICHOTOLOTSMP/modules/usr/src/sys/modules/linux/linux.ko.debug...done. Loaded symbols for /usr/src/sys/i386/compile/ICHOTOLOTSMP/modules/usr/src/sys/modules/linux/linux.ko.debug Reading symbols from /boot/kernel/green_saver.ko...done. Loaded symbols for /boot/kernel/green_saver.ko #0 doadump () at ../../../kern/kern_shutdown.c:240 240 dumping++; (kgdb) bt #0 doadump () at ../../../kern/kern_shutdown.c:240 #1 0xc03547c0 in boot (howto=260) at ../../../kern/kern_shutdown.c:372 #2 0xc0354ba6 in panic () at ../../../kern/kern_shutdown.c:550 #3 0xc050f9db in smp_tlb_shootdown (vector=0, addr1=0, addr2=0) at ../../../i386/i386/mp_machdep.c:2387 #4 0xc050fc79 in smp_invlpg_range (addr1=0, addr2=0) at ../../../i386/i386/mp_machdep.c:2519 #5 0xc0511df8 in pmap_invalidate_range (pmap=0xc06dc620, sva=3568271360, eva=1) at ../../../i386/i386/pmap.c:719 #6 0xc0512118 in pmap_qenter (sva=3568271360, m=0xe1cfb8c0, count=-1) at ../../../i386/i386/pmap.c:943 #7 0xc03a0448 in vm_hold_load_pages (bp=0xd199d440, from=3568271360, to=3568279552) at ../../../kern/vfs_bio.c:3574 #8 0xc039ea5c in allocbuf (bp=0xd199d440, size=6144) at ../../../kern/vfs_bio.c:2752 #9 0xc039e6fe in geteblk (size=6144) at ../../../kern/vfs_bio.c:2634 #10 0xc039b210 in bwrite (bp=0xd188b8e0) at ../../../kern/vfs_bio.c:818 #11 0xc039bc6c in bawrite (bp=0x0) at ../../../kern/vfs_bio.c:1153 #12 0xc03a4860 in vop_stdfsync (ap=0xe1cfba14) at ../../../kern/vfs_default.c:742 #13 0xc031ba10 in spec_fsync (ap=0xe1cfba14) at ../../../fs/specfs/spec_vnops.c:417 #14 0xc031ae38 in spec_vnoperate (ap=0x0) at ../../../fs/specfs/spec_vnops.c:122 #15 0xc04ad2d7 in ffs_sync (mp=0xc5336400, waitfor=2, cred=0xc1c27e80, td=0xc0669ec0) at vnode_if.h:624 #16 0xc03b0b1b in sync (td=0xc0669ec0, uap=0x0) at ../../../kern/vfs_syscalls.c:142 #17 0xc03542e2 in boot (howto=256) at ../../../kern/kern_shutdown.c:281 #18 0xc0354ba6 in panic () at ../../../kern/kern_shutdown.c:550 #19 0xc0517292 in trap_fatal (frame=0xe1cfbb7c, eva=0) at ../../../i386/i386/trap.c:836 #20 0xc0516863 in trap (frame= {tf_fs = -1067057128, tf_es = 16, tf_ds = -1067974640, tf_edi = -982159056, tf_esi = -1066987296, tf_ebp = -506479668, tf_isp = -506479704, tf_ebx = 0, tf_edx = 2, tf_ecx = 1, tf_eax = 0, tf_trapno = 12, tf_err = 0, tf_eip = -1070169251, tf_cs = 8, tf_eflags = 66178, tf_esp = -1066987296, tf_ss = 0}) at ../../../i386/i386/trap.c:256 #21 0xc04ff988 in calltrap () at {standard input}:97 #22 0xc036947b in sched_choose () at ../../../kern/sched_ule.c:1164 #23 0xc035a636 in choosethread () at ../../../kern/kern_switch.c:140 #24 0xc035c340 in mi_switch () at ../../../kern/kern_synch.c:524 #25 0xc034b686 in _mtx_lock_sleep (m=0xc066e680, opts=0, file=0xc058251d "../../../kern/kern_descrip.c", line=826) at ../../../kern/kern_mutex.c:641 #26 0xc034b0f8 in _mtx_lock_flags (m=0xc066e680, opts=0, file=0xc058251d "../../../kern/kern_descrip.c", line=826) at ../../../kern/kern_mutex.c:331 #27 0xc0335e11 in close (td=0xc5757130, uap=0x0) at ../../../kern/kern_descrip.c:826 #28 0xc05175ee in syscall (frame= {tf_fs = 47, tf_es = 47, tf_ds = 47, tf_edi = -1077936744, tf_esi = 0, tf_ebp = -1077936840, tf_isp = -506479244, tf_ebx = 689, tf_edx = -1, tf_ecx = 9, tf_eax = 6, tf_trapno = 12, tf_err = 2, tf_eip = 672626595, tf_cs = 31, tf_eflags = 663, tf_esp = -1077936900, tf_ss = 47}) at ../../../i386/i386/trap.c:1023 #29 0xc04ff9dd in Xint0x80_syscall () at {standard input}:139 ---Can't read userspace from dump, or kernel process--- # The initial panic seems to be around frame 22, so let's go there. (kgdb) fr 22 #22 0xc036947b in sched_choose () at ../../../kern/sched_ule.c:1164 1164 kseq_move(kseq, PCPU_GET(cpuid)); (kgdb) p kseq $1 = (struct kseq *) 0x0 (kgdb) l 1160 1155 */ 1156 if ((kseq = kseq_load_highest()) == NULL) 1157 return (NULL); 1158 1159 /* 1160 * Remove this kse from this kseq and runq and then requeue 1161 * on the current processor. Then we will dequeue it 1162 * normally above. 1163 */ 1164 kseq_move(kseq, PCPU_GET(cpuid)); # Hmm, that's odd. You'd think that if kseq returned by kseq_load_highest() # was null, line 1157 would have returned and we'd never hit line 1164. # Weird. Could be gdb is just being confused here, though. Hard to say. # Anyway, on further inspection, it seems the fatal trap wasn't in # sched_choose() at all, but elsewhere; look at the eip in the trap frame: (kgdb) fr 20 #20 0xc0516863 in trap (frame= {tf_fs = -1067057128, tf_es = 16, tf_ds = -1067974640, tf_edi = -982159056, tf_esi = -1066987296, tf_ebp = -506479668, tf_isp = -506479704, tf_ebx = 0, tf_edx = 2, tf_ecx = 1, tf_eax = 0, tf_trapno = 12, tf_err = 0, tf_eip = -1070169251, tf_cs = 8, tf_eflags = 66178, tf_esp = -1066987296, tf_ss = 0}) at ../../../i386/i386/trap.c:256 256 trap_fatal(&frame, eva); (kgdb) p/x frame.tf_eip $2 = 0xc036835d (kgdb) disass 0xc036835d Dump of assembler code for function kseq_move: 0xc0368340 <kseq_move>: push %ebp 0xc0368341 <kseq_move+1>: mov %esp,%ebp 0xc0368343 <kseq_move+3>: sub $0x10,%esp 0xc0368346 <kseq_move+6>: mov %ebx,0xfffffff8(%ebp) 0xc0368349 <kseq_move+9>: mov %esi,0xfffffffc(%ebp) 0xc036834c <kseq_move+12>: mov 0x8(%ebp),%esi 0xc036834f <kseq_move+15>: mov %esi,(%esp,1) 0xc0368352 <kseq_move+18>: call 0xc03683a0 <kseq_choose> 0xc0368357 <kseq_move+23>: mov %eax,%ebx 0xc0368359 <kseq_move+25>: mov %eax,0x4(%esp,1) 0xc036835d <kseq_move+29>: mov 0x38(%eax),%eax 0xc0368360 <kseq_move+32>: mov 0x4(%eax),%eax 0xc0368363 <kseq_move+35>: mov %eax,(%esp,1) 0xc0368366 <kseq_move+38>: call 0xc035ac50 <runq_remove> 0xc036836b <kseq_move+43>: movl $0x4,0x30(%ebx) 0xc0368372 <kseq_move+50>: mov %ebx,0x4(%esp,1) 0xc0368376 <kseq_move+54>: mov %esi,(%esp,1) 0xc0368379 <kseq_move+57>: call 0xc0367f30 <kseq_rem> 0xc036837e <kseq_move+62>: mov 0x38(%ebx),%edx 0xc0368381 <kseq_move+65>: mov 0xc(%ebp),%eax 0xc0368384 <kseq_move+68>: mov %al,0x14(%edx) 0xc0368387 <kseq_move+71>: mov %ebx,(%esp,1) 0xc036838a <kseq_move+74>: call 0xc0369490 <sched_add> 0xc036838f <kseq_move+79>: mov 0xfffffff8(%ebp),%ebx 0xc0368392 <kseq_move+82>: mov 0xfffffffc(%ebp),%esi 0xc0368395 <kseq_move+85>: mov %ebp,%esp 0xc0368397 <kseq_move+87>: pop %ebp 0xc0368398 <kseq_move+88>: ret End of assembler dump. # Aha, the trap occured right at this instruction: # 0xc036835d <kseq_move+29>: mov 0x38(%eax),%eax # Note from the trapframe that eax was 0 at that point, hence we're # dereferencing a NULL pointer. Apparently this NULL was returned by # kseq_choose, and the actual null deref is the "ke->ke_runq" at line 448: (kgdb) l kseq_move 439 return (NULL); 440 } 441 442 void 443 kseq_move(struct kseq *from, int cpu) 444 { 445 struct kse *ke; 446 447 ke = kseq_choose(from); 448 runq_remove(ke->ke_runq, ke); (kgdb) l 449 ke->ke_state = KES_THREAD; 450 kseq_rem(from, ke); 451 452 ke->ke_cpu = cpu; 453 sched_add(ke); 454 } 455 #endif 456 457 struct kse * 458 kseq_choose(struct kseq *kseq) (kgdb) q ichotolot# So that's what I found. Any suggestions on how to track this down further?Received on Thu Jul 03 2003 - 20:26:13 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:13 UTC