On Thu, 8 Apr 2004, Robert Watson wrote: > panic: m 0 so->so_rcv.sb_cc 17 > at line 860 in file ../../../kern/uipc_socket.c > cpuid = 1; > Debugger("panic") > Stopped at Debugger+0x46: xchgl %ebx,in_Debugger.0 > db> trace > Debugger(c07c3990) at Debugger+0x46 > __panic(c07c98f1,35c,c07c997d,0,11) at __panic+0x13d > soreceive(c6664618,e9891c0c,e9891c38,0,e9891c10) at soreceive+0x20c > recvit(c6561e70,3,e9891cc0,0,bfbfe410) at recvit+0x1a2 > recvmsg(c6561e70,e9891d14,3,4,296) at recvmsg+0x9a > syscall(808002f,bfbf002f,bfbf002f,bfbfe44c,8079a70) at syscall+0x217 > Xint0x80_syscall() at Xint0x80_syscall+0x1d > --- syscall (27, FreeBSD ELF32, recvmsg), eip = 0x282afff7, esp = > 0xbfbfe3fc, ebp = 0xbfbfe458 --- > db> Apr 8 04:09:29 sm-mta[3550]: i3831Ija003419: SYSERR(root): hash map > "Alias0": missing map file /etc/mail/aliases.db: No such file or directory > Apr 8 04:09:29 sm-mta[3550]: i3831Ija003419: SYSERR(root): cannot > flock(/etc/mail/aliases, fd=5, type=1, omode=40000, euid=0): Operation not > supported > > Funky, eh? I thought we used to have code to ipi the other cpu's and halt > them until the cpu in ddb was out agian. I guess I mis-remember, or that > code is broken... ddb stops the other CPUs (at least on i386's, unless you have edited smptests.h to comment out the option CPUSTOP_ON_DDBBREAK which should be non-optional (always enabled)), but plain panic() doesn't stop them immediately, so much may happen on other CPUs if ddb is not called from panic() or if ddb has problems stopping the CPUs. ddb does have problems stopping the CPU's, but I don't see how it can reach the db> prompt before stopping them. The main problem is that stopping all the other CPUs may be impossible because one of them is looping with IPIs disabled, perhaps because it is trying to enter ddb (and stop other CPUs) too. All CPUs entering ddb should hang in this case. Half-baked fixes: %%% Index: db_interface.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/db_interface.c,v retrieving revision 1.81 diff -u -2 -r1.81 db_interface.c --- db_interface.c 3 Apr 2004 22:23:36 -0000 1.81 +++ db_interface.c 4 Apr 2004 05:37:38 -0000 _at__at_ -35,4 +35,5 _at__at_ #include <sys/reboot.h> #include <sys/cons.h> +#include <sys/ktr.h> #include <sys/pcpu.h> #include <sys/proc.h> _at__at_ -41,4 +42,5 _at__at_ #include <machine/cpu.h> #ifdef SMP +#include <machine/smp.h> #include <machine/smptests.h> /** CPUSTOP_ON_DDBBREAK */ #endif _at__at_ -61,4 +63,33 _at__at_ static jmp_buf db_global_jmpbuf; +#ifdef SMP +/* XXX this is cloned from stop_cpus() since that function can hang. */ +static int +attempt_to_stop_cpus(u_int map) +{ + int i; + + if (!smp_started) + return 0; + + CTR1(KTR_SMP, "attempt_to_stop_cpus(%x)", map); + + /* send the stop IPI to all CPUs in map */ + ipi_selected(map, IPI_STOP); + + i = 0; + while ((atomic_load_acq_int(&stopped_cpus) & map) != map) { + /* spin */ + i++; + if (i == 100000000) { + printf("timeout stopping cpus\n"); + break; + } + } + + return 1; +} +#endif /* SMP */ + /* * kdb_trap - field a TRACE or BPT trap _at__at_ -69,4 +100,8 _at__at_ u_int ef; volatile int ddb_mode = !(boothowto & RB_GDB); +#ifdef SMP + static u_int kdb_trap_lock = NOCPU; + static u_int output_lock; +#endif /* _at__at_ -91,16 +126,48 _at__at_ #ifdef SMP + if (atomic_cmpset_int(&kdb_trap_lock, NOCPU, PCPU_GET(cpuid)) == 0 && + kdb_trap_lock != PCPU_GET(cpuid)) { + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; + db_printf( + "concurrent ddb entry: type %d trap, code=%x cpu=%d\n", + type, code, PCPU_GET(cpuid)); + atomic_store_rel_int(&output_lock, 0); + if (type == T_BPTFLT) + regs->tf_eip--; + else { + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; + db_printf( +"concurrent ddb entry on non-breakpoint: too hard to handle properly\n"); + atomic_store_rel_int(&output_lock, 0); + } + while (atomic_load_acq_int(&kdb_trap_lock) != NOCPU) + ; + write_eflags(ef); + return (1); + } +#endif + +#ifdef SMP #ifdef CPUSTOP_ON_DDBBREAK +#define VERBOSE_CPUSTOP_ON_DDBBREAK_NOT #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; db_printf("\nCPU%d stopping CPUs: 0x%08x...", PCPU_GET(cpuid), PCPU_GET(other_cpus)); + atomic_store_rel_int(&output_lock, 0); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ /* We stop all CPUs except ourselves (obviously) */ - stop_cpus(PCPU_GET(other_cpus)); + attempt_to_stop_cpus(PCPU_GET(other_cpus)); #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; db_printf(" stopped.\n"); + atomic_store_rel_int(&output_lock, 0); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ _at__at_ -192,22 +259,37 _at__at_ #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; db_printf("\nCPU%d restarting CPUs: 0x%08x...", PCPU_GET(cpuid), stopped_cpus); + atomic_store_rel_int(&output_lock, 0); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ /* Restart all the CPUs we previously stopped */ if (stopped_cpus != PCPU_GET(other_cpus) && smp_started != 0) { + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", PCPU_GET(other_cpus), stopped_cpus); + atomic_store_rel_int(&output_lock, 0); +#if 0 panic("stop_cpus() failed"); +#endif } restart_cpus(stopped_cpus); #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + while (atomic_cmpset_int(&output_lock, 0, 1) == 0) + ; db_printf(" restarted.\n"); + atomic_store_rel_int(&output_lock, 0); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ #endif /* CPUSTOP_ON_DDBBREAK */ #endif /* SMP */ + +#ifdef SMP + atomic_store_rel_int(&kdb_trap_lock, NOCPU); +#endif write_eflags(ef); %%% This is supposed to wait for the other CPUs to either stop or enter ddb. They shouldn't loop with interrupts disabled anywhere else. The output_lock stuff here is especially half baked. The VERBOSE_CPUSTOP_ON_DDBBREAK option should be non-optional (always disabled), but I needed something to debug concurrent entry and interleaved output is hard to read. BruceReceived on Thu Apr 08 2004 - 05:31:04 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:50 UTC