Re: Bug in latest rev kern_acct.c: panic: Trying sleep, but thread marked as sleeping prohibited

From: John Baldwin <jhb_at_freebsd.org>
Date: Wed, 21 Dec 2005 11:11:43 -0500
On Tuesday 20 December 2005 09:18 pm, Richard Todd wrote:
> I've been getting this panic a good bit recently after updating -current
> sources a couple days ago.  The panic occurs in the acctwatch() callout
> routine run from the clock ithread, apparently when it tries to sleep
> on a lock (which is, I gather, verboten in ithreads.)   The panics seemed
> to occur randomly but, on reflection, always occured when
>   1) there was a good bit of system activity and
>   2) I had just done something to allocate or free enough space on /usr to
>      cause accounting to be either suspended or resumed.
> and thus would cause the acctwatch() routine to have to do something.
> "make clean; make" in a big port seemed to be fairly effective in
> triggering the bug. :-)
>
> My previous kernel, which dated from Oct 24, didn't have this
> problem, which helps point suspicion at the latest rev of kern_acct.c
> (rev 1.76, date: 2005/11/12 10:45:13) which involved a bunch of changes
> in the lock handling of kernel accounting.

acctwatch() probably isn't a good thing to do from a callout since it wants to 
do VOPs and such.  Probably the easiest fix is to stick acctwatch() in its 
own kthread.

> GNU gdb 6.1.1 [FreeBSD]
> Copyright 2004 Free Software Foundation, Inc.
> GDB is free software, covered by the GNU General Public License, and you
> are welcome to change it and/or distribute copies of it under certain
> conditions. Type "show copying" to see the conditions.
> There is absolutely no warranty for GDB.  Type "show warranty" for details.
> This GDB was configured as "i386-marcel-freebsd".
>
> Unread portion of the kernel message buffer:
> panic: Trying sleep, but thread marked as sleeping prohibited
> cpuid = 1
> KDB: stack backtrace:
> kdb_backtrace(100,c3b7f900,c3b7f900,c0976100,c096f6a8) at
> kdb_backtrace+0x29 panic(c08a19c6,0,c096f6a8,c0970598,d85f0a14) at
> panic+0x114
> sleepq_add(c096f6a8,c0970598,c089a4ab,1,c0970598,0,c089a875,96) at
> sleepq_add+0x8c cv_wait_unlock(c096f6a8,c0970598,180,d85f0a4c,c096f680) at
> cv_wait_unlock+0x171 cv_wait(c096f6a8,c0970598,0,c3b7c480,c3ddb1c0) at
> cv_wait+0x36
> _sx_xlock(c096f680,c089a5a2,180) at _sx_xlock+0x68
> acctwatch(0) at acctwatch+0x20
> softclock(0) at softclock+0x22b
> ithread_execute_handlers(c3b7e8b0,c3b7c480) at
> ithread_execute_handlers+0xe6
> ithread_loop(c3b488a0,d85f0d38,c3b488a0,c065098c,0) at ithread_loop+0x67
> fork_exit(c065098c,c3b488a0,d85f0d38) at fork_exit+0xa4
> fork_trampoline() at fork_trampoline+0x8
> --- trap 0x1, eip = 0, esp = 0xd85f0d6c, ebp = 0 ---
> Uptime: 49m58s
> Dumping 637 MB (2 chunks)
>   chunk 0: 1MB (159 pages) ... ok
>   chunk 1: 637MB (163072 pages) 622 606 590 574 558 542 526 510 494 478 462
> 446 430 414 398 382 366 350 334 318 302 286 270 254 238 222 206 190 174 158
> 142 126 110 94 78 62 46 30 14
>
> #0  doadump () at pcpu.h:165
> 165		__asm __volatile("movl %%fs:0,%0" : "=r" (td));
> (kgdb) bt
> #0  doadump () at pcpu.h:165
> #1  0xc0663880 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:399
> #2  0xc0663b95 in panic (
>     fmt=0xc08a19c6 "Trying sleep, but thread marked as sleeping
> prohibited") at /usr/src/sys/kern/kern_shutdown.c:555
> #3  0xc06830fc in sleepq_add (wchan=0xc096f6a8, lock=0xc0970598, wmesg=0x0,
>     flags=1) at /usr/src/sys/kern/subr_sleepqueue.c:273
> #4  0xc063dd31 in cv_wait_unlock (cvp=0xc096f6a8, mp=0xc0970598)
>     at /usr/src/sys/kern/kern_condvar.c:152
> #5  0xc063db9a in cv_wait (cvp=0xc096f6a8, mp=0xc0970598)
>     at /usr/src/sys/kern/kern_condvar.c:112
> #6  0xc06697cc in _sx_xlock (sx=0xc096f680,
>     file=0xc089a5a2 "/usr/src/sys/kern/kern_acct.c", line=384)
>     at /usr/src/sys/kern/kern_sx.c:188
> #7  0xc063bcdc in acctwatch (a=0x0) at /usr/src/sys/kern/kern_acct.c:384
> #8  0xc0670bf7 in softclock (dummy=0x0) at
> /usr/src/sys/kern/kern_timeout.c:290 #9  0xc06508c2 in
> ithread_execute_handlers (p=0xc3b7e8b0, ie=0xc3b7c480) at
> /usr/src/sys/kern/kern_intr.c:662
> #10 0xc06509f3 in ithread_loop (arg=0xc3b488a0)
>     at /usr/src/sys/kern/kern_intr.c:745
> #11 0xc064fb54 in fork_exit (callout=0xc065098c <ithread_loop>,
>     arg=0xc3b488a0, frame=0xd85f0d38) at /usr/src/sys/kern/kern_fork.c:790
> #12 0xc081ac7c in fork_trampoline () at
> /usr/src/sys/i386/i386/exception.s:198 (kgdb) fr 9
> #9  0xc06508c2 in ithread_execute_handlers (p=0xc3b7e8b0, ie=0xc3b7c480)
>     at /usr/src/sys/kern/kern_intr.c:662
> 662			ih->ih_handler(ih->ih_argument);
> (kgdb) p *p
> $1 = {p_list = {le_next = 0xc3b7eadc, le_prev = 0xc3b7e684}, p_ksegrps = {
>     tqh_first = 0xc3b81d80, tqh_last = 0xc3b81d84}, p_threads = {
>     tqh_first = 0xc3b7f900, tqh_last = 0xc3b7f908}, p_suspended = {
>     tqh_first = 0x0, tqh_last = 0xc3b7e8c8}, p_ucred = 0xc3b79100,
>   p_fd = 0xc3b85200, p_fdtol = 0x0, p_stats = 0xc3b67a00,
>   p_limit = 0xc3b7a100, p_sigacts = 0xc3b8c000, p_flag = 524, p_sflag = 1,
>   p_state = PRS_NORMAL, p_pid = 12, p_hash = {le_next = 0x0,
>     le_prev = 0xc3b73030}, p_pglist = {le_next = 0xc3b7eadc,
>     le_prev = 0xc3b7e6d4}, p_pptr = 0xc096f0c0, p_sibling = {
>     le_next = 0xc3b7eadc, le_prev = 0xc3b7e6e0}, p_children = {
>     lh_first = 0x0}, p_mtx = {mtx_object = {lo_class = 0xc0931b24,
>       lo_name = 0xc089df1e "process lock",
>       lo_type = 0xc089df1e "process lock", lo_flags = 4390912,
>       lo_witness_data = {lod_list = {stqe_next = 0xc0981260},
>         lod_witness = 0xc0981260}}, mtx_lock = 4, mtx_recurse = 0},
>   p_ksi = 0x0, p_sigqueue = {sq_signals = {__bits = {0, 0, 0, 0}}, sq_list
> = { tqh_first = 0x0, tqh_last = 0xc3b7e948}, sq_proc = 0xc3b7e8b0, sq_flags
> = 1}, p_oppid = 0, p_vmspace = 0xc096f460, p_swtime = 2986, p_realtimer =
> {it_interval = {tv_sec = 0, tv_usec = 0}, it_value = { tv_sec = 0, tv_usec
> = 0}}, p_rux = {rux_runtime = {sec = 59, frac = 4151612463131724764},
> rux_uticks = 0, rux_sticks = 0, rux_iticks = 157, rux_uu = 0, rux_su =
> 191765, rux_iu = 58831433}, p_crux = {rux_runtime = {sec = 0, frac = 0},
> rux_uticks = 0, rux_sticks = 0, rux_iticks = 0, rux_uu = 0, rux_su = 0,
> rux_iu = 0}, p_profthreads = 0, p_maxthrwaits = 0, p_traceflag = 0,
> p_tracevp = 0x0, p_tracecred = 0x0, p_textvp = 0x0, p_lock = 1 '\001',
> p_sigiolst = {slh_first = 0x0}, p_sigparent = 20, p_sig = 0, p_code = 0,
> p_stops = 0, p_stype = 0, p_step = 0 '\0', p_pfsflags = 0 '\0', p_nlminfo =
> 0x0, p_aioinfo = 0x0, p_singlethread = 0x0, p_suspcount = 0, p_xthread =
> 0x0,
>   p_boundary_count = 0, p_procscopegrp = 0x0, p_pendingcnt = 0,
>   p_itimers = 0x0, p_magic = 3203398350,
>   p_comm = "swi4: clock sio\000\000\000\000", p_pgrp = 0xc096f600,
>   p_sysent = 0xc092cb80, p_args = 0x0, p_cpulimit = 9223372036854775807,
>   p_nice = 0 '\0', p_xstat = 0, p_klist = {kl_list = {slh_first = 0x0},
>     kl_lock = 0xc064b31c <knlist_mtx_lock>,
>     kl_unlock = 0xc064b338 <knlist_mtx_unlock>,
>     kl_locked = 0xc064b354 <knlist_mtx_locked>, kl_lockarg = 0xc3b7e918},
>   p_numthreads = 1, p_numksegrps = 1, p_md = {md_ldt = 0x0}, p_itcallout =
> { c_links = {sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0,
>         tqe_prev = 0x0}}, c_time = 0, c_arg = 0x0, c_func = 0, c_mtx = 0x0,
>     c_flags = 16}, p_acflag = 1, p_ru = 0x0, p_peers = 0x0,
>   p_leader = 0xc3b7e8b0, p_emuldata = 0x0, p_label = 0x0,
>   p_sched = 0xc3b7eadc, p_ktr = {stqh_first = 0x0, stqh_last = 0xc3b7ead0},
>   p_mqnotifier = {lh_first = 0x0}}
> (kgdb) fr 7
> #7  0xc063bcdc in acctwatch (a=0x0) at /usr/src/sys/kern/kern_acct.c:384
> 384		sx_xlock(&acct_sx);
> (kgdb) q
>
>
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"

-- 
John Baldwin <jhb_at_FreeBSD.org>  <><  http://www.FreeBSD.org/~jhb/
"Power Users Use the Power to Serve"  =  http://www.FreeBSD.org
Received on Wed Dec 21 2005 - 15:19:31 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:49 UTC