panic: Stray timeout

From: Andriy Gapon <avg_at_FreeBSD.org>
Date: Sun, 30 Aug 2015 13:27:29 +0300
I've got the following kernel panic seemingly at random.
I have been using the kernel for about a week without any issues and I wasn't
doing anything special when the panic occurred.
Does this panic ring any bells?  Could the problem be already fixed by more
recent changes?

r286985

panic: Stray timeout

(kgdb) bt
#0  doadump (textdump=1) at /usr/src/sys/kern/kern_shutdown.c:260
#1  0xffffffff8063236f in kern_reboot (howto=260) at
/usr/src/sys/kern/kern_shutdown.c:328
#2  0xffffffff806329d4 in vpanic (fmt=<value optimized out>, ap=<value optimized
out>) at /usr/src/sys/kern/kern_shutdown.c:508
#3  0xffffffff806326d3 in panic (fmt=0x0) at /usr/src/sys/kern/kern_shutdown.c:441
#4  0xffffffff80677dea in taskqueue_timeout_func (arg=<value optimized out>) at
/usr/src/sys/kern/subr_taskqueue.c:269
#5  0xffffffff8064858d in softclock_call_cc (c=0xfffffe000241cb50,
cc=0xffffffff81066000, direct=0) at /usr/src/sys/kern/kern_timeout.c:722
#6  0xffffffff80648917 in softclock (arg=<value optimized out>) at
/usr/src/sys/kern/kern_timeout.c:851
#7  0xffffffff805fe47f in intr_event_execute_handlers (p=0xfffff800059b0548,
ie=0xfffff8000599d900) at /usr/src/sys/kern/kern_intr.c:1262
#8  0xffffffff805fefac in ithread_execute_handlers (p=0xfffff800059b0548,
ie=0xfffff8000599d900) at /usr/src/sys/kern/kern_intr.c:1275
#9  0xffffffff805fee1b in ithread_loop (arg=<value optimized out>) at
/usr/src/sys/kern/kern_intr.c:1356
#10 0xffffffff805fba9b in fork_exit (callout=0xffffffff805fedc0 <ithread_loop>,
arg=0xfffff800059adb60, frame=0xfffffe01dc55bc00) at
/usr/src/sys/kern/kern_fork.c:1006
#11 0xffffffff808073de in fork_trampoline () at
/usr/src/sys/libkern/explicit_bzero.c:28
#12 0x0000000000000000 in ?? ()

(kgdb) fr 5
#5  0xffffffff8064858d in softclock_call_cc (c=0xfffffe000241cb50,
cc=0xffffffff81066000, direct=0) at /usr/src/sys/kern/kern_timeout.c:722
722             c_func(c_arg);
(kgdb) p *c
$1 = {c_links = {le = {le_next = 0x0, le_prev = 0xffffffff81066108}, sle =
{sle_next = 0x0}, tqe = {tqe_next = 0x0, tqe_prev = 0xffffffff81066108}}, c_time
= 171699241227799, c_precision = 2684354, c_arg = 0xfffffe000241cb28,
  c_func = 0xffffffff80677db0 <taskqueue_timeout_func>, c_lock =
0xfffff80013d66d30, c_flags = 2, c_iflags = 144, c_cpu = 0}

c_flags = CALLOUT_ACTIVE
c_iflags = CALLOUT_RETURNUNLOCKED | CALLOUT_PROCESSED

(kgdb) p *cc
$2 = {cc_lock = {lock_object = {lo_name = 0xffffffff809a0177 "callout", lo_flags
= 720896, lo_data = 0, lo_witness = 0xfffffe0001fd1400}, mtx_lock = 4},
cc_exec_entity = 0xffffffff81066080, cc_next = 0x0,
  cc_callout = 0xfffffe0002116000, cc_callwheel = 0xfffffe0002238000, cc_expireq
= {tqh_first = 0x0, tqh_last = 0xffffffff81066108}, cc_callfree = {slh_first =
0xfffffe00022372c0}, cc_firstevent = 171699349138844,
  cc_lastscan = 171699243988142, cc_cookie = 0xfffff800059a9b00, cc_bucket =
10456, cc_inited = 1, cc_ktr_event_name = 0xffffffff81066140 "callwheel cpu 0"}
(kgdb) p c_arg
$3 = (void *) 0xfffffe000241cb28

(kgdb) down
#4  0xffffffff80677dea in taskqueue_timeout_func (arg=<value optimized out>) at
/usr/src/sys/kern/subr_taskqueue.c:269
269             KASSERT((timeout_task->f & DT_CALLOUT_ARMED) != 0, ("Stray
timeout"));
(kgdb) p *(struct timeout_task *)0xfffffe000241cb28
$4 = {q = 0xfffff80013d66d00, t = {ta_link = {stqe_next = 0x0}, ta_pending = 0,
ta_priority = 0, ta_func = 0xffffffff82197220 <ttm_bo_delayed_workqueue>,
ta_context = 0xfffffe000241c5c0}, c = {c_links = {le = {le_next = 0x0,
        le_prev = 0xffffffff81066108}, sle = {sle_next = 0x0}, tqe = {tqe_next =
0x0, tqe_prev = 0xffffffff81066108}}, c_time = 171699241227799, c_precision =
2684354, c_arg = 0xfffffe000241cb28,
    c_func = 0xffffffff80677db0 <taskqueue_timeout_func>, c_lock =
0xfffff80013d66d30, c_flags = 2, c_iflags = 144, c_cpu = 0}, f = 0}
(kgdb) p *$4.q
$5 = {tq_queue = {stqh_first = 0x0, stqh_last = 0xfffff80013d66d00}, tq_enqueue
= 0xffffffff80678c30 <taskqueue_thread_enqueue>, tq_context =
0xffffffff811130d8, tq_active = {tqh_first = 0x0, tqh_last = 0xfffff80013d66d20},
  tq_mutex = {lock_object = {lo_name = 0xffffffff809a52a1 "taskqueue", lo_flags
= 16973824, lo_data = 0, lo_witness = 0xfffffe0001fd7200}, mtx_lock =
18446735277710583024}, tq_threads = 0xfffff80013dd1bd0, tq_tcount = 1,
  tq_spin = 0, tq_flags = 5, tq_callouts = 1, tq_callbacks = 0xfffff80013d66d68,
tq_cb_contexts = 0xfffff80013d66d78}

BTW, I see the following potential problem.  taskqueue_cancel_timeout()
unconditionally resets DT_CALLOUT_ARMED, so that happens even if callout_stop()
signals that the callout is going to be called.  In that case, when
taskqueue_timeout_func() gets called it's going to run into the assertion.

-- 
Andriy Gapon
Received on Sun Aug 30 2015 - 08:28:36 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:59 UTC