Re: zfs panic when 'make buildworld buildkernel'

From: Mateusz Guzik <mjguzik_at_gmail.com>
Date: Tue, 13 Oct 2020 14:15:19 +0200
On 10/13/20, Mateusz Guzik <mjguzik_at_gmail.com> wrote:
> On 10/13/20, YAMAMOTO Shigeru <shigeru_at_os-hackers.jp> wrote:
>>
>> Hi,
>>
>> I try to 'make buildworld buildkernel' at full ZFS environment.
>> But I can't finished buildworld/buildkernel without panic.
>> Anyone have same trouble?
>>
>> uname -a:
>> ```
>> FreeBSD jenkins-02.current.os-hackers.jp 13.0-CURRENT FreeBSD
>> 13.0-CURRENT
>> #0 r366657: Tue Oct 13 13:07:15 JST 2020
>> root_at_jenkins-02.current.os-hackers.jp:/usr/obj/usr/src/amd64.amd64/sys/GENERIC
>> amd64
>> ```
>>
>> panic message:
>> ```
>> login: panic: VERIFY(tid) failed
>>
>> cpuid = 2
>> time = 1602582381
>> KDB: stack backtrace:
>> db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
>> 0xfffffe002abaa9f0
>> vpanic() at vpanic+0x182/frame 0xfffffe002abaaa40
>> spl_panic() at spl_panic+0x3a/frame 0xfffffe002abaaaa0
>> taskq_dispatch() at taskq_dispatch+0xe8/frame 0xfffffe002abaaae0
>> arc_prune_async() at arc_prune_async+0x3f/frame 0xfffffe002abaab00
>> arc_evict_cb() at arc_evict_cb+0x1f6/frame 0xfffffe002abaab60
>> zthr_procedure() at zthr_procedure+0x8f/frame 0xfffffe002abaabb0
>> fork_exit() at fork_exit+0x80/frame 0xfffffe002abaabf0
>> fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe002abaabf0
>> --- trap 0, rip = 0, rsp = 0, rbp = 0 ---
>> KDB: enter: panic
>> [ thread pid 19 tid 100070 ]
>> Stopped at      kdb_enter+0x37: movq    $0,0x10b0116(%rip)
>> db>
>> ```
>>
>
> The issue is pretty apparent:
>
>         taskqid_t tqid = atomic_fetchadd_int(&tqidnext, 1);
>
> this eventually wraps to 0 and then you get the crash.
>
> Probably the thing to do is ot bump it to 64 bit and 0-check on other
> platforms.
>

This should do it for the time being:

diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
b/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
index 1050816cd968..a8e53aba3915 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
_at__at_ -67,7 +67,7 _at__at_ static unsigned long tqenthash;
 static unsigned long tqenthashlock;
 static struct sx *tqenthashtbl_lock;

-static uint32_t tqidnext = 1;
+static uint32_t tqidnext;

 #define        TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash])
 #define        TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) &
tqenthashlock)])
_at__at_ -90,7 +90,6 _at__at_ system_taskq_init(void *arg)
            M_TASKQ, M_WAITOK | M_ZERO);
        for (i = 0; i < tqenthashlock + 1; i++)
                sx_init_flags(&tqenthashtbl_lock[i], "tqenthash", SX_DUPOK);
-       tqidnext = 1;
        taskq_zone = uma_zcreate("taskq_zone", sizeof (taskq_ent_t),
            NULL, NULL, NULL, NULL,
            UMA_ALIGN_CACHE, 0);
_at__at_ -137,10 +136,23 _at__at_ taskq_lookup(taskqid_t tqid)
        return (ent);
 }

+static taskqid_t
+__taskq_nextgen(void)
+{
+       taskqid_t tqid;
+
+       for (;;) {
+               tqid = atomic_fetchadd_int(&tqidnext, 1) + 1;
+               if (__predict_true(tqid != 0))
+                       break;
+       }
+       return (tqid);
+}
+
 static taskqid_t
 taskq_insert(taskq_ent_t *ent)
 {
-       taskqid_t tqid = atomic_fetchadd_int(&tqidnext, 1);
+       taskqid_t tqid = __taskq_nextgen();

        ent->tqent_id = tqid;
        ent->tqent_registered = B_TRUE;
_at__at_ -345,9 +357,9 _at__at_ taskq_dispatch(taskq_t *tq, task_func_t func, void
*arg, uint_t flags)
        task->tqent_cancelled = B_FALSE;
        task->tqent_type = NORMAL_TASK;
        tid = taskq_insert(task);
+       VERIFY(tid);
        TASK_INIT(&task->tqent_task, prio, taskq_run, task);
        taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
-       VERIFY(tid);
        return (tid);
 }

-- 
Mateusz Guzik <mjguzik gmail.com>
Received on Tue Oct 13 2020 - 10:15:23 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:25 UTC