Re: panic: LK_RETRY set with incompatible flags

From: Sergey Kandaurov <pluknet_at_gmail.com>
Date: Mon, 4 Feb 2013 16:20:36 +0300
On 4 February 2013 16:30, Sergey Kandaurov <pluknet_at_gmail.com> wrote:
> On 4 February 2013 16:13, Sergey Kandaurov <pluknet_at_gmail.com> wrote:
>> On 4 February 2013 16:06, Andriy Gapon <avg_at_freebsd.org> wrote:
>>> on 04/02/2013 13:49 Sergey Kandaurov said the following:
>>>> Hi, Rick! Here is the requested info regarding witness, and a bit more.
>>>> The triggered KASSERT is now different though.
>>>
>>> It's exactly the same problem though :-)
>>> Do you have a crashdump?
>>> If yes, please print **vpp.
>>
>> Yep, but it's in a bad form :(
>> It has many bits optimized out (compiled with clang).
>> I'll rebuild the kernel with -O or so and will try again.
>>
>> #8  0xffffffff808bc4ce in kdb_enter (why=0xffffffff80e7ed99 "panic",
>>     msg=<value optimized out>) at cpufunc.h:63
>> #9  0xffffffff80888fb7 in vpanic (fmt=<value optimized out>,
>>     ap=<value optimized out>) at /usr/src/sys/kern/kern_shutdown.c:746
>> #10 0xffffffff80888e66 in kassert_panic (fmt=<value optimized out>)
>>     at /usr/src/sys/kern/kern_shutdown.c:641
>> #11 0xffffffff808d2259 in witness_checkorder (lock=0xfffffe00a1b44240,
>> ---Type <return> to continue, or q <return> to quit---
>>     flags=1,
>>     file=0xffffffff81b2bb36
>> "/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c",
>> line=1452,
>>     interlock=<value optimized out>) at /usr/src/sys/kern/subr_witness.c:1122
>> #12 0xffffffff8086c11e in __lockmgr_args (lk=0xfffffe00a1b44240,
>>     flags=<value optimized out>, ilk=0xfffffe00a1b44270,
>>     wmesg=0xffffffff81b2808d "zfs", pri=96, timo=51,
>>     file=0xffffffff80e8e407 "/usr/src/sys/kern/vfs_default.c", line=0)
>>     at /usr/src/sys/kern/kern_lock.c:511
>> #13 0xffffffff8091439c in vop_stdlock (ap=<value optimized out>)
>>     at lockmgr.h:97
>> #14 0xffffffff80cb70c0 in VOP_LOCK1_APV (vop=<value optimized out>,
>>     a=<value optimized out>) at vnode_if.c:2022
>> #15 0xffffffff80932fbb in _vn_lock (vp=0xfffffe00a1b441d8,
>>     flags=<value optimized out>,
>>     file=0xffffffff81b2bb36
>> "/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c",
>> line=1452) at vnode_if.h:859
>> #16 0xffffffff81abd902 in zfs_lookup ()
>>     at /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:1452
>> #17 0xffffffff81abdc1d in zfs_freebsd_lookup (ap=0xffffff848e6c0270)
>>     at /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:5864
>> ---Type <return> to continue, or q <return> to quit---
>> #18 0xffffffff80cb51b2 in VOP_CACHEDLOOKUP_APV (vop=<value optimized out>,
>>     a=<value optimized out>) at vnode_if.c:191
>> #19 0xffffffff8091177f in vfs_cache_lookup (ap=<value optimized out>)
>>     at vnode_if.h:80
>> #20 0xffffffff80cb50a2 in VOP_LOOKUP_APV (vop=<value optimized out>,
>>     a=<value optimized out>) at vnode_if.c:125
>> #21 0xffffffff80919658 in lookup (ndp=0xffffff848e6c05d8) at vnode_if.h:54
>> #22 0xffffffff807dffe5 in nfsvno_namei (nd=0xffffff848e6c08c8,
>>     ndp=0xffffff848e6c05d8, dp=<value optimized out>,
>>     islocked=<value optimized out>, exp=<value optimized out>,
>>     p=<value optimized out>, retdirp=<value optimized out>)
>>     at /usr/src/sys/fs/nfsserver/nfs_nfsdport.c:413
>> #23 0xffffffff807d8ffa in nfsrvd_lookup (nd=0xffffff848e6c08c8,
>>     isdgram=<value optimized out>, dp=0xfffffe00a1b441d8,
>>     vpp=0xffffff848e6c0810, fhp=0xffffff848e6c07f4, p=0xfffffe00a1198000,
>>     exp=0xffffff848e6c07a0) at /usr/src/sys/fs/nfsserver/nfs_nfsdserv.c:517
>> #24 0xffffffff807cb845 in nfsrvd_dorpc (nd=0xffffff848e6c08c8, isdgram=0,
>>     p=0xfffffe00a1198000) at /usr/src/sys/fs/nfsserver/nfs_nfsdsocket.c:823
>> #25 0xffffffff807d7af2 in nfssvc_program (rqst=0xfffffe00a17bb000,
>>     xprt=0xfffffe00a124b200) at /usr/src/sys/fs/nfsserver/nfs_nfsdkrpc.c:347
>> #26 0xffffffff80a70659 in svc_run_internal (pool=0xfffffe00067db600,
>>     ismaster=0) at /usr/src/sys/rpc/svc.c:895
>> #27 0xffffffff80a7155b in svc_thread_start (arg=0xffffff848e6bfc50)
>> ---Type <return> to continue, or q <return> to quit---
>>     at /usr/src/sys/rpc/svc.c:1200
>> #28 0xffffffff80858944 in fork_exit (
>>     callout=0xffffffff80a71550 <svc_thread_start>, arg=0xfffffe00067db600,
>>     frame=0xffffff848e6c0c00) at /usr/src/sys/kern/kern_fork.c:991
>> #29 0xffffffff80bfa86e in fork_trampoline () at exception.S:602
>> #30 0x0000000000000080 in ?? ()
>> #31 0x00007fffffffd820 in ?? ()
>> #32 0x0000000000000001 in ?? ()
>> #33 0x0000000000000000 in ?? ()
>> Current language:  auto; currently minimal
>>
>> (kgdb) p vpp
>> Cannot access memory at address 0x0
>> (kgdb) p dvp
>> Cannot access memory at address 0x0
>>
>
> That was from f 16. And as Andriy suggested in private (thanks Andriy!)
> those pointers could be reached from ap in f 17.
> On the other hand, zfs_lookup() is large, and *vpp might be
> rewritten several times there. Here it is:
>
> (kgdb) f 17
> #17 0xffffffff81abdc1d in zfs_freebsd_lookup (ap=0xffffff848e6c0270)
>     at /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:5864
> 5864            return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp,
> cnp->cn_nameiop,
> (kgdb) p ap->a_vpp
> $4 = (struct vnode **) 0xffffff848e6c0618
> (kgdb) p *ap->a_vpp
> $5 = (struct vnode *) 0xfffffe00a1b441d8
> (kgdb) p **ap->a_vpp
> $6 = {v_tag = 0xffffffff81b2808d "zfs", v_op = 0xffffffff81b35808,
>   v_data = 0xfffffe00a188f000, v_mount = 0xfffffe0015a5f000, v_nmntvnodes = {
>     tqe_next = 0xfffffe00a1b44000, tqe_prev = 0xfffffe00a1b443d0}, v_un = {
>     vu_mount = 0x0, vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0},
>   v_hashlist = {le_next = 0x0, le_prev = 0x0}, v_cache_src = {lh_first = 0x0},
>   v_cache_dst = {tqh_first = 0x0, tqh_last = 0xfffffe00a1b44228},
>   v_cache_dd = 0x0, v_lock = {lock_object = {
>       lo_name = 0xffffffff81b2808d "zfs", lo_flags = 91947008, lo_data = 0,
>       lo_witness = 0xffffff80006d2700}, lk_lock = 18446741877389099008,
>     lk_exslpfail = 0, lk_timo = 51, lk_pri = 96}, v_interlock = {
>     lock_object = {lo_name = 0xffffffff80e873f0 "vnode interlock",
>       lo_flags = 16973824, lo_data = 0, lo_witness = 0xffffff80006c9600},
>     mtx_lock = 4}, v_vnlock = 0xfffffe00a1b44240, v_actfreelist = {
>     tqe_next = 0xfffffe000bd363b0, tqe_prev = 0xfffffe0015a5f078}, v_bufobj = {
>     bo_mtx = {lock_object = {lo_name = 0xffffffff80e8f3d5 "bufobj interlock",
>         lo_flags = 16973824, lo_data = 0, lo_witness = 0xffffff80006d0580},
>       mtx_lock = 4}, bo_ops = 0xffffffff8121cc30,
>     bo_object = 0xfffffe00a1872780, bo_synclist = {le_next = 0x0,
>       le_prev = 0x0}, bo_private = 0xfffffe00a1b441d8,
>     __bo_vnode = 0xfffffe00a1b441d8, bo_clean = {bv_hd = {tqh_first = 0x0,
>         tqh_last = 0xfffffe00a1b442f8}, bv_root = 0x0, bv_cnt = 0},
>     bo_dirty = {bv_hd = {tqh_first = 0x0, tqh_last = 0xfffffe00a1b44318},
>       bv_root = 0x0, bv_cnt = 0}, bo_numoutput = 0, bo_flag = 0,
> ---Type <return> to continue, or q <return> to quit---
>     bo_bsize = 131072}, v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0,
>   v_rl = {rl_waiters = {tqh_first = 0x0, tqh_last = 0xfffffe00a1b44360},
>     rl_currdep = 0x0}, v_cstart = 0, v_lasta = 0, v_lastw = 0, v_clen = 0,
>   v_holdcnt = 4, v_usecount = 4, v_iflag = 512, v_vflag = 0, v_writecount = 0,
>   v_hash = 10597441, v_type = VDIR}

And here is dvp, cnp. So yes, *vpp == dvp.

(kgdb) p *ap->a_vpp
$7 = (struct vnode *) 0xfffffe00a1b441d8
(kgdb) p ap->a_dvp
$5 = (struct vnode *) 0xfffffe00a1b441d8

(kgdb) p *ap->a_dvp
$8 = {v_tag = 0xffffffff81b2808d "zfs", v_op = 0xffffffff81b35808,
v_data = 0xfffffe00a188f000, v_mount = 0xfffffe0015a5f000,
  v_nmntvnodes = {tqe_next = 0xfffffe00a1b44000, tqe_prev =
0xfffffe00a1b443d0}, v_un = {vu_mount = 0x0, vu_socket = 0x0,
    vu_cdev = 0x0, vu_fifoinfo = 0x0}, v_hashlist = {le_next = 0x0,
le_prev = 0x0}, v_cache_src = {lh_first = 0x0}, v_cache_dst = {
    tqh_first = 0x0, tqh_last = 0xfffffe00a1b44228}, v_cache_dd = 0x0,
v_lock = {lock_object = {lo_name = 0xffffffff81b2808d "zfs",
      lo_flags = 91947008, lo_data = 0, lo_witness =
0xffffff80006d2700}, lk_lock = 18446741877389099008, lk_exslpfail = 0,
    lk_timo = 51, lk_pri = 96}, v_interlock = {lock_object = {lo_name
= 0xffffffff80e873f0 "vnode interlock", lo_flags = 16973824,
      lo_data = 0, lo_witness = 0xffffff80006c9600}, mtx_lock = 4},
v_vnlock = 0xfffffe00a1b44240, v_actfreelist = {
    tqe_next = 0xfffffe000bd363b0, tqe_prev = 0xfffffe0015a5f078},
v_bufobj = {bo_mtx = {lock_object = {
        lo_name = 0xffffffff80e8f3d5 "bufobj interlock", lo_flags =
16973824, lo_data = 0, lo_witness = 0xffffff80006d0580},
      mtx_lock = 4}, bo_ops = 0xffffffff8121cc30, bo_object =
0xfffffe00a1872780, bo_synclist = {le_next = 0x0, le_prev = 0x0},
    bo_private = 0xfffffe00a1b441d8, __bo_vnode = 0xfffffe00a1b441d8,
bo_clean = {bv_hd = {tqh_first = 0x0,
        tqh_last = 0xfffffe00a1b442f8}, bv_root = 0x0, bv_cnt = 0},
bo_dirty = {bv_hd = {tqh_first = 0x0,
        tqh_last = 0xfffffe00a1b44318}, bv_root = 0x0, bv_cnt = 0},
bo_numoutput = 0, bo_flag = 0, bo_bsize = 131072},
  v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0, v_rl = {rl_waiters =
{tqh_first = 0x0, tqh_last = 0xfffffe00a1b44360},
    rl_currdep = 0x0}, v_cstart = 0, v_lasta = 0, v_lastw = 0, v_clen
= 0, v_holdcnt = 4, v_usecount = 4, v_iflag = 512,
  v_vflag = 0, v_writecount = 0, v_hash = 10597441, v_type = VDIR}
(kgdb) p/x *ap->a_dvp
$2 = {v_tag = 0xffffffff81b2808d, v_op = 0xffffffff81b35808, v_data =
0xfffffe00a188f000, v_mount = 0xfffffe0015a5f000,
  v_nmntvnodes = {tqe_next = 0xfffffe00a1b44000, tqe_prev =
0xfffffe00a1b443d0}, v_un = {vu_mount = 0x0, vu_socket = 0x0,
    vu_cdev = 0x0, vu_fifoinfo = 0x0}, v_hashlist = {le_next = 0x0,
le_prev = 0x0}, v_cache_src = {lh_first = 0x0}, v_cache_dst = {
    tqh_first = 0x0, tqh_last = 0xfffffe00a1b44228}, v_cache_dd = 0x0,
v_lock = {lock_object = {lo_name = 0xffffffff81b2808d,
      lo_flags = 0x57b0000, lo_data = 0x0, lo_witness =
0xffffff80006d2700}, lk_lock = 0xfffffe00a1198000, lk_exslpfail = 0x0,
    lk_timo = 0x33, lk_pri = 0x60}, v_interlock = {lock_object =
{lo_name = 0xffffffff80e873f0, lo_flags = 0x1030000, lo_data = 0x0,
      lo_witness = 0xffffff80006c9600}, mtx_lock = 0x4}, v_vnlock =
0xfffffe00a1b44240, v_actfreelist = {
    tqe_next = 0xfffffe000bd363b0, tqe_prev = 0xfffffe0015a5f078},
v_bufobj = {bo_mtx = {lock_object = {
        lo_name = 0xffffffff80e8f3d5, lo_flags = 0x1030000, lo_data =
0x0, lo_witness = 0xffffff80006d0580}, mtx_lock = 0x4},
    bo_ops = 0xffffffff8121cc30, bo_object = 0xfffffe00a1872780,
bo_synclist = {le_next = 0x0, le_prev = 0x0},
    bo_private = 0xfffffe00a1b441d8, __bo_vnode = 0xfffffe00a1b441d8,
bo_clean = {bv_hd = {tqh_first = 0x0,
        tqh_last = 0xfffffe00a1b442f8}, bv_root = 0x0, bv_cnt = 0x0},
bo_dirty = {bv_hd = {tqh_first = 0x0,
        tqh_last = 0xfffffe00a1b44318}, bv_root = 0x0, bv_cnt = 0x0},
bo_numoutput = 0x0, bo_flag = 0x0, bo_bsize = 0x20000},
  v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0, v_rl = {rl_waiters =
{tqh_first = 0x0, tqh_last = 0xfffffe00a1b44360},
    rl_currdep = 0x0}, v_cstart = 0x0, v_lasta = 0x0, v_lastw = 0x0,
v_clen = 0x0, v_holdcnt = 0x4, v_usecount = 0x4,
  v_iflag = 0x200, v_vflag = 0x0, v_writecount = 0x0, v_hash =
0xa1b441, v_type = 0x2}

(kgdb) p ap->a_cnp
$9 = (struct componentname *) 0xffffff848e6c0640
(kgdb) p *ap->a_cnp
$10 = {cn_nameiop = 0, cn_flags = 8451076, cn_thread =
0xfffffe00a1198000, cn_cred = 0xfffffe00a167cc00, cn_lkflags = 524288,
  cn_pnbuf = 0xfffffe000bdee400 "..", cn_nameptr = 0xfffffe000bdee400
"..", cn_namelen = 2, cn_consume = 0}
(kgdb) p/x *ap->a_cnp
$4 = {cn_nameiop = 0x0, cn_flags = 0x80f404, cn_thread =
0xfffffe00a1198000, cn_cred = 0xfffffe00a167cc00, cn_lkflags =
0x80000,
  cn_pnbuf = 0xfffffe000bdee400, cn_nameptr = 0xfffffe000bdee400,
cn_namelen = 0x2, cn_consume = 0x0}

-- 
wbr,
pluknet
Received on Mon Feb 04 2013 - 12:20:37 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:34 UTC