(unknown charset) processes stuck on a vnode lock

From: (unknown charset) Andriy Gapon <avg_at_icyb.net.ua>
Date: Thu, 04 Nov 2010 09:49:01 +0200
I see a few processes stuck on the same vnode, trying to take or to upgrade to
an exclusive lock on it, while the lock data suggests that it is already
shared-locked.  The vnode is a root vnode of one of ZFS filesystems (it's not a
global root).

I couldn't find any (other) threads that could actually hold the vnode lock, but
lock shared count is suspiciously or coincidentally the same as number of
threads in zfs_root call.

Relevant data dump:
 1125 100129 mountd           -                mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock zfs_root lookup
namei getfh syscallenter syscall Xfast_syscall
 1135 100209 nfsd             nfsd: service    mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock zfs_fhtovp
nfsrv_fhtovp nfsrv_readdirplus nfssvc_program svc_run_internal svc_thread_start
fork_exit fork_trampoline
39672 100779 find             -                mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock zfs_root lookup
namei vn_open_cred vn_open kern_openat kern_open open syscallenter syscall
Xfast_syscall
61414 100769 smbd             -                mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock cache_lookup
vfs_cache_lookup VOP_LOOKUP_APV lookup namei vn_open_cred vn_open kern_openat
kern_open open syscallenter
61644 100525 smbd             -                mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock cache_lookup
vfs_cache_lookup VOP_LOOKUP_APV lookup namei vn_open_cred vn_open kern_openat
kern_open open syscallenter
61645 100504 smbd             -                mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock cache_lookup
vfs_cache_lookup VOP_LOOKUP_APV lookup namei vn_open_cred vn_open kern_openat
kern_open open syscallenter
61646 100822 smbd             -                mi_switch sleepq_switch
sleepq_wait __lockmgr_args vop_stdlock VOP_LOCK1_APV _vn_lock cache_lookup
vfs_cache_lookup VOP_LOOKUP_APV lookup namei vn_open_cred vn_open kern_openat
kern_open open syscallenter
======================================================================

(kgdb) tid 100779
[Switching to thread 521 (Thread 100779)]#0  sched_switch
(td=0xffffff0051e59450, newtd=0xffffff0001a4c450, flags=Variable "flags" is not
available.
)
    at /usr/src/sys/kern/sched_ule.c:1851
1851                    cpuid = PCPU_GET(cpuid);
(kgdb) bt
#0  sched_switch (td=0xffffff0051e59450, newtd=0xffffff0001a4c450,
flags=Variable "flags" is not available.
) at /usr/src/sys/kern/sched_ule.c:1851
#1  0xffffffff8038631e in mi_switch (flags=Variable "flags" is not available.
) at /usr/src/sys/kern/kern_synch.c:449
#2  0xffffffff803bd87b in sleepq_switch (wchan=Variable "wchan" is not available.
) at /usr/src/sys/kern/subr_sleepqueue.c:538
#3  0xffffffff803be5a5 in sleepq_wait (wchan=0xffffff000a3e4098, pri=80) at
/usr/src/sys/kern/subr_sleepqueue.c:617
#4  0xffffffff80362d62 in __lockmgr_args (lk=0xffffff000a3e4098, flags=524288,
ilk=0xffffff000a3e40c8, wmesg=Variable "wmesg" is not available.
) at /usr/src/sys/kern/kern_lock.c:218
#5  0xffffffff804037f1 in vop_stdlock (ap=Variable "ap" is not available.
) at lockmgr.h:97
#6  0xffffffff805bd322 in VOP_LOCK1_APV (vop=0xffffffff807e2580,
a=0xffffff8126ec05b0) at vnode_if.c:1988
#7  0xffffffff80422d98 in _vn_lock (vp=0xffffff000a3e4000, flags=524288,
    file=0xffffffff80b23c58
"/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c",
line=1305) at vnode_if.h:859
#8  0xffffffff80abd185 in zfs_root (vfsp=Variable "vfsp" is not available.
) at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c:1305
#9  0xffffffff80408323 in lookup (ndp=0xffffff8126ec09a0) at
/usr/src/sys/kern/vfs_lookup.c:785
#10 0xffffffff80408f0f in namei (ndp=0xffffff8126ec09a0) at
/usr/src/sys/kern/vfs_lookup.c:273
#11 0xffffffff80422120 in vn_open_cred (ndp=0xffffff8126ec09a0,
flagp=0xffffff8126ec099c, cmode=2432, vn_open_flags=Variable "vn_open_flags" is
not available.
)
    at /usr/src/sys/kern/vfs_vnops.c:189
#12 0xffffffff804223cc in vn_open (ndp=Variable "ndp" is not available.
) at /usr/src/sys/kern/vfs_vnops.c:95
#13 0xffffffff80420b9d in kern_openat (td=0xffffff0051e59450, fd=-100,
path=0x800c61100 <Error reading address 0x800c61100: Bad address>,
    pathseg=UIO_USERSPACE, flags=131077, mode=13052800) at
/usr/src/sys/kern/vfs_syscalls.c:1083
#14 0xffffffff80420f19 in kern_open (td=Variable "td" is not available.
) at /usr/src/sys/kern/vfs_syscalls.c:1039
#15 0xffffffff80420f38 in open (td=Variable "td" is not available.
) at /usr/src/sys/kern/vfs_syscalls.c:1015
#16 0xffffffff803c0f8e in syscallenter (td=0xffffff0051e59450,
sa=0xffffff8126ec0bb0) at /usr/src/sys/kern/subr_trap.c:318
#17 0xffffffff8055b5f1 in syscall (frame=0xffffff8126ec0c40) at
/usr/src/sys/amd64/amd64/trap.c:939
#18 0xffffffff80546262 in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:381
#19 0x000000080073489c in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) fr 4
#4  0xffffffff80362d62 in __lockmgr_args (lk=0xffffff000a3e4098, flags=524288,
ilk=0xffffff000a3e40c8, wmesg=Variable "wmesg" is not available.
) at /usr/src/sys/kern/kern_lock.c:218
218                     sleepq_wait(&lk->lock_object, pri);
(kgdb) p *lk
$3 = {lock_object = {lo_name = 0xffffffff80b267b0 "zfs", lo_flags = 91947008,
lo_data = 0, lo_witness = 0x0}, lk_lock = 37, lk_exslpfail = 0,
  lk_timo = 51, lk_pri = 80}
(kgdb) p/x lk->lk_lock
$7 = 0x25

=====================================================================
(kgdb) p *dvp
$8 = {v_type = VDIR, v_tag = 0xffffffff80b267b0 "zfs", v_op =
0xffffffff80b2c780, v_data = 0xffffff000a2c8840, v_mount = 0xffffff000a3e92f8,
  v_nmntvnodes = {tqe_next = 0xffffff000a3e3d20, tqe_prev = 0xffffff000a3e9358},
v_un = {vu_mount = 0x0, vu_socket = 0x0, vu_cdev = 0x0,
    vu_fifoinfo = 0x0, vu_yield = 0}, v_hashlist = {le_next = 0x0, le_prev =
0x0}, v_hash = 0, v_cache_src = {lh_first = 0x0}, v_cache_dst = {
    tqh_first = 0x0, tqh_last = 0xffffff000a3e4060}, v_cache_dd = 0x0, v_cstart
= 0, v_lasta = 0, v_lastw = 0, v_clen = 0, v_lock = {lock_object = {
      lo_name = 0xffffffff80b267b0 "zfs", lo_flags = 91947008, lo_data = 0,
lo_witness = 0x0}, lk_lock = 37, lk_exslpfail = 0, lk_timo = 51,
    lk_pri = 80}, v_interlock = {lock_object = {lo_name = 0xffffffff8064ecf9
"vnode interlock", lo_flags = 16973824, lo_data = 0, lo_witness = 0x0},
    mtx_lock = 4}, v_vnlock = 0xffffff000a3e4098, v_holdcnt = 16, v_usecount =
16, v_iflag = 0, v_vflag = 1, v_writecount = 0, v_freelist = {
    tqe_next = 0x0, tqe_prev = 0x0}, v_bufobj = {bo_mtx = {lock_object =
{lo_name = 0xffffffff8064ed09 "bufobj interlock", lo_flags = 16973824,
        lo_data = 0, lo_witness = 0x0}, mtx_lock = 4}, bo_clean = {bv_hd =
{tqh_first = 0x0, tqh_last = 0xffffff000a3e4140}, bv_root = 0x0,
      bv_cnt = 0}, bo_dirty = {bv_hd = {tqh_first = 0x0, tqh_last =
0xffffff000a3e4160}, bv_root = 0x0, bv_cnt = 0}, bo_numoutput = 0, bo_flag = 0,
    bo_ops = 0xffffffff807df2a0, bo_bsize = 131072, bo_object =
0xffffff0123507000, bo_synclist = {le_next = 0x0, le_prev = 0x0},
    bo_private = 0xffffff000a3e4000, __bo_vnode = 0xffffff000a3e4000},
v_pollinfo = 0xffffff00658778c0, v_label = 0x0, v_lockf = 0x0}

-- 
Andriy Gapon
Received on Thu Nov 04 2010 - 06:49:06 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:08 UTC