Re: nullfs: panic: vrele: negative ref cnt

From: pluknet <pluknet_at_gmail.com>
Date: Mon, 9 Mar 2009 23:17:47 +0300
2009/3/9 pluknet <pluknet_at_gmail.com>:
> 2009/3/9 Paul B. Mahol <onemda_at_gmail.com>:
>> Hi,
>> here is part of textdump:
>>
>> db:1:lockinfo> show locks
>> db:1:locks>  show alllocks
>> Process 722 (sh) thread 0xc4134000 (100065)
>> db:1:alllocks>  show lockedvnods
>> Locked vnodes
>> db:0:kdb.enter.panic>  show pcpu
>> cpuid        = 1
>> curthread    = 0xc4134000: pid 722 "sh"
>> curpcb       = 0xc3ba8d90
>> fpcurthread  = 0xc4134000: pid 722 "sh"
>> idlethread   = 0xc3d08d80: pid 10 "idle: cpu1"
>> APIC ID      = 1
>> currentldt   = 0x50
>> spin locks held:
>> db:0:kdb.enter.panic>  bt
>> Tracing pid 722 tid 100065 td 0xc4134000
>> kdb_enter(c0610bfe,c0610bfe,c061db9e,c3ba8aa8,1,...) at kdb_enter+0x3a
>> panic(c061db9e,0,c061cee6,88e,4,...) at panic+0x136
>> vrele(c4326d9c,0,c433fb5f,82,c060f3c7,...) at vrele+0x111
>> null_nodeget(c3fd4780,c4326d9c,c3ba8bf4,0,c3ba8be4,...) at null_nodeget+0xa0
>> null_bypass(c3ba8be0,c3f35a78,c3ba8c28) at null_bypass+0x141
>> VOP_VPTOCNP_APV(c4340240,c3ba8be0,c061bf01,387,c3cecc00,...) at
>> VOP_VPTOCNP_APV+0xb3
>> vn_vptocnp(c3ba8c4c,c061bf01,3af,c3ba8c3c,c3f35a78,...) at vn_vptocnp+0xd9
>> vn_fullpath1(c3cecc00,c3ba8c5c,3fc,315,c3cecc00,...) at vn_fullpath1+0x29a
>> kern___getcwd(c4134000,bfbfe884,0,400,c3ba8d2c,...) at kern___getcwd+0xd0
>> __getcwd(c4134000,c3ba8cf8,8,c0617c09,c0640290,...) at __getcwd+0x29
>> syscall(c3ba8d38) at syscall+0x261
>> Xint0x80_syscall() at Xint0x80_syscall+0x20
>> --- syscall (326, FreeBSD ELF32, __getcwd), eip = 0x2814edfb, esp =
>> 0xbfbfe6cc, ebp = 0xbfbfe788 ---
>>
>>
>> panic: vrele: negative ref cnt
>> cpuid = 1
>> KDB: enter: panic
>> shared lockmgr ufs (ufs) r = 0 (0xc43c36a0) locked _at_
>> /usr/src/sys/kern/vfs_default.c:790
>> shared lockmgr ufs (ufs) r = 0 (0xc43c36a0) locked _at_
>> /usr/src/sys/kern/vfs_default.c:790
>>
>> 0xc43c3648: tag ufs, type VDIR
>>    usecount 1, writecount 0, refcount 1 mountedhere 0
>>    flags ()
>>    lock type ufs: SHARED (count 1)
>>        ino 3583142, on dev ad0s1h
>>
>> 0xc43c3430: tag null, type VDIR
>>    usecount 4, writecount 0, refcount 5 mountedhere 0
>>    flags ()
>>    lock type ufs: SHARED (count 1)
>>        vp=0xc43c3430, lowervp=0xc43c3648
>>
>>
>> How to reproduce it:
>>
>> # mount_nullfs /usr/local/src /usr/src
>> # cd /sys
>> # whereis something
>>
>>
>> It appears panic have been introduced after r189488
>>
>
> [marcus_at_ cc'ed]
>
> Futher debugging shows:
>
> Script started on Mon Mar  9 21:43:17 2009
> c8-vb# kgdb /boot/ke rnel/ke rnel /var/crash/vm core.1
>
> GNU gdb 6.1.1 [FreeBSD]
> Copyright 2004 Free Software Foundation, Inc.
> GDB is free software, covered by the GNU General Public License, and you are
> welcome to change it and/or distribute copies of it under certain conditions.
> Type "show copying" to see the conditions.
> There is absolutely no warranty for GDB.  Type "show warranty" for details.
> This GDB was configured as "i386-marcel-freebsd"...
>
> Unread portion of the kernel message buffer:
> , FreeBSD ELF32, __getcwd), eip = 0x28154b2b, esp = 0xbfbfe71c, ebp =
> 0xbfbfe7d8 ---
> null_hashget: 0xc4451560 is not locked but should be
> KDB: enter: lock violation
> panic: mutex Giant not owned at /usr/src/sys/kern/tty_ttydisc.c:1127
> cpuid = 0
> Uptime: 43m7s
> Physical memory: 1015 MB
> Dumping 51 MB: 36 20 4
>
> Reading symbols from /boot/kernel/if_em.ko...Reading symbols from
> /boot/kernel/if_em.ko.symbols...done.
> done.
> Loaded symbols for /boot/kernel/if_em.ko
> Reading symbols from /boot/kernel/nfsclient.ko...Reading symbols from
> /boot/kernel/nfsclient.ko.symbols...done.
> done.
> Loaded symbols for /boot/kernel/nfsclient.ko
> #0  doadump () at pcpu.h:246
> 246     pcpu.h: No such file or directory.
>        in pcpu.h
> (kgdb) bt
> #0  doadump () at pcpu.h:246
> #1  0xc05c458e in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:420
> #2  0xc05c4862 in panic (fmt=Variable "fmt" is not available.
> ) at /usr/src/sys/kern/kern_shutdown.c:576
> #3  0xc05b4c77 in _mtx_assert (m=0xc08b6170, what=4,
>    file=0xc0829baf "/usr/src/sys/kern/tty_ttydisc.c", line=1127)
>    at /usr/src/sys/kern/kern_mutex.c:639
> #4  0xc06197dd in ttydisc_getc (tp=0xc3dbf000, buf=0xe66bc7c0, len=128)
>    at /usr/src/sys/kern/tty_ttydisc.c:1127
> #5  0xc050daf4 in sctty_outwakeup (tp=0xc3dbf000)
>    at /usr/src/sys/dev/syscons/syscons.c:323
> #6  0xc050e05c in scgetc (sc=0xc0a73000, flags=3)
>    at /usr/src/sys/dev/syscons/syscons.c:3281
> #7  0xc050e680 in sc_cngetc (cd=0xc0863d20)
>    at /usr/src/sys/dev/syscons/syscons.c:1607
> #8  0xc058cd68 in cncheckc () at /usr/src/sys/kern/kern_cons.c:377
> #9  0xc058cda6 in cngetc () at /usr/src/sys/kern/kern_cons.c:355
> #10 0xc0480435 in db_readline (lstart=0xc08a0100 "", lsize=120)
>    at /usr/src/sys/ddb/db_input.c:326
> #11 0xc0480e0a in db_read_line () at /usr/src/sys/ddb/db_lex.c:56
> #12 0xc047f0b6 in db_command_loop () at /usr/src/sys/ddb/db_command.c:496
> #13 0xc0480f2d in db_trap (type=3, code=0) at /usr/src/sys/ddb/db_main.c:229
> #14 0xc05f1d86 in kdb_trap (type=3, code=0, tf=0xe66bca34)
>    at /usr/src/sys/kern/subr_kdb.c:534
> #15 0xc07bc856 in trap (frame=0xe66bca34) at /usr/src/sys/i386/i386/trap.c:678
> ---Type <return> to continue, or q <return> to quit---
> #16 0xc07a15bb in calltrap () at /usr/src/sys/i386/i386/exception.s:165
> #17 0xc05f1f0a in kdb_enter (why=0xc082deeb "vfslock",
>    msg=0xc082dedc "lock violation") at cpufunc.h:71
> #18 0xc064a541 in vfs_badlock (msg=0xc082df39 "is not locked but should be",
>    str=0xc08173c4 "null_hashget", vp=0xc4451560)
>    at /usr/src/sys/kern/vfs_subr.c:3699
> #19 0xc064ddf2 in assert_vop_locked (vp=0xc4451560,
>    str=0xc08173c4 "null_hashget") at /usr/src/sys/kern/vfs_subr.c:3723
> #20 0xc055c5df in null_nodeget (mp=0xc40152cc, lowervp=0xc4451560,
>    vpp=0xe66bcbf0) at /usr/src/sys/fs/nullfs/null_subr.c:110
> #21 0xc055d75a in null_bypass (ap=0xe66bcbdc)
>    at /usr/src/sys/fs/nullfs/null_vnops.c:325
> #22 0xc07c86a3 in VOP_VPTOCNP_APV (vop=0xc08688a0, a=0xe66bcbdc)
>    at vnode_if.c:2871
> #23 0xc063a749 in vn_vptocnp (vp=0xe66bcc24, bp=0xe66bcc28,
>    buf=0xc3e15c00 'p' <repeats 200 times>..., buflen=0xe66bcc48)
>    at vnode_if.h:1541
> #24 0xc063aaca in vn_fullpath1 (td=Variable "td" is not available.
> ) at /usr/src/sys/kern/vfs_cache.c:1001
> #25 0xc063ae60 in kern___getcwd (td=0xc4435000,
>    buf=0xbfbfe8d4 <Address 0xbfbfe8d4 out of bounds>, bufseg=UIO_USERSPACE,
>    buflen=1024) at /usr/src/sys/kern/vfs_cache.c:795
> #26 0xc063b009 in __getcwd (td=0xc4435000, uap=0xe66bccf8)
>    at /usr/src/sys/kern/vfs_cache.c:769
> #27 0xc07bbfd4 in syscall (frame=0xe66bcd38)
> ---Type <return> to continue, or q <return> to quit---
>    at /usr/src/sys/i386/i386/trap.c:1066
> #28 0xc07a1620 in Xint0x80_syscall ()
>    at /usr/src/sys/i386/i386/exception.s:261
> #29 0x00000033 in ?? ()
> Previous frame inner to this frame (corrupt stack?)
> (kgdb) c8-vb#
>
> c8-vb# ^D  exit
>
> Script done on Mon Mar  9 21:43:35 2009
>

Gahr.. another one looks a bit different.
No null_hashget assert, but now in usecount == 0 (like in Paul B. Mahol's case).

Script started on Mon Mar  9 23:00:44 2009
c8-vb# kgdb /boot/kernel/kernel /var/crash/vmcore.5

Unread portion of the kernel message buffer:
mgr_args+0xb80
#1 0xc074c9d1 at ffs_lock+0xa1
#2 0xc07ca005 at VOP_LOCK1_APV+0xb5
#3 0xc065a548 at _vn_lock+0x78
#4 0xc065b63f at vn_close+0x7f
#5 0xc063ea1d at vop_stdvptocnp+0x68d
#6 0xc07c86a6 at VOP_VPTOCNP_APV+0xd6
#7 0xc07c875b at VOP_VPTOCNP_AP+0x1b
#8 0xc055d6a9 at null_bypass+0xf9
#9 0xc07c86b3 at VOP_VPTOCNP_APV+0xe3
#10 0xc063a759 at vn_vptocnp+0xd9
#11 0xc063aada at vn_fullpath1+0x29a
#12 0xc063ae70 at kern___getcwd+0xd0
#13 0xc063b019 at __getcwd+0x29
#14 0xc07bbfe4 at syscall+0x2b4
#15 0xc07a1630 at Xint0x80_syscall+0x20
	ino 70663, on dev ad0s1a
panic: vrele: negative ref cnt
cpuid = 0
KDB: enter: panic
Uptime: 1m20s
Physical memory: 1015 MB
Dumping 51 MB: 36 20 4

Reading symbols from /boot/kernel/if_em.ko...Reading symbols from
/boot/kernel/if_em.ko.symbols...done.
done.
Loaded symbols for /boot/kernel/if_em.ko
Reading symbols from /boot/kernel/nfsclient.ko...Reading symbols from
/boot/kernel/nfsclient.ko.symbols...done.
done.
Loaded symbols for /boot/kernel/nfsclient.ko
#0  doadump () at pcpu.h:246
246	pcpu.h: No such file or directory.
	in pcpu.h
(kgdb) bt
#0  doadump () at pcpu.h:246
#1  0xc05c459e in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:420
#2  0xc05c4872 in panic (fmt=Variable "fmt" is not available.
) at /usr/src/sys/kern/kern_shutdown.c:576
#3  0xc064eeb9 in vrele (vp=0xc442cac0) at /usr/src/sys/kern/vfs_subr.c:2191
#4  0xc055c6b1 in null_nodeget (mp=0xc40162cc, lowervp=0xc442cac0,
    vpp=0xe66b0bf0) at /usr/src/sys/fs/nullfs/null_subr.c:204
#5  0xc055d76a in null_bypass (ap=0xe66b0bdc)
    at /usr/src/sys/fs/nullfs/null_vnops.c:325
#6  0xc07c86b3 in VOP_VPTOCNP_APV (vop=0xc08688a0, a=0xe66b0bdc)
    at vnode_if.c:2871
#7  0xc063a759 in vn_vptocnp (vp=0xe66b0c24, bp=0xe66b0c28,
    buf=0xc3dee800 'p' <repeats 200 times>..., buflen=0xe66b0c48)
    at vnode_if.h:1541
#8  0xc063aada in vn_fullpath1 (td=Variable "td" is not available.
) at /usr/src/sys/kern/vfs_cache.c:1001
#9  0xc063ae70 in kern___getcwd (td=0xc41a5460,
    buf=0xbfbfe844 <Address 0xbfbfe844 out of bounds>, bufseg=UIO_USERSPACE,
    buflen=1024) at /usr/src/sys/kern/vfs_cache.c:795
#10 0xc063b019 in __getcwd (td=0xc41a5460, uap=0xe66b0cf8)
    at /usr/src/sys/kern/vfs_cache.c:769
#11 0xc07bbfe4 in syscall (frame=0xe66b0d38)
    at /usr/src/sys/i386/i386/trap.c:1066
#12 0xc07a1630 in Xint0x80_syscall ()
    at /usr/src/sys/i386/i386/exception.s:261
#13 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) f 4
#4  0xc055c6b1 in null_nodeget (mp=0xc40162cc, lowervp=0xc442cac0,
    vpp=0xe66b0bf0) at /usr/src/sys/fs/nullfs/null_subr.c:204
204			vrele(lowervp);
(kgdb) p vlowerQuit
(kgdb) p lowervp
$1 = (struct vnode *) 0xc442cac0
(kgdb) p *lowervp
$2 = {v_type = VDIR, v_tag = 0xc081a668 "ufs", v_op = 0xc0888a00,
  v_data = 0xc442e5a0, v_mount = 0xc4016b30, v_nmntvnodes = {
    tqe_next = 0xc442c968, tqe_prev = 0xc443341c}, v_un = {vu_mount = 0x0,
    vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0, vu_yield = 0},
  v_hashlist = {le_next = 0x0, le_prev = 0xc3eb17b4}, v_hash = 70663,
  v_cache_src = {lh_first = 0xc444c94c}, v_cache_dst = {
    tqh_first = 0xc41c1880, tqh_last = 0xc41c1890}, v_dd = 0xc442c158,
  v_cstart = 0, v_lasta = 0, v_lastw = 0, v_clen = 0, v_lock = {
    lock_object = {lo_name = 0xc081a668 "ufs", lo_flags = 91947009,
      lo_data = 0, lo_witness = 0xc3cf51c0}, lk_lock = 1, lk_timo = 1,
    lk_pri = 80, lk_stack = {depth = 16, pcs = {3227181616, 3228879313,
        3229392901, 3227886920, 3227891263, 3227773469, 3229386406,
        3229386587, 3226850985, 3229386419, 3227756377, 3227757274,
        3227758192, 3227758617, 3229335524, 3229226544, 0, 0}}},
  v_interlock = {lock_object = {lo_name = 0xc0827160 "vnode interlock",
      lo_flags = 16973824, lo_data = 0, lo_witness = 0xc3cef178},
    mtx_lock = 4}, v_vnlock = 0xc442cb18, v_holdcnt = 5, v_usecount = 0,
  v_iflag = 0, v_vflag = 0, v_writecount = 0, v_freelist = {tqe_next = 0x0,
    tqe_prev = 0xc3e74244}, v_bufobj = {bo_mtx = {lock_object = {
        lo_name = 0xc082ebea "bufobj interlock", lo_flags = 16973824,
        lo_data = 0, lo_witness = 0xc3cf4048}, mtx_lock = 4}, bo_clean = {
      bv_hd = {tqh_first = 0xd7d19d20, tqh_last = 0xd7d19d58},
      bv_root = 0xd7d19d20, bv_cnt = 1}, bo_dirty = {bv_hd = {
        tqh_first = 0x0, tqh_last = 0xc442cbd8}, bv_root = 0x0, bv_cnt = 0},
---Type <return> to continue, or q <return> to quit---
    bo_numoutput = 0, bo_flag = 0, bo_ops = 0xc087abe0, bo_bsize = 16384,
    bo_object = 0xc443526c, bo_synclist = {le_next = 0x0, le_prev = 0x0},
    bo_private = 0xc442cac0, __bo_vnode = 0xc442cac0}, v_pollinfo = 0x0,
  v_label = 0x0, v_lockf = 0x0}
(kgdb) p *vpp
$3 = (struct vnode *) 0xc442c560
(kgdb) p **vpp
$4 = {v_type = VDIR, v_tag = 0xc0837f79 "null", v_op = 0xc08688a0,
  v_data = 0xc3e737b0, v_mount = 0xc40162cc, v_nmntvnodes = {
    tqe_next = 0xc444b2b0, tqe_prev = 0xc4016300}, v_un = {vu_mount = 0x0,
    vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0, vu_yield = 0},
  v_hashlist = {le_next = 0x0, le_prev = 0x0}, v_hash = 0, v_cache_src = {
    lh_first = 0x0}, v_cache_dst = {tqh_first = 0x0, tqh_last = 0xc442c590},
  v_dd = 0x0, v_cstart = 0, v_lasta = 0, v_lastw = 0, v_clen = 0, v_lock = {
    lock_object = {lo_name = 0xc0837f79 "null", lo_flags = 91947016,
      lo_data = 0, lo_witness = 0xc3cf5840}, lk_lock = 1, lk_timo = 1,
    lk_pri = 80, lk_stack = {depth = 0, pcs = {0 <repeats 18 times>}}},
  v_interlock = {lock_object = {lo_name = 0xc0827160 "vnode interlock",
      lo_flags = 16973824, lo_data = 0, lo_witness = 0xc3cef178},
    mtx_lock = 4}, v_vnlock = 0xc442cb18, v_holdcnt = 2, v_usecount = 3,
  v_iflag = 0, v_vflag = 1, v_writecount = 0, v_freelist = {tqe_next = 0x0,
    tqe_prev = 0x0}, v_bufobj = {bo_mtx = {lock_object = {
        lo_name = 0xc082ebea "bufobj interlock", lo_flags = 16973824,
        lo_data = 0, lo_witness = 0xc3cf4048}, mtx_lock = 4}, bo_clean = {
      bv_hd = {tqh_first = 0x0, tqh_last = 0xc442c668}, bv_root = 0x0,
      bv_cnt = 0}, bo_dirty = {bv_hd = {tqh_first = 0x0,
        tqh_last = 0xc442c678}, bv_root = 0x0, bv_cnt = 0}, bo_numoutput = 0,
    bo_flag = 0, bo_ops = 0xc087abe0, bo_bsize = 0, bo_object = 0x0,
    bo_synclist = {le_next = 0x0, le_prev = 0x0}, bo_private = 0xc442c560,
    __bo_vnode = 0xc442c560}, v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0}

-- 
wbr,
pluknet
Received on Mon Mar 09 2009 - 19:17:49 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:43 UTC