Non-SUJ deadlock in -current

From: Peter Jeremy <peterjeremy_at_acm.org>
Date: Tue, 4 May 2010 14:37:59 +1000
I have updated a vmware guest to -current from about 48 hours ago
to play with SUJ but before I got that far, I ran into a deadlock
trying to upgrade some ports.  It gets part-way through a configure
and then stops.

VTY switching works but none of the getty's will echo input.
Ctrl-T on the active window (serial console or ssh) reports
that the curent process is runnable but not using any time.
A systat stops updating.

A couple of LORs are reported but both are well before the hang:

lock order reversal:
 1st 0xc3cb4058 ufs (ufs) _at_ /usr/src/sys/kern/vfs_subr.c:2091
 2nd 0xd7a4314c bufwait (bufwait) _at_ /usr/src/sys/ufs/ffs/ffs_softdep.c:11362
 3rd 0xc3f0baf8 ufs (ufs) _at_ /usr/src/sys/kern/vfs_subr.c:2091
KDB: stack backtrace:
db_trace_self_wrapper(c077a551,e60853c8,c05a9899,c059e0b4,c077c2e9,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c059e0b4,c077c2e9,c0945b20,c3811ef8,e608541c,...) at kdb_backtrace+0x29
_witness_debugger(c077c2e9,c3f0baf8,c0772bf6,c3814e80,c0781760,...) at _witness_debugger+0x21
witness_checkorder(c3f0baf8,9,c0781760,82b,0,...) at witness_checkorder+0x68f
__lockmgr_args(c3f0baf8,80100,c3f0bb18,0,0,...) at __lockmgr_args+0x692
ffs_lock(e6085530,e6085518,c05a98e7,c0780fc5,c077be94,...) at ffs_lock+0x7d
VOP_LOCK1_APV(c07dfba0,e6085530,e6085530,c07f7ae0,c3f0baa0,...) at VOP_LOCK1_APV+0x8b
_vn_lock(c3f0baa0,80100,c0781760,82b,50,...) at _vn_lock+0x3d
vget(c3f0baa0,80100,c3e94900,50,0,...) at vget+0x63
vfs_hash_get(c3b21ca8,2e8ad,80000,c3e94900,e6085680,...) at vfs_hash_get+0xe2
ffs_vgetf(c3b21ca8,2e8ad,80000,e6085680,1,...) at ffs_vgetf+0x43
softdep_sync_metadata(c3cb4000,0,c078ee78,147,0,...) at softdep_sync_metadata+0xafc
ffs_syncvnode(c3cb4000,1,e6085718,c05654c2,c3cb40ac,...) at ffs_syncvnode+0x378
ffs_truncate(c3cb4000,200,0,880,c3b27d00,...) at ffs_truncate+0x7f6
ufs_direnter(c3cb4000,c3f0baa0,e6085944,e6085bd4,0,...) at ufs_direnter+0x838
ufs_makeinode(e6085bd4) at ufs_makeinode+0x2b4
ufs_create(e6085b34,c3b21ca8,c07f7ee0,c3cb4000,e6085b4c,...) at ufs_create+0x2c
VOP_CREATE_APV(c07dfba0,e6085b34,2,c077be94,0,...) at VOP_CREATE_APV+0x7a
vn_open_cred(e6085ba8,e6085c5c,180,0,c3b27d00,...) at vn_open_cred+0x1d3
vn_open(e6085ba8,e6085c5c,180,c3b41818,c3e94900,...) at vn_open+0x3b
kern_openat(c3e94900,ffffff9c,28404080,0,a03,...) at kern_openat+0xe7
kern_open(c3e94900,28404080,0,a02,180,...) at kern_open+0x35
open(c3e94900,e6085cf8,c079a860,c077c920,c3ec2aa0,...) at open+0x30
syscall(e6085d38) at syscall+0x228
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (5, FreeBSD ELF32, open), eip = 0x28168dd3, esp = 0xbfbfdd7c, ebp = 0xbfbfe228 ---


lock order reversal:
 1st 0xd7a6c2b8 bufwait (bufwait) _at_ /usr/src/sys/kern/vfs_bio.c:2581
 2nd 0xc3d8f800 dirhash (dirhash) _at_ /usr/src/sys/ufs/ufs/ufs_dirhash.c:283
KDB: stack backtrace:
db_trace_self_wrapper(c077a551,e6058a7c,c05a9899,c059e0b4,c077c2d0,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c059e0b4,c077c2d0,c0945d88,c3811ef8,e6058ad0,...) at kdb_backtrace+0x29
_witness_debugger(c077c2d0,c3d8f800,c078f127,c3814ee8,c078ef60,...) at _witness_debugger+0x21
witness_checkorder(c3d8f800,9,c078ef60,11b,0,...) at witness_checkorder+0x68f
_sx_xlock(c3d8f800,0,c078ef60,11b,dc57806c,...) at _sx_xlock+0x32
ufsdirhash_acquire(d7a6c258,dc57806c,c3e9cae0,c3e9c910,e6058b68,...) at ufsdirhash_acquire+0x31
ufsdirhash_remove(c3e9cae0,dc57806c,6c,e6058b58,e6058b54,...) at ufsdirhash_remove+0x14
ufs_dirremove(c3e99bb0,c3e9c910,500800c,1,c3b09480,...) at ufs_dirremove+0x129
ufs_rmdir(e6058c34,c3b21a20,c07f7ee0,c3e99bb0,e6058c44,...) at ufs_rmdir+0xd2
VOP_RMDIR_APV(c07dfba0,e6058c34,2,c3e997e8,2840d100,...) at VOP_RMDIR_APV+0x7c
kern_rmdirat(c3b09480,ffffff9c,2840d100,0,e6058c80,...) at kern_rmdirat+0x16b
kern_rmdir(c3b09480,2840d100,0,e6058d2c,c0726b64,...) at kern_rmdir+0x27
rmdir(c3b09480,e6058cf8,c079a860,c077c8be,c3e54aa0,...) at rmdir+0x22
syscall(e6058d38) at syscall+0x228
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (137, FreeBSD ELF32, rmdir), eip = 0x280d7333, esp = 0xbfbfe82c, ebp = 0xbfbfe858 ---


With the system hung, I have:
db> show alllocks
Process 16787 (sh) thread 0xc3e94d80 (100071)
exclusive lockmgr bufwait (bufwait) r = 0 (0xd7a5cbb0) locked _at_ /usr/src/sys/ker
n/vfs_bio.c:2581
exclusive lockmgr ufs (ufs) r = 0 (0xc41266b8) locked _at_ /usr/src/sys/kern/vfs_su
br.c:2091
exclusive lockmgr bufwait (bufwait) r = 0 (0xd79d0f20) locked _at_ /usr/src/sys/ufs
/ffs/ffs_softdep.c:11362
exclusive lockmgr ufs (ufs) r = 0 (0xc419e058) locked _at_ /usr/src/sys/kern/vfs_lo
okup.c:502
Process 16 (softdepflush) thread 0xc3b09000 (100039)
exclusive sleep mutex vnode interlock (vnode interlock) r = 0 (0xc419e078) locke
d _at_ /usr/src/sys/kern/vfs_hash.c:79
db> ps
  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
16787 16703  1123     0  R+                          sh
16703 11676  1123     0  S+      wait     0xc3f1c2a8 sh
11676 11489  1123     0  S+      wait     0xc3ec37f8 sh
11489 11375  1123     0  S+      wait     0xc3f1c550 sh
11375 11361  1123     0  S+      wait     0xc3e54000 make
11361  5540  1123     0  S+      wait     0xc3f1c000 make
 5540  5500  1123     0  S+      wait     0xc3ec2550 sh
 5500  1123  1123     0  S+      wait     0xc3f1c7f8 sh
 1128     0     0     0  RL                          [nfsiod 1]
 1127     0     0     0  RL                          [nfsiod 0]
 1123  1118  1123     0  S+      wait     0xc3e59000 sh
 1118  1099  1118     0  S+      pause    0xc3bbcda0 csh
 1106  1102  1106     0  T+                          less
 1102  1100  1102     0  Rs+                         csh
 1100   991  1100     0  Ss      select   0xc3ba7c64 sshd
 1099     1  1099     0  Ss+     wait     0xc3e59550 login
 1098     1  1098     0  Ss+     ttyin    0xc389e870 getty
 1097     1  1097     0  Ss+     ttyin    0xc389ea70 getty
 1096     1  1096     0  Ss+     ttyin    0xc389ec70 getty
 1095     1  1095     0  Ss+     ttyin    0xc389ee70 getty
 1094     1  1094     0  Ss+     ttyin    0xc3968070 getty
 1093     1  1093     0  Ss+     ttyin    0xc3968270 getty
 1092     1  1092     0  Rs+                         getty
 1091     1  1091     0  Ss+     ttyin    0xc389d270 getty
 1034     1  1034     0  Ss      select   0xc3ae5864 moused
 1010     1  1010     0  Rs                          cron
 1003     1  1003    25  Ss      pause    0xc3922850 sendmail
  999     1   999     0  Rs                          sendmail
  991     1   991     0  Ss      select   0xc3ba7624 sshd
  647     1   647     0  Rs                          syslogd
  463     1   463     0  Ss      select   0xc3ba7264 devd
  462     1   462    65  Ss      select   0xc3ae5aa4 dhclient
  446     1   446     0  Ss      select   0xc3ba7224 dhclient
  111     1   111     0  Ss      pause    0xc3b07850 adjkerntz
   16     0     0     0  RL      CPU 0               [softdepflush]
   15     0     0     0  RL                          [syncer]
   14     0     0     0  RL                          [vnlru]
    9     0     0     0  RL                          [bufdaemon]
    8     0     0     0  RL                          [pagezero]
    7     0     0     0  DL      psleep   0xc0978f58 [vmdaemon]
    6     0     0     0  RL                          [pagedaemon]
    5     0     0     0  DL      ccb_scan 0xc07fd1d4 [xpt_thrd]
   13     0     0     0  DL      (threaded)          [usb]
100026                   D       -        0xc3913dac [usbus0]
100025                   D       -        0xc3913d7c [usbus0]
100024                   D       -        0xc3913d4c [usbus0]
100023                   D       -        0xc3913d1c [usbus0]
   12     0     0     0  DL      -        0xc0814004 [yarrow]
    4     0     0     0  DL      -        0xc0811d24 [g_down]
    3     0     0     0  DL      -        0xc0811d20 [g_up]
    2     0     0     0  DL      -        0xc0811d18 [g_event]
   11     0     0     0  WL      (threaded)          [intr]
100031                   I                           [swi0: uart uart]
100030                   I                           [irq12: psm0]
100029                   I                           [irq1: atkbd0]
100028                   I                           [irq18: le0]
100027                   I                           [irq17: bt0]
100022                   I                           [irq19: pcm0 uhci0]
100021                   I                           [irq15: ata1]
100020                   I                           [irq14: ata0]
100019                   I                           [irq9: acpi0]
100017                   I                           [swi2: cambio]
100016                   I                           [swi6: task queue]
100015                   I                           [swi6: Giant taskq]
100013                   I                           [swi5: +]
100005                   I                           [swi3: vm]
100004                   I                           [swi4: clock]
100003                   I                           [swi1: netisr 0]
   10     0     0     0  RL                          [idle]
    1     0     1     0  SLs     wait     0xc384d000 [init]
    0     0     0     0  RLs     (threaded)          [kernel]
100018                   D       -        0xc38bcbc0 [kqueue taskq]
100014                   D       -        0xc38bce00 [thread taskq]
100012                   D       -        0xc38be000 [acpi_task_2]
100011                   D       -        0xc38be000 [acpi_task_1]
100010                   D       -        0xc38be000 [acpi_task_0]
100000                   RunQ                        [swapper]
db> show lockedvnods
Locked vnodes

0xc419e000: tag ufs, type VDIR
    usecount 4, writecount 0, refcount 7 mountedhere 0
    flags ()
 VI_LOCKed    v_object 0xc4191440 ref 0 pages 0
    lock type ufs: EXCL by thread 0xc3e94d80 (pid 16787)
        ino 143634, on dev ad1a

0xc4126660: tag ufs, type VREG
    usecount 2, writecount 0, refcount 2 mountedhere 0
    flags ()
    lock type ufs: EXCL by thread 0xc3e94d80 (pid 16787)
        ino 144536, on dev ad1a
db> 

-- 
Peter Jeremy

Received on Tue May 04 2010 - 02:38:04 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:03 UTC