I have 3 UP 6.0 machines that are currently stuck using 100-epsilon% of CPU in the syncer: last pid: 17621; load averages: 9.02, 7.21, 4.82 up 3+18:15:31 +08:49:50 73 processes: 2 running, 47 sleeping, 24 waiting CPU states: 0.0% user, 0.0% nice, 100% system, 0.0% interrupt, 0.0% idle Mem: 27M Active, 149M Inact, 137M Wired, 4K Cache, 109M Buf, 684M Free Swap: 2048M Total, 8K Used, 2048M Free PID USERNAME THR PRI NICE SIZE RES STATE TIME WCPU COMMAND 38 root 1 -4 0 0K 8K getblk 180:21 431.30% syncer 17474 root 1 -4 0 1928K 1428K getblk 1:13 28.71% rm 11 root 1 171 52 0K 8K RUN 20.2H 8.94% idle rm -rf is just removing a single ufs directory tree, but it's taking *much* longer than it should be (order of 5 minutes instead of 30 seconds it took for the other 25 machines), and the machine was extremely unresponsive in the meantime (response times of 30 seconds to run commands like top). WITNESS is on, but it's also on on the other 25 machines that aren't seeing a problem under what should be analogous loads. I broke to ddb a couple of times, and syncer is here: --- interrupt, eip = 0xc068fc97, esp = 0xe502f7ec, ebp = 0xe502f7f0 --- spinlock_exit(c07906a0,1,c06c6d13,18b) at spinlock_exit+0x27 _mtx_unlock_spin_flags(c07906a0,0,c06cb459,34a,c06cf11c) at _mtx_unlock_spin_flags+0xbc witness_checkorder(c292d000,9,c06cf11c,d25,1) at witness_checkorder+0x35e _mtx_lock_flags(c292d000,0,c06cf11c,d25,c06cf11c) at _mtx_lock_flags+0x8a vfs_clean_pages(d63aead0,0,c06cf11c,3a0,d63aead0) at vfs_clean_pages+0x68 bdwrite(d63aead0,0,c06dbd1a,867,0) at bdwrite+0x420 softdep_setup_freeblocks(c44154a4,0,0,800,1) at softdep_setup_freeblocks+0x7bf ffs_truncate(c2cc3770,0,0,c00,0) at ffs_truncate+0x632 ufs_inactive(e502fbd4,c2cc37ec,c2cc3770,c2cc37ec,e502fbec) at ufs_inactive+0xe6 VOP_INACTIVE_APV(c070fb20,e502fbd4,c06d0e48,886,c071de20) at VOP_INACTIVE_APV+0xac vinactive(c2cc3770,c22c4180,c06d0e48,818,d64) at vinactive+0x8b vput(c2cc3770,0,c06dbd1a,d64,3b7) at vput+0x19e handle_workitem_remove(c72960c0,0,2,32e,0) at handle_workitem_remove+0x14a process_worklist_item(0,0,c06dbd1a,2de,435e5452) at process_worklist_item+0x20b softdep_process_worklist(0,0,c06d0e48,689,0) at softdep_process_worklist+0x130 sched_sync(0,e502fd38,c06c4e9f,30d,0) at sched_sync+0x2fe fork_exit(c05524f0,0,e502fd38) at fork_exit+0xc1 fork_trampoline() at fork_trampoline+0x8 --- trap 0x1, eip = 0, esp = 0xe502fd6c, ebp = 0 --- db> --- interrupt, eip = 0xc068fc97, esp = 0xe502fbd0, ebp = 0xe502fbd4 --- spinlock_exit(c07906a0,1,c06c6d13,18b) at spinlock_exit+0x27 _mtx_unlock_spin_flags(c07906a0,0,c06cb459,34a,c070ef80) at _mtx_unlock_spin_flags+0xbc witness_checkorder(c07dfc20,9,c06dbd1a,362,c33cfdd0) at witness_checkorder+0x35e _mtx_lock_flags(c07dfc20,0,c06dbd1a,362,0) at _mtx_lock_flags+0x8a process_worklist_item(0,0,c06dbd1a,2de,435e545e) at process_worklist_item+0x356 softdep_process_worklist(0,0,c06d0e48,689,0) at softdep_process_worklist+0x130 sched_sync(0,e502fd38,c06c4e9f,30d,0) at sched_sync+0x2fe fork_exit(c05524f0,0,e502fd38) at fork_exit+0xc1 fork_trampoline() at fork_trampoline+0x8 --- trap 0x1, eip = 0, esp = 0xe502fd6c, ebp = 0 --- They eventually completed, but I'm suspicious of why these 3 took so long. Kris
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:46 UTC