On Tue, Jun 07, 2016 at 05:46:10AM +0300, Konstantin Belousov wrote: > On Mon, Jun 06, 2016 at 10:13:11AM -0700, Mark Johnston wrote: > > On Sat, Jun 04, 2016 at 12:32:36PM +0300, Konstantin Belousov wrote: > What statement was not true: that your code sets TDF_SBDRY, or that > the lf_advlock() function was called ? The latter. It turns out that we've modified some of our filesystem-specific syscalls to set TDF_SBDRY as well; see below. > > I'm a bit confused by this. How does TDF_SBDRY prevent thread_single() > > from waking up the thread? The sleepq_abort() call is only elided in the > > SINGLE_ALLPROC case, so in other cases, I think we will still interrupt > > the sleep. Thus, since thread_suspend_check() is only invoked prior to > > going to sleep, the application I referred to must have attempted to > > single-thread the process before the thread in question went to sleep. > It does not prevent the wakeup, sorry. > > What I should have said, more precisely, is that thread_suspend_check() > call before the thread is goes to sleep, is nop in case of TDF_SBDRY > flag was set. Ok, that's my understanding too. > > > From what I see in the code, our NFS client has similar issue of calling > > > lf_advlock() with TDF_SBDRY set. Below is the patch to fix that. > > > Similar bug existed in our fifofs, see r277321. > > > > Thanks. It may be that a similar fix is appropriate in our locking code, > > but I'll have to spend more time reading it. > > Still, I am confused now as well. If you can catch the process in that > state, where a thread is sleeping while single-threading request cannot > make the progress, I would like to see the struct thread and struct proc > printouts. Esp. the thread flags are interesting. Sure, see below. For reference: td_flags = 0xa84c = INMEM | SINTR | CANSWAP | ASTPENDING | SBDRY | NEEDSUSPCHK td_pflags = 0 td_inhibitors = 0x2 = SLEEPING td_locks = 0 stack: mi_switch+0x21e sleepq_catch_signals+0x377 sleepq_wait_sig+0xb _sleep+0x29d ... p_flag = 0x10080080 = INMEM | STOPPED_SINGLE | HADTHREADS p_flag2 = 0 The thread is sleeping interruptibly. The NEEDSUSPCHK flag is set, yet the SLEEPABORT flag is not, so the thread can not have been sleeping when thread_single() was called - else sleepq_abort() would have been invoked and set SLEEPABORT. We are at the second sleepq_switch() call in sleepq_catch_signals(), and no signal was pending, so we called thread_suspend_check(), which returned 0 because of SBDRY. So we went to sleep. I note that this couldn't have happened prior to r283320. That change was apparently motivated by a similar hang, but in that case the thread was suspended (with a vnode lock held) rather than asleep. It looks like our internal fix also added a change to set TDF_SBDRY around filesystem-specific syscalls, which often sleep interruptibly while holding vnode locks. But I don't think that's the problem here, as you noted with lf_advlock(). With r283320 reverted, P_STOPPED_SIG would not have been set, so thread_suspend_check() would have suspended us, allowing the core dump to proceed. I had thought that using SINGLE_BOUNDRY beforing coredumping would fix both hangs, but I guess that wouldn't help SINGLE_ALLPROC, so this is probably the wrong place to be solving the problem. CPU ID FUNCTION:NAME 1 1 :BEGIN struct thread { struct mtx *volatile td_lock = 0xffffffff814a5148 struct proc *td_proc = 0xfffff80445c694d8 struct td_plist = { struct thread *tqe_next = 0xfffff81a44186750 struct thread **tqe_prev = 0xfffff8047a027760 } struct td_runq = { struct thread *tqe_next = 0 struct thread **tqe_prev = 0xffffffff8147cd40 } struct td_slpq = { struct thread *tqe_next = 0 struct thread **tqe_prev = 0xfffff81c992b1d80 } struct td_lockq = { struct thread *tqe_next = 0 struct thread **tqe_prev = 0xfffffe2a1c727be8 } struct td_hash = { struct thread *le_next = 0 struct thread **le_prev = 0xfffffe0001076f98 } struct cpuset *td_cpuset = 0xfffff80699b9b510 struct seltd *td_sel = 0xfffff80258307b80 struct sleepqueue *td_sleepqueue = 0 struct turnstile *td_turnstile = 0xfffff80d50a3fcc0 struct rl_q_entry *td_rlqe = 0xfffff8025dcf5988 struct umtx_q *td_umtxq = 0xfffff807c9adfd80 lwpid_t td_tid = 0x18df3 sigqueue_t td_sigqueue = { sigset_t sq_signals = { __uint32_t [4] __bits = [ 0, 0, 0, 0 ] } sigset_t sq_kill = { __uint32_t [4] __bits = [ 0, 0, 0, 0 ] } struct sq_list = { struct ksiginfo *tqh_first = 0 struct ksiginfo **tqh_last = 0xfffff814be2df808 } struct proc *sq_proc = 0xfffff80445c694d8 int sq_flags = 0x1 } u_char td_lend_user_pri = 0xff int td_flags = 0xa84c int td_inhibitors = 0x2 int td_pflags = 0 int td_dupfd = 0 int td_sqqueue = 0 void *td_wchan = 0xfffffe2a1f3abec0 const char *td_wmesg = 0xffffffff80feb007 u_char td_lastcpu = 0x11 u_char td_oncpu = 0xff volatile u_char td_owepreempt = 0 u_char td_tsqueue = 0 short td_locks = 0 short td_rw_rlocks = 0 short td_lk_slocks = 0 short td_stopsched = 0 struct turnstile *td_blocked = 0 const char *td_lockname = 0 struct td_contested = { struct turnstile *lh_first = 0 } struct lock_list_entry *td_sleeplocks = 0 int td_intr_nesting_level = 0 int td_pinned = 0 struct ucred *td_ucred = 0xfffff802589c8700 u_int td_estcpu = 0 int td_slptick = 0x8b25f40c int td_blktick = 0 int td_swvoltick = 0x8b25f40c u_int td_cow = 0 struct rusage td_ru = { struct timeval ru_utime = { time_t tv_sec = 0 suseconds_t tv_usec = 0 } struct timeval ru_stime = { time_t tv_sec = 0 suseconds_t tv_usec = 0 } long ru_maxrss = 0x78a4 long ru_ixrss = 0x240 long ru_idrss = 0x30 long ru_isrss = 0x600 long ru_minflt = 0x84 long ru_majflt = 0 long ru_nswap = 0 long ru_inblock = 0xec long ru_oublock = 0 long ru_msgsnd = 0x6c long ru_msgrcv = 0x3b long ru_nsignals = 0 long ru_nvcsw = 0x1d5 long ru_nivcsw = 0x5b } struct rusage_ext td_rux = { uint64_t rux_runtime = 0xa2221e0 uint64_t rux_uticks = 0x3 uint64_t rux_sticks = 0x9 uint64_t rux_iticks = 0 uint64_t rux_uu = 0x4f3f uint64_t rux_su = 0xedbe uint64_t rux_tu = 0x13cfe } uint64_t td_incruntime = 0 uint64_t td_runtime = 0xa2221e0 u_int td_pticks = 0 u_int td_sticks = 0 u_int td_iticks = 0 u_int td_uticks = 0 int td_intrval = 0 sigset_t td_oldsigmask = { __uint32_t [4] __bits = [ 0, 0, 0, 0 ] } volatile u_int td_generation = 0x230 stack_t td_sigstk = { char *ss_sp = 0 __size_t ss_size = 0 int ss_flags = 0 } int td_xsig = 0 u_long td_profil_addr = 0 u_int td_profil_ticks = 0 char [20] td_name = [ "" ] uint32_t td_dom_gen = 0 uint64_t td_dom_dom = 0 void (*)() td_sleepcb = 0 void *td_sleepcbarg = 0 struct file *td_fpop = 0 int td_dbgflags = 0 struct ksiginfo td_dbgksi = { struct ksi_link = { struct ksiginfo *tqe_next = 0 struct ksiginfo **tqe_prev = 0 } siginfo_t ksi_info = { int si_signo = 0 int si_errno = 0 int si_code = 0 __pid_t si_pid = 0 __uid_t si_uid = 0 int si_status = 0 void *si_addr = 0 union sigval si_value = { int sival_int = 0 void *sival_ptr = 0 int sigval_int = 0 void *sigval_ptr = 0 } union _reason = { struct _fault = { int _trapno = 0 } struct _timer = { int _timerid = 0 int _overrun = 0 } struct _mesgq = { int _mqd = 0 } struct _poll = { long _band = 0 } struct __spare__ = { long __spare1__ = 0 int [7] __spare2__ = [ 0, 0, 0, 0, 0, 0, 0 ] } } } int ksi_flags = 0 struct sigqueue *ksi_sigq = 0 } int td_ng_outbound = 0 struct osd td_osd = { u_int osd_nslots = 0 void **osd_slots = 0 struct osd_next = { struct osd *le_next = 0 struct osd **le_prev = 0 } } struct vm_map_entry *td_map_def_user = 0 pid_t td_dbg_forked = 0 u_int td_vp_reserv = 0 int td_no_sleeping = 0 int td_dom_rr_idx = 0 struct ucred *td_pcred = 0 int td_ioprio = 0 int td_dsc_start = 0 int td_dsc_last = 0 uint16_t td_external_in_fs = 0x1 int td_unfair_sml = 0 int td_sb_nowait = 0x1 sigset_t td_sigmask = { __uint32_t [4] __bits = [ 0, 0, 0, 0 ] } u_char td_rqindex = 0x19 u_char td_base_pri = 0xc6 u_char td_priority = 0xc6 u_char td_pri_class = 0x2 u_char td_user_pri = 0xc6 u_char td_base_user_pri = 0xc6 struct pcb *td_pcb = 0xfffffe2a1f3acb80 enum td_state = TDS_INHIBITED register_t [2] td_retval = [ 0, 0 ] struct callout td_slpcallout = { union c_links = { struct le = { struct callout *le_next = 0xfffff812aff40d78 struct callout **le_prev = 0xfffffe000138dd00 } struct sle = { struct callout *sle_next = 0xfffff812aff40d78 } struct tqe = { struct callout *tqe_next = 0xfffff812aff40d78 struct callout **tqe_prev = 0xfffffe000138dd00 } } sbintime_t c_time = 0x2dd13a0ef0976 sbintime_t c_precision = 0x1e0041668 void *c_arg = 0xfffff814be2df750 void (*)() c_func = kernel`sleepq_timeout struct lock_object *c_lock = 0 int c_flags = 0x110 volatile int c_cpu = 0xd } struct trapframe *td_frame = 0xfffffe2a1f3acac0 struct vm_object *td_kstack_obj = 0xfffff817f52cb700 vm_offset_t td_kstack = 0xfffffe2a1f3a3000 int td_kstack_pages = 0xa volatile u_int td_critnest = 0x1 struct mdthread td_md = { int md_spinlock_count = 0x1 register_t md_saved_flags = 0x246 register_t md_spurflt_addr = 0 } struct td_sched *td_sched = 0xfffff814be2dfe50 struct kaudit_record *td_ar = 0 struct lpohead [2] td_lprof = [ struct lpohead { struct lock_profile_object *lh_first = 0 }, struct lpohead { struct lock_profile_object *lh_first = 0 } ] struct kdtrace_thread *td_dtrace = 0xfffff804daf1e200 int td_errno = 0 struct vnet *td_vnet = 0 const char *td_vnet_lpush = 0 struct trapframe *td_intr_frame = 0 struct proc *td_rfppwait_p = 0 struct vm_page **td_ma = 0 int td_ma_cnt = 0 } ^C 20 1 :BEGIN struct proc { struct p_list = { struct proc *le_next = 0xfffff8050bde7000 struct proc **le_prev = 0xfffff804697d09b0 } struct p_threads = { struct thread *tqh_first = 0xfffff80e2e2da750 struct thread **tqh_last = 0xfffff803d5897760 } struct mtx p_slock = { struct lock_object lock_object = { const char *lo_name = 0xffffffff80ea8f96 u_int lo_flags = 0xb0000 u_int lo_data = 0 struct witness *lo_witness = 0 } volatile uintptr_t mtx_lock = 0x4 } struct ucred *p_ucred = 0xfffff802589c8700 struct filedesc *p_fd = 0xfffff81596020800 struct filedesc_to_leader *p_fdtol = 0 struct pstats *p_stats = 0xfffff802d1273b40 struct plimit *p_limit = 0xfffff80258a17100 struct callout p_limco = { union c_links = { struct le = { struct callout *le_next = 0 struct callout **le_prev = 0 } struct sle = { struct callout *sle_next = 0 } struct tqe = { struct callout *tqe_next = 0 struct callout **tqe_prev = 0 } } sbintime_t c_time = 0 sbintime_t c_precision = 0 void *c_arg = 0 void (*)() c_func = 0 struct lock_object *c_lock = 0xfffff80445c695d8 int c_flags = 0 volatile int c_cpu = 0 } struct sigacts *p_sigacts = 0xfffff8049a35b000 int p_flag = 0x10080080 int p_flag2 = 0 enum p_state = PRS_NORMAL pid_t p_pid = 0x68aa struct p_hash = { struct proc *le_next = 0 struct proc **le_prev = 0xfffffe0001068550 } struct p_pglist = { struct proc *le_next = 0 struct proc **le_prev = 0xfffff8074431c420 } struct proc *p_pptr = 0xfffff80203d0a4d8 struct p_sibling = { struct proc *le_next = 0xfffff81a3498b000 struct proc **le_prev = 0xfffff8044d25a0e8 } struct p_children = { struct proc *lh_first = 0 } struct mtx p_mtx = { struct lock_object lock_object = { const char *lo_name = 0xffffffff80ea8f89 u_int lo_flags = 0x1430000 u_int lo_data = 0 struct witness *lo_witness = 0 } volatile uintptr_t mtx_lock = 0x4 } struct ksiginfo *p_ksi = 0xfffff80268048380 sigqueue_t p_sigqueue = { sigset_t sq_signals = { __uint32_t [4] __bits = [ 0x14103, 0, 0, 0x1000000 ] } sigset_t sq_kill = { __uint32_t [4] __bits = [ 0x10100, 0, 0, 0 ] } struct sq_list = { struct ksiginfo *tqh_first = 0xfffff81531ebe690 struct ksiginfo **tqh_last = 0xfffff81531ebeaf0 } struct proc *sq_proc = 0xfffff80445c694d8 int sq_flags = 0x1 } pid_t p_oppid = 0x1 struct vmspace *p_vmspace = 0xfffff8025864bc40 u_int p_swtick = 0x8b25eed7 struct itimerval p_realtimer = { struct timeval it_interval = { time_t tv_sec = 0 suseconds_t tv_usec = 0 } struct timeval it_value = { time_t tv_sec = 0 suseconds_t tv_usec = 0 } } struct rusage p_ru = { struct timeval ru_utime = { time_t tv_sec = 0 suseconds_t tv_usec = 0 } struct timeval ru_stime = { time_t tv_sec = 0 suseconds_t tv_usec = 0 } long ru_maxrss = 0 long ru_ixrss = 0 long ru_idrss = 0 long ru_isrss = 0 long ru_minflt = 0 long ru_majflt = 0 long ru_nswap = 0 long ru_inblock = 0 long ru_oublock = 0 long ru_msgsnd = 0 long ru_msgrcv = 0 long ru_nsignals = 0 long ru_nvcsw = 0 long ru_nivcsw = 0 } struct rusage_ext p_rux = { uint64_t rux_runtime = 0xc4463d41 uint64_t rux_uticks = 0x36 uint64_t rux_sticks = 0xa4 uint64_t rux_iticks = 0 uint64_t rux_uu = 0x5f0e4 uint64_t rux_su = 0x120b00 uint64_t rux_tu = 0x17fbe5 } struct rusage_ext p_crux = { uint64_t rux_runtime = 0 uint64_t rux_uticks = 0 uint64_t rux_sticks = 0 uint64_t rux_iticks = 0 uint64_t rux_uu = 0 uint64_t rux_su = 0 uint64_t rux_tu = 0 } int p_profthreads = 0 volatile int p_exitthreads = 0 int p_traceflag = 0 struct vnode *p_tracevp = 0 struct ucred *p_tracecred = 0 struct vnode *p_textvp = 0xfffff8046d76e218 u_int p_lock = 0 struct sigiolst p_sigiolst = { struct sigio *slh_first = 0 } int p_sigparent = 0x14 int p_sig = 0 u_long p_code = 0 u_int p_stops = 0 u_int p_stype = 0 char p_step = '\0' u_char p_pfsflags = 0 struct nlminfo *p_nlminfo = 0 struct kaioinfo *p_aioinfo = 0 struct thread *p_singlethread = 0xfffff804a4cc0000 int p_suspcount = 0x26 struct thread *p_xthread = 0 int p_boundary_count = 0x25 int p_pendingcnt = 0x30 struct itimers *p_itimers = 0 struct procdesc *p_procdesc = 0 u_int p_magic = 0xbeefface int p_osrel = 0x10c8e6 char [20] p_comm = [ "" ] struct pgrp *p_pgrp = 0xfffff8074431c410 struct sysentvec *p_sysent = 0xffffffff812d6ac8 struct pargs *p_args = 0xfffff81a09bd9e70 rlim_t p_cpulimit = 0x7fffffffffffffff char p_nice = '\0' int p_fibnum = 0 u_short p_xstat = 0x11 struct knlist p_klist = { struct klist kl_list = { struct knote *slh_first = 0xfffff80687e10080 } void (*)() kl_lock = kernel`knlist_mtx_lock void (*)() kl_unlock = kernel`knlist_mtx_unlock void (*)() kl_assert_locked = kernel`knlist_mtx_assert_locked void (*)() kl_assert_unlocked = kernel`knlist_mtx_assert_unlocked void *kl_lockarg = 0xfffff80445c695d8 } int p_numthreads = 0x27 struct mdproc p_md = { struct proc_ldt *md_ldt = 0 struct system_segment_descriptor md_ldt_sd = { unsigned long sd_lolimit :16 = 0 unsigned long sd_lobase :24 = 0 unsigned long sd_type :5 = 0 unsigned long sd_dpl :2 = 0 unsigned long sd_p :1 = 0 unsigned long sd_hilimit :4 = 0 unsigned long sd_xx0 :3 = 0 unsigned long sd_gran :1 = 0 unsigned long sd_hibase :40 = 0 unsigned long sd_xx1 :8 = 0 unsigned long sd_mbz :5 = 0 unsigned long sd_xx2 :19 = 0 } } struct callout p_itcallout = { union c_links = { struct le = { struct callout *le_next = 0 struct callout **le_prev = 0 } struct sle = { struct callout *sle_next = 0 } struct tqe = { struct callout *tqe_next = 0 struct callout **tqe_prev = 0 } } sbintime_t c_time = 0 sbintime_t c_precision = 0 void *c_arg = 0 void (*)() c_func = 0 struct lock_object *c_lock = 0xfffff80445c695d8 int c_flags = 0 volatile int c_cpu = 0 } u_short p_acflag = 0x11 struct proc *p_peers = 0 struct proc *p_leader = 0xfffff80445c694d8 void *p_emuldata = 0 struct label *p_label = 0 struct p_sched *p_sched = 0xfffff80445c699b0 struct p_ktr = { struct ktr_request *stqh_first = 0 struct ktr_request **stqh_last = 0xfffff80445c69938 } struct p_mqnotifier = { struct mqueue_notifier *lh_first = 0 } struct kdtrace_proc *p_dtrace = 0xfffff8166c1c9040 struct cv p_pwait = { const char *cv_description = 0xffffffff80ea9ab8 volatile int cv_waiters = 0 } struct cv p_dbgwait = { const char *cv_description = 0xffffffff80ea9abf volatile int cv_waiters = 0 } uint64_t p_prev_runtime = 0 struct racct *p_racct = 0 u_char p_throttled = 0 struct p_orphan = { struct proc *le_next = 0 struct proc **le_prev = 0xfffff804697d09a0 } struct p_orphans = { struct proc *lh_first = 0 } }Received on Tue Jun 07 2016 - 02:13:59 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:05 UTC