Re: thread suspension when dumping core

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 6 Jun 2016 21:17:41 -0700
On Tue, Jun 07, 2016 at 05:46:10AM +0300, Konstantin Belousov wrote:
> On Mon, Jun 06, 2016 at 10:13:11AM -0700, Mark Johnston wrote:
> > On Sat, Jun 04, 2016 at 12:32:36PM +0300, Konstantin Belousov wrote:
> What statement was not true: that your code sets TDF_SBDRY, or that
> the lf_advlock() function was called ?

The latter. It turns out that we've modified some of our
filesystem-specific syscalls to set TDF_SBDRY as well; see below.

> > I'm a bit confused by this. How does TDF_SBDRY prevent thread_single()
> > from waking up the thread? The sleepq_abort() call is only elided in the
> > SINGLE_ALLPROC case, so in other cases, I think we will still interrupt
> > the sleep. Thus, since thread_suspend_check() is only invoked prior to
> > going to sleep, the application I referred to must have attempted to
> > single-thread the process before the thread in question went to sleep.
> It does not prevent the wakeup, sorry.
> 
> What I should have said, more precisely, is that thread_suspend_check()
> call before the thread is goes to sleep, is nop in case of TDF_SBDRY
> flag was set.

Ok, that's my understanding too.

> > > From what I see in the code, our NFS client has similar issue of calling
> > > lf_advlock() with TDF_SBDRY set.  Below is the patch to fix that.
> > > Similar bug existed in our fifofs, see r277321.
> > 
> > Thanks. It may be that a similar fix is appropriate in our locking code,
> > but I'll have to spend more time reading it.
> 
> Still, I am confused now as well.  If you can catch the process in that
> state, where a thread is sleeping while single-threading request cannot
> make the progress, I would like to see the struct thread and struct proc
> printouts.  Esp. the thread flags are interesting.

Sure, see below. For reference:

td_flags = 0xa84c = INMEM | SINTR | CANSWAP | ASTPENDING | SBDRY | NEEDSUSPCHK
td_pflags = 0
td_inhibitors = 0x2 = SLEEPING
td_locks = 0

stack:
mi_switch+0x21e sleepq_catch_signals+0x377 sleepq_wait_sig+0xb _sleep+0x29d ...

p_flag = 0x10080080 = INMEM | STOPPED_SINGLE | HADTHREADS
p_flag2 = 0

The thread is sleeping interruptibly. The NEEDSUSPCHK flag is set, yet the
SLEEPABORT flag is not, so the thread can not have been sleeping when
thread_single() was called - else sleepq_abort() would have been
invoked and set SLEEPABORT. We are at the second sleepq_switch() call in
sleepq_catch_signals(), and no signal was pending, so we called
thread_suspend_check(), which returned 0 because of SBDRY. So we went to
sleep.

I note that this couldn't have happened prior to r283320. That change
was apparently motivated by a similar hang, but in that case the thread
was suspended (with a vnode lock held) rather than asleep. It looks like
our internal fix also added a change to set TDF_SBDRY around
filesystem-specific syscalls, which often sleep interruptibly while
holding vnode locks. But I don't think that's the problem here, as you
noted with lf_advlock().

With r283320 reverted, P_STOPPED_SIG would not have been set, so
thread_suspend_check() would have suspended us, allowing the core dump
to proceed. I had thought that using SINGLE_BOUNDRY beforing coredumping
would fix both hangs, but I guess that wouldn't help SINGLE_ALLPROC, so
this is probably the wrong place to be solving the problem.

CPU     ID                    FUNCTION:NAME
  1      1                           :BEGIN struct thread {
    struct mtx *volatile td_lock = 0xffffffff814a5148
    struct proc *td_proc = 0xfffff80445c694d8
    struct td_plist = {
        struct thread *tqe_next = 0xfffff81a44186750
        struct thread **tqe_prev = 0xfffff8047a027760
    }
    struct td_runq = {
        struct thread *tqe_next = 0
        struct thread **tqe_prev = 0xffffffff8147cd40
    }
    struct td_slpq = {
        struct thread *tqe_next = 0
        struct thread **tqe_prev = 0xfffff81c992b1d80
    }
    struct td_lockq = {
        struct thread *tqe_next = 0
        struct thread **tqe_prev = 0xfffffe2a1c727be8
    }
    struct td_hash = {
        struct thread *le_next = 0
        struct thread **le_prev = 0xfffffe0001076f98
    }
    struct cpuset *td_cpuset = 0xfffff80699b9b510
    struct seltd *td_sel = 0xfffff80258307b80
    struct sleepqueue *td_sleepqueue = 0
    struct turnstile *td_turnstile = 0xfffff80d50a3fcc0
    struct rl_q_entry *td_rlqe = 0xfffff8025dcf5988
    struct umtx_q *td_umtxq = 0xfffff807c9adfd80
    lwpid_t td_tid = 0x18df3
    sigqueue_t td_sigqueue = {
        sigset_t sq_signals = {
            __uint32_t [4] __bits = [ 0, 0, 0, 0 ]
        }
        sigset_t sq_kill = {
            __uint32_t [4] __bits = [ 0, 0, 0, 0 ]
        }
        struct sq_list = {
            struct ksiginfo *tqh_first = 0
            struct ksiginfo **tqh_last = 0xfffff814be2df808
        }
        struct proc *sq_proc = 0xfffff80445c694d8
        int sq_flags = 0x1
    }
    u_char td_lend_user_pri = 0xff
    int td_flags = 0xa84c
    int td_inhibitors = 0x2
    int td_pflags = 0
    int td_dupfd = 0
    int td_sqqueue = 0
    void *td_wchan = 0xfffffe2a1f3abec0
    const char *td_wmesg = 0xffffffff80feb007
    u_char td_lastcpu = 0x11
    u_char td_oncpu = 0xff
    volatile u_char td_owepreempt = 0
    u_char td_tsqueue = 0
    short td_locks = 0
    short td_rw_rlocks = 0
    short td_lk_slocks = 0
    short td_stopsched = 0
    struct turnstile *td_blocked = 0
    const char *td_lockname = 0
    struct td_contested = {
        struct turnstile *lh_first = 0
    }
    struct lock_list_entry *td_sleeplocks = 0
    int td_intr_nesting_level = 0
    int td_pinned = 0
    struct ucred *td_ucred = 0xfffff802589c8700
    u_int td_estcpu = 0
    int td_slptick = 0x8b25f40c
    int td_blktick = 0
    int td_swvoltick = 0x8b25f40c
    u_int td_cow = 0
    struct rusage td_ru = {
        struct timeval ru_utime = {
            time_t tv_sec = 0
            suseconds_t tv_usec = 0
        }
        struct timeval ru_stime = {
            time_t tv_sec = 0
            suseconds_t tv_usec = 0
        }
        long ru_maxrss = 0x78a4
        long ru_ixrss = 0x240
        long ru_idrss = 0x30
        long ru_isrss = 0x600
        long ru_minflt = 0x84
        long ru_majflt = 0
        long ru_nswap = 0
        long ru_inblock = 0xec
        long ru_oublock = 0
        long ru_msgsnd = 0x6c
        long ru_msgrcv = 0x3b
        long ru_nsignals = 0
        long ru_nvcsw = 0x1d5
        long ru_nivcsw = 0x5b
    }
    struct rusage_ext td_rux = {
        uint64_t rux_runtime = 0xa2221e0
        uint64_t rux_uticks = 0x3
        uint64_t rux_sticks = 0x9
        uint64_t rux_iticks = 0
        uint64_t rux_uu = 0x4f3f
        uint64_t rux_su = 0xedbe
        uint64_t rux_tu = 0x13cfe
    }
    uint64_t td_incruntime = 0
    uint64_t td_runtime = 0xa2221e0
    u_int td_pticks = 0
    u_int td_sticks = 0
    u_int td_iticks = 0
    u_int td_uticks = 0
    int td_intrval = 0
    sigset_t td_oldsigmask = {
        __uint32_t [4] __bits = [ 0, 0, 0, 0 ]
    }
    volatile u_int td_generation = 0x230
    stack_t td_sigstk = {
        char *ss_sp = 0
        __size_t ss_size = 0
        int ss_flags = 0
    }
    int td_xsig = 0
    u_long td_profil_addr = 0
    u_int td_profil_ticks = 0
    char [20] td_name = [ "" ]
    uint32_t td_dom_gen = 0
    uint64_t td_dom_dom = 0
    void (*)() td_sleepcb = 0
    void *td_sleepcbarg = 0
    struct file *td_fpop = 0
    int td_dbgflags = 0
    struct ksiginfo td_dbgksi = {
        struct ksi_link = {
            struct ksiginfo *tqe_next = 0
            struct ksiginfo **tqe_prev = 0
        }
        siginfo_t ksi_info = {
            int si_signo = 0
            int si_errno = 0
            int si_code = 0
            __pid_t si_pid = 0
            __uid_t si_uid = 0
            int si_status = 0
            void *si_addr = 0
            union sigval si_value = {
                int sival_int = 0
                void *sival_ptr = 0
                int sigval_int = 0
                void *sigval_ptr = 0
            }
            union _reason = {
                struct _fault = {
                    int _trapno = 0
                }
                struct _timer = {
                    int _timerid = 0
                    int _overrun = 0
                }
                struct _mesgq = {
                    int _mqd = 0
                }
                struct _poll = {
                    long _band = 0
                }
                struct __spare__ = {
                    long __spare1__ = 0
                    int [7] __spare2__ = [ 0, 0, 0, 0, 0, 0, 0 ]
                }
            }
        }
        int ksi_flags = 0
        struct sigqueue *ksi_sigq = 0
    }
    int td_ng_outbound = 0
    struct osd td_osd = {
        u_int osd_nslots = 0
        void **osd_slots = 0
        struct osd_next = {
            struct osd *le_next = 0
            struct osd **le_prev = 0
        }
    }
    struct vm_map_entry *td_map_def_user = 0
    pid_t td_dbg_forked = 0
    u_int td_vp_reserv = 0
    int td_no_sleeping = 0
    int td_dom_rr_idx = 0
    struct ucred *td_pcred = 0
    int td_ioprio = 0
    int td_dsc_start = 0
    int td_dsc_last = 0
    uint16_t td_external_in_fs = 0x1
    int td_unfair_sml = 0
    int td_sb_nowait = 0x1
    sigset_t td_sigmask = {
        __uint32_t [4] __bits = [ 0, 0, 0, 0 ]
    }
    u_char td_rqindex = 0x19
    u_char td_base_pri = 0xc6
    u_char td_priority = 0xc6
    u_char td_pri_class = 0x2
    u_char td_user_pri = 0xc6
    u_char td_base_user_pri = 0xc6
    struct pcb *td_pcb = 0xfffffe2a1f3acb80
    enum td_state = TDS_INHIBITED
    register_t [2] td_retval = [ 0, 0 ]
    struct callout td_slpcallout = {
        union c_links = {
            struct le = {
                struct callout *le_next = 0xfffff812aff40d78
                struct callout **le_prev = 0xfffffe000138dd00
            }
            struct sle = {
                struct callout *sle_next = 0xfffff812aff40d78
            }
            struct tqe = {
                struct callout *tqe_next = 0xfffff812aff40d78
                struct callout **tqe_prev = 0xfffffe000138dd00
            }
        }
        sbintime_t c_time = 0x2dd13a0ef0976
        sbintime_t c_precision = 0x1e0041668
        void *c_arg = 0xfffff814be2df750
        void (*)() c_func = kernel`sleepq_timeout
        struct lock_object *c_lock = 0
        int c_flags = 0x110
        volatile int c_cpu = 0xd
    }
    struct trapframe *td_frame = 0xfffffe2a1f3acac0
    struct vm_object *td_kstack_obj = 0xfffff817f52cb700
    vm_offset_t td_kstack = 0xfffffe2a1f3a3000
    int td_kstack_pages = 0xa
    volatile u_int td_critnest = 0x1
    struct mdthread td_md = {
        int md_spinlock_count = 0x1
        register_t md_saved_flags = 0x246
        register_t md_spurflt_addr = 0
    }
    struct td_sched *td_sched = 0xfffff814be2dfe50
    struct kaudit_record *td_ar = 0
    struct lpohead [2] td_lprof = [ 
        struct lpohead {
            struct lock_profile_object *lh_first = 0
        }, 
        struct lpohead {
            struct lock_profile_object *lh_first = 0
        }
    ]
    struct kdtrace_thread *td_dtrace = 0xfffff804daf1e200
    int td_errno = 0
    struct vnet *td_vnet = 0
    const char *td_vnet_lpush = 0
    struct trapframe *td_intr_frame = 0
    struct proc *td_rfppwait_p = 0
    struct vm_page **td_ma = 0
    int td_ma_cnt = 0
}
^C

 20      1                           :BEGIN struct proc {
    struct p_list = {
        struct proc *le_next = 0xfffff8050bde7000
        struct proc **le_prev = 0xfffff804697d09b0
    }
    struct p_threads = {
        struct thread *tqh_first = 0xfffff80e2e2da750
        struct thread **tqh_last = 0xfffff803d5897760
    }
    struct mtx p_slock = {
        struct lock_object lock_object = {
            const char *lo_name = 0xffffffff80ea8f96
            u_int lo_flags = 0xb0000
            u_int lo_data = 0
            struct witness *lo_witness = 0
        }
        volatile uintptr_t mtx_lock = 0x4
    }
    struct ucred *p_ucred = 0xfffff802589c8700
    struct filedesc *p_fd = 0xfffff81596020800
    struct filedesc_to_leader *p_fdtol = 0
    struct pstats *p_stats = 0xfffff802d1273b40
    struct plimit *p_limit = 0xfffff80258a17100
    struct callout p_limco = {
        union c_links = {
            struct le = {
                struct callout *le_next = 0
                struct callout **le_prev = 0
            }
            struct sle = {
                struct callout *sle_next = 0
            }
            struct tqe = {
                struct callout *tqe_next = 0
                struct callout **tqe_prev = 0
            }
        }
        sbintime_t c_time = 0
        sbintime_t c_precision = 0
        void *c_arg = 0
        void (*)() c_func = 0
        struct lock_object *c_lock = 0xfffff80445c695d8
        int c_flags = 0
        volatile int c_cpu = 0
    }
    struct sigacts *p_sigacts = 0xfffff8049a35b000
    int p_flag = 0x10080080
    int p_flag2 = 0
    enum p_state = PRS_NORMAL
    pid_t p_pid = 0x68aa
    struct p_hash = {
        struct proc *le_next = 0
        struct proc **le_prev = 0xfffffe0001068550
    }
    struct p_pglist = {
        struct proc *le_next = 0
        struct proc **le_prev = 0xfffff8074431c420
    }
    struct proc *p_pptr = 0xfffff80203d0a4d8
    struct p_sibling = {
        struct proc *le_next = 0xfffff81a3498b000
        struct proc **le_prev = 0xfffff8044d25a0e8
    }
    struct p_children = {
        struct proc *lh_first = 0
    }
    struct mtx p_mtx = {
        struct lock_object lock_object = {
            const char *lo_name = 0xffffffff80ea8f89
            u_int lo_flags = 0x1430000
            u_int lo_data = 0
            struct witness *lo_witness = 0
        }
        volatile uintptr_t mtx_lock = 0x4
    }
    struct ksiginfo *p_ksi = 0xfffff80268048380
    sigqueue_t p_sigqueue = {
        sigset_t sq_signals = {
            __uint32_t [4] __bits = [ 0x14103, 0, 0, 0x1000000 ]
        }
        sigset_t sq_kill = {
            __uint32_t [4] __bits = [ 0x10100, 0, 0, 0 ]
        }
        struct sq_list = {
            struct ksiginfo *tqh_first = 0xfffff81531ebe690
            struct ksiginfo **tqh_last = 0xfffff81531ebeaf0
        }
        struct proc *sq_proc = 0xfffff80445c694d8
        int sq_flags = 0x1
    }
    pid_t p_oppid = 0x1
    struct vmspace *p_vmspace = 0xfffff8025864bc40
    u_int p_swtick = 0x8b25eed7
    struct itimerval p_realtimer = {
        struct timeval it_interval = {
            time_t tv_sec = 0
            suseconds_t tv_usec = 0
        }
        struct timeval it_value = {
            time_t tv_sec = 0
            suseconds_t tv_usec = 0
        }
    }
    struct rusage p_ru = {
        struct timeval ru_utime = {
            time_t tv_sec = 0
            suseconds_t tv_usec = 0
        }
        struct timeval ru_stime = {
            time_t tv_sec = 0
            suseconds_t tv_usec = 0
        }
        long ru_maxrss = 0
        long ru_ixrss = 0
        long ru_idrss = 0
        long ru_isrss = 0
        long ru_minflt = 0
        long ru_majflt = 0
        long ru_nswap = 0
        long ru_inblock = 0
        long ru_oublock = 0
        long ru_msgsnd = 0
        long ru_msgrcv = 0
        long ru_nsignals = 0
        long ru_nvcsw = 0
        long ru_nivcsw = 0
    }
    struct rusage_ext p_rux = {
        uint64_t rux_runtime = 0xc4463d41
        uint64_t rux_uticks = 0x36
        uint64_t rux_sticks = 0xa4
        uint64_t rux_iticks = 0
        uint64_t rux_uu = 0x5f0e4
        uint64_t rux_su = 0x120b00
        uint64_t rux_tu = 0x17fbe5
    }
    struct rusage_ext p_crux = {
        uint64_t rux_runtime = 0
        uint64_t rux_uticks = 0
        uint64_t rux_sticks = 0
        uint64_t rux_iticks = 0
        uint64_t rux_uu = 0
        uint64_t rux_su = 0
        uint64_t rux_tu = 0
    }
    int p_profthreads = 0
    volatile int p_exitthreads = 0
    int p_traceflag = 0
    struct vnode *p_tracevp = 0
    struct ucred *p_tracecred = 0
    struct vnode *p_textvp = 0xfffff8046d76e218
    u_int p_lock = 0
    struct sigiolst p_sigiolst = {
        struct sigio *slh_first = 0
    }
    int p_sigparent = 0x14
    int p_sig = 0
    u_long p_code = 0
    u_int p_stops = 0
    u_int p_stype = 0
    char p_step = '\0'
    u_char p_pfsflags = 0
    struct nlminfo *p_nlminfo = 0
    struct kaioinfo *p_aioinfo = 0
    struct thread *p_singlethread = 0xfffff804a4cc0000
    int p_suspcount = 0x26
    struct thread *p_xthread = 0
    int p_boundary_count = 0x25
    int p_pendingcnt = 0x30
    struct itimers *p_itimers = 0
    struct procdesc *p_procdesc = 0
    u_int p_magic = 0xbeefface
    int p_osrel = 0x10c8e6
    char [20] p_comm = [ "" ]
    struct pgrp *p_pgrp = 0xfffff8074431c410
    struct sysentvec *p_sysent = 0xffffffff812d6ac8
    struct pargs *p_args = 0xfffff81a09bd9e70
    rlim_t p_cpulimit = 0x7fffffffffffffff
    char p_nice = '\0'
    int p_fibnum = 0
    u_short p_xstat = 0x11
    struct knlist p_klist = {
        struct klist kl_list = {
            struct knote *slh_first = 0xfffff80687e10080
        }
        void (*)() kl_lock = kernel`knlist_mtx_lock
        void (*)() kl_unlock = kernel`knlist_mtx_unlock
        void (*)() kl_assert_locked = kernel`knlist_mtx_assert_locked
        void (*)() kl_assert_unlocked = kernel`knlist_mtx_assert_unlocked
        void *kl_lockarg = 0xfffff80445c695d8
    }
    int p_numthreads = 0x27
    struct mdproc p_md = {
        struct proc_ldt *md_ldt = 0
        struct system_segment_descriptor md_ldt_sd = {
            unsigned long sd_lolimit :16 = 0
            unsigned long sd_lobase :24 = 0
            unsigned long sd_type :5 = 0
            unsigned long sd_dpl :2 = 0
            unsigned long sd_p :1 = 0
            unsigned long sd_hilimit :4 = 0
            unsigned long sd_xx0 :3 = 0
            unsigned long sd_gran :1 = 0
            unsigned long sd_hibase :40 = 0
            unsigned long sd_xx1 :8 = 0
            unsigned long sd_mbz :5 = 0
            unsigned long sd_xx2 :19 = 0
        }
    }
    struct callout p_itcallout = {
        union c_links = {
            struct le = {
                struct callout *le_next = 0
                struct callout **le_prev = 0
            }
            struct sle = {
                struct callout *sle_next = 0
            }
            struct tqe = {
                struct callout *tqe_next = 0
                struct callout **tqe_prev = 0
            }
        }
        sbintime_t c_time = 0
        sbintime_t c_precision = 0
        void *c_arg = 0
        void (*)() c_func = 0
        struct lock_object *c_lock = 0xfffff80445c695d8
        int c_flags = 0
        volatile int c_cpu = 0
    }
    u_short p_acflag = 0x11
    struct proc *p_peers = 0
    struct proc *p_leader = 0xfffff80445c694d8
    void *p_emuldata = 0
    struct label *p_label = 0
    struct p_sched *p_sched = 0xfffff80445c699b0
    struct p_ktr = {
        struct ktr_request *stqh_first = 0
        struct ktr_request **stqh_last = 0xfffff80445c69938
    }
    struct p_mqnotifier = {
        struct mqueue_notifier *lh_first = 0
    }
    struct kdtrace_proc *p_dtrace = 0xfffff8166c1c9040
    struct cv p_pwait = {
        const char *cv_description = 0xffffffff80ea9ab8
        volatile int cv_waiters = 0
    }
    struct cv p_dbgwait = {
        const char *cv_description = 0xffffffff80ea9abf
        volatile int cv_waiters = 0
    }
    uint64_t p_prev_runtime = 0
    struct racct *p_racct = 0
    u_char p_throttled = 0
    struct p_orphan = {
        struct proc *le_next = 0
        struct proc **le_prev = 0xfffff804697d09a0
    }
    struct p_orphans = {
        struct proc *lh_first = 0
    }
}
Received on Tue Jun 07 2016 - 02:13:59 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:05 UTC