Re: Fatal error 'mutex is on list' at line 139 in file /usr/src/lib/libthr/thread/thr_mutex.c (errno = 35)

From: Oleg V. Nauman <oleg_at_opentransfer.com>
Date: Sat, 19 Mar 2016 22:01:45 +0200
On Saturday 19 March 2016 21:47:57 Konstantin Belousov wrote:
> On Sat, Mar 19, 2016 at 04:03:06PM +0200, Oleg V. Nauman wrote:
> > Core was generated by `akonadi_baloo_index'.
> > Program terminated with signal SIGABRT, Aborted.
> > #0  0x0000000805630d6a in thr_kill () from /lib/libc.so.7
> > (gdb) bt
> > #0  0x0000000805630d6a in thr_kill () from /lib/libc.so.7
> > #1  0x0000000805630d3b in __raise (s=6) at
> > /usr/src/lib/libc/gen/raise.c:52
> > #2  0x0000000805630ca9 in abort () at /usr/src/lib/libc/stdlib/abort.c:65
> > #3  0x00000008053564b4 in _thread_exit (
> > 
> >     fname=0x805357b70 "/usr/src/lib/libthr/thread/thr_mutex.c",
> >     lineno=139,
> >     msg=0x805357b97 "mutex is on list") at
> > 
> > /usr/src/lib/libthr/thread/thr_exit.c:182
> > #4  0x000000080534cddc in mutex_assert_not_owned (m=0x80064d000)
> > 
> >     at /usr/src/lib/libthr/thread/thr_mutex.c:139
> > 
> > #5  0x000000080534cfb9 in enqueue_mutex (curthread=0x80e015000,
> > m=0x80064d000)> 
> >     at /usr/src/lib/libthr/thread/thr_mutex.c:383
> > 
> > #6  0x000000080534d213 in mutex_lock_common (m=0x80064d000,
> > abstime=0x7fffffffd4e8,
> > 
> >     cvattach=0) at /usr/src/lib/libthr/thread/thr_mutex.c:533
> > 
> > #7  0x000000080534c6be in __pthread_mutex_timedlock (mutex=0x811c00008,
> > 
> >     abstime=0x7fffffffd4e8) at /usr/src/lib/libthr/thread/thr_mutex.c:566
> > 
> > (gdb) f 7
> > #7  0x000000080534c6be in __pthread_mutex_timedlock (mutex=0x811c00008,
> > 
> >     abstime=0x7fffffffd4e8) at /usr/src/lib/libthr/thread/thr_mutex.c:566
> > 
> > 566                     ret = mutex_lock_common(m, abstime, 0);
> > (gdb) p *mutex
> > $1 = (pthread_mutex_t) 0x8000000000000001
> > (gdb) p m
> > $2 = (struct pthread_mutex *) 0x80064d000
> > (gdb) p *m
> > $3 = {m_lock = {m_owner = -2147383372, m_flags = 1, m_ceilings = {0, 0},
> > m_spare = {0, 0, 0,
> > 
> >       0}}, m_flags = 1, m_owner = 100276, m_count = 0, m_spinloops = 0,
> > 
> > m_yieldloops = 0,
> > 
> >   m_qe = {tqe_next = 0x0, tqe_prev = 0x0}, m_pqe = {tqe_next = 0x0,
> >   tqe_prev => 
> > 0x0}}
> > (gdb) p *curthread
> > No symbol "curthread" in current context.
> > (gdb)
> 
> curthread is available e.q. in the frame 5.

(gdb) f 5
#5  0x000000080534cfb9 in enqueue_mutex (curthread=0x80e015000, m=0x80064d000)
    at /usr/src/lib/libthr/thread/thr_mutex.c:383
383             mutex_assert_not_owned(m);
(gdb) p *curthread
$4 = {tid = 100276, lock = {m_owner = 0, m_flags = 0, m_ceilings = {0, 0}, 
m_spare = {0, 0,
      0, 0}}, cycle = 0, locklevel = 0, critical_count = 0, sigblock = 0, tle 
= {
    tqe_next = 0x0, tqe_prev = 0x80555df40 <_thread_list>}, gcle = {tqe_next = 
0x0,
    tqe_prev = 0x0}, hle = {le_next = 0x0, le_prev = 0x805568340}, wle = 
{tqe_next = 0x0,
    tqe_prev = 0x0}, refcount = 0, start_routine = 0x0, arg = 0x0, attr = 
{sched_policy = 2,
    sched_inherit = 4, prio = 0, suspend = 0, flags = 258, stackaddr_attr = 
0x7ffffdfff000,
    stacksize_attr = 33554432, guardsize_attr = 4096, cpuset = 0x0, cpusetsize 
= 0},
  cancel_enable = 1, cancel_pending = 0, cancel_point = 0, no_cancel = 0, 
cancel_async = 0,
  cancelling = 0, sigmask = {__bits = {0, 0, 0, 0}}, unblock_sigcancel = 0,
  in_sigsuspend = 0, deferred_siginfo = {si_signo = 0, si_errno = 0, si_code = 
0, si_pid = 0,
    si_uid = 0, si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, 
sival_ptr = 0x0,
      sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, 
_timer = {
        _timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {_band = 0}, 
__spare__ = {
        __spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}}, 
deferred_sigmask = {__bits = {
      0, 0, 0, 0}}, deferred_sigact = {__sigaction_u = {__sa_handler = 0x0,
      __sa_sigaction = 0x0}, sa_flags = 0, sa_mask = {__bits = {0, 0, 0, 0}}},
  deferred_run = 0, force_exit = 0, state = PS_RUNNING, error = 0, joiner = 
0x0, flags = 0,
  tlflags = 2, mq = {{tqh_first = 0x0, tqh_last = 0x80e0151a0}, {tqh_first = 
0x0,
      tqh_last = 0x80e0151b0}, {tqh_first = 0x0, tqh_last = 0x80e0151c0}, 
{tqh_first = 0x0,
      tqh_last = 0x80e0151d0}}, ret = 0x0, specific = 0x80064c000, 
specific_data_count = 4,
  rdlock_count = 0, rtld_bits = 0, tcb = 0x8006fd158, cleanup = 0x0, ex = {
    exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 
0},
  unwind_stackend = 0x7ffffffff000, unwind_disabled = 0, magic = 3499860245,
  report_events = 0, event_mask = 0, event_buf = {event = TD_EVENT_NONE, th_p 
= 0, data = 0},
  wchan = 0x0, mutex_obj = 0x0, will_sleep = 0, nwaiter_defer = 0, 
defer_waiters = {
    0x0 <repeats 50 times>}, wake_addr = 0x805568048, sleepqueue = 
0x80e014040}



> 
> The content from the printout is reasonable, but now it contradicts to the
> assertion fired, since both checked pointers are NULL, as reported by gdb.
> 
> Please add the following debugging patch on top of the previous change
> and reproduce the issue.  I need the same info, but please also provide
> exact gasp message from libthr, which is enchanced in the patch below.


 ok, tomorrow morning I will try to reproduce coredump with new patch.

Thank you


> 
> diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c
> index 30a8be2..3342c9f 100644
> --- a/lib/libthr/thread/thr_mutex.c
> +++ b/lib/libthr/thread/thr_mutex.c
> _at__at_ -124,8 +124,14 _at__at_ mutex_assert_is_owned(struct pthread_mutex *m)
>  {
> 
>  #if defined(_PTHREADS_INVARIANTS)
> -	if (__predict_false(m->m_qe.tqe_prev == NULL))
> -		PANIC("mutex is not on list");
> +	if (__predict_false(m->m_qe.tqe_prev == NULL)) {
> +		char msg[128];
> +		snprintf(msg, sizeof(msg),
> +		    "mutex %p own %#x %#x is not on list %p %p",
> +		    m, m->m_lock.m_owner, m->m_owner, m->m_qe.tqe_prev,
> +		    m->m_qe.tqe_next);
> +		PANIC(msg);
> +	}
>  #endif
>  }
> 
> _at__at_ -135,8 +141,14 _at__at_ mutex_assert_not_owned(struct pthread_mutex *m)
> 
>  #if defined(_PTHREADS_INVARIANTS)
>  	if (__predict_false(m->m_qe.tqe_prev != NULL ||
> -	    m->m_qe.tqe_next != NULL))
> -		PANIC("mutex is on list");
> +	    m->m_qe.tqe_next != NULL)) {
> +		char msg[128];
> +		snprintf(msg, sizeof(msg),
> +		    "mutex %p own %#x %#x is on list %p %p",
> +		    m, m->m_lock.m_owner, m->m_owner, m->m_qe.tqe_prev,
> +		    m->m_qe.tqe_next);
> +		PANIC(msg);
> +	}
>  #endif
>  }
Received on Sat Mar 19 2016 - 19:01:58 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:03 UTC