Re: Fatal error 'mutex is on list' at line 139 in file /usr/src/lib/libthr/thread/thr_mutex.c (errno = 35)

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Sat, 19 Mar 2016 21:47:57 +0200
On Sat, Mar 19, 2016 at 04:03:06PM +0200, Oleg V. Nauman wrote:
> Core was generated by `akonadi_baloo_index'.
> Program terminated with signal SIGABRT, Aborted.
> #0  0x0000000805630d6a in thr_kill () from /lib/libc.so.7
> (gdb) bt
> #0  0x0000000805630d6a in thr_kill () from /lib/libc.so.7
> #1  0x0000000805630d3b in __raise (s=6) at /usr/src/lib/libc/gen/raise.c:52
> #2  0x0000000805630ca9 in abort () at /usr/src/lib/libc/stdlib/abort.c:65
> #3  0x00000008053564b4 in _thread_exit (
>     fname=0x805357b70 "/usr/src/lib/libthr/thread/thr_mutex.c", lineno=139,
>     msg=0x805357b97 "mutex is on list") at 
> /usr/src/lib/libthr/thread/thr_exit.c:182
> #4  0x000000080534cddc in mutex_assert_not_owned (m=0x80064d000)
>     at /usr/src/lib/libthr/thread/thr_mutex.c:139
> #5  0x000000080534cfb9 in enqueue_mutex (curthread=0x80e015000, m=0x80064d000)
>     at /usr/src/lib/libthr/thread/thr_mutex.c:383
> #6  0x000000080534d213 in mutex_lock_common (m=0x80064d000, 
> abstime=0x7fffffffd4e8,
>     cvattach=0) at /usr/src/lib/libthr/thread/thr_mutex.c:533
> #7  0x000000080534c6be in __pthread_mutex_timedlock (mutex=0x811c00008,
>     abstime=0x7fffffffd4e8) at /usr/src/lib/libthr/thread/thr_mutex.c:566
> 
> (gdb) f 7
> #7  0x000000080534c6be in __pthread_mutex_timedlock (mutex=0x811c00008,
>     abstime=0x7fffffffd4e8) at /usr/src/lib/libthr/thread/thr_mutex.c:566
> 566                     ret = mutex_lock_common(m, abstime, 0);
> (gdb) p *mutex
> $1 = (pthread_mutex_t) 0x8000000000000001
> (gdb) p m
> $2 = (struct pthread_mutex *) 0x80064d000
> (gdb) p *m
> $3 = {m_lock = {m_owner = -2147383372, m_flags = 1, m_ceilings = {0, 0}, 
> m_spare = {0, 0, 0,
>       0}}, m_flags = 1, m_owner = 100276, m_count = 0, m_spinloops = 0, 
> m_yieldloops = 0,
>   m_qe = {tqe_next = 0x0, tqe_prev = 0x0}, m_pqe = {tqe_next = 0x0, tqe_prev = 
> 0x0}}
> (gdb) p *curthread
> No symbol "curthread" in current context.
> (gdb)

curthread is available e.q. in the frame 5.

The content from the printout is reasonable, but now it contradicts to the
assertion fired, since both checked pointers are NULL, as reported by gdb.

Please add the following debugging patch on top of the previous change
and reproduce the issue.  I need the same info, but please also provide
exact gasp message from libthr, which is enchanced in the patch below.

diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c
index 30a8be2..3342c9f 100644
--- a/lib/libthr/thread/thr_mutex.c
+++ b/lib/libthr/thread/thr_mutex.c
_at__at_ -124,8 +124,14 _at__at_ mutex_assert_is_owned(struct pthread_mutex *m)
 {
 
 #if defined(_PTHREADS_INVARIANTS)
-	if (__predict_false(m->m_qe.tqe_prev == NULL))
-		PANIC("mutex is not on list");
+	if (__predict_false(m->m_qe.tqe_prev == NULL)) {
+		char msg[128];
+		snprintf(msg, sizeof(msg),
+		    "mutex %p own %#x %#x is not on list %p %p",
+		    m, m->m_lock.m_owner, m->m_owner, m->m_qe.tqe_prev,
+		    m->m_qe.tqe_next);
+		PANIC(msg);
+	}
 #endif
 }
 
_at__at_ -135,8 +141,14 _at__at_ mutex_assert_not_owned(struct pthread_mutex *m)
 
 #if defined(_PTHREADS_INVARIANTS)
 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
-	    m->m_qe.tqe_next != NULL))
-		PANIC("mutex is on list");
+	    m->m_qe.tqe_next != NULL)) {
+		char msg[128];
+		snprintf(msg, sizeof(msg),
+		    "mutex %p own %#x %#x is on list %p %p",
+		    m, m->m_lock.m_owner, m->m_owner, m->m_qe.tqe_prev,
+		    m->m_qe.tqe_next);
+		PANIC(msg);
+	}
 #endif
 }
 
Received on Sat Mar 19 2016 - 18:48:02 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:03 UTC