[follow-up] FreeBSD/amd64 r195146 to r195848, fatal trap 12 under network load

From: Kamigishi Rei <spambox_at_haruhiism.net> Date: Tue, 28 Jul 2009 18:24:53 +0400 · This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:52 UTC

Hello, hope you're having a nice day,

Revisions mentioned are those which were tested by me; r195849+ has the 
corruption padded somewhere else so it might produce a panic with a 
different set of options. For reference, my test kernel uses a GENERIC 
config from May 09 snapshot without WITNESS and with IPFIREWALL, 
IPFIREWALL_DEFAULT_TO_ACCEPT and DEVICE_POLLING enabled.

If someone experiences fatal traps under network load, and has the 
kernel compiled with "options INVARIANTS", here's a patch to check if 
you're suffering the memory corruption in netisr's DPCPU area. I'm 
pretty interested in the backtraces this panic() call will produce.

Please note: with this patch, your system - if affected by the 
aforementioned problem - will trap *almost immediately* after the 
corruption happens, while a non-patched system can survive that for a 
bit if by the time v is assigned mtx_lock's value in _mtx_sleep_flags() 
the value is already fixed by a concurrent thread.
I highly recommend having a backup 'normal kernel'.
DO NOT apply this patch on a system you can't access via local (and/or 
IPMI/LOM) or serial console.

For systems without INVARIANTS (although I'm not sure if the issue 
affects non-INVARIANTS builds) you can replace

KASSERT(!(((foo & 0x8000000000000000)==0x0) && (foo != MTX_UNOWNED)),("mi_switch: DPCPU sanity checks: netisr workstream mutex nws_mtx contains an invalid pointer %llx in mtx_lock; this will lead to a page fault (cpuid: %u). Terminating.\n", ((long long unsigned)foo), (mycpuid)));

with

if(((foo & 0x8000000000000000)==0x0) && (foo != MTX_UNOWNED)) panic("mi_switch: DPCPU sanity checks: netisr workstream mutex nws_mtx contains an invalid pointer %llx in mtx_lock; this will lead to a page fault (cpuid: %u). Terminating.\n", (long long unsigned)foo, mycpuid);

You can also replace the panic() call with a simple printf(), and it 
will just print out the warning message, but that won't really help 
debugging the cause, although will check if you're getting a fatal trap 
because of that aforementioned corruption.

I will be really grateful if someone can help me debug this issue, 
namely, point me at how can I trace the problem to a thread/system call 
which could have messed with the memory area in question.

--
Kamigishi Rei
KREI-RIPE

Index: sys/kern/kern_synch.c
===================================================================
--- sys/kern/kern_synch.c	(revision 195848)
+++ sys/kern/kern_synch.c	(working copy)
_at__at_ -71,6 +71,8 _at__at_
 #include <vm/pmap.h>
 #endif

+#include <net/netisr.h>
+
 #define	KTDSTATE(td)							\
 	(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  :		\
 	((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :	\
_at__at_ -391,7 +393,18 _at__at_
 	uint64_t runtime, new_switchtime;
 	struct thread *td;
 	struct proc *p;
+	struct netisr_workstream *nwsp;
+	uintptr_t foo;
+	unsigned int mycpuid;

+	for (mycpuid = 0; mycpuid < mp_maxid; mycpuid++) {
+		nwsp = DPCPU_ID_PTR(mycpuid, nws);
+		if (mtx_initialized(&(nwsp->nws_mtx))) {
+			foo = nwsp->nws_mtx.mtx_lock;
+			KASSERT(!(((foo & 0x8000000000000000)==0x0) && (foo != MTX_UNOWNED)),("mi_switch: DPCPU sanity checks: netisr workstream mutex nws_mtx contains an invalid pointer %llx in mtx_lock; this will lead to a page fault (cpuid: %u). Terminating.\n", ((long long unsigned)foo), (mycpuid)));
+		}
+	}
+
 	td = curthread;			/* XXX */
 	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 	p = td->td_proc;		/* XXX */
Index: sys/net/netisr.c
===================================================================
--- sys/net/netisr.c	(revision 195848)
+++ sys/net/netisr.c	(working copy)
_at__at_ -203,8 +203,6 _at__at_
 	u_int		 np_policy;	/* Work placement policy. */
 };

-#define	NETISR_MAXPROT		16		/* Compile-time limit. */
-
 /*
  * The np array describes all registered protocols, indexed by protocol
  * number.
_at__at_ -212,53 +210,6 _at__at_
 static struct netisr_proto	np[NETISR_MAXPROT];

 /*
- * Protocol-specific work for each workstream is described by struct
- * netisr_work.  Each work descriptor consists of an mbuf queue and
- * statistics.
- */
-struct netisr_work {
-	/*
-	 * Packet queue, linked by m_nextpkt.
-	 */
-	struct mbuf	*nw_head;
-	struct mbuf	*nw_tail;
-	u_int		 nw_len;
-	u_int		 nw_qlimit;
-	u_int		 nw_watermark;
-
-	/*
-	 * Statistics -- written unlocked, but mostly from curcpu.
-	 */
-	u_int64_t	 nw_dispatched; /* Number of direct dispatches. */
-	u_int64_t	 nw_hybrid_dispatched; /* "" hybrid dispatches. */
-	u_int64_t	 nw_qdrops;	/* "" drops. */
-	u_int64_t	 nw_queued;	/* "" enqueues. */
-	u_int64_t	 nw_handled;	/* "" handled in worker. */
-};
-
-/*
- * Workstreams hold a set of ordered work across each protocol, and are
- * described by netisr_workstream.  Each workstream is associated with a
- * worker thread, which in turn is pinned to a CPU.  Work associated with a
- * workstream can be processd in other threads during direct dispatch;
- * concurrent processing is prevented by the NWS_RUNNING flag, which
- * indicates that a thread is already processing the work queue.
- */
-struct netisr_workstream {
-	struct intr_event *nws_intr_event;	/* Handler for stream. */
-	void		*nws_swi_cookie;	/* swi(9) cookie for stream. */
-	struct mtx	 nws_mtx;		/* Synchronize work. */
-	u_int		 nws_cpu;		/* CPU pinning. */
-	u_int		 nws_flags;		/* Wakeup flags. */
-	u_int		 nws_pendingbits;	/* Scheduled protocols. */
-
-	/*
-	 * Each protocol has per-workstream data.
-	 */
-	struct netisr_work	nws_work[NETISR_MAXPROT];
-} __aligned(CACHE_LINE_SIZE);
-
-/*
  * Per-CPU workstream data.
  */
 DPCPU_DEFINE(struct netisr_workstream, nws);
Index: sys/net/netisr.h
===================================================================
--- sys/net/netisr.h	(revision 195848)
+++ sys/net/netisr.h	(working copy)
_at__at_ -111,6 +111,60 _at__at_
 };

 /*
+ * Protocol-specific work for each workstream is described by struct
+ * netisr_work.  Each work descriptor consists of an mbuf queue and
+ * statistics.
+ */
+struct netisr_work {
+	/*
+	 * Packet queue, linked by m_nextpkt.
+	 */
+	struct mbuf	*nw_head;
+	struct mbuf	*nw_tail;
+	u_int		 nw_len;
+	u_int		 nw_qlimit;
+	u_int		 nw_watermark;
+
+	/*
+	 * Statistics -- written unlocked, but mostly from curcpu.
+	 */
+	u_int64_t	 nw_dispatched; /* Number of direct dispatches. */
+	u_int64_t	 nw_hybrid_dispatched; /* "" hybrid dispatches. */
+	u_int64_t	 nw_qdrops;	/* "" drops. */
+	u_int64_t	 nw_queued;	/* "" enqueues. */
+	u_int64_t	 nw_handled;	/* "" handled in worker. */
+};
+
+#define	NETISR_MAXPROT		16		/* Compile-time limit. */
+
+/*
+ * Workstreams hold a set of ordered work across each protocol, and are
+ * described by netisr_workstream.  Each workstream is associated with a
+ * worker thread, which in turn is pinned to a CPU.  Work associated with a
+ * workstream can be processd in other threads during direct dispatch;
+ * concurrent processing is prevented by the NWS_RUNNING flag, which
+ * indicates that a thread is already processing the work queue.
+ */
+struct netisr_workstream {
+	struct intr_event *nws_intr_event;	/* Handler for stream. */
+	void		*nws_swi_cookie;	/* swi(9) cookie for stream. */
+	struct mtx	 nws_mtx;		/* Synchronize work. */
+	u_int		 nws_cpu;		/* CPU pinning. */
+	u_int		 nws_flags;		/* Wakeup flags. */
+	u_int		 nws_pendingbits;	/* Scheduled protocols. */
+
+	/*
+	 * Each protocol has per-workstream data.
+	 */
+	struct netisr_work	nws_work[NETISR_MAXPROT];
+} __aligned(CACHE_LINE_SIZE);
+
+/*
+ * Declare per-CPU workstream data globally
+ */
+DPCPU_DECLARE(struct netisr_workstream, nws);
+
+/*
  * Register, unregister, and other netisr handler management functions.
  */
 void	netisr_clearqdrops(const struct netisr_handler *nhp);