Re: General Protection Fault in prelist_remove()

From: Mark Johnston <markj_at_freebsd.org>
Date: Mon, 17 Nov 2014 23:21:18 -0800
On Sat, Nov 15, 2014 at 04:17:35PM -0500, Ryan Stone wrote:
> On Mon, Sep 16, 2013 at 1:10 PM, Mark Johnston <markj_at_freebsd.org> wrote:
> > I've partially fixed this at work by adding a rw lock to protect access
> > to the the prefix, default router, and DAD lists. The patch is here:
> > http://people.freebsd.org/~markj/patches/ndp-locking.diff
> 
> Hi Mark,
> 
> I've hit a bug in this patch today.  The problem is in the locking of
> the DAD list.  Many functions (e.g.
> nd6_dad_duplicated) call nd6_dad_find() to look up a dadq structure,
> and then manipulate the structure with no lock held.  The problem that
> once nd6_dad_find() releases the ND lock there is nothing preventing
> another thread from going in and free'ing the structure.  This causes
> a use-after-free in nd6_dad_duplicated.  I have a setup which is
> somehow triggering DAD on link-local addresses (I don't understand
> why; I don't have duplicate mac addresses on the network as best that
> I can tell) and with INVARIANTS on I very frequently get a crash in
> nd6_dad_duplicated.
> 
> 
> It looks to me that the only way to fix it is either introduce
> referencing counting into the structure, or push the locking out of
> nd6_dad_find() and into the callers.  Any opinions?

I spent some time looking at the code. There seem to be a few
miscellaneous bugs in addition to races you described above;
nd6_dad_na_input() can potentially call nd6_dad_duplicated() with a NULL
dadq, for example. I wasn't able to specifically reproduce the
use-after-free that you described, but it's pretty easy to trigger
crashes in the DAD code, e.g. by assigning a duplicate address to an
interface in a loop.

It's hard to fix these races directly because of the different contexts
in which the DAD entry queue is manipulated: entries can be added or
removed with ioctls when addresses are added or removed, the DAD callout
can modify the queue, and nd6_dad_duplicated() can be called in the
packet processing path for some NS or NA packets. It seems to me that
this can be simplified somewhat though, which makes it easier to fix the
races.

The patch below (against HEAD) simplifies the DAD code and tries to fix
remaining races by adding a refcount to struct dadq. Note that hrs_at_ added a
lock for the DAD queue in r266857, so my ndp-locking patch needs to be
rebased. It simplifies the code by having the DAD callout do most of the
work: when a NS or NA packet indicates that an address is a duplicate,
we just increment a counter and wait for the corresponding DAD callout
to flag the address. With the patch, DAD is only cancelled when the
corresponding address is removed from an interface. It also fixes some
improper handling of the ifa references for struct dadq.

Note that there are still some races around the struct dadq counters
themselves (the increments are non-atomic), and the handling of
in6_flags should be atomic. A copy of the patch is here:
https://people.freebsd.org/~markj/patches/nd6_dad_races.diff
It survives some basic tests and stress testing that previously exposed
some races on HEAD.

Thanks,
-Mark

diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index a917b76..819fe30 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
_at__at_ -51,6 +51,7 _at__at_ __FBSDID("$FreeBSD$");
 #include <sys/syslog.h>
 #include <sys/queue.h>
 #include <sys/callout.h>
+#include <sys/refcount.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
_at__at_ -1167,6 +1168,7 _at__at_ struct dadq {
 	int dad_na_icount;
 	struct callout dad_timer_ch;
 	struct vnet *dad_vnet;
+	u_int dad_refcnt;
 };
 
 static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
_at__at_ -1183,12 +1185,22 _at__at_ static VNET_DEFINE(struct rwlock, dad_rwlock);
 #define	DADQ_WUNLOCK()		rw_wunlock(&V_dad_rwlock)	
 
 static void
+nd6_dad_rele(struct dadq *dp)
+{
+
+	if (refcount_release(&dp->dad_refcnt)) {
+		ifa_free(dp->dad_ifa);
+		free(dp, M_IP6NDP);
+	}
+}
+
+static void
 nd6_dad_add(struct dadq *dp)
 {
 
-	ifa_ref(dp->dad_ifa);	/* just for safety */
+	refcount_acquire(&dp->dad_refcnt);
 	DADQ_WLOCK();
-	TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);
+	TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
 	DADQ_WUNLOCK();
 }
 
_at__at_ -1196,10 +1208,10 _at__at_ static void
 nd6_dad_del(struct dadq *dp)
 {
 
-	ifa_free(dp->dad_ifa);
 	DADQ_WLOCK();
-	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
+	TAILQ_REMOVE(&V_dadq, dp, dad_list);
 	DADQ_WUNLOCK();
+	nd6_dad_rele(dp);
 }
 
 static struct dadq *
_at__at_ -1209,8 +1221,10 _at__at_ nd6_dad_find(struct ifaddr *ifa)
 
 	DADQ_RLOCK();
 	TAILQ_FOREACH(dp, &V_dadq, dad_list)
-		if (dp->dad_ifa == ifa)
+		if (dp->dad_ifa == ifa) {
+			refcount_acquire(&dp->dad_refcnt);
 			break;
+		}
 	DADQ_RUNLOCK();
 
 	return (dp);
_at__at_ -1228,7 +1242,8 _at__at_ static void
 nd6_dad_stoptimer(struct dadq *dp)
 {
 
-	callout_stop(&dp->dad_timer_ch);
+	callout_drain(&dp->dad_timer_ch);
+	nd6_dad_rele(dp);
 }
 
 /*
_at__at_ -1275,8 +1290,9 _at__at_ nd6_dad_start(struct ifaddr *ifa, int delay)
 	}
 	if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)
 		return;
-	if (nd6_dad_find(ifa) != NULL) {
+	if ((dp = nd6_dad_find(ifa)) != NULL) {
 		/* DAD already in progress */
+		nd6_dad_rele(dp);
 		return;
 	}
 
_at__at_ -1303,9 +1319,11 _at__at_ nd6_dad_start(struct ifaddr *ifa, int delay)
 	 * (re)initialization.
 	 */
 	dp->dad_ifa = ifa;
+	ifa_ref(dp->dad_ifa);
 	dp->dad_count = V_ip6_dad_count;
 	dp->dad_ns_icount = dp->dad_na_icount = 0;
 	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
+	refcount_init(&dp->dad_refcnt, 1);
 	nd6_dad_add(dp);
 	if (delay == 0) {
 		nd6_dad_ns_output(dp, ifa);
_at__at_ -1334,8 +1352,16 _at__at_ nd6_dad_stop(struct ifaddr *ifa)
 
 	nd6_dad_stoptimer(dp);
 
+	/*
+	 * The DAD queue entry may have been removed by nd6_dad_timer() while
+	 * we were waiting for it to stop, so re-do the lookup.
+	 */
+	nd6_dad_rele(dp);
+	if (nd6_dad_find(ifa) == NULL)
+		return;
+
 	nd6_dad_del(dp);
-	free(dp, M_IP6NDP);
+	nd6_dad_rele(dp);
 }
 
 static void
_at__at_ -1354,38 +1380,30 _at__at_ nd6_dad_timer(struct dadq *dp)
 	}
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
 		/* Do not need DAD for ifdisabled interface. */
-		TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
 		log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
 		    "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
-		free(dp, M_IP6NDP);
-		dp = NULL;
-		ifa_free(ifa);
-		goto done;
+		goto dup;
 	}
 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
 		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
-		goto done;
+		goto dup;
 	}
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
 		log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
-		goto done;
+		goto dup;
 	}
 
 	/* timeouted with IFF_{RUNNING,UP} check */
 	if (dp->dad_ns_tcount > V_dad_maxtry) {
 		nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
 		    if_name(ifa->ifa_ifp)));
-
-		nd6_dad_del(dp);
-		free(dp, M_IP6NDP);
-		dp = NULL;
-		goto done;
+		goto dup;
 	}
 
 	/* Need more checks? */
_at__at_ -1396,33 +1414,16 _at__at_ nd6_dad_timer(struct dadq *dp)
 		nd6_dad_ns_output(dp, ifa);
 		nd6_dad_starttimer(dp,
 		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+		goto done;
 	} else {
 		/*
 		 * We have transmitted sufficient number of DAD packets.
 		 * See what we've got.
 		 */
-		int duplicate;
-
-		duplicate = 0;
-
-		if (dp->dad_na_icount) {
-			/*
-			 * the check is in nd6_dad_na_input(),
-			 * but just in case
-			 */
-			duplicate++;
-		}
-
-		if (dp->dad_ns_icount) {
-			/* We've seen NS, means DAD has failed. */
-			duplicate++;
-		}
-
-		if (duplicate) {
-			/* (*dp) will be freed in nd6_dad_duplicated() */
+		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
+			/* We've seen NS or NA, means DAD has failed. */
 			nd6_dad_duplicated(ifa, dp);
-			dp = NULL;
-		} else {
+		else {
 			/*
 			 * We are done with DAD.  No NA came, no NS came.
 			 * No duplicate address found.  Check IFDISABLED flag
_at__at_ -1436,13 +1437,11 _at__at_ nd6_dad_timer(struct dadq *dp)
 			    "%s: DAD complete for %s - no duplicates found\n",
 			    if_name(ifa->ifa_ifp),
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
-
-			nd6_dad_del(dp);
-			free(dp, M_IP6NDP);
-			dp = NULL;
 		}
 	}
-
+dup:
+	nd6_dad_del(dp);
+	nd6_dad_rele(dp);
 done:
 	CURVNET_RESTORE();
 }
_at__at_ -1462,9 +1461,6 _at__at_ nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 	ia->ia6_flags |= IN6_IFF_DUPLICATED;
 
-	/* We are done with DAD, with duplicate address found. (failure) */
-	nd6_dad_stoptimer(dp);
-
 	ifp = ifa->ifa_ifp;
 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
 	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
_at__at_ -1505,9 +1501,6 _at__at_ nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
 			break;
 		}
 	}
-
-	nd6_dad_del(dp);
-	free(dp, M_IP6NDP);
 }
 
 static void
_at__at_ -1535,7 +1528,6 _at__at_ nd6_dad_ns_input(struct ifaddr *ifa)
 	struct ifnet *ifp;
 	const struct in6_addr *taddr6;
 	struct dadq *dp;
-	int duplicate;
 
 	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_ns_input");
_at__at_ -1543,8 +1535,9 _at__at_ nd6_dad_ns_input(struct ifaddr *ifa)
 	ia = (struct in6_ifaddr *)ifa;
 	ifp = ifa->ifa_ifp;
 	taddr6 = &ia->ia_addr.sin6_addr;
-	duplicate = 0;
 	dp = nd6_dad_find(ifa);
+	if (dp == NULL)
+		return;
 
 	/* Quickhack - completely ignore DAD NS packets */
 	if (V_dad_ignore_ns) {
_at__at_ -1556,26 +1549,10 _at__at_ nd6_dad_ns_input(struct ifaddr *ifa)
 		return;
 	}
 
-	/*
-	 * if I'm yet to start DAD, someone else started using this address
-	 * first.  I have a duplicate and you win.
-	 */
-	if (dp == NULL || dp->dad_ns_ocount == 0)
-		duplicate++;
-
 	/* XXX more checks for loopback situation - see nd6_dad_timer too */
 
-	if (duplicate) {
-		nd6_dad_duplicated(ifa, dp);
-		dp = NULL;	/* will be freed in nd6_dad_duplicated() */
-	} else {
-		/*
-		 * not sure if I got a duplicate.
-		 * increment ns count and see what happens.
-		 */
-		if (dp)
-			dp->dad_ns_icount++;
-	}
+	dp->dad_ns_icount++;
+	nd6_dad_rele(dp);
 }
 
 static void
_at__at_ -1587,9 +1564,8 _at__at_ nd6_dad_na_input(struct ifaddr *ifa)
 		panic("ifa == NULL in nd6_dad_na_input");
 
 	dp = nd6_dad_find(ifa);
-	if (dp)
+	if (dp != NULL) {
 		dp->dad_na_icount++;
-
-	/* remove the address. */
-	nd6_dad_duplicated(ifa, dp);
+		nd6_dad_rele(dp);
+	}
 }
Received on Tue Nov 18 2014 - 06:21:24 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:54 UTC