Re: panic in rt_check_fib()

From: Giorgos Keramidas <keramida_at_freebsd.org>
Date: Sat, 13 Sep 2008 22:56:02 +0300
On Sat, 13 Sep 2008 22:47:52 +0300, Giorgos Keramidas <keramida_at_freebsd.org> wrote:
> +#define RT_RELOCK(_rt) do {					\
> +	RT_LOCK(_rt)						\
> +	if ((_rt)->rt_refcnt <= 1)				\
> +		rtfree(_rt);					\
> +		_rt = 0; /*  signal that it went away */	\
> +	else {							\
> +		RT_REMREF(_rt);					\
> +		/* note that _rt is still valid */		\
> +	}							\
> +} while (0)
> +

This macro needs a semicolon after Rt_LOCK(_rt) and a pair of { ... } in
the if part too:

%%%
#define RT_RELOCK(_rt) do {					\
	RT_LOCK(_rt);						\
	if ((_rt)->rt_refcnt <= 1) {				\
		rtfree(_rt);					\
		_rt = 0; /*  signal that it went away */	\
	} else {						\
		RT_REMREF(_rt);					\
		/* note that _rt is still valid */		\
	}							\
} while (0)
%%%

and the `error' variable is unused in the new rt_check_fib(), or it
breaks the kernel with a warning about an unused variable:

/usr/src/sys/net/route.c: In function 'rt_check_fib':
/usr/src/sys/net/route.c:1716: warning: unused variable 'error'

The updated patch that I'm build-testing now is:

%%%
diff -r ef8e7f2fc284 sys/net/route.c
--- a/sys/net/route.c	Fri Sep 12 02:12:33 2008 +0300
+++ b/sys/net/route.c	Sat Sep 13 22:55:45 2008 +0300
_at__at_ -1676,19 +1676,31 _at__at_
  *   *lrt0 points to the cached route to the final destination;
  *   *lrt is not meaningful;
  *    fibnum is the index to the correct network fib for this packet
+ *	(*lrt0 has not ref held on it so REMREF is not needed )
  *
  * === Operation ===
  * If the route is marked down try to find a new route.  If the route
  * to the gateway is gone, try to setup a new route.  Otherwise,
  * if the route is marked for packets to be rejected, enforce that.
+ * Note that rtalloc returns an rtentry with an extra REF that we need to lose.
  *
  * === On return ===
  *   *dst is unchanged;
  *   *lrt0 points to the (possibly new) route to the final destination
- *   *lrt points to the route to the next hop
+ *   *lrt points to the route to the next hop   [LOCKED]
  *
  * Their values are meaningful ONLY if no error is returned.
+ *
+ * To follow this you have to remember that:
+ * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!)
+ * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1)
+ *    and an RT_UNLOCK
+ * RTFREE does an RT_LOCK and an RTFREE_LOCKED
+ * The gwroute pointer counts as a reference on the rtentry to which it points.
+ * so when we add it we use the ref that rtalloc gives us and when we lose it
+ * we need to remove the reference.
  */
+
 int
 rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
 {
_at__at_ -1701,58 +1713,81 _at__at_
 {
 	struct rtentry *rt;
 	struct rtentry *rt0;
-	int error;
 
 	KASSERT(*lrt0 != NULL, ("rt_check"));
-	rt = rt0 = *lrt0;
+	rt0 = *lrt0;
+	rt = NULL;
 
 	/* NB: the locking here is tortuous... */
-	RT_LOCK(rt);
-	if ((rt->rt_flags & RTF_UP) == 0) {
-		RT_UNLOCK(rt);
-		rt = rtalloc1_fib(dst, 1, 0UL, fibnum);
-		if (rt != NULL) {
-			RT_REMREF(rt);
-			/* XXX what about if change? */
-		} else
+	RT_LOCK(rt0);
+retry:
+	if (rt0 && (rt0->rt_flags & RTF_UP) == 0) {
+		/* Current rt0 is useless, try get a replacement. */
+		RT_UNLOCK(rt0);
+		rt0 = NULL;
+	}
+	if (rt0 == NULL) {
+		rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum);
+		if (rt0 == NULL) {
 			return (EHOSTUNREACH);
-		rt0 = rt;
+		}
+		RT_REMREF(rt0); /* don't need the reference. */
 	}
-	/* XXX BSD/OS checks dst->sa_family != AF_NS */
-	if (rt->rt_flags & RTF_GATEWAY) {
-		if (rt->rt_gwroute == NULL)
-			goto lookup;
-		rt = rt->rt_gwroute;
-		RT_LOCK(rt);		/* NB: gwroute */
-		if ((rt->rt_flags & RTF_UP) == 0) {
-			RTFREE_LOCKED(rt);	/* unlock gwroute */
-			rt = rt0;
-			rt0->rt_gwroute = NULL;
-		lookup:
-			RT_UNLOCK(rt0);
-/* XXX MRT link level looked up in table 0 */
-			rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0);
-			if (rt == rt0) {
-				RT_REMREF(rt0);
-				RT_UNLOCK(rt0);
+
+	if (rt0->rt_flags & RTF_GATEWAY) {
+		if ((rt = rt0->rt_gwroute) != NULL) {
+			RT_LOCK(rt);		/* NB: gwroute */
+			if ((rt->rt_flags & RTF_UP) == 0) {
+				/* gw route is dud. ignore/lose it */
+				RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */
+				rt = rt0->rt_gwroute = NULL;
+			}
+		}
+		if (rt == NULL) {  /* NOT AN ELSE CLAUSE */
+			RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */
+			rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum);
+			if ((rt == rt0) || (rt == NULL)) {
+				/* the best we can do is not good enough */
+				if (rt) {
+					RT_REMREF(rt); /* assumes ref > 0 */
+					RT_UNLOCK(rt);
+				}
+				RTFREE(rt0); /* lock, unref, (unlock) */
 				return (ENETUNREACH);
 			}
-			RT_LOCK(rt0);
-			if (rt0->rt_gwroute != NULL)
-				RTFREE(rt0->rt_gwroute);
-			rt0->rt_gwroute = rt;
-			if (rt == NULL) {
-				RT_UNLOCK(rt0);
-				return (EHOSTUNREACH);
+			/*
+			 * Relock it and lose the added reference.
+			 * All sorts of things could have happenned while we
+			 * had no lock on it, so check for them.
+			 */
+			RT_RELOCK(rt0);
+			if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0))
+				/* Ru-roh.. what we had is no longer any good */
+				goto retry;
+			/*
+			 * While we were away, someone replaced the gateway.
+			 * Since a reference count is involved we can't just
+			 * overwrite it.
+			 */
+			if (rt0->rt_gwroute) {
+				if (rt0->rt_gwroute != rt) {
+					RTFREE_LOCKED(rt);
+					goto retry;
+				}
+			} else {
+				rt0->rt_gwroute = rt;
 			}
 		}
+		RT_LOCK_ASSERT(rt);
 		RT_UNLOCK(rt0);
+	} else {
+		/* think of rt as having the lock from now on.. */
+		rt = rt0;
 	}
 	/* XXX why are we inspecting rmx_expire? */
-	error = (rt->rt_flags & RTF_REJECT) &&
-		(rt->rt_rmx.rmx_expire == 0 ||
-			time_uptime < rt->rt_rmx.rmx_expire);
-	if (error) {
+	if ((rt->rt_flags & RTF_REJECT) &&
+	    (rt->rt_rmx.rmx_expire == 0 ||
+	    time_uptime < rt->rt_rmx.rmx_expire)) {
 		RT_UNLOCK(rt);
 		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
diff -r ef8e7f2fc284 sys/net/route.h
--- a/sys/net/route.h	Fri Sep 12 02:12:33 2008 +0300
+++ b/sys/net/route.h	Sat Sep 13 22:55:45 2008 +0300
_at__at_ -315,6 +315,22 _at__at_
 	(_rt)->rt_refcnt--;					\
 } while (0)
 
+#define RT_TEMP_UNLOCK(_rt) do {				\
+	RT_ADDREF(_rt);						\
+	RT_UNLOCK(_rt);						\
+} while (0)
+
+#define RT_RELOCK(_rt) do {					\
+	RT_LOCK(_rt);						\
+	if ((_rt)->rt_refcnt <= 1) {				\
+		rtfree(_rt);					\
+		_rt = 0; /*  signal that it went away */	\
+	} else {						\
+		RT_REMREF(_rt);					\
+		/* note that _rt is still valid */		\
+	}							\
+} while (0)
+
 #define	RTFREE_LOCKED(_rt) do {					\
 		if ((_rt)->rt_refcnt <= 1)			\
 			rtfree(_rt);				\
%%%

Received on Sat Sep 13 2008 - 17:56:16 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:35 UTC