On Sat, 13 Sep 2008 12:29:49 -0700, Julian Elischer <julian_at_elischer.org> wrote: > Robert Watson wrote: >> >> On Fri, 5 Sep 2008, Giorgos Keramidas wrote: >> >>> A kernel that I built last night to test Ed's "packet mode" for ptys >>> included all the changes up to 182743 panics with: >> >> I had an identical panic on 7-STABLE last night: > > I have a patch for this that i have had out for review for s while... > it's a replacement rt_check_fib function.. As it happens, I just build a kernel with the new rt_check_fib() function with the patched code. I'm not sure it fixes the panic, but it didn't compile at all without s/RT_FREE/RTFREE/ in a couple of places, so I'll give it a try but a more thorough review than I can give is needed. The original diff was: %%% diff -r ef8e7f2fc284 sys/net/route.c --- a/sys/net/route.c Fri Sep 12 02:12:33 2008 +0300 +++ b/sys/net/route.c Sat Sep 13 22:40:53 2008 +0300 _at__at_ -1676,19 +1676,31 _at__at_ * *lrt0 points to the cached route to the final destination; * *lrt is not meaningful; * fibnum is the index to the correct network fib for this packet + * (*lrt0 has not ref held on it so REMREF is not needed ) * * === Operation === * If the route is marked down try to find a new route. If the route * to the gateway is gone, try to setup a new route. Otherwise, * if the route is marked for packets to be rejected, enforce that. + * Note that rtalloc returns an rtentry with an extra REF that we need to lose. * * === On return === * *dst is unchanged; * *lrt0 points to the (possibly new) route to the final destination - * *lrt points to the route to the next hop + * *lrt points to the route to the next hop [LOCKED] * * Their values are meaningful ONLY if no error is returned. + * + * To follow this you have to remember that: + * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!) + * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1) + * and an RT_UNLOCK + * RTFREE does an RT_LOCK and an RTFREE_LOCKED + * The gwroute pointer counts as a reference on the rtentry to which it points. + * so when we add it we use the ref that rtalloc gives us and when we lose it + * we need to remove the reference. */ + int rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) { _at__at_ -1704,55 +1716,79 _at__at_ int error; KASSERT(*lrt0 != NULL, ("rt_check")); - rt = rt0 = *lrt0; + rt0 = *lrt0; + rt = NULL; /* NB: the locking here is tortuous... */ - RT_LOCK(rt); - if ((rt->rt_flags & RTF_UP) == 0) { - RT_UNLOCK(rt); - rt = rtalloc1_fib(dst, 1, 0UL, fibnum); - if (rt != NULL) { - RT_REMREF(rt); - /* XXX what about if change? */ - } else + RT_LOCK(rt0); +retry: + if (rt0 && (rt0->rt_flags & RTF_UP) == 0) { + /* Current rt0 is useless, try get a replacement. */ + RT_UNLOCK(rt0); + rt0 = NULL; + } + if (rt0 == NULL) { + rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum); + if (rt0 == NULL) { return (EHOSTUNREACH); - rt0 = rt; + } + RT_REMREF(rt0); /* don't need the reference. */ } - /* XXX BSD/OS checks dst->sa_family != AF_NS */ - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == NULL) - goto lookup; - rt = rt->rt_gwroute; - RT_LOCK(rt); /* NB: gwroute */ - if ((rt->rt_flags & RTF_UP) == 0) { - RTFREE_LOCKED(rt); /* unlock gwroute */ - rt = rt0; - rt0->rt_gwroute = NULL; - lookup: - RT_UNLOCK(rt0); -/* XXX MRT link level looked up in table 0 */ - rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0); - if (rt == rt0) { - RT_REMREF(rt0); - RT_UNLOCK(rt0); + + if (rt0->rt_flags & RTF_GATEWAY) { + if ((rt = rt0->rt_gwroute) != NULL) { + RT_LOCK(rt); /* NB: gwroute */ + if ((rt->rt_flags & RTF_UP) == 0) { + /* gw route is dud. ignore/lose it */ + RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */ + rt = rt0->rt_gwroute = NULL; + } + } + if (rt == NULL) { /* NOT AN ELSE CLAUSE */ + RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */ + rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum); + if ((rt == rt0) || (rt == NULL)) { + /* the best we can do is not good enough */ + if (rt) { + RT_REMREF(rt); /* assumes ref > 0 */ + RT_UNLOCK(rt); + } + RT_FREE(rt0); /* lock, unref, (unlock) */ return (ENETUNREACH); } - RT_LOCK(rt0); - if (rt0->rt_gwroute != NULL) - RTFREE(rt0->rt_gwroute); - rt0->rt_gwroute = rt; - if (rt == NULL) { - RT_UNLOCK(rt0); - return (EHOSTUNREACH); + /* + * Relock it and lose the added reference. + * All sorts of things could have happenned while we + * had no lock on it, so check for them. + */ + RT_RELOCK(rt0); + if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0)) + /* Ru-roh.. what we had is no longer any good */ + goto retry; + /* + * While we were away, someone replaced the gateway. + * Since a reference count is involved we can't just + * overwrite it. + */ + if (rt0->rt_gwroute) { + if (rt0->rt_gwroute != rt) { + RT_FREE_LOCKED(rt); + goto retry; + } + } else { + rt0->rt_gwroute = rt; } } + RT_LOCK_ASSERT(rt); RT_UNLOCK(rt0); + } else { + /* think of rt as having the lock from now on.. */ + rt = rt0; } /* XXX why are we inspecting rmx_expire? */ - error = (rt->rt_flags & RTF_REJECT) && - (rt->rt_rmx.rmx_expire == 0 || - time_uptime < rt->rt_rmx.rmx_expire); - if (error) { + if ((rt->rt_flags & RTF_REJECT) && + (rt->rt_rmx.rmx_expire == 0 || + time_uptime < rt->rt_rmx.rmx_expire)) { RT_UNLOCK(rt); return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); } diff -r ef8e7f2fc284 sys/net/route.h --- a/sys/net/route.h Fri Sep 12 02:12:33 2008 +0300 +++ b/sys/net/route.h Sat Sep 13 22:40:53 2008 +0300 _at__at_ -315,6 +315,22 _at__at_ (_rt)->rt_refcnt--; \ } while (0) +#define RT_TEMP_UNLOCK(_rt) do { \ + RT_ADDREF(_rt); \ + RT_UNLOCK(_rt); \ +} while (0) + +#define RT_RELOCK(_rt) do { \ + RT_LOCK(_rt) \ + if ((_rt)->rt_refcnt <= 1) \ + rtfree(_rt); \ + _rt = 0; /* signal that it went away */ \ + else { \ + RT_REMREF(_rt); \ + /* note that _rt is still valid */ \ + } \ +} while (0) + #define RTFREE_LOCKED(_rt) do { \ if ((_rt)->rt_refcnt <= 1) \ rtfree(_rt); \ %%% The two places I had to edit after applying the patch are lines 1756 and 1755: 1756 RT_FREE(rt0); /* lock, unref, (unlock) */ 1775 RT_FREE_LOCKED(rt); The 'fix' was trivial (s/RT_FREE/RTFREE/) but I haven't booted into the new kernel yet. BRB in a few minutes :)
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:35 UTC