Re: Detach of an0 induced kernel panic in 7.0-PRE

From: John Baldwin <jhb_at_freebsd.org>
Date: Tue, 30 Oct 2007 10:29:25 -0400
On Tuesday 30 October 2007 12:39:38 am Tai-hwa Liang wrote:
> On Mon, 29 Oct 2007, David Wolfskill wrote:
> > On Tue, Oct 30, 2007 at 09:38:47AM +0800, Tai-hwa Liang wrote:
> >> ...
> >>> Unread portion of the kernel message buffer:
> >>> an0: RID access failed
> >>> an0: detached
> >>>
> >>>
> >>> Fatal trap 12: page fault while in kernel mode
> >> ...
> >>> #0  doadump () at pcpu.h:195
> >>> 195             __asm __volatile("movl %%fs:0,%0" : "=r" (td));
> >>> (kgdb) bt
> >>> #0  doadump () at pcpu.h:195
> >>> #1  0xc073b637 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409
> >>> #2  0xc073b8f9 in panic (fmt=Variable "fmt" is not available.
> >>> ) at /usr/src/sys/kern/kern_shutdown.c:563
> >>> #3  0xc0a005dc in trap_fatal (frame=0xe2987c1c, eva=3358077304)
> >>>   at /usr/src/sys/i386/i386/trap.c:872
> >>> #4  0xc0a00860 in trap_pfault (frame=0xe2987c1c, usermode=0,
> >>> eva=3358077304)
> >>>   at /usr/src/sys/i386/i386/trap.c:785
> >>> #5  0xc0a011d5 in trap (frame=0xe2987c1c) at
> >>> /usr/src/sys/i386/i386/trap.c:463
> >>> #6  0xc09e71eb in calltrap () at /usr/src/sys/i386/i386/exception.s:139
> >>> #7  0xc04daaf7 in an_stats_update (xsc=0xc8282000) at atomic.h:149
> >>> #8  0xc074d7ea in softclock (dummy=0x0) at
> >>> /usr/src/sys/kern/kern_timeout.c:274
> >>> #9  0xc071ef0b in ithread_loop (arg=0xc3f0d2f0)
> >>>   at /usr/src/sys/kern/kern_intr.c:1036
> >>> #10 0xc071bc19 in fork_exit (callout=0xc071ed60 <ithread_loop>,
> >>>   arg=0xc3f0d2f0, frame=0xe2987d38) at /usr/src/sys/kern/kern_fork.c:796
> >>> #11 0xc09e7260 in fork_trampoline () at
> >>> /usr/src/sys/i386/i386/exception.s:205
> >>> (kgdb)
> >> ...
> >>   Does the attached patch help?
> >
> > Aye, it does appear to do so.
> >
> > I tested at home, where the an0 NIC is actually useful.  But I managed
> > to detach the card before the script reported successful association;
> > the machine kept running (and the script fell back to trying the wi0
> > NIC, since the an0 NIC was  no longer there).
> >
> > I'd call that a definite improvement -- and fast response!  Thanks again!
> 
>    Thank you for the testing works.  I'll ask for re_at_'s approval once
> this is settled in HEAD.

This isn't really the best fix as it is still racey.  A better approach is to
make an(4) use the callout API with callout_init_mtx() and to add a
callout_drain() during an_detach() otherwise you can still get panics due to
the callout routine trying to lock a destroyed mutex.

Something like this:

Index: if_an.c
===================================================================
RCS file: /host/cvs/usr/cvs/src/sys/dev/an/if_an.c,v
retrieving revision 1.84
diff -u -r1.84 if_an.c
--- if_an.c	10 Sep 2007 12:53:34 -0000	1.84
+++ if_an.c	30 Oct 2007 14:28:37 -0000
_at__at_ -790,7 +790,7 _at__at_
 	 */
 
 	ether_ifattach(ifp, sc->an_caps.an_oemaddr);
-	callout_handle_init(&sc->an_stat_ch);
+	callout_init_mtx(&sc->an_stat_ch, &sc->an_mtx);
 
 	return(0);
 fail:;
_at__at_ -818,6 +818,7 _at__at_
 	AN_UNLOCK(sc);
 	ether_ifdetach(ifp);
 	bus_teardown_intr(dev, sc->irq_res, sc->irq_handle);
+	callout_drain(&sc->an_stat_ch);
 	if_free(ifp);
 	an_release_resources(dev);
 	mtx_destroy(&sc->an_mtx);
_at__at_ -1150,7 +1151,7 _at__at_
 	struct ifnet		*ifp;
 
 	sc = xsc;
-	AN_LOCK(sc);
+	AN_LOCK_ASSERT(sc);
 	ifp = sc->an_ifp;
 
 	sc->an_status.an_type = AN_RID_STATUS;
_at__at_ -1164,8 +1165,7 _at__at_
 
 	/* Don't do this while we're transmitting */
 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE) {
-		sc->an_stat_ch = timeout(an_stats_update, sc, hz);
-		AN_UNLOCK(sc);
+		callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc);
 		return;
 	}
 
_at__at_ -1173,8 +1173,7 _at__at_
 	sc->an_stats.an_type = AN_RID_32BITS_CUM;
 	an_read_record(sc, (struct an_ltv_gen *)&sc->an_stats.an_len);
 
-	sc->an_stat_ch = timeout(an_stats_update, sc, hz);
-	AN_UNLOCK(sc);
+	callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc);
 
 	return;
 }
_at__at_ -2615,7 +2614,7 _at__at_
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
-	sc->an_stat_ch = timeout(an_stats_update, sc, hz);
+	callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc);
 	AN_UNLOCK(sc);
 
 	return;
_at__at_ -2828,7 +2827,7 _at__at_
 	for (i = 0; i < AN_TX_RING_CNT; i++)
 		an_cmd(sc, AN_CMD_DEALLOC_MEM, sc->an_rdata.an_tx_fids[i]);
 
-	untimeout(an_stats_update, sc, sc->an_stat_ch);
+	callout_stop(&sc->an_stat_ch);
 
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING|IFF_DRV_OACTIVE);
 
Index: if_anreg.h
===================================================================
RCS file: /host/cvs/usr/cvs/src/sys/dev/an/if_anreg.h,v
retrieving revision 1.23
diff -u -r1.23 if_anreg.h
--- if_anreg.h	10 Jun 2005 16:49:03 -0000	1.23
+++ if_anreg.h	30 Oct 2007 14:24:50 -0000
_at__at_ -485,7 +485,7 _at__at_
 	int			an_have_rssimap;
 	struct an_ltv_rssi_map	an_rssimap;
 #endif
-	struct callout_handle	an_stat_ch;
+	struct callout		an_stat_ch;
 	struct mtx		an_mtx;
 	device_t		an_dev;
 	struct ifmedia		an_ifmedia;


-- 
John Baldwin
Received on Tue Oct 30 2007 - 14:02:51 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:20 UTC