----- Original Message ---- > From: Hans Petter Selasky <hselasky_at_c2i.net> > To: freebsd-current_at_freebsd.org > Cc: PseudoCylon <moonlightakkiy_at_yahoo.ca>; Sam Leffler <sam_at_freebsd.org>; >freebsd-usb_at_freebsd.org > Sent: Mon, July 19, 2010 1:17:04 PM > Subject: Re: [panic] Race in IEEE802.11 layer towards device drivers > > Hi AK, > > I've committed your patches to USB P4. I've made some additional patches. > > Can you check and verify everything? > > http://p4web.freebsd.org/_at__at_181189?ac=10 > Hi If we change sc->cmdq_run = RUN_CMDQ_ABORT, -- begin excerpt -- _at__at_ -4890,7 +4877,10 _at__at_ run_stop(void *arg) ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->ratectl_run = RUN_RATECTL_OFF; -sc->cmdq_run = RUN_CMDQ_ABORT; + +RUN_CMDQ_LOCK(sc); +sc->cmdq_run = sc->cmdq_key_set = RUN_CMDQ_ABORT; +RUN_CMDQ_UNLOCK(sc); -- end excerpt -- we also need to change this, otherwise key will be cleared. -- begin patch -- diff --git a/dev/usb/wlan/if_run.c b/dev/usb/wlan/if_run.c index 017e4b0..f7abe17 100644 --- a/dev/usb/wlan/if_run.c +++ b/dev/usb/wlan/if_run.c _at__at_ -4670,8 +4670,6 _at__at_ run_init_locked(struct run_softc *sc) if(ic->ic_nrunning > 1) return; -run_stop(sc); - for (ntries = 0; ntries < 100; ntries++) { if (run_read(sc, RT2860_ASIC_VER_ID, &tmp) != 0) goto fail; -- end patch -- > Also please compile a kernel with WITNESS enabled to catch any LOR's, hence we > > introduced another mutex. > The 2nd mutex did solve a deadlock, but doesn't solve the LOR. -- begin message -- lock order reversal: 1st 0xffffff8000a257d0 run0_node_lock (run0_node_lock) _at_ /usr/src/sys/net80211/ieee80211_node.c:1736 2nd 0xffffff8000a19348 run0 (network driver) _at_ /mnt/share/home/AK/FreeBSD/modules/usb/run/../../../../mnt/dev/usb/wlan/if_run.c:2212 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a _witness_debugger() at _witness_debugger+0x2e witness_checkorder() at witness_checkorder+0x81e _mtx_lock_flags() at _mtx_lock_flags+0x78 run_key_delete() at run_key_delete+0x45 _ieee80211_crypto_delkey() at _ieee80211_crypto_delkey+0x9e ieee80211_crypto_delkey() at ieee80211_crypto_delkey+0x28 ieee80211_node_delucastkey() at ieee80211_node_delucastkey+0x78 ieee80211_sta_leave() at ieee80211_sta_leave+0x16 ieee80211_node_leave() at ieee80211_node_leave+0x11d hostap_recv_mgmt() at hostap_recv_mgmt+0x33f hostap_input() at hostap_input+0xc09 run_rx_frame() at run_rx_frame+0x13f run_bulk_rx_callback() at run_bulk_rx_callback+0x3b7 usbd_callback_wrapper() at usbd_callback_wrapper+0x12b usb_command_wrapper() at usb_command_wrapper+0x76 usb_callback_proc() at usb_callback_proc+0x76 usb_process() at usb_process+0xbb fork_exit() at fork_exit+0x12a fork_trampoline() at fork_trampoline+0xe --- trap 0, rip = 0, rsp = 0xffffff8029b4ed30, rbp = 0 --- -- end message -- or -- begin message -- lock order reversal: 1st 0xffffff8000a257d0 run0_node_lock (run0_node_lock) _at_ /usr/src/sys/net80211/ieee80211_node.c:1736 2nd 0xffffff8000a19348 run0 (network driver) _at_ /mnt/share/home/AK/FreeBSD/modules/usb/run/../../../../mnt/dev/usb/wlan/if_run.c:2212 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a _witness_debugger() at _witness_debugger+0x2e witness_checkorder() at witness_checkorder+0x81e _mtx_lock_flags() at _mtx_lock_flags+0x78 run_key_delete() at run_key_delete+0x47 _ieee80211_crypto_delkey() at _ieee80211_crypto_delkey+0x9e ieee80211_crypto_delkey() at ieee80211_crypto_delkey+0x28 ieee80211_node_delucastkey() at ieee80211_node_delucastkey+0x78 ieee80211_sta_leave() at ieee80211_sta_leave+0x16 ieee80211_node_leave() at ieee80211_node_leave+0x11d ieee80211_node_timeout() at ieee80211_node_timeout+0x1d5 softclock() at softclock+0x2a0 intr_event_execute_handlers() at intr_event_execute_handlers+0x66 ithread_loop() at ithread_loop+0xb2 fork_exit() at fork_exit+0x12a fork_trampoline() at fork_trampoline+0xe --- trap 0, rip = 0, rsp = 0xffffff8000052d30, rbp = 0 --- -- end message -- There are new warning, "acquiring duplicate lock." For example, -- begin message -- acquiring duplicate lock of same type: "network driver" 1st run0 _at_ /usr/src/sys/dev/usb/usb_request.c:691 2nd run0 _at_ /mnt/share/home/AK/FreeBSD/modules/usb/run/../../../../mnt/dev/usb/wlan/if_run.c:4831 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a _witness_debugger() at _witness_debugger+0x2e witness_checkorder() at witness_checkorder+0x8ef _mtx_lock_flags() at _mtx_lock_flags+0x78 run_init_locked() at run_init_locked+0x753 run_ioctl() at run_ioctl+0xad taskqueue_run() at taskqueue_run+0x91 taskqueue_thread_loop() at taskqueue_thread_loop+0x3f fork_exit() at fork_exit+0x12a fork_trampoline() at fork_trampoline+0xe --- trap 0, rip = 0, rsp = 0xffffff803e5b2d30, rbp = 0 --- -- end message -- I don't know if it's worth patching or safe to patch (specially lock/unlock in run_bulk_tx_callbackN()), but here is one -- begin patch -- diff --git a/dev/usb/wlan/if_run.c b/dev/usb/wlan/if_run.c index 017e4b0..2a0b5b6 100644 --- a/dev/usb/wlan/if_run.c +++ b/dev/usb/wlan/if_run.c _at__at_ -712,14 +712,14 _at__at_ run_detach(device_t self) /* stop all USB transfers */ usbd_transfer_unsetup(sc->sc_xfer, RUN_N_XFER); -RUN_LOCK(sc); - -sc->ratectl_run = RUN_RATECTL_OFF; - RUN_CMDQ_LOCK(sc); sc->cmdq_run = sc->cmdq_key_set = RUN_CMDQ_ABORT; RUN_CMDQ_UNLOCK(sc); +RUN_LOCK(sc); + +sc->ratectl_run = RUN_RATECTL_OFF; + /* free TX list, if any */ for (i = 0; i != RUN_EP_QUEUES; i++) run_unsetup_tx_list(sc, &sc->sc_epq[i]); _at__at_ -2865,6 +2865,9 _at__at_ tr_setup: if ((error == USB_ERR_TIMEOUT) && (vap != NULL)) { uint8_t i; device_printf(sc->sc_dev, "device timeout\n"); + +RUN_UNLOCK(sc); + RUN_CMDQ_LOCK(sc); i = run_cmdq_append(sc); if (i < RUN_CMDQ_MAX) { _at__at_ -2874,6 +2877,8 _at__at_ tr_setup: RUN_CMDQ_UNLOCK(sc); if (i < RUN_CMDQ_MAX) ieee80211_runtask(ic, &sc->cmdq_task); + +RUN_LOCK(sc); } /* _at__at_ -3134,6 +3139,8 _at__at_ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) */ uint8_t i; +RUN_UNLOCK(sc); + RUN_CMDQ_LOCK(sc); i = run_cmdq_append(sc); if (i < RUN_CMDQ_MAX) { _at__at_ -3144,6 +3151,7 _at__at_ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if (i < RUN_CMDQ_MAX) ieee80211_runtask(ic, &sc->cmdq_task); +RUN_LOCK(sc); } } _at__at_ -4670,8 +4678,6 _at__at_ run_init_locked(struct run_softc *sc) if(ic->ic_nrunning > 1) return; -run_stop(sc); - for (ntries = 0; ntries < 100; ntries++) { if (run_read(sc, RT2860_ASIC_VER_ID, &tmp) != 0) goto fail; _at__at_ -4827,9 +4833,11 _at__at_ run_init_locked(struct run_softc *sc) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; +RUN_UNLOCK(sc); RUN_CMDQ_LOCK(sc); sc->cmdq_run = RUN_CMDQ_GO; RUN_CMDQ_UNLOCK(sc); +RUN_LOCK(sc); for(i = 0; i != RUN_N_XFER; i++) usbd_xfer_set_stall(sc->sc_xfer[i]); _at__at_ -4878,12 +4886,12 _at__at_ run_stop(void *arg) sc->ratectl_run = RUN_RATECTL_OFF; +RUN_UNLOCK(sc); + RUN_CMDQ_LOCK(sc); sc->cmdq_run = sc->cmdq_key_set = RUN_CMDQ_ABORT; RUN_CMDQ_UNLOCK(sc); -RUN_UNLOCK(sc); - for(i = 0; i < RUN_N_XFER; i++) usbd_transfer_drain(sc->sc_xfer[i]); -- end patch -- There is a LOR between node lock and run lock in run_raw_xmit(), but I haven't been able to reproduce with the latest driver. This LOR has been around since addition of hostap mode. I didn't fix it because everyone would have objected if I had deferred run_raw_xmit(). Following LORs are also around from the beginning and not related to this change, but just for info -- begin message -- lock order reversal: 1st 0xffffff8000a257d0 run0_node_lock (run0_node_lock) _at_ /usr/src/sys/net80211/ieee80211_ioctl.c:1326 2nd 0xffffff8000a24018 run0_com_lock (run0_com_lock) _at_ /usr/src/sys/net80211/ieee80211_node.c:2486 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a _witness_debugger() at _witness_debugger+0x2e witness_checkorder() at witness_checkorder+0x81e _mtx_lock_flags() at _mtx_lock_flags+0x78 ieee80211_node_leave() at ieee80211_node_leave+0x80 setmlme_common() at setmlme_common+0x27b ieee80211_ioctl_setmlme() at ieee80211_ioctl_setmlme+0x7e ieee80211_ioctl_set80211() at ieee80211_ioctl_set80211+0xaba in_control() at in_control+0x1ff ifioctl() at ifioctl+0x1100 kern_ioctl() at kern_ioctl+0xc5 ioctl() at ioctl+0xf0 syscallenter() at syscallenter+0x1b5 syscall() at syscall+0x4c Xfast_syscall() at Xfast_syscall+0xe2 --- syscall (54, FreeBSD ELF64, ioctl), rip = 0x8008a8d4c, rsp = 0x7fffffffe898, rbp = 0x800ca6200 --- -- end message -- -- begin message -- lock order reversal: 1st 0xffffff8000a24018 run0_com_lock (run0_com_lock) _at_ /usr/src/sys/net80211/ieee80211_scan.c:683 2nd 0xffffff8000a25928 run0_scan_lock (run0_scan_lock) _at_ /usr/src/sys/net80211/ieee80211_node.c:2135 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a _witness_debugger() at _witness_debugger+0x2e witness_checkorder() at witness_checkorder+0x81e _mtx_lock_flags() at _mtx_lock_flags+0x78 ieee80211_iterate_nodes() at ieee80211_iterate_nodes+0x3d hostap_newstate() at hostap_newstate+0x3a1 run_newstate() at run_newstate+0x1ef ieee80211_newstate_cb() at ieee80211_newstate_cb+0x71 taskqueue_run() at taskqueue_run+0x91 taskqueue_thread_loop() at taskqueue_thread_loop+0x3f fork_exit() at fork_exit+0x12a fork_trampoline() at fork_trampoline+0xe --- trap 0, rip = 0, rsp = 0xffffff803e5b7d30, rbp = 0 --- -- end message -- AKReceived on Tue Jul 20 2010 - 08:03:23 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:05 UTC