Re: [panic] Race in IEEE802.11 layer towards device drivers

From: PseudoCylon <moonlightakkiy_at_yahoo.ca>
Date: Tue, 20 Jul 2010 03:03:22 -0700 (PDT)
----- Original Message ----
> From: Hans Petter Selasky <hselasky_at_c2i.net>
> To: freebsd-current_at_freebsd.org
> Cc: PseudoCylon <moonlightakkiy_at_yahoo.ca>; Sam Leffler <sam_at_freebsd.org>; 
>freebsd-usb_at_freebsd.org
> Sent: Mon, July 19, 2010 1:17:04 PM
> Subject: Re: [panic] Race in IEEE802.11 layer towards device drivers
> 
> Hi AK,
> 
> I've committed your patches to USB P4. I've made some additional  patches.
> 
> Can you check and verify everything?
> 
> http://p4web.freebsd.org/_at__at_181189?ac=10
> 

Hi

If we change sc->cmdq_run = RUN_CMDQ_ABORT,

-- begin excerpt --


_at__at_ -4890,7 +4877,10 _at__at_ run_stop(void *arg)
 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 sc->ratectl_run = RUN_RATECTL_OFF;
-sc->cmdq_run = RUN_CMDQ_ABORT;
+
+RUN_CMDQ_LOCK(sc);
+sc->cmdq_run = sc->cmdq_key_set = RUN_CMDQ_ABORT;
+RUN_CMDQ_UNLOCK(sc);

-- end excerpt --


we also need to change this, otherwise key will be cleared.


-- begin patch --

diff --git a/dev/usb/wlan/if_run.c b/dev/usb/wlan/if_run.c
index 017e4b0..f7abe17 100644
--- a/dev/usb/wlan/if_run.c
+++ b/dev/usb/wlan/if_run.c
_at__at_ -4670,8 +4670,6 _at__at_ run_init_locked(struct run_softc *sc)
 if(ic->ic_nrunning > 1)
 return;
 
-run_stop(sc);
-
 for (ntries = 0; ntries < 100; ntries++) {
 if (run_read(sc, RT2860_ASIC_VER_ID, &tmp) != 0)
 goto fail;

-- end patch --


> Also please compile a kernel  with WITNESS enabled to catch any LOR's, hence we 
>
> introduced another  mutex.
> 


The 2nd mutex did solve a deadlock, but doesn't solve the LOR.


-- begin message --

lock order reversal:
 1st 0xffffff8000a257d0 run0_node_lock (run0_node_lock) _at_ 
/usr/src/sys/net80211/ieee80211_node.c:1736
 2nd 0xffffff8000a19348 run0 (network driver) _at_ 
/mnt/share/home/AK/FreeBSD/modules/usb/run/../../../../mnt/dev/usb/wlan/if_run.c:2212

KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2a
_witness_debugger() at _witness_debugger+0x2e
witness_checkorder() at witness_checkorder+0x81e
_mtx_lock_flags() at _mtx_lock_flags+0x78
run_key_delete() at run_key_delete+0x45
_ieee80211_crypto_delkey() at _ieee80211_crypto_delkey+0x9e
ieee80211_crypto_delkey() at ieee80211_crypto_delkey+0x28
ieee80211_node_delucastkey() at ieee80211_node_delucastkey+0x78
ieee80211_sta_leave() at ieee80211_sta_leave+0x16
ieee80211_node_leave() at ieee80211_node_leave+0x11d
hostap_recv_mgmt() at hostap_recv_mgmt+0x33f
hostap_input() at hostap_input+0xc09
run_rx_frame() at run_rx_frame+0x13f
run_bulk_rx_callback() at run_bulk_rx_callback+0x3b7
usbd_callback_wrapper() at usbd_callback_wrapper+0x12b
usb_command_wrapper() at usb_command_wrapper+0x76
usb_callback_proc() at usb_callback_proc+0x76
usb_process() at usb_process+0xbb
fork_exit() at fork_exit+0x12a
fork_trampoline() at fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xffffff8029b4ed30, rbp = 0 ---

-- end message --

or

-- begin message --

lock order reversal:
 1st 0xffffff8000a257d0 run0_node_lock (run0_node_lock) _at_ 
/usr/src/sys/net80211/ieee80211_node.c:1736
 2nd 0xffffff8000a19348 run0 (network driver) _at_ 
/mnt/share/home/AK/FreeBSD/modules/usb/run/../../../../mnt/dev/usb/wlan/if_run.c:2212

KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2a
_witness_debugger() at _witness_debugger+0x2e
witness_checkorder() at witness_checkorder+0x81e
_mtx_lock_flags() at _mtx_lock_flags+0x78
run_key_delete() at run_key_delete+0x47
_ieee80211_crypto_delkey() at _ieee80211_crypto_delkey+0x9e
ieee80211_crypto_delkey() at ieee80211_crypto_delkey+0x28
ieee80211_node_delucastkey() at ieee80211_node_delucastkey+0x78
ieee80211_sta_leave() at ieee80211_sta_leave+0x16
ieee80211_node_leave() at ieee80211_node_leave+0x11d
ieee80211_node_timeout() at ieee80211_node_timeout+0x1d5
softclock() at softclock+0x2a0
intr_event_execute_handlers() at intr_event_execute_handlers+0x66
ithread_loop() at ithread_loop+0xb2
fork_exit() at fork_exit+0x12a
fork_trampoline() at fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xffffff8000052d30, rbp = 0 ---

-- end message --


There are new warning, "acquiring duplicate lock." For example,


-- begin message --

acquiring duplicate lock of same type: "network driver"
 1st run0 _at_ /usr/src/sys/dev/usb/usb_request.c:691
 2nd run0 _at_ 
/mnt/share/home/AK/FreeBSD/modules/usb/run/../../../../mnt/dev/usb/wlan/if_run.c:4831

KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2a
_witness_debugger() at _witness_debugger+0x2e
witness_checkorder() at witness_checkorder+0x8ef
_mtx_lock_flags() at _mtx_lock_flags+0x78
run_init_locked() at run_init_locked+0x753
run_ioctl() at run_ioctl+0xad
taskqueue_run() at taskqueue_run+0x91
taskqueue_thread_loop() at taskqueue_thread_loop+0x3f
fork_exit() at fork_exit+0x12a
fork_trampoline() at fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xffffff803e5b2d30, rbp = 0 ---

-- end message --


I don't know if it's worth patching or safe to patch (specially lock/unlock in 
run_bulk_tx_callbackN()), but here is one


-- begin patch --

diff --git a/dev/usb/wlan/if_run.c b/dev/usb/wlan/if_run.c
index 017e4b0..2a0b5b6 100644
--- a/dev/usb/wlan/if_run.c
+++ b/dev/usb/wlan/if_run.c
_at__at_ -712,14 +712,14 _at__at_ run_detach(device_t self)
 /* stop all USB transfers */
 usbd_transfer_unsetup(sc->sc_xfer, RUN_N_XFER);
 
-RUN_LOCK(sc);
-
-sc->ratectl_run = RUN_RATECTL_OFF;
-
 RUN_CMDQ_LOCK(sc);
 sc->cmdq_run = sc->cmdq_key_set = RUN_CMDQ_ABORT;
 RUN_CMDQ_UNLOCK(sc);
 
+RUN_LOCK(sc);
+
+sc->ratectl_run = RUN_RATECTL_OFF;
+
 /* free TX list, if any */
 for (i = 0; i != RUN_EP_QUEUES; i++)
 run_unsetup_tx_list(sc, &sc->sc_epq[i]);
_at__at_ -2865,6 +2865,9 _at__at_ tr_setup:
 if ((error == USB_ERR_TIMEOUT) && (vap != NULL)) {
 uint8_t i;
 device_printf(sc->sc_dev, "device timeout\n");
+
+RUN_UNLOCK(sc);
+
 RUN_CMDQ_LOCK(sc);
 i = run_cmdq_append(sc);
 if (i < RUN_CMDQ_MAX) {
_at__at_ -2874,6 +2877,8 _at__at_ tr_setup:
 RUN_CMDQ_UNLOCK(sc);
 if (i < RUN_CMDQ_MAX)
 ieee80211_runtask(ic, &sc->cmdq_task);
+
+RUN_LOCK(sc);
 }
 
 /*
_at__at_ -3134,6 +3139,8 _at__at_ run_tx(struct run_softc *sc, struct mbuf *m, struct 
ieee80211_node *ni)
  */
 uint8_t i;
 
+RUN_UNLOCK(sc);
+
 RUN_CMDQ_LOCK(sc);
 i = run_cmdq_append(sc);
 if (i < RUN_CMDQ_MAX) {
_at__at_ -3144,6 +3151,7 _at__at_ run_tx(struct run_softc *sc, struct mbuf *m, struct 
ieee80211_node *ni)
 if (i < RUN_CMDQ_MAX)
 ieee80211_runtask(ic, &sc->cmdq_task);
 
+RUN_LOCK(sc);
 }
 }
 
_at__at_ -4670,8 +4678,6 _at__at_ run_init_locked(struct run_softc *sc)
 if(ic->ic_nrunning > 1)
 return;
 
-run_stop(sc);
-
 for (ntries = 0; ntries < 100; ntries++) {
 if (run_read(sc, RT2860_ASIC_VER_ID, &tmp) != 0)
 goto fail;
_at__at_ -4827,9 +4833,11 _at__at_ run_init_locked(struct run_softc *sc)
 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
+RUN_UNLOCK(sc);
 RUN_CMDQ_LOCK(sc);
 sc->cmdq_run = RUN_CMDQ_GO;
 RUN_CMDQ_UNLOCK(sc);
+RUN_LOCK(sc);
 
 for(i = 0; i != RUN_N_XFER; i++)
 usbd_xfer_set_stall(sc->sc_xfer[i]);
_at__at_ -4878,12 +4886,12 _at__at_ run_stop(void *arg)
 
 sc->ratectl_run = RUN_RATECTL_OFF;
 
+RUN_UNLOCK(sc);
+
 RUN_CMDQ_LOCK(sc);
 sc->cmdq_run = sc->cmdq_key_set = RUN_CMDQ_ABORT;
 RUN_CMDQ_UNLOCK(sc);
 
-RUN_UNLOCK(sc);
-
 for(i = 0; i < RUN_N_XFER; i++)
 usbd_transfer_drain(sc->sc_xfer[i]);

-- end patch --


There is a LOR between node lock and run lock in run_raw_xmit(), but I haven't 
been able to reproduce with the latest driver. This LOR has been around since 
addition of hostap mode. I didn't fix it because everyone would have objected if 
I had deferred run_raw_xmit().


Following LORs are also around from the beginning and not related to this 
change, but just for info


-- begin message --

lock order reversal:
 1st 0xffffff8000a257d0 run0_node_lock (run0_node_lock) _at_ 
/usr/src/sys/net80211/ieee80211_ioctl.c:1326
 2nd 0xffffff8000a24018 run0_com_lock (run0_com_lock) _at_ 
/usr/src/sys/net80211/ieee80211_node.c:2486
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2a
_witness_debugger() at _witness_debugger+0x2e
witness_checkorder() at witness_checkorder+0x81e
_mtx_lock_flags() at _mtx_lock_flags+0x78
ieee80211_node_leave() at ieee80211_node_leave+0x80
setmlme_common() at setmlme_common+0x27b
ieee80211_ioctl_setmlme() at ieee80211_ioctl_setmlme+0x7e
ieee80211_ioctl_set80211() at ieee80211_ioctl_set80211+0xaba
in_control() at in_control+0x1ff
ifioctl() at ifioctl+0x1100
kern_ioctl() at kern_ioctl+0xc5
ioctl() at ioctl+0xf0
syscallenter() at syscallenter+0x1b5
syscall() at syscall+0x4c
Xfast_syscall() at Xfast_syscall+0xe2
--- syscall (54, FreeBSD ELF64, ioctl), rip = 0x8008a8d4c, rsp = 0x7fffffffe898, 
rbp = 0x800ca6200 ---

-- end message --

-- begin message --

lock order reversal:
 1st 0xffffff8000a24018 run0_com_lock (run0_com_lock) _at_ 
/usr/src/sys/net80211/ieee80211_scan.c:683
 2nd 0xffffff8000a25928 run0_scan_lock (run0_scan_lock) _at_ 
/usr/src/sys/net80211/ieee80211_node.c:2135
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2a
_witness_debugger() at _witness_debugger+0x2e
witness_checkorder() at witness_checkorder+0x81e
_mtx_lock_flags() at _mtx_lock_flags+0x78
ieee80211_iterate_nodes() at ieee80211_iterate_nodes+0x3d
hostap_newstate() at hostap_newstate+0x3a1
run_newstate() at run_newstate+0x1ef
ieee80211_newstate_cb() at ieee80211_newstate_cb+0x71
taskqueue_run() at taskqueue_run+0x91
taskqueue_thread_loop() at taskqueue_thread_loop+0x3f
fork_exit() at fork_exit+0x12a
fork_trampoline() at fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xffffff803e5b7d30, rbp = 0 ---

-- end message --


AK
Received on Tue Jul 20 2010 - 08:03:23 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:05 UTC