Re: Fix Emulex "oce" driver in CURRENT

From: Stefano Garzarella <stefanogarzarella_at_gmail.com> Date: Tue, 15 Jul 2014 10:22:42 +0200 · This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:50 UTC

Hi,
I found other problems in the "oce" driver during some experiments with
netmap in emulation mode.

In details:
- missing locking:
- in some functions there are write accesses on the wq struct (tx queue
descriptor)
without acquire LOCK on the queue, particularly in oce_wq_handler() that is
invoked
in the interrupt routine. For this reason there may be race conditions.

- tx cleanup:
- in oce_if_deactivate() the wq queues are drained but some still pending
mbufs are not freed.
For this reason, I added the oce_tx_clean() that releases any pending mbufs.

I also tried experimenting with iperf3 using the same Borja environment and
I don't have panic.
Can you try this patch? Do you still have the panic?

Cheers,
Stefano Garzarella

diff --git a/sys/dev/oce/oce_if.c b/sys/dev/oce/oce_if.c
index af57491..33b35b4 100644
--- a/sys/dev/oce/oce_if.c
+++ b/sys/dev/oce/oce_if.c
_at__at_ -142,6 +142,7 _at__at_ static int  oce_tx(POCE_SOFTC sc, struct mbuf **mpp,
int wq_index);
 static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq);
 static void oce_tx_complete(struct oce_wq *wq, uint32_t wqe_idx,
  uint32_t status);
+static void oce_tx_clean(POCE_SOFTC sc);
 static int  oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m,
   struct oce_wq *wq);

_at__at_ -585,8 +586,10 _at__at_ oce_multiq_flush(struct ifnet *ifp)
  int i = 0;

  for (i = 0; i < sc->nwqs; i++) {
+ LOCK(&sc->wq[i]->tx_lock);
  while ((m = buf_ring_dequeue_sc(sc->wq[i]->br)) != NULL)
  m_freem(m);
+ UNLOCK(&sc->wq[i]->tx_lock);
  }
  if_qflush(ifp);
 }
_at__at_ -1052,6 +1055,19 _at__at_ oce_tx_complete(struct oce_wq *wq, uint32_t wqe_idx,
uint32_t status)
  }
 }

+static void
+oce_tx_clean(POCE_SOFTC sc) {
+ int i = 0;
+ struct oce_wq *wq;
+
+ for_all_wq_queues(sc, wq, i) {
+ LOCK(&wq->tx_lock);
+ while (wq->pkt_desc_tail != wq->pkt_desc_head) {
+ oce_tx_complete(wq, 0, 0);
+ }
+ UNLOCK(&wq->tx_lock);
+ }
+}

 static void
 oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq)
_at__at_ -1213,6 +1229,8 _at__at_ oce_wq_handler(void *arg)
  struct oce_nic_tx_cqe *cqe;
  int num_cqes = 0;

+ LOCK(&wq->tx_lock);
+
  bus_dmamap_sync(cq->ring->dma.tag,
  cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
  cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe);
_at__at_ -1237,6 +1255,8 _at__at_ oce_wq_handler(void *arg)
  if (num_cqes)
  oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE);

+ UNLOCK(&wq->tx_lock);
+
  return 0;
 }

_at__at_ -2087,6 +2107,9 _at__at_ oce_if_deactivate(POCE_SOFTC sc)
  /* Delete RX queue in card with flush param */
  oce_stop_rx(sc);

+ /* Flush the mbufs that are still in TX queues */
+ oce_tx_clean(sc);
+
  /* Invalidate any pending cq and eq entries*/
  for_all_evnt_queues(sc, eq, i)
  oce_drain_eq(eq);
diff --git a/sys/dev/oce/oce_queue.c b/sys/dev/oce/oce_queue.c
index 308c16d..161011b 100644
--- a/sys/dev/oce/oce_queue.c
+++ b/sys/dev/oce/oce_queue.c
_at__at_ -969,7 +969,9 _at__at_ oce_start_rq(struct oce_rq *rq)
 int
 oce_start_wq(struct oce_wq *wq)
 {
+ LOCK(&wq->tx_lock); /* XXX: maybe not necessary */
  oce_arm_cq(wq->parent, wq->cq->cq_id, 0, TRUE);
+ UNLOCK(&wq->tx_lock);
  return 0;
 }

_at__at_ -1076,6 +1078,8 _at__at_ oce_drain_wq_cq(struct oce_wq *wq)
         struct oce_nic_tx_cqe *cqe;
         int num_cqes = 0;

+ LOCK(&wq->tx_lock); /* XXX: maybe not necessary */
+
  bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map,
   BUS_DMASYNC_POSTWRITE);

_at__at_ -1093,6 +1097,7 _at__at_ oce_drain_wq_cq(struct oce_wq *wq)

  oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE);

+ UNLOCK(&wq->tx_lock);
 }

2014-07-07 13:57 GMT+02:00 Borja Marcos <borjam_at_sarenet.es>:

>
> On Jul 7, 2014, at 1:23 PM, Luigi Rizzo wrote:
>
> > On Mon, Jul 7, 2014 at 1:03 PM, Borja Marcos <borjam_at_sarenet.es> wrote:
> > we'll try to investigate, can you tell us more about the environment you
> use ?
> > (FreeBSD version, card model (PCI id perhaps), iperf3 invocation line,
> > interface configuration etc.)
> >
> > The main differences between 10.0.747.0 and the code in head (after
> > our fix) is the use
> > of drbr_enqueue/dequeue versus the peek/putback in the transmit routine.
> >
> >
> > Both drivers still have issues when the link flaps because the
> > transmit queue is not cleaned
> > up properly (unlike what happens in the linux driver and all FreeBSD
> > drivers for different
> > hardware), so it might well be that you are seeing some side effect of
> > that or other
> > problem which manifests itself differently depending on the environment.
> >
> > 'instant panic' by itself does not tell us anything about what could
> > be the problem you experience (and we do not see it with either driver).
>
> The environment details are here:
>
> https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=183391
>
> The way I produce an instant panic is:
>
> 1) Connect to another machine (cross connect cable)
>
> 2) iperf3 -s on the other machine
> (The other machine is different, it has an  "ix" card)
>
> 3) iperf3 -t 30 -P 4 -c 10.0.0.1 -N
>
> In less than 30 seconds, panic.
>
>
>
> mierda dumped core - see /var/crash/vmcore.0
>
> Mon Jul  7 13:06:44 CEST 2014
>
> FreeBSD mierda 10.0-STABLE FreeBSD 10.0-STABLE #2: Mon Jul  7 11:41:45
> CEST 2014     root_at_mierda:/usr/obj/usr/src/sys/GENERIC  amd64
>
> panic: sbsndptr: sockbuf 0xfffff800a70489b0 and mbuf 0xfffff801a3326e00
> clashing
>
> GNU gdb 6.1.1 [FreeBSD]
> Copyright 2004 Free Software Foundation, Inc.
> GDB is free software, covered by the GNU General Public License, and you
> are
> welcome to change it and/or distribute copies of it under certain
> conditions.
> Type "show copying" to see the conditions.
> There is absolutely no warranty for GDB.  Type "show warranty" for details.
> This GDB was configured as "amd64-marcel-freebsd"...
>
> Unread portion of the kernel message buffer:
> panic: sbsndptr: sockbuf 0xfffff800a70489b0 and mbuf 0xfffff801a3326e00
> clashing
> cpuid = 12
> KDB: stack backtrace:
> #0 0xffffffff8092a470 at kdb_backtrace+0x60
> #1 0xffffffff808ef9c5 at panic+0x155
> #2 0xffffffff80962710 at sbdroprecord_locked+0
> #3 0xffffffff80a8ba8c at tcp_output+0xdbc
> #4 0xffffffff80a8987f at tcp_do_segment+0x30ff
> #5 0xffffffff80a85b34 at tcp_input+0xd04
> #6 0xffffffff80a1af57 at ip_input+0x97
> #7 0xffffffff809ba512 at netisr_dispatch_src+0x62
> #8 0xffffffff809b1ae6 at ether_demux+0x126
> #9 0xffffffff809b278e at ether_nh_input+0x35e
> #10 0xffffffff809ba512 at netisr_dispatch_src+0x62
> #11 0xffffffff81c19ab9 at oce_rx+0x3c9
> #12 0xffffffff81c19536 at oce_rq_handler+0xb6
> #13 0xffffffff81c1bb1c at oce_intr+0xdc
> #14 0xffffffff80938b35 at taskqueue_run_locked+0xe5
> #15 0xffffffff809395c8 at taskqueue_thread_loop+0xa8
> #16 0xffffffff808c057a at fork_exit+0x9a
> #17 0xffffffff80ccb51e at fork_trampoline+0xe
> Uptime: 51m20s
>
>
>
>
>
>
>
>
>
>
>
>
>
> Borja.
>
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
>

-- 
Stefano Garzarella