=== sys/dev/oce/oce_if.c ================================================================== --- sys/dev/oce/oce_if.c (revision 271555) +++ sys/dev/oce/oce_if.c (local) @@ -1731,7 +1731,9 @@ sc->ifp->if_baudrate = IF_Gbps(10); #if __FreeBSD_version >= 1000000 - sc->ifp->if_hw_tsomax = OCE_MAX_TSO_SIZE; + sc->ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + sc->ifp->if_hw_tsomaxsegcount = OCE_MAX_TX_ELEMENTS; + sc->ifp->if_hw_tsomaxsegsize = 4096; #endif ether_ifattach(sc->ifp, sc->macaddr.mac_addr); === sys/dev/oce/oce_if.h ================================================================== --- sys/dev/oce/oce_if.h (revision 271555) +++ sys/dev/oce/oce_if.h (local) @@ -152,7 +152,6 @@ #define OCE_MAX_TX_ELEMENTS 29 #define OCE_MAX_TX_DESC 1024 #define OCE_MAX_TX_SIZE 65535 -#define OCE_MAX_TSO_SIZE (65535 - ETHER_HDR_LEN) #define OCE_MAX_RX_SIZE 4096 #define OCE_MAX_RQ_POSTS 255 #define OCE_DEFAULT_PROMISCUOUS 0 === sys/dev/vmware/vmxnet3/if_vmx.c ================================================================== --- sys/dev/vmware/vmxnet3/if_vmx.c (revision 271555) +++ sys/dev/vmware/vmxnet3/if_vmx.c (local) @@ -1722,7 +1722,9 @@ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vmxnet3_init; ifp->if_ioctl = vmxnet3_ioctl; - ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE; + ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS; + ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE; #ifdef VMXNET3_LEGACY_TX ifp->if_start = vmxnet3_start; === sys/dev/vmware/vmxnet3/if_vmxvar.h ================================================================== --- sys/dev/vmware/vmxnet3/if_vmxvar.h (revision 271555) +++ sys/dev/vmware/vmxnet3/if_vmxvar.h (local) @@ -277,8 +277,6 @@ */ #define VMXNET3_TX_MAXSEGS 32 #define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES) -#define VMXNET3_TSO_MAXSIZE \ - (VMXNET3_TX_MAXSIZE - sizeof(struct ether_vlan_header)) /* * Maximum support Tx segments size. The length field in the === sys/dev/xen/netfront/netfront.c ================================================================== --- sys/dev/xen/netfront/netfront.c (revision 271555) +++ sys/dev/xen/netfront/netfront.c (local) @@ -134,7 +134,6 @@ * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) -#define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) #define RX_COPY_THRESHOLD 256 @@ -2102,7 +2101,9 @@ ifp->if_hwassist = XN_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; - ifp->if_hw_tsomax = NF_TSO_MAXBURST; + ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; + ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ether_ifattach(ifp, np->mac); callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); === sys/kern/uipc_sockbuf.c ================================================================== --- sys/kern/uipc_sockbuf.c (revision 271555) +++ sys/kern/uipc_sockbuf.c (local) @@ -1015,6 +1015,37 @@ } /* + * Return the first mbuf and the mbuf data offset for the provided + * send offset without changing the "sb_sndptroff" field. + */ +struct mbuf * +sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) +{ + struct mbuf *m; + + KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); + + /* + * If the "off" is below the stored offset, which happens on + * retransmits, just use "sb_mb": + */ + if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { + m = sb->sb_mb; + } else { + m = sb->sb_sndptr; + off -= sb->sb_sndptroff; + } + while (off > 0 && m != NULL) { + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + *moff = off; + return (m); +} + +/* * Drop a record off the front of a sockbuf and move the next record to the * front. */ === sys/net/if.c ================================================================== --- sys/net/if.c (revision 271555) +++ sys/net/if.c (local) @@ -584,6 +584,57 @@ if_attach_internal(ifp, 0); } +/* + * Compute the least common TSO limit. + */ +void +if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) +{ + /* + * 1) If there is no limit currently, take the limit from + * the network adapter. + * + * 2) If the network adapter has a limit below the current + * limit, apply it. + */ + if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && + ifp->if_hw_tsomax < pmax->tsomaxbytes)) { + pmax->tsomaxbytes = ifp->if_hw_tsomax; + } + if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && + ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { + pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; + } + if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && + ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { + pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; + } +} + +/* + * Update TSO limit of a network adapter. + * + * Returns zero if no change. Else non-zero. + */ +int +if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) +{ + int retval = 0; + if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { + ifp->if_hw_tsomax = pmax->tsomaxbytes; + retval++; + } + if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { + ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; + retval++; + } + if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { + ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; + retval++; + } + return (retval); +} + static void if_attach_internal(struct ifnet *ifp, int vmove) { @@ -659,13 +710,36 @@ ifp->if_broadcastaddr = NULL; #if defined(INET) || defined(INET6) - /* Initialize to max value. */ - if (ifp->if_hw_tsomax == 0) - ifp->if_hw_tsomax = min(IP_MAXPACKET, 32 * MCLBYTES - + /* Use defaults for TSO, if nothing is set */ + if (ifp->if_hw_tsomax == 0 && + ifp->if_hw_tsomaxsegcount == 0 && + ifp->if_hw_tsomaxsegsize == 0) { + /* + * The TSO defaults needs to be such that an + * NFS mbuf list of 35 mbufs totalling just + * below 64K works and that a chain of mbufs + * can be defragged into at most 32 segments: + */ + ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); - KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET && - ifp->if_hw_tsomax >= IP_MAXPACKET / 8, - ("%s: tsomax outside of range", __func__)); + ifp->if_hw_tsomaxsegcount = 35; + ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ + + /* XXX some drivers set IFCAP_TSO after ethernet attach */ + if (ifp->if_capabilities & IFCAP_TSO) { + if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", + ifp->if_hw_tsomax, + ifp->if_hw_tsomaxsegcount, + ifp->if_hw_tsomaxsegsize); + } + } + /* + * If the "if_hw_tsomax" limit is set, check if it is + * too small: + */ + KASSERT(ifp->if_hw_tsomax == 0 || + ifp->if_hw_tsomax >= (IP_MAXPACKET / 8), + ("%s: if_hw_tsomax is outside of range", __func__)); #endif } #ifdef VIMAGE === sys/net/if_lagg.c ================================================================== --- sys/net/if_lagg.c (revision 271555) +++ sys/net/if_lagg.c (local) @@ -445,23 +445,18 @@ struct lagg_port *lp; int cap = ~0, ena = ~0; u_long hwa = ~0UL; -#if defined(INET) || defined(INET6) - u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */ -#else - u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */ -#endif + struct ifnet_hw_tsomax hw_tsomax; LAGG_WLOCK_ASSERT(sc); + memset(&hw_tsomax, 0, sizeof(hw_tsomax)); + /* Get capabilities from the lagg ports */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { cap &= lp->lp_ifp->if_capabilities; ena &= lp->lp_ifp->if_capenable; hwa &= lp->lp_ifp->if_hwassist; - /* Set to the minimum value of the lagg ports. */ - if (lp->lp_ifp->if_hw_tsomax < hw_tsomax && - lp->lp_ifp->if_hw_tsomax > 0) - hw_tsomax = lp->lp_ifp->if_hw_tsomax; + if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax); } cap = (cap == ~0 ? 0 : cap); ena = (ena == ~0 ? 0 : ena); @@ -470,11 +465,10 @@ if (sc->sc_ifp->if_capabilities != cap || sc->sc_ifp->if_capenable != ena || sc->sc_ifp->if_hwassist != hwa || - sc->sc_ifp->if_hw_tsomax != hw_tsomax) { + if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) { sc->sc_ifp->if_capabilities = cap; sc->sc_ifp->if_capenable = ena; sc->sc_ifp->if_hwassist = hwa; - sc->sc_ifp->if_hw_tsomax = hw_tsomax; getmicrotime(&sc->sc_ifp->if_lastchange); if (sc->sc_ifflags & IFF_DEBUG) === sys/net/if_var.h ================================================================== --- sys/net/if_var.h (revision 271555) +++ sys/net/if_var.h (local) @@ -119,6 +119,12 @@ typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ifnet_counter); +struct ifnet_hw_tsomax { + u_int tsomaxbytes; /* TSO total burst length limit in bytes */ + u_int tsomaxsegcount; /* TSO maximum segment count */ + u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ +}; + /* * Structure defining a network interface. * @@ -222,10 +228,11 @@ if_get_counter_t if_get_counter; /* get counter values */ /* Stuff that's only temporary and doesn't belong here. */ - u_int if_hw_tsomax; /* tso burst length limit, the minimum - * is (IP_MAXPACKET / 8). - * XXXAO: Have to find a better place - * for it eventually. */ + u_int if_hw_tsomax; /* TSO total burst length + * limit in bytes. A value of + * zero means no limit. Have + * to find a better place for + * it eventually. */ /* * Old, racy and expensive statistics, should not be used in * new drivers. @@ -243,6 +250,10 @@ uint64_t if_oqdrops; /* dropped on output */ uint64_t if_noproto; /* destined for unsupported protocol */ + /* TSO fields for segment limits. If a field is zero below, there is no limit. */ + u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ + u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ + /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel @@ -615,5 +626,9 @@ int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br); int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m); +/* TSO */ +void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); +int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); + #endif /* _KERNEL */ #endif /* !_NET_IF_VAR_H_ */ === sys/net/if_vlan.c ================================================================== --- sys/net/if_vlan.c (revision 271555) +++ sys/net/if_vlan.c (local) @@ -1531,6 +1531,7 @@ { struct ifnet *p = PARENT(ifv); struct ifnet *ifp = ifv->ifv_ifp; + struct ifnet_hw_tsomax hw_tsomax; TRUNK_LOCK_ASSERT(TRUNK(ifv)); @@ -1557,8 +1558,9 @@ * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ - if (p->if_hw_tsomax > 0) - ifp->if_hw_tsomax = p->if_hw_tsomax; + memset(&hw_tsomax, 0, sizeof(hw_tsomax)); + if_hw_tsomax_common(p, &hw_tsomax); + if_hw_tsomax_update(ifp, &hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { === sys/netinet/tcp_input.c ================================================================== --- sys/netinet/tcp_input.c (revision 271555) +++ sys/netinet/tcp_input.c (local) @@ -3618,6 +3618,8 @@ if (cap.ifcap & CSUM_TSO) { tp->t_flags |= TF_TSO; tp->t_tsomax = cap.tsomax; + tp->t_tsomaxsegcount = cap.tsomaxsegcount; + tp->t_tsomaxsegsize = cap.tsomaxsegsize; } } === sys/netinet/tcp_output.c ================================================================== --- sys/netinet/tcp_output.c (revision 271555) +++ sys/netinet/tcp_output.c (local) @@ -767,28 +767,113 @@ flags &= ~TH_FIN; if (tso) { + u_int if_hw_tsomax; + u_int if_hw_tsomaxsegcount; + u_int if_hw_tsomaxsegsize; + struct mbuf *mb; + u_int moff; + int max_len; + + /* extract TSO information */ + if_hw_tsomax = tp->t_tsomax; + if_hw_tsomaxsegcount = tp->t_tsomaxsegcount; + if_hw_tsomaxsegsize = tp->t_tsomaxsegsize; + + /* + * Limit a TSO burst to prevent it from + * overflowing or exceeding the maximum length + * allowed by the network interface: + */ KASSERT(ipoptlen == 0, ("%s: TSO can't do IP options", __func__)); /* - * Limit a burst to t_tsomax minus IP, - * TCP and options length to keep ip->ip_len - * from overflowing or exceeding the maximum - * length allowed by the network interface. + * Check if we should limit by maximum payload + * length: */ - if (len > tp->t_tsomax - hdrlen) { - len = tp->t_tsomax - hdrlen; - sendalot = 1; + if (if_hw_tsomax != 0) { + /* compute maximum TSO length */ + max_len = (if_hw_tsomax - hdrlen); + if (max_len <= 0) { + len = 0; + } else if (len > (u_int)max_len) { + sendalot = 1; + len = (u_int)max_len; + } } /* + * Check if we should limit by maximum segment + * size and count: + */ + if (if_hw_tsomaxsegcount != 0 && if_hw_tsomaxsegsize != 0) { + max_len = 0; + mb = sbsndmbuf(&so->so_snd, off, &moff); + + while (mb != NULL && (u_int)max_len < len) { + u_int cur_length; + u_int cur_frags; + + /* + * Get length of mbuf fragment + * and how many hardware + * frags, rounded up, it would + * use: + */ + cur_length = (mb->m_len - moff); + cur_frags = (cur_length + if_hw_tsomaxsegsize - + 1) / if_hw_tsomaxsegsize; + + /* Handle special case: Zero Length Mbuf */ + if (cur_frags == 0) + cur_frags = 1; + + /* + * Check if the fragment limit + * will be reached or + * exceeded: + */ + if (cur_frags >= if_hw_tsomaxsegcount) { + max_len += min(cur_length, + if_hw_tsomaxsegcount * + if_hw_tsomaxsegsize); + break; + } + max_len += cur_length; + if_hw_tsomaxsegcount -= cur_frags; + moff = 0; + mb = mb->m_next; + } + if (max_len <= 0) { + len = 0; + } else if (len > (u_int)max_len) { + sendalot = 1; + len = (u_int)max_len; + } + } + + /* * Prevent the last segment from being - * fractional unless the send sockbuf can - * be emptied. + * fractional unless the send sockbuf can be + * emptied: */ - if (sendalot && off + len < so->so_snd.sb_cc) { - len -= len % (tp->t_maxopd - optlen); + max_len = (tp->t_maxopd - optlen); + if ((off + len) < so->so_snd.sb_cc) { + moff = len % (u_int)max_len; + if (moff != 0) { + len -= moff; + sendalot = 1; + } + } + + /* + * In case there are too many small fragments + * don't use TSO: + */ + if (len <= (u_int)max_len) { + len = (u_int)max_len; sendalot = 1; + tso = 0; } /* === sys/netinet/tcp_subr.c ================================================================== --- sys/netinet/tcp_subr.c (revision 271555) +++ sys/netinet/tcp_subr.c (local) @@ -1819,6 +1819,8 @@ ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; + cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; + cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } RTFREE(sro.ro_rt); @@ -1858,6 +1860,8 @@ ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; + cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; + cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } RTFREE(sro6.ro_rt); === sys/netinet/tcp_var.h ================================================================== --- sys/netinet/tcp_var.h (revision 271555) +++ sys/netinet/tcp_var.h (local) @@ -199,9 +199,12 @@ u_int t_keepintvl; /* interval between keepalives */ u_int t_keepcnt; /* number of keepalives before close */ - u_int t_tsomax; /* tso burst length limit */ + u_int t_tsomax; /* TSO total burst length limit in bytes */ - uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ + uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ + uint32_t t_tsomaxsegcount; /* TSO maximum segment count */ + uint32_t t_tsomaxsegsize; /* TSO maximum segment size in bytes */ + void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */ uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */ }; @@ -324,6 +327,8 @@ struct tcp_ifcap { int ifcap; u_int tsomax; + u_int tsomaxsegcount; + u_int tsomaxsegsize; }; #ifndef _NETINET_IN_PCB_H_ === sys/ofed/drivers/net/mlx4/en_netdev.c ================================================================== --- sys/ofed/drivers/net/mlx4/en_netdev.c (revision 271555) +++ sys/ofed/drivers/net/mlx4/en_netdev.c (local) @@ -673,6 +673,11 @@ else priv->rx_csum = 0; + /* set TSO limits so that we don't have to drop TX packets */ + dev->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + dev->if_hw_tsomaxsegcount = 16; + dev->if_hw_tsomaxsegsize = 65536; /* XXX can do up to 4GByte */ + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL info, unable to modify\n"); === sys/sys/sockbuf.h ================================================================== --- sys/sys/sockbuf.h (revision 271555) +++ sys/sys/sockbuf.h (local) @@ -158,6 +158,8 @@ struct thread *td); struct mbuf * sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff); +struct mbuf * + sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); int sbwait(struct sockbuf *sb); int sblock(struct sockbuf *sb, int flags);