diff -Naur /usr/src/sys.dist/dev/em/if_em.c /usr/src/sys/dev/em/if_em.c --- /usr/src/sys.dist/dev/em/if_em.c Fri Aug 4 00:56:33 2006 +++ /usr/src/sys/dev/em/if_em.c Tue Sep 5 15:58:42 2006 @@ -72,6 +72,8 @@ #include #include +#include + #include #include #include @@ -229,6 +231,10 @@ struct mbuf *); static void em_transmit_checksum_setup(struct adapter *, struct mbuf *, uint32_t *, uint32_t *); +#ifdef EM_TSO +static boolean_t em_tso_setup(struct adapter *, struct mbuf *, u_int32_t *, + uint32_t *); +#endif static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); @@ -302,6 +308,7 @@ #define E1000_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define E1000_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) +#define M_TSO_LEN 66 static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR); @@ -905,6 +912,10 @@ ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } + if (mask & IFCAP_TSO) { + ifp->if_capenable ^= IFCAP_TSO; + reinit = 1; + } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; @@ -1061,11 +1072,14 @@ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + ifp->if_hwassist = 0; if (adapter->hw.mac_type >= em_82543) { if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = EM_CHECKSUM_FEATURES; - else - ifp->if_hwassist = 0; +#ifdef EM_TSO + if (ifp->if_capenable & IFCAP_TSO) + ifp->if_hwassist |= EM_TCPSEG_FEATURES; +#endif } callout_reset(&adapter->timer, hz, em_local_timer, adapter); @@ -1416,11 +1430,17 @@ struct m_tag *mtag; uint32_t txd_upper, txd_lower, txd_used, txd_saved; int nsegs, i, j; - int error; + int error, do_tso, tso_desc = 0; m_head = *m_headp; current_tx_desc = NULL; - txd_used = txd_saved = 0; + txd_upper = txd_lower = txd_used = txd_saved = 0; + +#ifdef EM_TSO + do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0); +#else + do_tso = 0; +#endif /* * Force a cleanup if number of TX descriptors @@ -1473,6 +1493,17 @@ *m_headp = m_head; } + /* + * TSO workaround: + * If an mbuf is only header we need + * to pull 4 bytes of data into it. + */ + if (do_tso && (m_head->m_len <= M_TSO_LEN)) { + m_head = m_pullup(m_head, M_TSO_LEN + 4); + if (m_head == NULL) + return (ENOBUFS); + } + /* * Map the packet for DMA. */ @@ -1487,23 +1518,43 @@ } KASSERT(nsegs != 0, ("em_encap: empty packet")); - if (nsegs > adapter->num_tx_desc_avail) { + /* + * TSO Hardware workaround, if this packet is not + * TSO, and is only a single descriptor long, and + * it follows a TSO burst, then we need to add a + * sentinel descriptor to prevent premature writeback. + */ + if ((do_tso == 0) && (adapter->tx_tso == TRUE)) { + if (nsegs == 1) + tso_desc = TRUE; + adapter->tx_tso = FALSE; + } + + if (nsegs > adapter->num_tx_desc_avail - 2) { adapter->no_tx_desc_avail2++; error = ENOBUFS; goto encap_fail; } - if (ifp->if_hwassist > 0) - em_transmit_checksum_setup(adapter, m_head, &txd_upper, &txd_lower); - else - txd_upper = txd_lower = 0; + /* Do hardware assists */ + if ( ifp->if_hwassist > 0) { +#ifdef EM_TSO + if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) { + /* we need to make a final sentinel transmit desc */ + tso_desc = TRUE; + } else +#endif + em_transmit_checksum_setup(adapter, m_head, + &txd_upper, &txd_lower); + } i = adapter->next_avail_tx_desc; - if (adapter->pcix_82544) { + if (adapter->pcix_82544) txd_saved = i; - txd_used = 0; - } + for (j = 0; j < nsegs; j++) { + bus_size_t seg_len; + bus_addr_t seg_addr; /* If adapter is 82544 and on PCIX bus. */ if(adapter->pcix_82544) { DESC_ARRAY desc_array; @@ -1537,26 +1588,57 @@ txd_used++; } } else { - tx_buffer = &adapter->tx_buffer_area[i]; - current_tx_desc = &adapter->tx_desc_base[i]; - - current_tx_desc->buffer_addr = htole64(segs[j].ds_addr); - current_tx_desc->lower.data = htole32( - adapter->txd_cmd | txd_lower | segs[j].ds_len); - current_tx_desc->upper.data = htole32(txd_upper); - - if (++i == adapter->num_tx_desc) - i = 0; - - tx_buffer->m_head = NULL; + tx_buffer = &adapter->tx_buffer_area[i]; + current_tx_desc = &adapter->tx_desc_base[i]; + seg_addr = htole64(segs[j].ds_addr); + seg_len = segs[j].ds_len; + /* + ** TSO Workaround: + ** If this is the last descriptor, we want to + ** split it so we have a small final sentinel + */ + if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) { + seg_len -= 4; + current_tx_desc->buffer_addr = seg_addr; + current_tx_desc->lower.data = htole32( + adapter->txd_cmd | txd_lower | seg_len); + current_tx_desc->upper.data = + htole32(txd_upper); + if (++i == adapter->num_tx_desc) + i = 0; + /* Now make the sentinel */ + ++txd_used; /* using an extra txd */ + current_tx_desc = &adapter->tx_desc_base[i]; + tx_buffer = &adapter->tx_buffer_area[i]; + current_tx_desc->buffer_addr = + seg_addr + seg_len; + current_tx_desc->lower.data = htole32( + adapter->txd_cmd | txd_lower | 4); + current_tx_desc->upper.data = + htole32(txd_upper); + if (++i == adapter->num_tx_desc) + i = 0; + } else { + current_tx_desc->buffer_addr = seg_addr; + current_tx_desc->lower.data = htole32( + adapter->txd_cmd | txd_lower | seg_len); + current_tx_desc->upper.data = + htole32(txd_upper); + if (++i == adapter->num_tx_desc) + i = 0; + } + tx_buffer->m_head = NULL; } } adapter->next_avail_tx_desc = i; if (adapter->pcix_82544) adapter->num_tx_desc_avail -= txd_used; - else + else { adapter->num_tx_desc_avail -= nsegs; + if (tso_desc) /* TSO used an extra for sentinel */ + adapter->num_tx_desc_avail -= txd_used; + } if (mtag != NULL) { /* Set the vlan id. */ @@ -2226,6 +2308,15 @@ ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } +#ifdef EM_TSO + /* Enable TSO if available */ + if ((adapter->hw.mac_type > em_82544) && + (adapter->hw.mac_type != em_82547)) { + ifp->if_capabilities |= IFCAP_TSO; + ifp->if_capenable |= IFCAP_TSO; + } +#endif + /* * Tell the upper layer(s) we support long frames. */ @@ -2436,15 +2527,27 @@ static int em_setup_transmit_structures(struct adapter *adapter) { +#ifdef EM_TSO + struct ifnet *ifp = adapter->ifp; +#endif device_t dev = adapter->dev; struct em_buffer *tx_buffer; - bus_size_t size; + bus_size_t size, segsize; int error, i; /* * Setup DMA descriptor areas. */ - size = roundup2(adapter->hw.max_frame_size, MCLBYTES); + segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES); + +#ifdef EM_TSO + /* Overrides for TSO - want large sizes */ + if (ifp->if_hwassist & EM_TCPSEG_FEATURES) { + size = EM_TSO_SIZE; + segsize = PAGE_SIZE; + } +#endif + if ((error = bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ @@ -2452,7 +2555,7 @@ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ - size, /* maxsegsize */ + segsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ @@ -2713,6 +2816,87 @@ adapter->next_avail_tx_desc = curr_txd; } +#ifdef EM_TSO +/********************************************************************** + * + * Setup work for hardware segmentation offload (TSO) + * + **********************************************************************/ +static boolean_t +em_tso_setup(struct adapter *adapter, + struct mbuf *mp, + u_int32_t *txd_upper, + u_int32_t *txd_lower) +{ + struct em_context_desc *TXD; + struct em_buffer *tx_buffer; + struct ip *ip; + struct tcphdr *th; + int curr_txd, hdr_len, ip_hlen, tcp_hlen; + + if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) || + (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) { + return FALSE; + } + + *txd_lower = (E1000_TXD_CMD_DEXT | + E1000_TXD_DTYP_D | + E1000_TXD_CMD_TSE); + + *txd_upper = (E1000_TXD_POPTS_IXSM | + E1000_TXD_POPTS_TXSM) << 8; + + curr_txd = adapter->next_avail_tx_desc; + tx_buffer = &adapter->tx_buffer_area[curr_txd]; + TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd]; + + mp->m_data += sizeof(struct ether_header); + ip = mtod(mp, struct ip *); + ip->ip_len = 0; + ip->ip_sum = 0; + ip_hlen = ip->ip_hl << 2 ; + th = (struct tcphdr *)((caddr_t)ip + ip_hlen); + tcp_hlen = th->th_off << 2; + + hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen; + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htons(IPPROTO_TCP)); + + mp->m_data -= sizeof(struct ether_header); + TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN; + TXD->lower_setup.ip_fields.ipcso = + ETHER_HDR_LEN + offsetof(struct ip, ip_sum); + TXD->lower_setup.ip_fields.ipcse = + htole16(ETHER_HDR_LEN + ip_hlen - 1); + + TXD->upper_setup.tcp_fields.tucss = + ETHER_HDR_LEN + ip_hlen; + TXD->upper_setup.tcp_fields.tucse = 0; + TXD->upper_setup.tcp_fields.tucso = + ETHER_HDR_LEN + ip_hlen + + offsetof(struct tcphdr, th_sum); + TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); + TXD->tcp_seg_setup.fields.hdr_len = hdr_len; + TXD->cmd_and_length = htole32(adapter->txd_cmd | + E1000_TXD_CMD_DEXT | + E1000_TXD_CMD_TSE | + E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP | + (mp->m_pkthdr.len - (hdr_len))); + + tx_buffer->m_head = NULL; + + if (++curr_txd == adapter->num_tx_desc) + curr_txd = 0; + + adapter->num_tx_desc_avail--; + adapter->next_avail_tx_desc = curr_txd; + adapter->tx_tso = TRUE; + + return TRUE; +} +#endif /* EM_TSO */ + /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done @@ -3639,6 +3823,12 @@ (long long)adapter->stats.gprc); device_printf(dev, "Good Packets Xmtd = %lld\n", (long long)adapter->stats.gptc); +#ifdef EM_TSO + device_printf(dev, "TSO Contexts Xmtd = %lld\n", + (long long)adapter->stats.tsctc); + device_printf(dev, "TSO Contexts Failed = %lld\n", + (long long)adapter->stats.tsctfc); +#endif } static int diff -Naur /usr/src/sys.dist/dev/em/if_em.h /usr/src/sys/dev/em/if_em.h --- /usr/src/sys.dist/dev/em/if_em.h Thu Aug 3 12:05:04 2006 +++ /usr/src/sys/dev/em/if_em.h Tue Sep 5 14:29:19 2006 @@ -36,6 +36,9 @@ #ifndef _EM_H_DEFINED_ #define _EM_H_DEFINED_ +/* Undefine this to remove TSO from driver */ +#define EM_TSO + /* Tunables */ /* @@ -138,6 +141,11 @@ #define EM_CHECKSUM_FEATURES (CSUM_TCP | CSUM_UDP) /* + * Inform the stack about transmit segmentation offload capabilities. + */ +#define EM_TCPSEG_FEATURES CSUM_TSO + +/* * This parameter controls the duration of transmit watchdog timer. */ #define EM_TX_TIMEOUT 5 /* set to 5 seconds */ @@ -225,6 +233,7 @@ #define EM_RXBUFFER_16384 16384 #define EM_MAX_SCATTER 64 +#define EM_TSO_SIZE 65535 typedef enum _XSUM_CONTEXT_T { OFFLOAD_NONE, @@ -307,6 +316,7 @@ uint32_t txd_cmd; struct em_buffer *tx_buffer_area; bus_dma_tag_t txtag; /* dma tag for tx */ + uint32_t tx_tso; /* last tx was tso */ /* * Receive definitions