diff -Naur head/if_igb.c patch/if_igb.c --- head/if_igb.c 2014-01-14 14:08:04.000000000 -0800 +++ patch/if_igb.c 2014-01-14 14:07:01.000000000 -0800 @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2013, Intel Corporation + Copyright (c) 2001-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -244,9 +244,11 @@ static bool igb_rxeof(struct igb_queue *, int, int *); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); + struct mbuf *, u32 *, u32 *, struct igb_pkt_info *); static int igb_tso_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); + struct mbuf *, u32 *, u32 *, struct igb_pkt_info *); +static int igb_pullup_headers(struct mbuf **, u32, + struct igb_pkt_info *); static void igb_set_promisc(struct adapter *); static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); @@ -290,6 +292,7 @@ static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); + #ifdef DEVICE_POLLING static poll_handler_t igb_poll; #endif /* POLLING */ @@ -1843,9 +1846,11 @@ bus_dma_segment_t segs[IGB_MAX_SCATTER]; bus_dmamap_t map; struct igb_tx_buf *txbuf; + struct igb_pkt_info pkt; union e1000_adv_tx_desc *txd = NULL; m_head = *m_headp; + bzero(&pkt, sizeof(struct igb_pkt_info)); /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | @@ -1863,6 +1868,14 @@ txbuf = &txr->tx_buffers[first]; map = txbuf->map; + /* Pullup headers if offload has been requested */ + if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { + error = igb_pullup_headers(m_headp, + m_head->m_pkthdr.csum_flags, &pkt); + if (error) + return (error); + } + /* * Map the packet for DMA. */ @@ -1906,18 +1919,18 @@ bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } - m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ - error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); + error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status, &pkt); if (__predict_false(error)) { m_freem(*m_headp); *m_headp = NULL; return (error); } + m_head = *m_headp; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) @@ -3701,60 +3714,39 @@ **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) + u32 *cmd_type_len, u32 *olinfo_status, struct igb_pkt_info *pkt) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; - u16 vtag = 0, eh_type; - int ctxd, ehdrlen, ip_hlen, tcp_hlen; - struct ether_vlan_header *eh; + u16 vtag = 0; + int ctxd, tcp_hlen; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif - struct tcphdr *th; - - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - eh_type = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - eh_type = eh->evl_encap_proto; - } - switch (ntohs(eh_type)) { + switch (pkt->etype) { #ifdef INET6 case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + ip6 = pkt->ip6; /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); - ip_hlen = sizeof(struct ip6_hdr); - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); - th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); + pkt->th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); + ip = pkt->ip; if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, + pkt->th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ @@ -3763,17 +3755,17 @@ #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", - __func__, ntohs(eh_type)); + __func__, pkt->etype); break; } ctxd = txr->next_avail_desc; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; - tcp_hlen = th->th_off << 2; + tcp_hlen = pkt->th->th_off << 2; /* This is used in the transmit desc in encap */ - paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; + paylen = mp->m_pkthdr.len - pkt->ehdrlen - pkt->ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { @@ -3781,8 +3773,8 @@ vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } - vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= ip_hlen; + vlan_macip_lens |= pkt->ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= pkt->ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ @@ -3820,16 +3812,13 @@ static int igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) + u32 *cmd_type_len, u32 *olinfo_status, struct igb_pkt_info *pkt) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; - struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; - int ehdrlen, ip_hlen = 0; - u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; @@ -3837,7 +3826,7 @@ /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) - return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); + return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status, pkt)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; @@ -3859,33 +3848,17 @@ } else if (offload == FALSE) /* ... no offload to do */ return (0); - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); - ehdrlen = ETHER_HDR_LEN; - } - /* Set the ether header length */ - vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= pkt->ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; - switch (etype) { + switch (pkt->etype) { case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - ip_hlen = ip->ip_hl << 2; + ip = pkt->ip; ipproto = ip->ip_p; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - ip_hlen = sizeof(struct ip6_hdr); + ip6 = pkt->ip6; /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; @@ -3895,7 +3868,7 @@ break; } - vlan_macip_lens |= ip_hlen; + vlan_macip_lens |= pkt->ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (ipproto) { @@ -3941,6 +3914,167 @@ return (0); } +/* + * Intel recommends entire IP/TCP header length reside in a single + * buffer. If multiple descriptors are used to describe the IP and + * TCP header, each descriptor should describe one or more + * complete headers; descriptors referencing only parts of headers + * are not supported. If all layer headers are not coalesced into + * a single buffer, each buffer should not cross a 4KB boundary, + * or be larger than the maximum read request size. + * + * Controller also requires modifing IP/TCP header to make TSO work + * so we firstly get a writable mbuf chain then coalesce ethernet/ + * IP/TCP header into a single buffer to meet the requirement of + * controller. This also simplifies IP/TCP/UDP checksum offloading + * which also has similiar restrictions. + */ +static int +igb_pullup_headers(struct mbuf **m_headp, u32 csum_flags, struct igb_pkt_info *pkt) +{ + struct mbuf *m_head = *m_headp; + struct ether_vlan_header *eh; + u16 etype; + u32 ehdrlen, ip_hlen, do_tso, poff; +#ifdef INET + struct ip *ip; +#endif +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + struct tcphdr *th; + + do_tso = ((csum_flags & CSUM_TSO) != 0); + bzero(pkt, sizeof(struct igb_pkt_info)); + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present, + * helpful for QinQ too. + */ + eh = mtod(m_head, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + etype = ntohs(eh->evl_proto); + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + etype = ntohs(eh->evl_encap_proto); + ehdrlen = ETHER_HDR_LEN; + } + + /* make mbuf chain writable */ + if (m_head->m_next != NULL) { + if (M_WRITABLE(*m_headp) == 0) { + m_head = m_dup(*m_headp, M_NOWAIT); + m_freem(*m_headp); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + *m_headp = m_head; + } + } + + /* pull up ethernet header, accounts for vlan tag */ + m_head = m_pullup(m_head, ehdrlen); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + + /* pull up ip and get pointers */ + switch (etype) { +#ifdef INET + case ETHERTYPE_IP: + m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip)); + ip = (struct ip *)(mtod(m_head, char *) + ehdrlen); + ip_hlen = ip->ip_hl << 2; + poff = ehdrlen + ip_hlen; + break; +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip6_hdr)); + ip6 = (struct ip6_hdr *)(mtod(m_head, char *) + ehdrlen); + ip_hlen = sizeof(struct ip6_hdr); + poff = ehdrlen + ip_hlen; + break; +#endif + default: + break; + } + + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + + if (do_tso) { + /* pull up TCP header */ + m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + th = (struct tcphdr *)(mtod(m_head, char *) + poff); + /* + * TSO workaround: + * pull 4 more bytes of data into it. + */ + m_head = m_pullup(m_head, poff + (th->th_off << 2) + 4); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + pkt->th = (struct tcphdr *)(mtod(m_head, char *) + poff); + } else if (csum_flags & CSUM_TCP) { + m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + th = (struct tcphdr *)(mtod(m_head, char *) + poff); + m_head = m_pullup(m_head, poff + (th->th_off << 2)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + pkt->th = (struct tcphdr *)(mtod(m_head, char *) + poff); + } else if (csum_flags & CSUM_UDP) { + m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } + + /* final capture of ip pointer */ + switch (etype) { +#ifdef INET + case ETHERTYPE_IP: + pkt->ip = (struct ip *)(mtod(m_head, char *) + ehdrlen); + pkt->ip6 = NULL; + break; +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + pkt->ip6 = (struct ip6_hdr *)(mtod(m_head, char *) + ehdrlen); + pkt->ip = NULL; + break; +#endif + default: + break; + } + *m_headp = m_head; + + /* return these calculated pointers/values for re-use */ + pkt->ehdrlen = ehdrlen; + pkt->etype = etype; + pkt->ip_hlen = ip_hlen; + + return (0); +} + + /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done diff -Naur head/if_igb.h patch/if_igb.h --- head/if_igb.h 2014-01-14 14:08:04.000000000 -0800 +++ patch/if_igb.h 2014-01-14 14:07:01.000000000 -0800 @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2013, Intel Corporation + Copyright (c) 2001-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -237,9 +237,9 @@ /* Offload bits in mbuf flag */ #if __FreeBSD_version >= 800000 -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_TSO|CSUM_SCTP) #else -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_TSO) #endif /* Define the starting Interrupt rate per Queue */ @@ -513,6 +513,15 @@ bus_dmamap_t pmap; /* bus_dma map for packet */ }; +struct igb_pkt_info { + u16 etype; + u32 ehdrlen; + u32 ip_hlen; + struct ip *ip; + struct ip6_hdr *ip6; + struct tcphdr *th; +}; + /* ** Find the number of unrefreshed RX descriptors */