diff -Naur sys.dist/conf/options sys/conf/options --- sys.dist/conf/options Wed Aug 2 22:19:33 2006 +++ sys/conf/options Thu Aug 31 16:55:23 2006 @@ -386,6 +386,7 @@ SLIP_IFF_OPTS opt_slip.h TCPDEBUG TCP_SIGNATURE opt_inet.h +TCP_TSO opt_global.h TCP_SACK_DEBUG opt_tcp_sack.h TCP_DROP_SYNFIN opt_tcp_input.h DEV_VLAN opt_vlan.h diff -Naur sys.dist/net/if.h sys/net/if.h --- sys.dist/net/if.h Mon Jun 19 15:20:44 2006 +++ sys/net/if.h Thu Aug 31 12:33:40 2006 @@ -189,6 +189,7 @@ #define IFCAP_JUMBO_MTU 0x0020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x0040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x0080 /* can do IFCAP_HWCSUM on VLANs */ +#define IFCAP_TCPSEG 0x0100 /* can do TSO */ #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) diff -Naur sys.dist/netinet/ip_output.c sys/netinet/ip_output.c --- sys.dist/netinet/ip_output.c Thu Jun 29 06:38:36 2006 +++ sys/netinet/ip_output.c Thu Aug 31 15:16:44 2006 @@ -495,9 +495,11 @@ /* * If small enough for interface, or the interface will take - * care of the fragmentation for us, can just send directly. + * care of the fragmentation for us, or the interface is + * doing segmentation, we can just send directly. */ - if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT && + if (ip->ip_len <= ifp->if_mtu || + (ifp->if_hwassist & (CSUM_FRAGMENT | CSUM_TCPSEG) && ((ip->ip_off & IP_DF) == 0))) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); diff -Naur sys.dist/netinet/tcp_output.c sys/netinet/tcp_output.c --- sys.dist/netinet/tcp_output.c Thu Feb 23 13:14:34 2006 +++ sys/netinet/tcp_output.c Thu Aug 31 16:50:33 2006 @@ -105,6 +105,12 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno, 0, "Enable NewReno Algorithms"); +#ifdef TCP_TSO +int tcp_enable_tso = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso_enable, CTLFLAG_RW, &tcp_enable_tso, + 0, "Enable TCP Segmentation Offload"); +#endif + /* * Tcp output routine: figure out what should be sent and send it. */ @@ -130,6 +136,9 @@ #if 0 int maxburst = TCP_MAXBURST; #endif +#ifdef TCP_TSO + int tso_capable, use_tso; +#endif #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6; @@ -139,6 +148,10 @@ INP_LOCK_ASSERT(tp->t_inpcb); +#ifdef TCP_TSO + if (tcp_enable_tso) + use_tso = tso_capable = tcp_checktso(tp); +#endif /* * Determine length of data that should be transmitted, * and flags that will be used. @@ -380,9 +393,22 @@ * no longer contains the last data byte. */ if (len > tp->t_maxseg) { - len = tp->t_maxseg; +#ifdef TCP_TSO + if (use_tso) { + /* Even out the transmissions */ + len = ((ulmin(len, TCP_MAXWIN)/ tp->t_maxseg) * + tp->t_maxseg); + if (len <= tp->t_maxseg) + use_tso = 0; + } else +#endif + len = tp->t_maxseg; sendalot = 1; } +#ifdef TCP_TSO + else + use_tso = 0; +#endif if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) flags &= ~TH_FIN; @@ -406,7 +432,7 @@ * - we need to retransmit */ if (len) { - if (len == tp->t_maxseg) + if (len >= tp->t_maxseg) /* TSO makes it possible to be greater */ goto send; /* * NOTE! on localhost connections an 'ack' from the remote @@ -703,6 +729,19 @@ * Clear the FIN bit because we cut off the tail of * the segment. */ +#ifdef TCP_TSO + if (use_tso) { + if (len + optlen + ipoptlen > TCP_MAXWIN) { + /* + * If there is still more to send, + * don't close the connection. + */ + flags &= ~TH_FIN; + len = TCP_MAXWIN - optlen - ipoptlen; + sendalot = 1; + } + } else +#endif if (len + optlen + ipoptlen > tp->t_maxopd) { /* * If there is still more to send, don't close the connection. @@ -944,6 +983,12 @@ /* IP version must be set here for ipv4/ipv6 checking later */ KASSERT(ip->ip_v == IPVERSION, ("%s: IP version incorrect: %d", __func__, ip->ip_v)); +#ifdef TCP_TSO + if (use_tso) { /* Setup for TSO */ + m->m_pkthdr.tso_mss = tp->t_maxseg; + m->m_pkthdr.csum_flags |= CSUM_TCPSEG; + } +#endif } /* @@ -1076,7 +1121,11 @@ * Section 2. However the tcp hostcache migitates the problem * so it affects only the first tcp connection with a host. */ +#ifdef TCP_TSO + if (path_mtu_discovery && !use_tso) +#else if (path_mtu_discovery) +#endif ip->ip_off |= IP_DF; error = ip_output(m, tp->t_inpcb->inp_options, NULL, diff -Naur sys.dist/netinet/tcp_subr.c sys/netinet/tcp_subr.c --- sys.dist/netinet/tcp_subr.c Wed Aug 2 09:18:05 2006 +++ sys/netinet/tcp_subr.c Thu Aug 31 16:54:31 2006 @@ -1643,6 +1643,41 @@ } #endif /* INET6 */ +#ifdef TCP_TSO +/* + * Find the interface for this tcpcb and determine + * if a TSO hardware assist is available. + */ +boolean_t +tcp_checktso(tp) + struct tcpcb *tp; +{ + struct in_conninfo *inc; + struct route sro; + struct sockaddr_in *dst; + struct ifnet *ifp; + + inc = &tp->t_inpcb->inp_inc; + bzero(&sro, sizeof(sro)); + if (inc->inc_faddr.s_addr != INADDR_ANY) { + dst = (struct sockaddr_in *)&sro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = inc->inc_faddr; + rtalloc_ign(&sro, RTF_CLONING); + } + if (sro.ro_rt != NULL) { + ifp = sro.ro_rt->rt_ifp; + if (ifp->if_hwassist & CSUM_TCPSEG) { + RTFREE(sro.ro_rt); + return TRUE; + } + RTFREE(sro.ro_rt); + } + return FALSE; +} +#endif + #ifdef IPSEC /* compute ESP/AH header size for TCP, including outer IP header. */ size_t diff -Naur sys.dist/netinet/tcp_var.h sys/netinet/tcp_var.h --- sys.dist/netinet/tcp_var.h Mon Jun 26 08:35:25 2006 +++ sys/netinet/tcp_var.h Thu Aug 31 12:42:25 2006 @@ -510,6 +510,10 @@ u_long tcp_maxmtu6(struct in_conninfo *); void tcp_mss(struct tcpcb *, int); int tcp_mssopt(struct in_conninfo *); +#ifdef TCP_TSO +boolean_t + tcp_checktso(struct tcpcb *); +#endif struct inpcb * tcp_drop_syn_sent(struct inpcb *, int); struct inpcb * diff -Naur sys.dist/sys/mbuf.h sys/sys/mbuf.h --- sys.dist/sys/mbuf.h Sun Jul 23 18:49:57 2006 +++ sys/sys/mbuf.h Thu Aug 31 12:38:04 2006 @@ -110,6 +110,7 @@ /* variables for hardware checksum */ int csum_flags; /* flags regarding checksum */ int csum_data; /* data field used by csum routines */ + int tso_mss; /* TSO segment size */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ }; @@ -215,6 +216,7 @@ #define CSUM_UDP 0x0004 /* will csum UDP */ #define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ #define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ +#define CSUM_TCPSEG 0x0020 /* eligible for TCP segmentation */ #define CSUM_IP_CHECKED 0x0100 /* did csum IP */ #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */