]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv4/inet_lro.c
Merge remote-tracking branch 'ubi/linux-next'
[karo-tx-linux.git] / net / ipv4 / inet_lro.c
1 /*
2  *  linux/net/ipv4/inet_lro.c
3  *
4  *  Large Receive Offload (ipv4 / tcp)
5  *
6  *  (C) Copyright IBM Corp. 2007
7  *
8  *  Authors:
9  *       Jan-Bernd Themann <themann@de.ibm.com>
10  *       Christoph Raisch <raisch@de.ibm.com>
11  *
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2, or (at your option)
16  * any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26  */
27
28
29 #include <linux/module.h>
30 #include <linux/if_vlan.h>
31 #include <linux/inet_lro.h>
32 #include <net/checksum.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36 MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37
38 #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39 #define IP_HDR_LEN(iph) (iph->ihl << 2)
40 #define TCP_PAYLOAD_LENGTH(iph, tcph) \
41         (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42
43 #define IPH_LEN_WO_OPTIONS 5
44 #define TCPH_LEN_WO_OPTIONS 5
45 #define TCPH_LEN_W_TIMESTAMP 8
46
47 #define LRO_MAX_PG_HLEN 64
48
49 #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
50
51 /*
52  * Basic tcp checks whether packet is suitable for LRO
53  */
54
55 static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56                             int len, const struct net_lro_desc *lro_desc)
57 {
58         /* check ip header: don't aggregate padded frames */
59         if (ntohs(iph->tot_len) != len)
60                 return -1;
61
62         if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
63                 return -1;
64
65         if (iph->ihl != IPH_LEN_WO_OPTIONS)
66                 return -1;
67
68         if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69             tcph->rst || tcph->syn || tcph->fin)
70                 return -1;
71
72         if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
73                 return -1;
74
75         if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76             tcph->doff != TCPH_LEN_W_TIMESTAMP)
77                 return -1;
78
79         /* check tcp options (only timestamp allowed) */
80         if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81                 __be32 *topt = (__be32 *)(tcph + 1);
82
83                 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84                                    | (TCPOPT_TIMESTAMP << 8)
85                                    | TCPOLEN_TIMESTAMP))
86                         return -1;
87
88                 /* timestamp should be in right order */
89                 topt++;
90                 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
91                                       ntohl(*topt)))
92                         return -1;
93
94                 /* timestamp reply should not be zero */
95                 topt++;
96                 if (*topt == 0)
97                         return -1;
98         }
99
100         return 0;
101 }
102
103 static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
104 {
105         struct iphdr *iph = lro_desc->iph;
106         struct tcphdr *tcph = lro_desc->tcph;
107         __be32 *p;
108         __wsum tcp_hdr_csum;
109
110         tcph->ack_seq = lro_desc->tcp_ack;
111         tcph->window = lro_desc->tcp_window;
112
113         if (lro_desc->tcp_saw_tstamp) {
114                 p = (__be32 *)(tcph + 1);
115                 *(p+2) = lro_desc->tcp_rcv_tsecr;
116         }
117
118         csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119         iph->tot_len = htons(lro_desc->ip_tot_len);
120
121         tcph->check = 0;
122         tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123         lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124         tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125                                         lro_desc->ip_tot_len -
126                                         IP_HDR_LEN(iph), IPPROTO_TCP,
127                                         lro_desc->data_csum);
128 }
129
130 static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
131 {
132         __wsum tcp_csum;
133         __wsum tcp_hdr_csum;
134         __wsum tcp_ps_hdr_csum;
135
136         tcp_csum = ~csum_unfold(tcph->check);
137         tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
138
139         tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140                                              len + TCP_HDR_LEN(tcph),
141                                              IPPROTO_TCP, 0);
142
143         return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
144                         tcp_ps_hdr_csum);
145 }
146
147 static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148                           struct iphdr *iph, struct tcphdr *tcph)
149 {
150         int nr_frags;
151         __be32 *ptr;
152         u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
153
154         nr_frags = skb_shinfo(skb)->nr_frags;
155         lro_desc->parent = skb;
156         lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
157         lro_desc->iph = iph;
158         lro_desc->tcph = tcph;
159         lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160         lro_desc->tcp_ack = tcph->ack_seq;
161         lro_desc->tcp_window = tcph->window;
162
163         lro_desc->pkt_aggr_cnt = 1;
164         lro_desc->ip_tot_len = ntohs(iph->tot_len);
165
166         if (tcph->doff == 8) {
167                 ptr = (__be32 *)(tcph+1);
168                 lro_desc->tcp_saw_tstamp = 1;
169                 lro_desc->tcp_rcv_tsval = *(ptr+1);
170                 lro_desc->tcp_rcv_tsecr = *(ptr+2);
171         }
172
173         lro_desc->mss = tcp_data_len;
174         lro_desc->active = 1;
175
176         lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
177                                                 tcp_data_len);
178 }
179
180 static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
181 {
182         memset(lro_desc, 0, sizeof(struct net_lro_desc));
183 }
184
185 static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186                            struct tcphdr *tcph, int tcp_data_len)
187 {
188         struct sk_buff *parent = lro_desc->parent;
189         __be32 *topt;
190
191         lro_desc->pkt_aggr_cnt++;
192         lro_desc->ip_tot_len += tcp_data_len;
193         lro_desc->tcp_next_seq += tcp_data_len;
194         lro_desc->tcp_window = tcph->window;
195         lro_desc->tcp_ack = tcph->ack_seq;
196
197         /* don't update tcp_rcv_tsval, would not work with PAWS */
198         if (lro_desc->tcp_saw_tstamp) {
199                 topt = (__be32 *) (tcph + 1);
200                 lro_desc->tcp_rcv_tsecr = *(topt + 2);
201         }
202
203         lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204                                              lro_tcp_data_csum(iph, tcph,
205                                                                tcp_data_len),
206                                              parent->len);
207
208         parent->len += tcp_data_len;
209         parent->data_len += tcp_data_len;
210         if (tcp_data_len > lro_desc->mss)
211                 lro_desc->mss = tcp_data_len;
212 }
213
214 static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215                            struct iphdr *iph, struct tcphdr *tcph)
216 {
217         struct sk_buff *parent = lro_desc->parent;
218         int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
219
220         lro_add_common(lro_desc, iph, tcph, tcp_data_len);
221
222         skb_pull(skb, (skb->len - tcp_data_len));
223         parent->truesize += skb->truesize;
224
225         if (lro_desc->last_skb)
226                 lro_desc->last_skb->next = skb;
227         else
228                 skb_shinfo(parent)->frag_list = skb;
229
230         lro_desc->last_skb = skb;
231 }
232
233 static void lro_add_frags(struct net_lro_desc *lro_desc,
234                           int len, int hlen, int truesize,
235                           struct skb_frag_struct *skb_frags,
236                           struct iphdr *iph, struct tcphdr *tcph)
237 {
238         struct sk_buff *skb = lro_desc->parent;
239         int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
240
241         lro_add_common(lro_desc, iph, tcph, tcp_data_len);
242
243         skb->truesize += truesize;
244
245         skb_frags[0].page_offset += hlen;
246         skb_frag_size_sub(&skb_frags[0], hlen);
247
248         while (tcp_data_len > 0) {
249                 *(lro_desc->next_frag) = *skb_frags;
250                 tcp_data_len -= skb_frag_size(skb_frags);
251                 lro_desc->next_frag++;
252                 skb_frags++;
253                 skb_shinfo(skb)->nr_frags++;
254         }
255 }
256
257 static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
258                               struct iphdr *iph,
259                               struct tcphdr *tcph)
260 {
261         if ((lro_desc->iph->saddr != iph->saddr) ||
262             (lro_desc->iph->daddr != iph->daddr) ||
263             (lro_desc->tcph->source != tcph->source) ||
264             (lro_desc->tcph->dest != tcph->dest))
265                 return -1;
266         return 0;
267 }
268
269 static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
270                                          struct net_lro_desc *lro_arr,
271                                          struct iphdr *iph,
272                                          struct tcphdr *tcph)
273 {
274         struct net_lro_desc *lro_desc = NULL;
275         struct net_lro_desc *tmp;
276         int max_desc = lro_mgr->max_desc;
277         int i;
278
279         for (i = 0; i < max_desc; i++) {
280                 tmp = &lro_arr[i];
281                 if (tmp->active)
282                         if (!lro_check_tcp_conn(tmp, iph, tcph)) {
283                                 lro_desc = tmp;
284                                 goto out;
285                         }
286         }
287
288         for (i = 0; i < max_desc; i++) {
289                 if (!lro_arr[i].active) {
290                         lro_desc = &lro_arr[i];
291                         goto out;
292                 }
293         }
294
295         LRO_INC_STATS(lro_mgr, no_desc);
296 out:
297         return lro_desc;
298 }
299
300 static void lro_flush(struct net_lro_mgr *lro_mgr,
301                       struct net_lro_desc *lro_desc)
302 {
303         if (lro_desc->pkt_aggr_cnt > 1)
304                 lro_update_tcp_ip_header(lro_desc);
305
306         skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
307
308         if (lro_mgr->features & LRO_F_NAPI)
309                 netif_receive_skb(lro_desc->parent);
310         else
311                 netif_rx(lro_desc->parent);
312
313         LRO_INC_STATS(lro_mgr, flushed);
314         lro_clear_desc(lro_desc);
315 }
316
317 static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
318                           void *priv)
319 {
320         struct net_lro_desc *lro_desc;
321         struct iphdr *iph;
322         struct tcphdr *tcph;
323         u64 flags;
324         int vlan_hdr_len = 0;
325
326         if (!lro_mgr->get_skb_header ||
327             lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
328                                     &flags, priv))
329                 goto out;
330
331         if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
332                 goto out;
333
334         lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
335         if (!lro_desc)
336                 goto out;
337
338         if ((skb->protocol == htons(ETH_P_8021Q)) &&
339             !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
340                 vlan_hdr_len = VLAN_HLEN;
341
342         if (!lro_desc->active) { /* start new lro session */
343                 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
344                         goto out;
345
346                 skb->ip_summed = lro_mgr->ip_summed_aggr;
347                 lro_init_desc(lro_desc, skb, iph, tcph);
348                 LRO_INC_STATS(lro_mgr, aggregated);
349                 return 0;
350         }
351
352         if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
353                 goto out2;
354
355         if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
356                 goto out2;
357
358         lro_add_packet(lro_desc, skb, iph, tcph);
359         LRO_INC_STATS(lro_mgr, aggregated);
360
361         if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
362             lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
363                 lro_flush(lro_mgr, lro_desc);
364
365         return 0;
366
367 out2: /* send aggregated SKBs to stack */
368         lro_flush(lro_mgr, lro_desc);
369
370 out:
371         return 1;
372 }
373
374
375 static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
376                                    struct skb_frag_struct *frags,
377                                    int len, int true_size,
378                                    void *mac_hdr,
379                                    int hlen, __wsum sum,
380                                    u32 ip_summed)
381 {
382         struct sk_buff *skb;
383         struct skb_frag_struct *skb_frags;
384         int data_len = len;
385         int hdr_len = min(len, hlen);
386
387         skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
388         if (!skb)
389                 return NULL;
390
391         skb_reserve(skb, lro_mgr->frag_align_pad);
392         skb->len = len;
393         skb->data_len = len - hdr_len;
394         skb->truesize += true_size;
395         skb->tail += hdr_len;
396
397         memcpy(skb->data, mac_hdr, hdr_len);
398
399         skb_frags = skb_shinfo(skb)->frags;
400         while (data_len > 0) {
401                 *skb_frags = *frags;
402                 data_len -= skb_frag_size(frags);
403                 skb_frags++;
404                 frags++;
405                 skb_shinfo(skb)->nr_frags++;
406         }
407
408         skb_shinfo(skb)->frags[0].page_offset += hdr_len;
409         skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
410
411         skb->ip_summed = ip_summed;
412         skb->csum = sum;
413         skb->protocol = eth_type_trans(skb, lro_mgr->dev);
414         return skb;
415 }
416
417 static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
418                                           struct skb_frag_struct *frags,
419                                           int len, int true_size,
420                                           void *priv, __wsum sum)
421 {
422         struct net_lro_desc *lro_desc;
423         struct iphdr *iph;
424         struct tcphdr *tcph;
425         struct sk_buff *skb;
426         u64 flags;
427         void *mac_hdr;
428         int mac_hdr_len;
429         int hdr_len = LRO_MAX_PG_HLEN;
430         int vlan_hdr_len = 0;
431
432         if (!lro_mgr->get_frag_header ||
433             lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
434                                      (void *)&tcph, &flags, priv)) {
435                 mac_hdr = skb_frag_address(frags);
436                 goto out1;
437         }
438
439         if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
440                 goto out1;
441
442         hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
443         mac_hdr_len = (int)((void *)(iph) - mac_hdr);
444
445         lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
446         if (!lro_desc)
447                 goto out1;
448
449         if (!lro_desc->active) { /* start new lro session */
450                 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
451                         goto out1;
452
453                 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
454                                   hdr_len, 0, lro_mgr->ip_summed_aggr);
455                 if (!skb)
456                         goto out;
457
458                 if ((skb->protocol == htons(ETH_P_8021Q)) &&
459                     !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
460                         vlan_hdr_len = VLAN_HLEN;
461
462                 iph = (void *)(skb->data + vlan_hdr_len);
463                 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
464                                 + IP_HDR_LEN(iph));
465
466                 lro_init_desc(lro_desc, skb, iph, tcph);
467                 LRO_INC_STATS(lro_mgr, aggregated);
468                 return NULL;
469         }
470
471         if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
472                 goto out2;
473
474         if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
475                 goto out2;
476
477         lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
478         LRO_INC_STATS(lro_mgr, aggregated);
479
480         if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
481             lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
482                 lro_flush(lro_mgr, lro_desc);
483
484         return NULL;
485
486 out2: /* send aggregated packets to the stack */
487         lro_flush(lro_mgr, lro_desc);
488
489 out1:  /* Original packet has to be posted to the stack */
490         skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
491                           hdr_len, sum, lro_mgr->ip_summed);
492 out:
493         return skb;
494 }
495
496 void lro_receive_skb(struct net_lro_mgr *lro_mgr,
497                      struct sk_buff *skb,
498                      void *priv)
499 {
500         if (__lro_proc_skb(lro_mgr, skb, priv)) {
501                 if (lro_mgr->features & LRO_F_NAPI)
502                         netif_receive_skb(skb);
503                 else
504                         netif_rx(skb);
505         }
506 }
507 EXPORT_SYMBOL(lro_receive_skb);
508
509 void lro_receive_frags(struct net_lro_mgr *lro_mgr,
510                        struct skb_frag_struct *frags,
511                        int len, int true_size, void *priv, __wsum sum)
512 {
513         struct sk_buff *skb;
514
515         skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
516         if (!skb)
517                 return;
518
519         if (lro_mgr->features & LRO_F_NAPI)
520                 netif_receive_skb(skb);
521         else
522                 netif_rx(skb);
523 }
524 EXPORT_SYMBOL(lro_receive_frags);
525
526 void lro_flush_all(struct net_lro_mgr *lro_mgr)
527 {
528         int i;
529         struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
530
531         for (i = 0; i < lro_mgr->max_desc; i++) {
532                 if (lro_desc[i].active)
533                         lro_flush(lro_mgr, &lro_desc[i]);
534         }
535 }
536 EXPORT_SYMBOL(lro_flush_all);
537
538 void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
539                   struct iphdr *iph, struct tcphdr *tcph)
540 {
541         struct net_lro_desc *lro_desc;
542
543         lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
544         if (lro_desc->active)
545                 lro_flush(lro_mgr, lro_desc);
546 }
547 EXPORT_SYMBOL(lro_flush_pkt);