1 //==========================================================================
3 // src/sys/netinet6/frag6.c
5 //==========================================================================
6 //####BSDCOPYRIGHTBEGIN####
8 // -------------------------------------------
10 // Portions of this software may have been derived from OpenBSD,
11 // FreeBSD or other sources, and are covered by the appropriate
12 // copyright disclaimers included herein.
14 // Portions created by Red Hat are
15 // Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
17 // -------------------------------------------
19 //####BSDCOPYRIGHTEND####
20 //==========================================================================
22 /* $KAME: frag6.c,v 1.32 2001/06/21 09:06:29 sumikawa Exp $ */
25 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
26 * All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. Neither the name of the project nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
40 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 #include <sys/param.h>
54 #include <sys/malloc.h>
56 #include <sys/domain.h>
57 #include <sys/protosw.h>
58 #include <sys/socket.h>
59 #include <sys/errno.h>
62 #include <net/route.h>
64 #include <netinet/in.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #if !(defined(__FreeBSD__) && __FreeBSD__ >= 3) && !defined(__OpenBSD__) && !(defined(__bsdi__) && _BSDI_VERSION >= 199802)
69 #include <netinet6/in6_pcb.h>
71 #include <netinet/icmp6.h>
74 #include <dev/rndvar.h>
78 * Define it to get a correct behavior on per-interface statistics.
79 * You will need to perform an extra routing table lookup, per fragment,
80 * to do it. This may, or may not be, a performance hit.
82 #define IN6_IFSTAT_STRICT
84 static void frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *));
85 static void frag6_deq __P((struct ip6asfrag *));
86 static void frag6_insque __P((struct ip6q *, struct ip6q *));
87 static void frag6_remque __P((struct ip6q *));
88 static void frag6_freef __P((struct ip6q *));
90 /* XXX we eventually need splreass6, or some real semaphore */
91 int frag6_doing_reass;
92 u_int frag6_nfragpackets;
93 struct ip6q ip6q; /* ip6 reassemble queue */
96 #ifndef offsetof /* XXX */
97 #define offsetof(type, member) ((size_t)(&((type *)0)->member))
101 * Initialise reassembly queue and fragment identifier.
109 #if defined(__FreeBSD__) && __FreeBSD__ >= 4
110 ip6_maxfragpackets = nmbclusters / 4;
114 * in many cases, random() here does NOT return random number
115 * as initialization during bootstrap time occur in fixed order.
118 ip6_id = random() ^ tv.tv_usec;
120 ip6_id = arc4random();
122 ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
126 * In RFC2460, fragment and reassembly rule do not agree with each other,
127 * in terms of next header field handling in fragment header.
128 * While the sender will use the same value for all of the fragmented packets,
129 * receiver is suggested not to check the consistency.
131 * fragment rule (p20):
132 * (2) A Fragment header containing:
133 * The Next Header value that identifies the first header of
134 * the Fragmentable Part of the original packet.
135 * -> next header field is same for all fragments
137 * reassembly rule (p21):
138 * The Next Header field of the last header of the Unfragmentable
139 * Part is obtained from the Next Header field of the first
140 * fragment's Fragment header.
141 * -> should grab it from the first fragment only
143 * The following note also contradicts with fragment rule - noone is going to
144 * send different fragment with different next header field.
146 * additional note (p22):
147 * The Next Header values in the Fragment headers of different
148 * fragments of the same original packet may differ. Only the value
149 * from the Offset zero fragment packet is used for reassembly.
150 * -> should grab it from the first fragment only
152 * There is no explicit reason given in the RFC. Historical reason maybe?
158 frag6_input(mp, offp, proto)
162 struct mbuf *m = *mp, *t;
164 struct ip6_frag *ip6f;
166 struct ip6asfrag *af6, *ip6af, *af6dwn;
167 int offset = *offp, nxt, i, next;
169 int fragoff, frgpartlen; /* must be larger than u_int16_t */
170 struct ifnet *dstifp;
171 #ifdef IN6_IFSTAT_STRICT
172 #ifdef NEW_STRUCT_ROUTE
173 static struct route ro;
175 static struct route_in6 ro;
177 struct sockaddr_in6 *dst;
180 ip6 = mtod(m, struct ip6_hdr *);
181 #ifndef PULLDOWN_TEST
182 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
183 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
185 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
191 #ifdef IN6_IFSTAT_STRICT
192 /* find the destination interface of the packet. */
193 dst = (struct sockaddr_in6 *)&ro.ro_dst;
195 && ((ro.ro_rt->rt_flags & RTF_UP) == 0
196 || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
198 ro.ro_rt = (struct rtentry *)0;
200 if (ro.ro_rt == NULL) {
201 bzero(dst, sizeof(*dst));
202 dst->sin6_family = AF_INET6;
203 dst->sin6_len = sizeof(struct sockaddr_in6);
204 dst->sin6_addr = ip6->ip6_dst;
207 rtalloc((struct route *)&ro);
209 rtcalloc((struct route *)&ro);
211 if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL)
212 dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp;
214 /* we are violating the spec, this is not the destination interface */
215 if ((m->m_flags & M_PKTHDR) != 0)
216 dstifp = m->m_pkthdr.rcvif;
219 /* jumbo payload can't contain a fragment header */
220 if (ip6->ip6_plen == 0) {
221 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
222 in6_ifstat_inc(dstifp, ifs6_reass_fail);
227 * check whether fragment packet's fragment length is
228 * multiple of 8 octets.
229 * sizeof(struct ip6_frag) == 8
230 * sizeof(struct ip6_hdr) = 40
232 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
233 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
234 icmp6_error(m, ICMP6_PARAM_PROB,
235 ICMP6_PARAMPROB_HEADER,
236 offsetof(struct ip6_hdr, ip6_plen));
237 in6_ifstat_inc(dstifp, ifs6_reass_fail);
241 ip6stat.ip6s_fragments++;
242 in6_ifstat_inc(dstifp, ifs6_reass_reqd);
244 /* offset now points to data portion */
245 offset += sizeof(struct ip6_frag);
247 frag6_doing_reass = 1;
249 for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
250 if (ip6f->ip6f_ident == q6->ip6q_ident &&
251 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
252 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
257 * the first fragment to arrive, create a reassembly queue.
262 * Enforce upper bound on number of fragmented packets
263 * for which we attempt reassembly;
264 * If maxfrag is 0, never accept fragments.
265 * If maxfrag is -1, accept all fragments without limitation.
267 if (ip6_maxfragpackets < 0)
269 else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets)
271 frag6_nfragpackets++;
272 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
276 bzero(q6, sizeof(*q6));
278 frag6_insque(q6, &ip6q);
280 /* ip6q_nxt will be filled afterwards, from 1st fragment */
281 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
283 q6->ip6q_nxtp = (u_char *)nxtp;
285 q6->ip6q_ident = ip6f->ip6f_ident;
286 q6->ip6q_arrive = 0; /* Is it used anywhere? */
287 q6->ip6q_ttl = IPV6_FRAGTTL;
288 q6->ip6q_src = ip6->ip6_src;
289 q6->ip6q_dst = ip6->ip6_dst;
290 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
294 * If it's the 1st fragment, record the length of the
295 * unfragmentable part and the next header of the fragment header.
297 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
299 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr)
300 - sizeof(struct ip6_frag);
301 q6->ip6q_nxt = ip6f->ip6f_nxt;
305 * Check that the reassembled packet would not exceed 65535 bytes
307 * If it would exceed, discard the fragment and return an ICMP error.
309 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
310 if (q6->ip6q_unfrglen >= 0) {
311 /* The 1st fragment has already arrived. */
312 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
313 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
314 offset - sizeof(struct ip6_frag) +
315 offsetof(struct ip6_frag, ip6f_offlg));
316 frag6_doing_reass = 0;
317 return(IPPROTO_DONE);
320 else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
321 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
322 offset - sizeof(struct ip6_frag) +
323 offsetof(struct ip6_frag, ip6f_offlg));
324 frag6_doing_reass = 0;
325 return(IPPROTO_DONE);
328 * If it's the first fragment, do the above check for each
329 * fragment already stored in the reassembly queue.
332 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
334 af6dwn = af6->ip6af_down;
336 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
338 struct mbuf *merr = IP6_REASS_MBUF(af6);
339 struct ip6_hdr *ip6err;
340 int erroff = af6->ip6af_offset;
342 /* dequeue the fragment. */
346 /* adjust pointer. */
347 ip6err = mtod(merr, struct ip6_hdr *);
350 * Restore source and destination addresses
351 * in the erroneous IPv6 header.
353 ip6err->ip6_src = q6->ip6q_src;
354 ip6err->ip6_dst = q6->ip6q_dst;
356 icmp6_error(merr, ICMP6_PARAM_PROB,
357 ICMP6_PARAMPROB_HEADER,
358 erroff - sizeof(struct ip6_frag) +
359 offsetof(struct ip6_frag, ip6f_offlg));
364 ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
368 bzero(ip6af, sizeof(*ip6af));
369 ip6af->ip6af_head = ip6->ip6_flow;
370 ip6af->ip6af_len = ip6->ip6_plen;
371 ip6af->ip6af_nxt = ip6->ip6_nxt;
372 ip6af->ip6af_hlim = ip6->ip6_hlim;
373 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
374 ip6af->ip6af_off = fragoff;
375 ip6af->ip6af_frglen = frgpartlen;
376 ip6af->ip6af_offset = offset;
377 IP6_REASS_MBUF(ip6af) = m;
380 af6 = (struct ip6asfrag *)q6;
385 * Find a segment which begins after this one does.
387 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
388 af6 = af6->ip6af_down)
389 if (af6->ip6af_off > ip6af->ip6af_off)
394 * If there is a preceding segment, it may provide some of
395 * our data already. If so, drop the data from the incoming
396 * segment. If it provides all of our data, drop us.
398 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
399 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
402 if (i >= ip6af->ip6af_frglen)
404 m_adj(IP6_REASS_MBUF(ip6af), i);
405 ip6af->ip6af_off += i;
406 ip6af->ip6af_frglen -= i;
411 * While we overlap succeeding segments trim them or,
412 * if they are completely covered, dequeue them.
414 while (af6 != (struct ip6asfrag *)q6 &&
415 ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
416 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
417 if (i < af6->ip6af_frglen) {
418 af6->ip6af_frglen -= i;
420 m_adj(IP6_REASS_MBUF(af6), i);
423 af6 = af6->ip6af_down;
424 m_freem(IP6_REASS_MBUF(af6->ip6af_up));
425 frag6_deq(af6->ip6af_up);
429 * If the incoming framgent overlaps some existing fragments in
430 * the reassembly queue, drop it, since it is dangerous to override
431 * existing fragments from a security point of view.
433 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
434 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
437 #if 0 /* suppress the noisy log */
438 log(LOG_ERR, "%d bytes of a fragment from %s "
439 "overlaps the previous fragment\n",
440 i, ip6_sprintf(&q6->ip6q_src));
442 free(ip6af, M_FTABLE);
446 if (af6 != (struct ip6asfrag *)q6) {
447 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
449 #if 0 /* suppress the noisy log */
450 log(LOG_ERR, "%d bytes of a fragment from %s "
451 "overlaps the succeeding fragment",
452 i, ip6_sprintf(&q6->ip6q_src));
454 free(ip6af, M_FTABLE);
463 * Stick new segment in its place;
464 * check for complete reassembly.
465 * Move to front of packet queue, as we are
466 * the most recently active fragmented packet.
468 frag6_enq(ip6af, af6->ip6af_up);
470 if (q6 != ip6q.ip6q_next) {
472 frag6_insque(q6, &ip6q);
476 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
477 af6 = af6->ip6af_down) {
478 if (af6->ip6af_off != next) {
479 frag6_doing_reass = 0;
482 next += af6->ip6af_frglen;
484 if (af6->ip6af_up->ip6af_mff) {
485 frag6_doing_reass = 0;
490 * Reassembly is complete; concatenate fragments.
492 ip6af = q6->ip6q_down;
493 t = m = IP6_REASS_MBUF(ip6af);
494 af6 = ip6af->ip6af_down;
496 while (af6 != (struct ip6asfrag *)q6) {
497 af6dwn = af6->ip6af_down;
501 t->m_next = IP6_REASS_MBUF(af6);
502 m_adj(t->m_next, af6->ip6af_offset);
507 /* adjust offset to point where the original next header starts */
508 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
509 free(ip6af, M_FTABLE);
510 ip6 = mtod(m, struct ip6_hdr *);
511 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
512 ip6->ip6_src = q6->ip6q_src;
513 ip6->ip6_dst = q6->ip6q_dst;
516 *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
520 * Delete frag6 header with as a few cost as possible.
522 if (offset < m->m_len) {
523 ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
525 m->m_data += sizeof(struct ip6_frag);
526 m->m_len -= sizeof(struct ip6_frag);
528 /* this comes with no copy if the boundary is on cluster */
529 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
532 frag6_nfragpackets--;
535 m_adj(t, sizeof(struct ip6_frag));
540 * Store NXT to the original.
543 char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
549 frag6_nfragpackets--;
551 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
553 for (t = m; t; t = t->m_next)
555 m->m_pkthdr.len = plen;
558 ip6stat.ip6s_reassembled++;
559 in6_ifstat_inc(dstifp, ifs6_reass_ok);
562 * Tell launch routine the next header
568 frag6_doing_reass = 0;
572 in6_ifstat_inc(dstifp, ifs6_reass_fail);
573 ip6stat.ip6s_fragdropped++;
575 frag6_doing_reass = 0;
580 * Free a fragment reassembly header and all
581 * associated datagrams.
587 struct ip6asfrag *af6, *down6;
589 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
591 struct mbuf *m = IP6_REASS_MBUF(af6);
593 down6 = af6->ip6af_down;
597 * Return ICMP time exceeded error for the 1st fragment.
598 * Just free other fragments.
600 if (af6->ip6af_off == 0) {
604 ip6 = mtod(m, struct ip6_hdr *);
606 /* restoure source and destination addresses */
607 ip6->ip6_src = q6->ip6q_src;
608 ip6->ip6_dst = q6->ip6q_dst;
610 icmp6_error(m, ICMP6_TIME_EXCEEDED,
611 ICMP6_TIME_EXCEED_REASSEMBLY, 0);
618 frag6_nfragpackets--;
622 * Put an ip fragment on a reassembly chain.
623 * Like insque, but pointers in middle of structure.
627 struct ip6asfrag *af6, *up6;
630 af6->ip6af_down = up6->ip6af_down;
631 up6->ip6af_down->ip6af_up = af6;
632 up6->ip6af_down = af6;
636 * To frag6_enq as remque is to insque.
640 struct ip6asfrag *af6;
642 af6->ip6af_up->ip6af_down = af6->ip6af_down;
643 af6->ip6af_down->ip6af_up = af6->ip6af_up;
647 frag6_insque(new, old)
648 struct ip6q *new, *old;
650 new->ip6q_prev = old;
651 new->ip6q_next = old->ip6q_next;
652 old->ip6q_next->ip6q_prev= new;
653 old->ip6q_next = new;
660 p6->ip6q_prev->ip6q_next = p6->ip6q_next;
661 p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
665 * IPv6 reassembling timer processing;
666 * if a timer expires on a reassembly
674 int s = splsoftnet();
679 frag6_doing_reass = 1;
682 while (q6 != &ip6q) {
685 if (q6->ip6q_prev->ip6q_ttl == 0) {
686 ip6stat.ip6s_fragtimeout++;
687 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
688 frag6_freef(q6->ip6q_prev);
692 * If we are over the maximum number of fragments
693 * (due to the limit being lowered), drain off
694 * enough to get down to the new limit.
696 while (frag6_nfragpackets > (u_int)ip6_maxfragpackets &&
698 ip6stat.ip6s_fragoverflow++;
699 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
700 frag6_freef(ip6q.ip6q_prev);
702 frag6_doing_reass = 0;
706 * Routing changes might produce a better route than we last used;
707 * make sure we notice eventually, even if forwarding only for one
708 * destination and the cache is never replaced.
710 if (ip6_forward_rt.ro_rt) {
711 RTFREE(ip6_forward_rt.ro_rt);
712 ip6_forward_rt.ro_rt = 0;
714 if (ipsrcchk_rt.ro_rt) {
715 RTFREE(ipsrcchk_rt.ro_rt);
716 ipsrcchk_rt.ro_rt = 0;
724 * Drain off all datagram fragments.
729 if (frag6_doing_reass)
731 while (ip6q.ip6q_next != &ip6q) {
732 ip6stat.ip6s_fragdropped++;
733 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
734 frag6_freef(ip6q.ip6q_next);