mirror of
https://github.com/openvswitch/ovs
synced 2025-10-17 14:28:02 +00:00
If the entire file is not going to be compiled because OVS is using upstream tunnel support then also don't bother pulling in the headers. Signed-off-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Pravin B Shelar <pshelar@ovn.org>
419 lines
10 KiB
C
419 lines
10 KiB
C
/*
|
|
* IP fragmentation backport, heavily based on linux/net/ipv4/ip_output.c,
|
|
* copied from Linux ae7ef81ef000 ("skbuff: introduce skb_gso_validate_mtu")
|
|
*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* The Internet Protocol (IP) output module.
|
|
*
|
|
* Authors: Ross Biro
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
* Donald Becker, <becker@super.org>
|
|
* Alan Cox, <Alan.Cox@linux.org>
|
|
* Richard Underwood
|
|
* Stefan Becker, <stefanb@yello.ping.de>
|
|
* Jorge Cwik, <jorge@laser.satlink.net>
|
|
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
|
|
* Hirokazu Takahashi, <taka@valinux.co.jp>
|
|
*
|
|
* See ip_input.c for original log
|
|
*
|
|
* Fixes:
|
|
* Alan Cox : Missing nonblock feature in ip_build_xmit.
|
|
* Mike Kilburn : htons() missing in ip_build_xmit.
|
|
* Bradford Johnson: Fix faulty handling of some frames when
|
|
* no route is found.
|
|
* Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
|
|
* (in case if packet not accepted by
|
|
* output firewall rules)
|
|
* Mike McLagan : Routing by source
|
|
* Alexey Kuznetsov: use new route cache
|
|
* Andi Kleen: Fix broken PMTU recovery and remove
|
|
* some redundant tests.
|
|
* Vitaly E. Lavrov : Transparent proxy revived after year coma.
|
|
* Andi Kleen : Replace ip_reply with ip_send_reply.
|
|
* Andi Kleen : Split fast and slow ip_build_xmit path
|
|
* for decreased register pressure on x86
|
|
* and more readibility.
|
|
* Marc Boucher : When call_out_firewall returns FW_QUEUE,
|
|
* silently drop skb instead of failing with -EPERM.
|
|
* Detlev Wengorz : Copy protocol for fragments.
|
|
* Hirokazu Takahashi: HW checksumming for outgoing UDP
|
|
* datagrams.
|
|
* Hirokazu Takahashi: sendfile() on UDP works now.
|
|
*/
|
|
|
|
#ifndef HAVE_CORRECT_MRU_HANDLING
|
|
#include <asm/uaccess.h>
|
|
#include <linux/module.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/socket.h>
|
|
#include <linux/sockios.h>
|
|
#include <linux/in.h>
|
|
#include <linux/inet.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/etherdevice.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/stat.h>
|
|
#include <linux/init.h>
|
|
|
|
#include <net/snmp.h>
|
|
#include <net/ip.h>
|
|
#include <net/protocol.h>
|
|
#include <net/route.h>
|
|
#include <net/xfrm.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/sock.h>
|
|
#include <net/arp.h>
|
|
#include <net/icmp.h>
|
|
#include <net/checksum.h>
|
|
#include <net/inetpeer.h>
|
|
#include <linux/igmp.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <linux/netfilter_bridge.h>
|
|
#include <linux/netlink.h>
|
|
#include <linux/tcp.h>
|
|
|
|
static inline void rpl_ip_options_fragment(struct sk_buff *skb)
|
|
{
|
|
unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
|
|
struct ip_options *opt = &(IPCB(skb)->opt);
|
|
int l = opt->optlen;
|
|
int optlen;
|
|
|
|
while (l > 0) {
|
|
switch (*optptr) {
|
|
case IPOPT_END:
|
|
return;
|
|
case IPOPT_NOOP:
|
|
l--;
|
|
optptr++;
|
|
continue;
|
|
}
|
|
optlen = optptr[1];
|
|
if (optlen < 2 || optlen > l)
|
|
return;
|
|
if (!IPOPT_COPIED(*optptr))
|
|
memset(optptr, IPOPT_NOOP, optlen);
|
|
l -= optlen;
|
|
optptr += optlen;
|
|
}
|
|
opt->ts = 0;
|
|
opt->rr = 0;
|
|
opt->rr_needaddr = 0;
|
|
opt->ts_needaddr = 0;
|
|
opt->ts_needtime = 0;
|
|
}
|
|
#define ip_options_fragment rpl_ip_options_fragment
|
|
|
|
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
|
|
{
|
|
to->pkt_type = from->pkt_type;
|
|
to->priority = from->priority;
|
|
to->protocol = from->protocol;
|
|
skb_dst_drop(to);
|
|
skb_dst_copy(to, from);
|
|
to->dev = from->dev;
|
|
to->mark = from->mark;
|
|
|
|
/* Copy the flags to each fragment. */
|
|
IPCB(to)->flags = IPCB(from)->flags;
|
|
|
|
#ifdef CONFIG_NET_SCHED
|
|
to->tc_index = from->tc_index;
|
|
#endif
|
|
nf_copy(to, from);
|
|
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
|
|
to->ipvs_property = from->ipvs_property;
|
|
#endif
|
|
skb_copy_secmark(to, from);
|
|
}
|
|
|
|
#ifdef HAVE_IP_DO_FRAGMENT_USING_NET
|
|
#define OUTPUT(net, sk, skb) output(net, sk, skb)
|
|
#elif defined(HAVE_IP_FRAGMENT_TAKES_SOCK)
|
|
#define OUTPUT(net, sk, skb) output(sk, skb)
|
|
#else
|
|
#define OUTPUT(net, sk, skb) output(skb)
|
|
#endif
|
|
|
|
/*
|
|
* This IP datagram is too large to be sent in one piece. Break it up into
|
|
* smaller pieces (each of size equal to IP header plus
|
|
* a block of the data of the original IP data part) that will yet fit in a
|
|
* single device frame, and queue such a frame for sending.
|
|
*/
|
|
|
|
int rpl_ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
|
int (*output)(OVS_VPORT_OUTPUT_PARAMS))
|
|
{
|
|
struct iphdr *iph;
|
|
int ptr;
|
|
struct net_device *dev;
|
|
struct sk_buff *skb2;
|
|
unsigned int mtu, hlen, left, len, ll_rs;
|
|
int offset;
|
|
__be16 not_last_frag;
|
|
struct rtable *rt = skb_rtable(skb);
|
|
int err = 0;
|
|
|
|
dev = rt->dst.dev;
|
|
|
|
/* for offloaded checksums cleanup checksum before fragmentation */
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL &&
|
|
(err = skb_checksum_help(skb)))
|
|
goto fail;
|
|
|
|
/*
|
|
* Point into the IP datagram header.
|
|
*/
|
|
|
|
iph = ip_hdr(skb);
|
|
|
|
mtu = ip_skb_dst_mtu(skb);
|
|
if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
|
|
mtu = IPCB(skb)->frag_max_size;
|
|
|
|
/*
|
|
* Setup starting values.
|
|
*/
|
|
|
|
hlen = iph->ihl * 4;
|
|
mtu = mtu - hlen; /* Size of data space */
|
|
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
|
|
|
|
/* When frag_list is given, use it. First, check its validity:
|
|
* some transformers could create wrong frag_list or break existing
|
|
* one, it is not prohibited. In this case fall back to copying.
|
|
*
|
|
* LATER: this step can be merged to real generation of fragments,
|
|
* we can switch to copy when see the first bad fragment.
|
|
*/
|
|
if (skb_has_frag_list(skb)) {
|
|
struct sk_buff *frag, *frag2;
|
|
int first_len = skb_pagelen(skb);
|
|
|
|
if (first_len - hlen > mtu ||
|
|
((first_len - hlen) & 7) ||
|
|
ip_is_fragment(iph) ||
|
|
skb_cloned(skb))
|
|
goto slow_path;
|
|
|
|
skb_walk_frags(skb, frag) {
|
|
/* Correct geometry. */
|
|
if (frag->len > mtu ||
|
|
((frag->len & 7) && frag->next) ||
|
|
skb_headroom(frag) < hlen)
|
|
goto slow_path_clean;
|
|
|
|
/* Partially cloned skb? */
|
|
if (skb_shared(frag))
|
|
goto slow_path_clean;
|
|
|
|
BUG_ON(frag->sk);
|
|
if (skb->sk) {
|
|
frag->sk = skb->sk;
|
|
frag->destructor = sock_wfree;
|
|
}
|
|
skb->truesize -= frag->truesize;
|
|
}
|
|
|
|
/* Everything is OK. Generate! */
|
|
|
|
err = 0;
|
|
offset = 0;
|
|
frag = skb_shinfo(skb)->frag_list;
|
|
skb_frag_list_init(skb);
|
|
skb->data_len = first_len - skb_headlen(skb);
|
|
skb->len = first_len;
|
|
iph->tot_len = htons(first_len);
|
|
iph->frag_off = htons(IP_MF);
|
|
ip_send_check(iph);
|
|
|
|
for (;;) {
|
|
/* Prepare header of the next frame,
|
|
* before previous one went down. */
|
|
if (frag) {
|
|
frag->ip_summed = CHECKSUM_NONE;
|
|
skb_reset_transport_header(frag);
|
|
__skb_push(frag, hlen);
|
|
skb_reset_network_header(frag);
|
|
memcpy(skb_network_header(frag), iph, hlen);
|
|
iph = ip_hdr(frag);
|
|
iph->tot_len = htons(frag->len);
|
|
ip_copy_metadata(frag, skb);
|
|
if (offset == 0)
|
|
ip_options_fragment(frag);
|
|
offset += skb->len - hlen;
|
|
iph->frag_off = htons(offset>>3);
|
|
if (frag->next)
|
|
iph->frag_off |= htons(IP_MF);
|
|
/* Ready, complete checksum */
|
|
ip_send_check(iph);
|
|
}
|
|
|
|
err = OUTPUT(net, sk, skb);
|
|
|
|
if (!err)
|
|
IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
|
|
if (err || !frag)
|
|
break;
|
|
|
|
skb = frag;
|
|
frag = skb->next;
|
|
skb->next = NULL;
|
|
}
|
|
|
|
if (err == 0) {
|
|
IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
|
|
return 0;
|
|
}
|
|
|
|
while (frag) {
|
|
skb = frag->next;
|
|
kfree_skb(frag);
|
|
frag = skb;
|
|
}
|
|
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
|
|
return err;
|
|
|
|
slow_path_clean:
|
|
skb_walk_frags(skb, frag2) {
|
|
if (frag2 == frag)
|
|
break;
|
|
frag2->sk = NULL;
|
|
frag2->destructor = NULL;
|
|
skb->truesize += frag2->truesize;
|
|
}
|
|
}
|
|
|
|
slow_path:
|
|
iph = ip_hdr(skb);
|
|
|
|
left = skb->len - hlen; /* Space per frame */
|
|
ptr = hlen; /* Where to start from */
|
|
|
|
ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
|
|
|
|
/*
|
|
* Fragment the datagram.
|
|
*/
|
|
|
|
offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
|
|
not_last_frag = iph->frag_off & htons(IP_MF);
|
|
|
|
/*
|
|
* Keep copying data until we run out.
|
|
*/
|
|
|
|
while (left > 0) {
|
|
len = left;
|
|
/* IF: it doesn't fit, use 'mtu' - the data space left */
|
|
if (len > mtu)
|
|
len = mtu;
|
|
/* IF: we are not sending up to and including the packet end
|
|
then align the next start on an eight byte boundary */
|
|
if (len < left) {
|
|
len &= ~7;
|
|
}
|
|
|
|
/* Allocate buffer */
|
|
skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
|
|
if (!skb2) {
|
|
err = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
/*
|
|
* Set up data on packet
|
|
*/
|
|
|
|
ip_copy_metadata(skb2, skb);
|
|
skb_reserve(skb2, ll_rs);
|
|
skb_put(skb2, len + hlen);
|
|
skb_reset_network_header(skb2);
|
|
skb2->transport_header = skb2->network_header + hlen;
|
|
|
|
/*
|
|
* Charge the memory for the fragment to any owner
|
|
* it might possess
|
|
*/
|
|
|
|
if (skb->sk)
|
|
skb_set_owner_w(skb2, skb->sk);
|
|
|
|
/*
|
|
* Copy the packet header into the new buffer.
|
|
*/
|
|
|
|
skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
|
|
|
|
/*
|
|
* Copy a block of the IP datagram.
|
|
*/
|
|
if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
|
|
BUG();
|
|
left -= len;
|
|
|
|
/*
|
|
* Fill in the new header fields.
|
|
*/
|
|
iph = ip_hdr(skb2);
|
|
iph->frag_off = htons((offset >> 3));
|
|
|
|
if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
|
|
iph->frag_off |= htons(IP_DF);
|
|
|
|
/* ANK: dirty, but effective trick. Upgrade options only if
|
|
* the segment to be fragmented was THE FIRST (otherwise,
|
|
* options are already fixed) and make it ONCE
|
|
* on the initial skb, so that all the following fragments
|
|
* will inherit fixed options.
|
|
*/
|
|
if (offset == 0)
|
|
ip_options_fragment(skb);
|
|
|
|
/*
|
|
* Added AC : If we are fragmenting a fragment that's not the
|
|
* last fragment then keep MF on each bit
|
|
*/
|
|
if (left > 0 || not_last_frag)
|
|
iph->frag_off |= htons(IP_MF);
|
|
ptr += len;
|
|
offset += len;
|
|
|
|
/*
|
|
* Put this fragment into the sending queue.
|
|
*/
|
|
iph->tot_len = htons(len + hlen);
|
|
|
|
ip_send_check(iph);
|
|
|
|
err = OUTPUT(net, sk, skb2);
|
|
if (err)
|
|
goto fail;
|
|
|
|
IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
|
|
}
|
|
consume_skb(skb);
|
|
IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
|
|
return err;
|
|
|
|
fail:
|
|
kfree_skb(skb);
|
|
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(rpl_ip_do_fragment);
|
|
|
|
#endif /* HAVE_CORRECT_MRU_HANDLING */
|