2010-02-02 16:44:44 -05:00
|
|
|
#ifndef __NET_IP_WRAPPER_H
|
|
|
|
|
#define __NET_IP_WRAPPER_H 1
|
|
|
|
|
|
|
|
|
|
#include_next <net/ip.h>
|
|
|
|
|
|
2015-12-02 23:53:44 -08:00
|
|
|
#include <net/route.h>
|
2010-02-02 16:44:44 -05:00
|
|
|
#include <linux/version.h>
|
|
|
|
|
|
2015-03-24 16:16:18 -07:00
|
|
|
#ifndef HAVE_INET_GET_LOCAL_PORT_RANGE_USING_NET
|
2014-04-01 20:55:21 -07:00
|
|
|
static inline void rpl_inet_get_local_port_range(struct net *net, int *low,
|
|
|
|
|
int *high)
|
|
|
|
|
{
|
|
|
|
|
inet_get_local_port_range(low, high);
|
|
|
|
|
}
|
|
|
|
|
#define inet_get_local_port_range rpl_inet_get_local_port_range
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
2015-12-02 23:53:45 -08:00
|
|
|
#ifndef IPSKB_FRAG_PMTU
|
|
|
|
|
#define IPSKB_FRAG_PMTU BIT(6)
|
|
|
|
|
#endif
|
|
|
|
|
|
2015-12-02 23:53:43 -08:00
|
|
|
/* IPv4 datagram length is stored into 16bit field (tot_len) */
|
|
|
|
|
#ifndef IP_MAX_MTU
|
|
|
|
|
#define IP_MAX_MTU 0xFFFFU
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifndef HAVE_IP_SKB_DST_MTU
|
|
|
|
|
static inline bool rpl_ip_sk_use_pmtu(const struct sock *sk)
|
|
|
|
|
{
|
|
|
|
|
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
|
|
|
|
|
}
|
|
|
|
|
#define ip_sk_use_pmtu rpl_ip_sk_use_pmtu
|
|
|
|
|
|
|
|
|
|
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
|
|
|
|
|
bool forwarding)
|
|
|
|
|
{
|
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
|
|
|
|
|
struct net *net = dev_net(dst->dev);
|
|
|
|
|
|
|
|
|
|
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
|
|
|
|
|
dst_metric_locked(dst, RTAX_MTU) ||
|
|
|
|
|
!forwarding)
|
|
|
|
|
return dst_mtu(dst);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return min(dst->dev->mtu, IP_MAX_MTU);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline unsigned int rpl_ip_skb_dst_mtu(const struct sk_buff *skb)
|
|
|
|
|
{
|
|
|
|
|
if (!skb->sk || ip_sk_use_pmtu(skb->sk)) {
|
|
|
|
|
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
|
|
|
|
|
return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
|
|
|
|
|
} else {
|
|
|
|
|
return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#define ip_skb_dst_mtu rpl_ip_skb_dst_mtu
|
|
|
|
|
#endif /* HAVE_IP_SKB_DST_MTU */
|
|
|
|
|
|
2015-12-02 23:53:44 -08:00
|
|
|
#ifdef HAVE_IP_FRAGMENT_TAKES_SOCK
|
2016-07-18 15:13:15 -07:00
|
|
|
#ifdef HAVE_IP_LOCAL_OUT_TAKES_NET
|
|
|
|
|
#define OVS_VPORT_OUTPUT_PARAMS struct net *net, struct sock *sock, struct sk_buff *skb
|
|
|
|
|
#else
|
2015-12-02 23:53:44 -08:00
|
|
|
#define OVS_VPORT_OUTPUT_PARAMS struct sock *sock, struct sk_buff *skb
|
2016-07-18 15:13:15 -07:00
|
|
|
#endif
|
2015-12-02 23:53:44 -08:00
|
|
|
#else
|
|
|
|
|
#define OVS_VPORT_OUTPUT_PARAMS struct sk_buff *skb
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-02-02 15:19:02 -08:00
|
|
|
/* Prior to upstream commit d6b915e29f4a ("ip_fragment: don't forward
|
|
|
|
|
* defragmented DF packet"), IPCB(skb)->frag_max_size was not always populated
|
|
|
|
|
* correctly, which would lead to reassembled packets not being refragmented.
|
|
|
|
|
* So, we backport all of ip_defrag() in these cases.
|
|
|
|
|
*/
|
2016-02-29 09:54:16 -08:00
|
|
|
#ifndef HAVE_CORRECT_MRU_HANDLING
|
2015-12-02 23:53:44 -08:00
|
|
|
|
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0)
|
|
|
|
|
static inline bool ip_defrag_user_in_between(u32 user,
|
|
|
|
|
enum ip_defrag_users lower_bond,
|
|
|
|
|
enum ip_defrag_users upper_bond)
|
|
|
|
|
{
|
|
|
|
|
return user >= lower_bond && user <= upper_bond;
|
|
|
|
|
}
|
2016-02-02 15:19:02 -08:00
|
|
|
#endif /* < v4.2 */
|
2015-12-02 23:53:44 -08:00
|
|
|
|
compat: Backport ip_do_fragment().
Prior to upstream Linux commit d6b915e29f4a ("ip_fragment: don't forward
defragmented DF packet"), the fragmentation behaviour was incorrect when
dealing with linear skbs, as it would not respect the "max_frag_size"
that ip_defrag() provides, but instead attempt to use the output
device's MTU.
If OVS reassembles an IP message and passes it up to userspace, it
also provides a PACKET_ATTR_MRU to indicate the maximum received unit
size for this message. When userspace executes actions to output this
packet, it passes the MRU back down and this is the desired refragment
size. When the packet data is placed back into the skb in the execute
path, a frags list is not created so fragmentation code will treat it
as one big linear skb. Due to the above bug it would use the device's
MTU to refragment instead of the provided MRU. In the case of regular
ports, this is not too dangerous as the MTU would be a reasonable value.
However, in the case of a tunnel port the typical MTU is a very large
value. As such, rather than refragmenting the message on output, it
would simply output the (too-large) frame to the tunnel.
Depending on the tunnel type and other factors, this large frame could
be dropped along the path, or it could end up at the remote tunnel
endpoint and end up being delivered towards a remote host stack or VM.
If OVS is also controlling that endpoint, it will likely drop the packet
when sending to the final destination, because the packet exceeds the
port MTU.
Different OpenFlow rule configurations could end up preventing IP
messages from being refragmented correctly for as many as the first four
attempts in each connection.
Fix this issue by backporting ip_do_fragment() so that it will respect
the MRU value that is provided in the execute path.
VMWare-BZ: #1651589
Fixes: 213e1f54b4b3 ("compat: Wrap IPv4 fragmentation.")
Reported-by: Salman Malik <salmanm@vmware.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>
2016-06-22 18:00:43 -07:00
|
|
|
int rpl_ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
|
|
|
|
int (*output)(OVS_VPORT_OUTPUT_PARAMS));
|
2015-12-02 23:53:44 -08:00
|
|
|
#define ip_do_fragment rpl_ip_do_fragment
|
2015-12-02 23:53:45 -08:00
|
|
|
|
2016-05-02 11:19:18 -07:00
|
|
|
/* If backporting IP defrag, then init/exit functions need to be called from
|
|
|
|
|
* compat_{in,ex}it() to prepare the backported fragmentation cache. In this
|
|
|
|
|
* case we declare the functions which are defined in
|
|
|
|
|
* datapath/linux/compat/ip_fragment.c. */
|
2016-05-02 11:19:11 -07:00
|
|
|
int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
|
2015-12-02 23:53:45 -08:00
|
|
|
#define ip_defrag rpl_ip_defrag
|
|
|
|
|
int __init rpl_ipfrag_init(void);
|
|
|
|
|
void rpl_ipfrag_fini(void);
|
2016-02-29 09:54:16 -08:00
|
|
|
|
|
|
|
|
#else /* HAVE_CORRECT_MRU_HANDLING */
|
2016-01-29 11:01:56 -08:00
|
|
|
|
2016-06-22 18:00:42 -07:00
|
|
|
#ifndef HAVE_IP_DO_FRAGMENT_TAKES_NET
|
|
|
|
|
static inline int rpl_ip_do_fragment(struct net *net, struct sock *sk,
|
|
|
|
|
struct sk_buff *skb,
|
|
|
|
|
int (*output)(OVS_VPORT_OUTPUT_PARAMS))
|
|
|
|
|
{
|
|
|
|
|
return ip_do_fragment(sk, skb, output);
|
|
|
|
|
}
|
|
|
|
|
#define ip_do_fragment rpl_ip_do_fragment
|
|
|
|
|
#endif /* IP_DO_FRAGMENT_TAKES_NET */
|
|
|
|
|
|
2016-01-29 11:01:56 -08:00
|
|
|
/* We have no good way to detect the presence of upstream commit 8282f27449bf
|
|
|
|
|
* ("inet: frag: Always orphan skbs inside ip_defrag()"), but it should be
|
|
|
|
|
* always included in kernels 4.5+. */
|
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0)
|
2016-05-02 11:19:11 -07:00
|
|
|
static inline int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
|
2016-01-29 11:01:56 -08:00
|
|
|
{
|
|
|
|
|
skb_orphan(skb);
|
2016-05-02 11:19:11 -07:00
|
|
|
#ifndef HAVE_IP_DEFRAG_TAKES_NET
|
2016-02-02 15:18:59 -08:00
|
|
|
return ip_defrag(skb, user);
|
2016-05-02 11:19:11 -07:00
|
|
|
#else
|
|
|
|
|
return ip_defrag(net, skb, user);
|
|
|
|
|
#endif
|
2016-01-29 11:01:56 -08:00
|
|
|
}
|
|
|
|
|
#define ip_defrag rpl_ip_defrag
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-05-02 11:19:18 -07:00
|
|
|
/* If we can use upstream defrag then we can rely on the upstream
|
|
|
|
|
* defrag module to init/exit correctly. In this case the calls in
|
|
|
|
|
* compat_{in,ex}it() can be no-ops. */
|
2015-12-02 23:53:45 -08:00
|
|
|
static inline int rpl_ipfrag_init(void) { return 0; }
|
|
|
|
|
static inline void rpl_ipfrag_fini(void) { }
|
2016-02-29 09:54:16 -08:00
|
|
|
#endif /* HAVE_CORRECT_MRU_HANDLING */
|
|
|
|
|
|
2015-12-02 23:53:45 -08:00
|
|
|
#define ipfrag_init rpl_ipfrag_init
|
|
|
|
|
#define ipfrag_fini rpl_ipfrag_fini
|
|
|
|
|
|
2010-02-02 16:44:44 -05:00
|
|
|
#endif
|