2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-29 15:28:56 +00:00
Files
openvswitch/datapath/linux/compat/nf_conntrack_reasm.c

602 lines
15 KiB
C
Raw Normal View History

/*
* Backported from upstream commit 5b490047240f
* ("ipv6: Export nf_ct_frag6_gather()")
*
* IPv6 fragment reassembly for connection tracking
*
* Copyright (C)2004 USAGI/WIDE Project
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* Based on: net/ipv6/reassembly.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "IPv6-nf: " fmt
#include <linux/version.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/jiffies.h>
#include <linux/net.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/inet_frag.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/inet_ecn.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#ifdef OVS_NF_DEFRAG6_BACKPORT
static const char nf_frags_cache_name[] = "ovs-frag6";
struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
int offset;
};
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
static struct inet_frags nf_frags;
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
(__force u32)id, nf_frags.rnd);
}
compat: Simplify inet_fragment backports. The core fragmentation handling logic is exported on all supported kernels, so it's not necessary to backport the latest version of this. This greatly simplifies the code due to inconsistencies between the old per-lookup garbage collection and the newer workqueue based garbage collection. As a result of simplifying and removing unnecessary backport code, a few bugs are fixed for corner cases such as when some fragments remain in the fragment cache when openvswitch is unloaded. Some backported ip functions need a little extra logic than what is seen on the latest code due to this, for instance on kernels <3.17: * Call inet_frag_evictor() before defrag * Limit hashsize in ip{,6}_fragment logic The pernet init/exit logic also differs a little from upstream. Upstream ipv[46]_defrag logic initializes the various pernet fragment parameters and its own global fragments cache. In the OVS backport, the pernet parameters are shared while the fragments cache is separate. The backport relies upon upstream pernet initialization to perform the shared setup, and performs no pernet initialization of its own. When it comes to pernet exit however, the backport must ensure that all OVS-specific fragment state is cleared, while the shared state remains untouched so that the regular ipv[46] logic may do its own cleanup. In practice this means that OVS must have its own divergent implementation of inet_frags_exit_net(). Fixes the following crash: Call Trace: <IRQ> [<ffffffff810744f6>] ? call_timer_fn+0x36/0x100 [<ffffffff8107548f>] run_timer_softirq+0x1ef/0x2f0 [<ffffffff8106cccc>] __do_softirq+0xec/0x2c0 [<ffffffff8106d215>] irq_exit+0x105/0x110 [<ffffffff81737095>] smp_apic_timer_interrupt+0x45/0x60 [<ffffffff81735a1d>] apic_timer_interrupt+0x6d/0x80 <EOI> [<ffffffff8104f596>] ? native_safe_halt+0x6/0x10 [<ffffffff8101cb2f>] default_idle+0x1f/0xc0 [<ffffffff8101d406>] arch_cpu_idle+0x26/0x30 [<ffffffff810bf3a5>] cpu_startup_entry+0xc5/0x290 [<ffffffff810415ed>] start_secondary+0x21d/0x2d0 Code: Bad RIP value. RIP [<ffffffffa0177480>] 0xffffffffa0177480 RSP <ffff88003f703e78> CR2: ffffffffa0177480 ---[ end trace eb98ca80ba07bd9c ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org>
2016-07-12 15:26:19 -07:00
/* fb3cfe6e75b9 ("inet: frag: remove hash size assumptions from callers")
* shifted this logic into inet_fragment, but prior kernels still need this.
*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
#define nf_hash_frag(a, b, c) (nf_hash_frag(a, b, c) & (INETFRAGS_HASHSZ - 1))
#endif
#ifdef HAVE_INET_FRAGS_CONST
static unsigned int nf_hashfn(const struct inet_frag_queue *q)
#else
static unsigned int nf_hashfn(struct inet_frag_queue *q)
#endif
{
const struct frag_queue *nq;
nq = container_of(q, struct frag_queue, q);
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
struct net *net;
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6_expire_frag_queue(net, fq, &nf_frags);
}
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
struct in6_addr *dst, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
unsigned int hash;
arg.id = id;
arg.user = user;
arg.src = src;
arg.dst = dst;
arg.ecn = ecn;
compat: Simplify inet_fragment backports. The core fragmentation handling logic is exported on all supported kernels, so it's not necessary to backport the latest version of this. This greatly simplifies the code due to inconsistencies between the old per-lookup garbage collection and the newer workqueue based garbage collection. As a result of simplifying and removing unnecessary backport code, a few bugs are fixed for corner cases such as when some fragments remain in the fragment cache when openvswitch is unloaded. Some backported ip functions need a little extra logic than what is seen on the latest code due to this, for instance on kernels <3.17: * Call inet_frag_evictor() before defrag * Limit hashsize in ip{,6}_fragment logic The pernet init/exit logic also differs a little from upstream. Upstream ipv[46]_defrag logic initializes the various pernet fragment parameters and its own global fragments cache. In the OVS backport, the pernet parameters are shared while the fragments cache is separate. The backport relies upon upstream pernet initialization to perform the shared setup, and performs no pernet initialization of its own. When it comes to pernet exit however, the backport must ensure that all OVS-specific fragment state is cleared, while the shared state remains untouched so that the regular ipv[46] logic may do its own cleanup. In practice this means that OVS must have its own divergent implementation of inet_frags_exit_net(). Fixes the following crash: Call Trace: <IRQ> [<ffffffff810744f6>] ? call_timer_fn+0x36/0x100 [<ffffffff8107548f>] run_timer_softirq+0x1ef/0x2f0 [<ffffffff8106cccc>] __do_softirq+0xec/0x2c0 [<ffffffff8106d215>] irq_exit+0x105/0x110 [<ffffffff81737095>] smp_apic_timer_interrupt+0x45/0x60 [<ffffffff81735a1d>] apic_timer_interrupt+0x6d/0x80 <EOI> [<ffffffff8104f596>] ? native_safe_halt+0x6/0x10 [<ffffffff8101cb2f>] default_idle+0x1f/0xc0 [<ffffffff8101d406>] arch_cpu_idle+0x26/0x30 [<ffffffff810bf3a5>] cpu_startup_entry+0xc5/0x290 [<ffffffff810415ed>] start_secondary+0x21d/0x2d0 Code: Bad RIP value. RIP [<ffffffffa0177480>] 0xffffffffa0177480 RSP <ffff88003f703e78> CR2: ffffffffa0177480 ---[ end trace eb98ca80ba07bd9c ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org>
2016-07-12 15:26:19 -07:00
#ifdef HAVE_INET_FRAGS_WITH_RWLOCK
read_lock_bh(&nf_frags.lock);
#else
local_bh_disable();
compat: Simplify inet_fragment backports. The core fragmentation handling logic is exported on all supported kernels, so it's not necessary to backport the latest version of this. This greatly simplifies the code due to inconsistencies between the old per-lookup garbage collection and the newer workqueue based garbage collection. As a result of simplifying and removing unnecessary backport code, a few bugs are fixed for corner cases such as when some fragments remain in the fragment cache when openvswitch is unloaded. Some backported ip functions need a little extra logic than what is seen on the latest code due to this, for instance on kernels <3.17: * Call inet_frag_evictor() before defrag * Limit hashsize in ip{,6}_fragment logic The pernet init/exit logic also differs a little from upstream. Upstream ipv[46]_defrag logic initializes the various pernet fragment parameters and its own global fragments cache. In the OVS backport, the pernet parameters are shared while the fragments cache is separate. The backport relies upon upstream pernet initialization to perform the shared setup, and performs no pernet initialization of its own. When it comes to pernet exit however, the backport must ensure that all OVS-specific fragment state is cleared, while the shared state remains untouched so that the regular ipv[46] logic may do its own cleanup. In practice this means that OVS must have its own divergent implementation of inet_frags_exit_net(). Fixes the following crash: Call Trace: <IRQ> [<ffffffff810744f6>] ? call_timer_fn+0x36/0x100 [<ffffffff8107548f>] run_timer_softirq+0x1ef/0x2f0 [<ffffffff8106cccc>] __do_softirq+0xec/0x2c0 [<ffffffff8106d215>] irq_exit+0x105/0x110 [<ffffffff81737095>] smp_apic_timer_interrupt+0x45/0x60 [<ffffffff81735a1d>] apic_timer_interrupt+0x6d/0x80 <EOI> [<ffffffff8104f596>] ? native_safe_halt+0x6/0x10 [<ffffffff8101cb2f>] default_idle+0x1f/0xc0 [<ffffffff8101d406>] arch_cpu_idle+0x26/0x30 [<ffffffff810bf3a5>] cpu_startup_entry+0xc5/0x290 [<ffffffff810415ed>] start_secondary+0x21d/0x2d0 Code: Bad RIP value. RIP [<ffffffffa0177480>] 0xffffffffa0177480 RSP <ffff88003f703e78> CR2: ffffffffa0177480 ---[ end trace eb98ca80ba07bd9c ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org>
2016-07-12 15:26:19 -07:00
#endif
hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
local_bh_enable();
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
}
return container_of(q, struct frag_queue, q);
}
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
unsigned int payload_len;
int offset, end;
u8 ecn;
if (qp_flags(fq) & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
goto err;
}
payload_len = ntohs(ipv6_hdr(skb)->payload_len);
offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (payload_len -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
pr_debug("offset is too large.\n");
return -1;
}
ecn = ip6_frag_ecn(ipv6_hdr(skb));
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
csum_partial(nh, (u8 *)(fhdr + 1) - nh,
0));
}
/* Is this the final fragment? */
if (!(fhdr->frag_off & htons(IP6_MF))) {
/* If we already have some bits beyond end
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
((qp_flags(fq) & INET_FRAG_LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
qp_flags(fq) |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
*/
if (end & 0x7) {
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
return -1;
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
if (qp_flags(fq) & INET_FRAG_LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
fq->q.len = end;
}
}
if (end == offset)
goto err;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
pr_debug("queue: message is too short.\n");
goto err;
}
if (pskb_trim_rcsum(skb, end - offset)) {
pr_debug("Can't trim\n");
goto err;
}
/* Find out which fragments are in front and at the back of us
* in the chain of fragments so far. We must know where to put
* this fragment, right?
*/
prev = fq->q.fragments_tail;
if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
if (NFCT_FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
}
found:
/* RFC5722, Section 4:
* When reassembling an IPv6 datagram, if
* one or more its constituent fragments is determined to be an
* overlapping fragment, the entire datagram (and any constituent
* fragments, including those not yet received) MUST be silently
* discarded.
*/
/* Check for overlap with preceding fragment. */
if (prev &&
(NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
if (next && NFCT_FRAG6_CB(next)->offset < end)
goto discard_fq;
NFCT_FRAG6_CB(skb)->offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
if (!next)
fq->q.fragments_tail = skb;
if (prev)
prev->next = skb;
else
fq->q.fragments = skb;
if (skb->dev) {
fq->iif = skb->dev->ifindex;
skb->dev = NULL;
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
if (offset == 0) {
fq->nhoffset = nhoff;
qp_flags(fq) |= INET_FRAG_FIRST_IN;
}
inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
inet_frag_kill(&fq->q, &nf_frags);
err:
return -1;
}
/*
* Check if this packet is complete.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*
* returns true if *prev skb has been transformed into the reassembled
* skb, false otherwise.
*/
static bool
nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
u8 ecn;
inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
return false;
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
return false;
}
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
return false;
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
if (skb_has_frag_list(head)) {
struct sk_buff *clone;
int i, plen = 0;
clone = alloc_skb(0, GFP_ATOMIC);
if (clone == NULL)
return false;
clone->next = head->next;
head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
clone->len = clone->data_len = head->data_len - plen;
head->data_len -= clone->len;
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* morph head into last received skb: prev.
*
* This allows callers of ipv6 conntrack defrag to continue
* to use the last skb(frag) passed into the reasm engine.
* The last skb frag 'silently' turns into the full reassembled skb.
*
* Since prev is also part of q->fragments we have to clone it first.
*/
if (head != prev) {
struct sk_buff *iter;
fp = skb_clone(prev, GFP_ATOMIC);
if (!fp)
return false;
fp->next = prev->next;
iter = head;
while (iter) {
if (iter->next == prev) {
iter->next = fp;
break;
}
iter = iter->next;
}
skb_morph(prev, head);
prev->next = head->next;
consume_skb(head);
head = prev;
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
head->mac_header += sizeof(struct frag_hdr);
head->network_header += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_partial(skb_network_header(head),
skb_network_header_len(head),
head->csum);
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
return true;
}
/*
* find the header just before Fragment Header.
*
* if success return 0 and set ...
* (*prevhdrp): the value of "Next Header Field" in the header
* just before Fragment Header.
* (*prevhoff): the offset of "Next Header Field" in the header
* just before Fragment Header.
* (*fhoff) : the offset of Fragment Header.
*
* Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
*
*/
static int
find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
{
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
const int netoff = skb_network_offset(skb);
u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
int start = netoff + sizeof(struct ipv6hdr);
int len = skb->len - start;
u8 prevhdr = NEXTHDR_IPV6;
while (nexthdr != NEXTHDR_FRAGMENT) {
struct ipv6_opt_hdr hdr;
int hdrlen;
if (!ipv6_ext_hdr(nexthdr)) {
return -1;
}
if (nexthdr == NEXTHDR_NONE) {
pr_debug("next header is none\n");
return -1;
}
if (len < (int)sizeof(struct ipv6_opt_hdr)) {
pr_debug("too short\n");
return -1;
}
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
BUG();
if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hdr.hdrlen+2)<<2;
else
hdrlen = ipv6_optlen(&hdr);
prevhdr = nexthdr;
prev_nhoff = start;
nexthdr = hdr.nexthdr;
len -= hdrlen;
start += hdrlen;
}
if (len < 0)
return -1;
*prevhdrp = prevhdr;
*prevhoff = prev_nhoff;
*fhoff = start;
return 0;
}
compat: ipv6: orphan skbs in reassembly unit. Upstream commit: ipv6: orphan skbs in reassembly unit Andrey reported a use-after-free in IPv6 stack. Issue here is that we free the socket while it still has skb in TX path and in some queues. It happens here because IPv6 reassembly unit messes skb->truesize, breaking skb_set_owner_w() badly. We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") Acked-by: Joe Stringer <joe@ovn.org> ================================================================== BUG: KASAN: use-after-free in sock_wfree+0x118/0x120 Read of size 8 at addr ffff880062da0060 by task a.out/4140 page:ffffea00018b6800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013 raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000 page dumped because: kasan: bad access detected CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 dump_stack+0x292/0x398 lib/dump_stack.c:51 describe_address mm/kasan/report.c:262 kasan_report_error+0x121/0x560 mm/kasan/report.c:370 kasan_report mm/kasan/report.c:392 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413 sock_flag ./arch/x86/include/asm/bitops.h:324 sock_wfree+0x118/0x120 net/core/sock.c:1631 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put ./include/net/inet_frag.h:133 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn ./include/linux/netfilter.h:102 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook ./include/linux/netfilter.h:212 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x620 net/socket.c:848 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x187/0x530 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 RIP: 0033:0x7ff26e6f5b79 RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79 RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003 RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003 The buggy address belongs to the object at ffff880062da0000 which belongs to the cache RAWv6 of size 1504 The buggy address ffff880062da0060 is located 96 bytes inside of 1504-byte region [ffff880062da0000, ffff880062da05e0) Freed by task 4113: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1352 slab_free_freelist_hook mm/slub.c:1374 slab_free mm/slub.c:2951 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973 sk_prot_free net/core/sock.c:1377 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sk_free+0x23/0x30 net/core/sock.c:1479 sock_put ./include/net/sock.h:1638 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431 sock_release+0x8d/0x1e0 net/socket.c:599 sock_close+0x16/0x20 net/socket.c:1063 __fput+0x332/0x7f0 fs/file_table.c:208 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x19b/0x270 kernel/task_work.c:116 exit_task_work ./include/linux/task_work.h:21 do_exit+0x186b/0x2800 kernel/exit.c:839 do_group_exit+0x149/0x420 kernel/exit.c:943 SYSC_exit_group kernel/exit.c:954 SyS_exit_group+0x1d/0x20 kernel/exit.c:952 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:432 slab_alloc_node mm/slub.c:2708 slab_alloc mm/slub.c:2716 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334 sk_alloc+0x105/0x1010 net/core/sock.c:1396 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183 __sock_create+0x4f6/0x880 net/socket.c:1199 sock_create net/socket.c:1239 SYSC_socket net/socket.c:1269 SyS_socket+0xf9/0x230 net/socket.c:1249 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Memory state around the buggy address: ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Andrey Konovalov <andreyknvl@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> This patch is a bugfix, and will be progressively backported to earlier kernels. If it is backported to any kernel 4.5 through 4.10, then users use that updated kernel with the OVS kernel module prior to this patch, it could cause a crash. The compat code here resolves such issues. Upstream: 48cac18ecf1d ("ipv6: orphan skbs in reassembly unit") Signed-off-by: Joe Stringer <joe@ovn.org> Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-03-01 14:45:06 -08:00
int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
u8 prevhdr;
/* Jumbo payload inhibits frag. header */
if (ipv6_hdr(skb)->payload_len == 0) {
pr_debug("payload len = 0\n");
return -EINVAL;
}
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
return -EINVAL;
if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
return -ENOMEM;
skb_set_transport_header(skb, fhoff);
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
compat: Simplify inet_fragment backports. The core fragmentation handling logic is exported on all supported kernels, so it's not necessary to backport the latest version of this. This greatly simplifies the code due to inconsistencies between the old per-lookup garbage collection and the newer workqueue based garbage collection. As a result of simplifying and removing unnecessary backport code, a few bugs are fixed for corner cases such as when some fragments remain in the fragment cache when openvswitch is unloaded. Some backported ip functions need a little extra logic than what is seen on the latest code due to this, for instance on kernels <3.17: * Call inet_frag_evictor() before defrag * Limit hashsize in ip{,6}_fragment logic The pernet init/exit logic also differs a little from upstream. Upstream ipv[46]_defrag logic initializes the various pernet fragment parameters and its own global fragments cache. In the OVS backport, the pernet parameters are shared while the fragments cache is separate. The backport relies upon upstream pernet initialization to perform the shared setup, and performs no pernet initialization of its own. When it comes to pernet exit however, the backport must ensure that all OVS-specific fragment state is cleared, while the shared state remains untouched so that the regular ipv[46] logic may do its own cleanup. In practice this means that OVS must have its own divergent implementation of inet_frags_exit_net(). Fixes the following crash: Call Trace: <IRQ> [<ffffffff810744f6>] ? call_timer_fn+0x36/0x100 [<ffffffff8107548f>] run_timer_softirq+0x1ef/0x2f0 [<ffffffff8106cccc>] __do_softirq+0xec/0x2c0 [<ffffffff8106d215>] irq_exit+0x105/0x110 [<ffffffff81737095>] smp_apic_timer_interrupt+0x45/0x60 [<ffffffff81735a1d>] apic_timer_interrupt+0x6d/0x80 <EOI> [<ffffffff8104f596>] ? native_safe_halt+0x6/0x10 [<ffffffff8101cb2f>] default_idle+0x1f/0xc0 [<ffffffff8101d406>] arch_cpu_idle+0x26/0x30 [<ffffffff810bf3a5>] cpu_startup_entry+0xc5/0x290 [<ffffffff810415ed>] start_secondary+0x21d/0x2d0 Code: Bad RIP value. RIP [<ffffffffa0177480>] 0xffffffffa0177480 RSP <ffff88003f703e78> CR2: ffffffffa0177480 ---[ end trace eb98ca80ba07bd9c ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org>
2016-07-12 15:26:19 -07:00
/* See ip_evictor(). */
#ifdef HAVE_INET_FRAG_EVICTOR
local_bh_disable();
inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
local_bh_enable();
#endif
compat: ipv6: orphan skbs in reassembly unit. Upstream commit: ipv6: orphan skbs in reassembly unit Andrey reported a use-after-free in IPv6 stack. Issue here is that we free the socket while it still has skb in TX path and in some queues. It happens here because IPv6 reassembly unit messes skb->truesize, breaking skb_set_owner_w() badly. We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") Acked-by: Joe Stringer <joe@ovn.org> ================================================================== BUG: KASAN: use-after-free in sock_wfree+0x118/0x120 Read of size 8 at addr ffff880062da0060 by task a.out/4140 page:ffffea00018b6800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013 raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000 page dumped because: kasan: bad access detected CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 dump_stack+0x292/0x398 lib/dump_stack.c:51 describe_address mm/kasan/report.c:262 kasan_report_error+0x121/0x560 mm/kasan/report.c:370 kasan_report mm/kasan/report.c:392 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413 sock_flag ./arch/x86/include/asm/bitops.h:324 sock_wfree+0x118/0x120 net/core/sock.c:1631 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put ./include/net/inet_frag.h:133 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn ./include/linux/netfilter.h:102 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook ./include/linux/netfilter.h:212 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x620 net/socket.c:848 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x187/0x530 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 RIP: 0033:0x7ff26e6f5b79 RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79 RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003 RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003 The buggy address belongs to the object at ffff880062da0000 which belongs to the cache RAWv6 of size 1504 The buggy address ffff880062da0060 is located 96 bytes inside of 1504-byte region [ffff880062da0000, ffff880062da05e0) Freed by task 4113: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1352 slab_free_freelist_hook mm/slub.c:1374 slab_free mm/slub.c:2951 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973 sk_prot_free net/core/sock.c:1377 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sk_free+0x23/0x30 net/core/sock.c:1479 sock_put ./include/net/sock.h:1638 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431 sock_release+0x8d/0x1e0 net/socket.c:599 sock_close+0x16/0x20 net/socket.c:1063 __fput+0x332/0x7f0 fs/file_table.c:208 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x19b/0x270 kernel/task_work.c:116 exit_task_work ./include/linux/task_work.h:21 do_exit+0x186b/0x2800 kernel/exit.c:839 do_group_exit+0x149/0x420 kernel/exit.c:943 SYSC_exit_group kernel/exit.c:954 SyS_exit_group+0x1d/0x20 kernel/exit.c:952 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:432 slab_alloc_node mm/slub.c:2708 slab_alloc mm/slub.c:2716 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334 sk_alloc+0x105/0x1010 net/core/sock.c:1396 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183 __sock_create+0x4f6/0x880 net/socket.c:1199 sock_create net/socket.c:1239 SYSC_socket net/socket.c:1269 SyS_socket+0xf9/0x230 net/socket.c:1249 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Memory state around the buggy address: ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Andrey Konovalov <andreyknvl@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> This patch is a bugfix, and will be progressively backported to earlier kernels. If it is backported to any kernel 4.5 through 4.10, then users use that updated kernel with the OVS kernel module prior to this patch, it could cause a crash. The compat code here resolves such issues. Upstream: 48cac18ecf1d ("ipv6: orphan skbs in reassembly unit") Signed-off-by: Joe Stringer <joe@ovn.org> Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-03-01 14:45:06 -08:00
skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq == NULL)
return -ENOMEM;
spin_lock_bh(&fq->q.lock);
if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
ret = -EINVAL;
goto out_unlock;
}
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
* must be returned.
*/
ret = -EINPROGRESS;
if (qp_flags(fq) == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len &&
nf_ct_frag6_reasm(fq, skb, dev))
ret = 0;
out_unlock:
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q, &nf_frags);
return ret;
}
static void nf_ct_net_exit(struct net *net)
{
2016-07-12 15:26:18 -07:00
inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
}
static struct pernet_operations nf_ct_net_ops = {
.exit = nf_ct_net_exit,
};
int rpl_nf_ct_frag6_init(void)
{
int ret = 0;
nf_defrag_ipv6_enable();
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
#ifdef HAVE_INET_FRAGS_WITH_FRAGS_WORK
nf_frags.frags_cache_name = nf_frags_cache_name;
#endif
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
ret = register_pernet_subsys(&nf_ct_net_ops);
if (ret)
inet_frags_fini(&nf_frags);
out:
return ret;
}
void rpl_nf_ct_frag6_cleanup(void)
{
unregister_pernet_subsys(&nf_ct_net_ops);
inet_frags_fini(&nf_frags);
}
#endif /* OVS_NF_DEFRAG6_BACKPORT */