2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-25 15:07:05 +00:00
Files
openvswitch/datapath/linux/compat/inet_fragment.c
Joe Stringer ccd0a13bfa compat: Simplify inet_fragment backports.
The core fragmentation handling logic is exported on all supported
kernels, so it's not necessary to backport the latest version of this.
This greatly simplifies the code due to inconsistencies between the old
per-lookup garbage collection and the newer workqueue based garbage
collection.

As a result of simplifying and removing unnecessary backport code, a few
bugs are fixed for corner cases such as when some fragments remain in
the fragment cache when openvswitch is unloaded.

Some backported ip functions need a little extra logic than what is seen
on the latest code due to this, for instance on kernels <3.17:
* Call inet_frag_evictor() before defrag
* Limit hashsize in ip{,6}_fragment logic

The pernet init/exit logic also differs a little from upstream. Upstream
ipv[46]_defrag logic initializes the various pernet fragment parameters
and its own global fragments cache. In the OVS backport, the pernet
parameters are shared while the fragments cache is separate. The
backport relies upon upstream pernet initialization to perform the
shared setup, and performs no pernet initialization of its own. When it
comes to pernet exit however, the backport must ensure that all
OVS-specific fragment state is cleared, while the shared state remains
untouched so that the regular ipv[46] logic may do its own cleanup. In
practice this means that OVS must have its own divergent implementation
of inet_frags_exit_net().

Fixes the following crash:

Call Trace:
 <IRQ>
 [<ffffffff810744f6>] ? call_timer_fn+0x36/0x100
 [<ffffffff8107548f>] run_timer_softirq+0x1ef/0x2f0
 [<ffffffff8106cccc>] __do_softirq+0xec/0x2c0
 [<ffffffff8106d215>] irq_exit+0x105/0x110
 [<ffffffff81737095>] smp_apic_timer_interrupt+0x45/0x60
 [<ffffffff81735a1d>] apic_timer_interrupt+0x6d/0x80
 <EOI>
 [<ffffffff8104f596>] ? native_safe_halt+0x6/0x10
 [<ffffffff8101cb2f>] default_idle+0x1f/0xc0
 [<ffffffff8101d406>] arch_cpu_idle+0x26/0x30
 [<ffffffff810bf3a5>] cpu_startup_entry+0xc5/0x290
 [<ffffffff810415ed>] start_secondary+0x21d/0x2d0
Code:  Bad RIP value.
RIP  [<ffffffffa0177480>] 0xffffffffa0177480
 RSP <ffff88003f703e78>
CR2: ffffffffa0177480
---[ end trace eb98ca80ba07bd9c ]---
Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
2016-08-01 14:16:10 -07:00

115 lines
2.6 KiB
C

/*
* inet fragments management
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Pavel Emelyanov <xemul@openvz.org>
* Started as consolidation of ipv4/ip_fragment.c,
* ipv6/reassembly. and ipv6 nf conntrack reassembly
*/
#ifndef HAVE_CORRECT_MRU_HANDLING
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
#ifdef HAVE_INET_FRAGS_WITH_FRAGS_WORK
static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
{
return q->net->low_thresh == 0 ||
frag_mem_limit(q->net) >= q->net->low_thresh;
}
static unsigned int
inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
{
struct inet_frag_queue *fq;
struct hlist_node *n;
unsigned int evicted = 0;
HLIST_HEAD(expired);
spin_lock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
if (!inet_fragq_should_evict(fq))
continue;
if (!del_timer(&fq->timer))
continue;
#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
hlist_add_head(&fq->list_evictor, &expired);
#else
hlist_del(&fq->list);
hlist_add_head(&fq->list, &expired);
#endif
++evicted;
}
spin_unlock(&hb->chain_lock);
#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
#else
hlist_for_each_entry_safe(fq, n, &expired, list)
#endif
f->frag_expire((unsigned long) fq);
return evicted;
}
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
int thresh = nf->low_thresh;
unsigned int seq;
int i;
nf->low_thresh = 0;
evict_again:
local_bh_disable();
seq = read_seqbegin(&f->rnd_seqlock);
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]);
local_bh_enable();
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
percpu_counter_sum(&nf->mem))
goto evict_again;
nf->low_thresh = thresh;
}
#else /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
int thresh = nf->low_thresh;
nf->low_thresh = 0;
local_bh_disable();
inet_frag_evictor(nf, f, true);
local_bh_enable();
nf->low_thresh = thresh;
}
#endif /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */
#endif /* !HAVE_CORRECT_MRU_HANDLING */