| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Copyright (c) 2007-2013 Nicira, Inc. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This program is free software; you can redistribute it and/or | 
					
						
							|  |  |  |  * modify it under the terms of version 2 of the GNU General Public | 
					
						
							|  |  |  |  * License as published by the Free Software Foundation. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This program is distributed in the hope that it will be useful, but | 
					
						
							|  |  |  |  * WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 
					
						
							|  |  |  |  * General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |  * along with this program; if not, write to the Free Software | 
					
						
							|  |  |  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | 
					
						
							|  |  |  |  * 02110-1301, USA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "flow.h"
 | 
					
						
							|  |  |  | #include "datapath.h"
 | 
					
						
							|  |  |  | #include <linux/uaccess.h>
 | 
					
						
							|  |  |  | #include <linux/netdevice.h>
 | 
					
						
							|  |  |  | #include <linux/etherdevice.h>
 | 
					
						
							|  |  |  | #include <linux/if_ether.h>
 | 
					
						
							|  |  |  | #include <linux/if_vlan.h>
 | 
					
						
							|  |  |  | #include <net/llc_pdu.h>
 | 
					
						
							|  |  |  | #include <linux/kernel.h>
 | 
					
						
							| 
									
										
										
										
											2015-01-07 12:55:49 +01:00
										 |  |  | #include <linux/jhash.h>
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | #include <linux/jiffies.h>
 | 
					
						
							|  |  |  | #include <linux/llc.h>
 | 
					
						
							|  |  |  | #include <linux/module.h>
 | 
					
						
							|  |  |  | #include <linux/in.h>
 | 
					
						
							|  |  |  | #include <linux/rcupdate.h>
 | 
					
						
							|  |  |  | #include <linux/if_arp.h>
 | 
					
						
							|  |  |  | #include <linux/ip.h>
 | 
					
						
							|  |  |  | #include <linux/ipv6.h>
 | 
					
						
							|  |  |  | #include <linux/sctp.h>
 | 
					
						
							|  |  |  | #include <linux/tcp.h>
 | 
					
						
							|  |  |  | #include <linux/udp.h>
 | 
					
						
							|  |  |  | #include <linux/icmp.h>
 | 
					
						
							|  |  |  | #include <linux/icmpv6.h>
 | 
					
						
							|  |  |  | #include <linux/rculist.h>
 | 
					
						
							|  |  |  | #include <net/ip.h>
 | 
					
						
							|  |  |  | #include <net/ipv6.h>
 | 
					
						
							|  |  |  | #include <net/ndisc.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "vlan.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | #define TBL_MIN_BUCKETS		1024
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | #define MASK_ARRAY_SIZE_MIN	16
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | #define REHASH_INTERVAL		(10 * 60 * HZ)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | #define MC_HASH_SHIFT		8
 | 
					
						
							|  |  |  | #define MC_HASH_ENTRIES		(1u << MC_HASH_SHIFT)
 | 
					
						
							|  |  |  | #define MC_HASH_SEGS		((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | static struct kmem_cache *flow_cache; | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | struct kmem_cache *flow_stats_cache __read_mostly; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | static u16 range_n_bytes(const struct sw_flow_key_range *range) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return range->end - range->start; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, | 
					
						
							| 
									
										
										
										
											2015-09-22 18:13:00 -07:00
										 |  |  | 		       bool full, const struct sw_flow_mask *mask) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-09-22 18:13:00 -07:00
										 |  |  | 	int start = full ? 0 : mask->range.start; | 
					
						
							|  |  |  | 	int len = full ? sizeof *dst : range_n_bytes(&mask->range); | 
					
						
							|  |  |  | 	const long *m = (const long *)((const u8 *)&mask->key + start); | 
					
						
							|  |  |  | 	const long *s = (const long *)((const u8 *)src + start); | 
					
						
							|  |  |  | 	long *d = (long *)((u8 *)dst + start); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-22 18:13:00 -07:00
										 |  |  | 	/* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
 | 
					
						
							|  |  |  | 	 * if 'full' is false the memory outside of the 'mask->range' is left | 
					
						
							|  |  |  | 	 * uninitialized. This can be used as an optimization when further | 
					
						
							|  |  |  | 	 * operations on 'dst' only use contents within 'mask->range'. | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2015-09-22 18:13:00 -07:00
										 |  |  | 	for (i = 0; i < len; i += sizeof(long)) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 		*d++ = *s++ & *m++; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-18 09:07:03 -08:00
										 |  |  | struct sw_flow *ovs_flow_alloc(void) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow *flow; | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	struct flow_stats *stats; | 
					
						
							|  |  |  | 	int node; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); | 
					
						
							|  |  |  | 	if (!flow) | 
					
						
							|  |  |  | 		return ERR_PTR(-ENOMEM); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	flow->sf_acts = NULL; | 
					
						
							|  |  |  | 	flow->mask = NULL; | 
					
						
							| 
									
										
										
										
											2015-02-15 15:01:42 -08:00
										 |  |  | 	flow->id.ufid_len = 0; | 
					
						
							|  |  |  | 	flow->id.unmasked_key = NULL; | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	flow->stats_last_writer = NUMA_NO_NODE; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	/* Initialize the default stat node. */ | 
					
						
							|  |  |  | 	stats = kmem_cache_alloc_node(flow_stats_cache, | 
					
						
							|  |  |  | 				      GFP_KERNEL | __GFP_ZERO, 0); | 
					
						
							|  |  |  | 	if (!stats) | 
					
						
							| 
									
										
										
										
											2014-02-18 09:07:03 -08:00
										 |  |  | 		goto err; | 
					
						
							| 
									
										
										
										
											2013-10-23 14:34:39 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	spin_lock_init(&stats->lock); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	RCU_INIT_POINTER(flow->stats[0], stats); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for_each_node(node) | 
					
						
							|  |  |  | 		if (node != 0) | 
					
						
							|  |  |  | 			RCU_INIT_POINTER(flow->stats[node], NULL); | 
					
						
							| 
									
										
										
										
											2013-12-05 15:50:27 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	return flow; | 
					
						
							| 
									
										
										
										
											2013-12-05 15:50:27 -08:00
										 |  |  | err: | 
					
						
							| 
									
										
										
										
											2014-01-08 06:07:52 -08:00
										 |  |  | 	kmem_cache_free(flow_cache, flow); | 
					
						
							| 
									
										
										
										
											2013-12-05 15:50:27 -08:00
										 |  |  | 	return ERR_PTR(-ENOMEM); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-23 16:02:35 +02:00
										 |  |  | int ovs_flow_tbl_count(const struct flow_table *table) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	return table->count; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | static struct flex_array *alloc_buckets(unsigned int n_buckets) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct flex_array *buckets; | 
					
						
							|  |  |  | 	int i, err; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	buckets = flex_array_alloc(sizeof(struct hlist_head), | 
					
						
							|  |  |  | 				   n_buckets, GFP_KERNEL); | 
					
						
							|  |  |  | 	if (!buckets) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); | 
					
						
							|  |  |  | 	if (err) { | 
					
						
							|  |  |  | 		flex_array_free(buckets); | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < n_buckets; i++) | 
					
						
							|  |  |  | 		INIT_HLIST_HEAD((struct hlist_head *) | 
					
						
							|  |  |  | 					flex_array_get(buckets, i)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return buckets; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void flow_free(struct sw_flow *flow) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	int node; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	if (ovs_identifier_is_key(&flow->id)) | 
					
						
							|  |  |  | 		kfree(flow->id.unmasked_key); | 
					
						
							| 
									
										
										
										
											2014-10-20 15:05:56 -07:00
										 |  |  | 	kfree(rcu_dereference_raw(flow->sf_acts)); | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	for_each_node(node) | 
					
						
							|  |  |  | 		if (flow->stats[node]) | 
					
						
							|  |  |  | 			kmem_cache_free(flow_stats_cache, | 
					
						
							| 
									
										
										
										
											2014-10-20 15:05:56 -07:00
										 |  |  | 					rcu_dereference_raw(flow->stats[node])); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	kmem_cache_free(flow_cache, flow); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void rcu_free_flow_callback(struct rcu_head *rcu) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	flow_free(flow); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	kfree(mask); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | void ovs_flow_free(struct sw_flow *flow, bool deferred) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 	if (!flow) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 		return; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	if (deferred) | 
					
						
							|  |  |  | 		call_rcu(&flow->rcu, rcu_free_flow_callback); | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		flow_free(flow); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void free_buckets(struct flex_array *buckets) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	flex_array_free(buckets); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | static void __table_instance_destroy(struct table_instance *ti) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	free_buckets(ti->buckets); | 
					
						
							|  |  |  | 	kfree(ti); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | static struct table_instance *table_instance_alloc(int new_size) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	if (!ti) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 		return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	ti->buckets = alloc_buckets(new_size); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	if (!ti->buckets) { | 
					
						
							|  |  |  | 		kfree(ti); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 		return NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	ti->n_buckets = new_size; | 
					
						
							|  |  |  | 	ti->node_ver = 0; | 
					
						
							|  |  |  | 	ti->keep_flows = false; | 
					
						
							|  |  |  | 	get_random_bytes(&ti->hash_seed, sizeof(u32)); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	return ti; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | static void mask_array_rcu_cb(struct rcu_head *rcu) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct mask_array *ma = container_of(rcu, struct mask_array, rcu); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	kfree(ma); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct mask_array *tbl_mask_array_alloc(int size) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct mask_array *new; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 	size = max(MASK_ARRAY_SIZE_MIN, size); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	new = kzalloc(sizeof(struct mask_array) + | 
					
						
							|  |  |  | 		      sizeof(struct sw_flow_mask *) * size, GFP_KERNEL); | 
					
						
							|  |  |  | 	if (!new) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	new->count = 0; | 
					
						
							|  |  |  | 	new->max = size; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return new; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int tbl_mask_array_realloc(struct flow_table *tbl, int size) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct mask_array *old; | 
					
						
							|  |  |  | 	struct mask_array *new; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	new = tbl_mask_array_alloc(size); | 
					
						
							|  |  |  | 	if (!new) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	old = ovsl_dereference(tbl->mask_array); | 
					
						
							|  |  |  | 	if (old) { | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 		int i, count = 0; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 		for (i = 0; i < old->max; i++) { | 
					
						
							|  |  |  | 			if (ovsl_dereference(old->masks[i])) | 
					
						
							|  |  |  | 				new->masks[count++] = old->masks[i]; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2014-06-16 12:45:04 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 		new->count = count; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	rcu_assign_pointer(tbl->mask_array, new); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (old) | 
					
						
							|  |  |  | 		call_rcu(&old->rcu, mask_array_rcu_cb); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | int ovs_flow_tbl_init(struct flow_table *table) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	struct table_instance *ti, *ufid_ti; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	struct mask_array *ma; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * | 
					
						
							|  |  |  | 					  MC_HASH_ENTRIES, __alignof__(struct mask_cache_entry)); | 
					
						
							|  |  |  | 	if (!table->mask_cache) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); | 
					
						
							|  |  |  | 	if (!ma) | 
					
						
							|  |  |  | 		goto free_mask_cache; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	ti = table_instance_alloc(TBL_MIN_BUCKETS); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	if (!ti) | 
					
						
							|  |  |  | 		goto free_mask_array; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); | 
					
						
							|  |  |  | 	if (!ufid_ti) | 
					
						
							|  |  |  | 		goto free_ti; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	rcu_assign_pointer(table->ti, ti); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	rcu_assign_pointer(table->ufid_ti, ufid_ti); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	rcu_assign_pointer(table->mask_array, ma); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	table->last_rehash = jiffies; | 
					
						
							|  |  |  | 	table->count = 0; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	table->ufid_count = 0; | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	return 0; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | free_ti: | 
					
						
							|  |  |  | 	__table_instance_destroy(ti); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | free_mask_array: | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 	kfree(ma); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | free_mask_cache: | 
					
						
							|  |  |  | 	free_percpu(table->mask_cache); | 
					
						
							|  |  |  | 	return -ENOMEM; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	struct table_instance *ti = container_of(rcu, struct table_instance, rcu); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	__table_instance_destroy(ti); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | static void table_instance_destroy(struct table_instance *ti, | 
					
						
							|  |  |  | 				   struct table_instance *ufid_ti, | 
					
						
							|  |  |  | 				   bool deferred) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	if (!ti) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 		return; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	BUG_ON(!ufid_ti); | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 	if (ti->keep_flows) | 
					
						
							|  |  |  | 		goto skip_flows; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < ti->n_buckets; i++) { | 
					
						
							|  |  |  | 		struct sw_flow *flow; | 
					
						
							|  |  |  | 		struct hlist_head *head = flex_array_get(ti->buckets, i); | 
					
						
							|  |  |  | 		struct hlist_node *n; | 
					
						
							|  |  |  | 		int ver = ti->node_ver; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		int ufid_ver = ufid_ti->node_ver; | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { | 
					
						
							|  |  |  | 			hlist_del_rcu(&flow->flow_table.node[ver]); | 
					
						
							|  |  |  | 			if (ovs_identifier_is_ufid(&flow->id)) | 
					
						
							|  |  |  | 				hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 			ovs_flow_free(flow, deferred); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | skip_flows: | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	if (deferred) { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); | 
					
						
							|  |  |  | 	} else { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 		__table_instance_destroy(ti); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		__table_instance_destroy(ufid_ti); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:33:38 -07:00
										 |  |  | /* No need for locking this function is called from RCU callback or
 | 
					
						
							| 
									
										
										
										
											2014-10-20 15:19:10 -07:00
										 |  |  |  * error path. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2014-04-23 08:33:38 -07:00
										 |  |  | void ovs_flow_tbl_destroy(struct flow_table *table) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-10-20 15:05:56 -07:00
										 |  |  | 	struct table_instance *ti = rcu_dereference_raw(table->ti); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	free_percpu(table->mask_cache); | 
					
						
							| 
									
										
										
										
											2014-10-20 15:05:56 -07:00
										 |  |  | 	kfree(rcu_dereference_raw(table->mask_array)); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	table_instance_destroy(ti, ufid_ti, false); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 				       u32 *bucket, u32 *last) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow *flow; | 
					
						
							|  |  |  | 	struct hlist_head *head; | 
					
						
							|  |  |  | 	int ver; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	ver = ti->node_ver; | 
					
						
							|  |  |  | 	while (*bucket < ti->n_buckets) { | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 		i = 0; | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 		head = flex_array_get(ti->buckets, *bucket); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 			if (i < *last) { | 
					
						
							|  |  |  | 				i++; | 
					
						
							|  |  |  | 				continue; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			*last = i + 1; | 
					
						
							|  |  |  | 			return flow; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		(*bucket)++; | 
					
						
							|  |  |  | 		*last = 0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	hash = jhash_1word(hash, ti->hash_seed); | 
					
						
							|  |  |  | 	return flex_array_get(ti->buckets, | 
					
						
							|  |  |  | 				(hash & (ti->n_buckets - 1))); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | static void table_instance_insert(struct table_instance *ti, | 
					
						
							|  |  |  | 				  struct sw_flow *flow) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct hlist_head *head; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	head = find_bucket(ti, flow->flow_table.hash); | 
					
						
							|  |  |  | 	hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void ufid_table_instance_insert(struct table_instance *ti, | 
					
						
							|  |  |  | 				       struct sw_flow *flow) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct hlist_head *head; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	head = find_bucket(ti, flow->ufid_table.hash); | 
					
						
							|  |  |  | 	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | static void flow_table_copy_flows(struct table_instance *old, | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 				  struct table_instance *new, bool ufid) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	int old_ver; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	old_ver = old->node_ver; | 
					
						
							|  |  |  | 	new->node_ver = !old_ver; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Insert in new table. */ | 
					
						
							|  |  |  | 	for (i = 0; i < old->n_buckets; i++) { | 
					
						
							|  |  |  | 		struct sw_flow *flow; | 
					
						
							|  |  |  | 		struct hlist_head *head; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		head = flex_array_get(old->buckets, i); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		if (ufid) | 
					
						
							|  |  |  | 			hlist_for_each_entry(flow, head, | 
					
						
							|  |  |  | 					     ufid_table.node[old_ver]) | 
					
						
							|  |  |  | 				ufid_table_instance_insert(new, flow); | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			hlist_for_each_entry(flow, head, | 
					
						
							|  |  |  | 					     flow_table.node[old_ver]) | 
					
						
							|  |  |  | 				table_instance_insert(new, flow); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	old->keep_flows = true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | static struct table_instance *table_instance_rehash(struct table_instance *ti, | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 						    int n_buckets, bool ufid) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	struct table_instance *new_ti; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	new_ti = table_instance_alloc(n_buckets); | 
					
						
							|  |  |  | 	if (!new_ti) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 		return NULL; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	flow_table_copy_flows(ti, new_ti, ufid); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	return new_ti; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | int ovs_flow_tbl_flush(struct flow_table *flow_table) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	struct table_instance *old_ti, *new_ti; | 
					
						
							|  |  |  | 	struct table_instance *old_ufid_ti, *new_ufid_ti; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	new_ti = table_instance_alloc(TBL_MIN_BUCKETS); | 
					
						
							|  |  |  | 	if (!new_ti) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); | 
					
						
							|  |  |  | 	if (!new_ufid_ti) | 
					
						
							|  |  |  | 		goto err_free_ti; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	old_ti = ovsl_dereference(flow_table->ti); | 
					
						
							|  |  |  | 	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	rcu_assign_pointer(flow_table->ti, new_ti); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	flow_table->last_rehash = jiffies; | 
					
						
							|  |  |  | 	flow_table->count = 0; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	flow_table->ufid_count = 0; | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	table_instance_destroy(old_ti, old_ufid_ti, true); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	return 0; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | err_free_ti: | 
					
						
							|  |  |  | 	__table_instance_destroy(new_ti); | 
					
						
							|  |  |  | 	return -ENOMEM; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:50 -08:00
										 |  |  | static u32 flow_hash(const struct sw_flow_key *key, | 
					
						
							|  |  |  | 		     const struct sw_flow_key_range *range) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:50 -08:00
										 |  |  | 	int key_start = range->start; | 
					
						
							|  |  |  | 	int key_end = range->end; | 
					
						
							| 
									
										
										
										
											2014-01-23 17:19:29 +01:00
										 |  |  | 	const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	int hash_u32s = (key_end - key_start) >> 2; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Make sure number of hash bytes are multiple of u32. */ | 
					
						
							|  |  |  | 	BUILD_BUG_ON(sizeof(long) % sizeof(u32)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-07 12:55:49 +01:00
										 |  |  | 	return jhash2(hash_key, hash_u32s, 0); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int flow_key_start(const struct sw_flow_key *key) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (key->tun_key.ipv4_dst) | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		return rounddown(offsetof(struct sw_flow_key, phy), | 
					
						
							|  |  |  | 					  sizeof(long)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static bool cmp_key(const struct sw_flow_key *key1, | 
					
						
							|  |  |  | 		    const struct sw_flow_key *key2, | 
					
						
							|  |  |  | 		    int key_start, int key_end) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-23 17:19:29 +01:00
										 |  |  | 	const long *cp1 = (const long *)((const u8 *)key1 + key_start); | 
					
						
							|  |  |  | 	const long *cp2 = (const long *)((const u8 *)key2 + key_start); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	long diffs = 0; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = key_start; i < key_end;  i += sizeof(long)) | 
					
						
							|  |  |  | 		diffs |= *cp1++ ^ *cp2++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return diffs == 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static bool flow_cmp_masked_key(const struct sw_flow *flow, | 
					
						
							|  |  |  | 				const struct sw_flow_key *key, | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:50 -08:00
										 |  |  | 				const struct sw_flow_key_range *range) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	return cmp_key(&flow->key, key, range->start, range->end); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-15 06:32:32 -07:00
										 |  |  | static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, | 
					
						
							|  |  |  | 				      const struct sw_flow_match *match) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow_key *key = match->key; | 
					
						
							|  |  |  | 	int key_start = flow_key_start(key); | 
					
						
							|  |  |  | 	int key_end = match->range.end; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	BUG_ON(ovs_identifier_is_ufid(&flow->id)); | 
					
						
							|  |  |  | 	return cmp_key(flow->id.unmasked_key, key, key_start, key_end); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | static struct sw_flow *masked_flow_lookup(struct table_instance *ti, | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 					  const struct sw_flow_key *unmasked, | 
					
						
							| 
									
										
										
										
											2014-09-23 16:02:35 +02:00
										 |  |  | 					  const struct sw_flow_mask *mask, | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 					  u32 *n_mask_hit) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow *flow; | 
					
						
							|  |  |  | 	struct hlist_head *head; | 
					
						
							|  |  |  | 	u32 hash; | 
					
						
							|  |  |  | 	struct sw_flow_key masked_key; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-22 18:13:00 -07:00
										 |  |  | 	ovs_flow_mask_key(&masked_key, unmasked, false, mask); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:50 -08:00
										 |  |  | 	hash = flow_hash(&masked_key, &mask->range); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	head = find_bucket(ti, hash); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	(*n_mask_hit)++; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { | 
					
						
							|  |  |  | 		if (flow->mask == mask && flow->flow_table.hash == hash && | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:50 -08:00
										 |  |  | 		    flow_cmp_masked_key(flow, &masked_key, &mask->range)) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 			return flow; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-05 10:47:23 -07:00
										 |  |  | /* Flow lookup does full lookup on flow table. It starts with
 | 
					
						
							|  |  |  |  * mask from index passed in *index. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | static struct sw_flow *flow_lookup(struct flow_table *tbl, | 
					
						
							|  |  |  | 				   struct table_instance *ti, | 
					
						
							| 
									
										
										
										
											2014-09-23 16:02:35 +02:00
										 |  |  | 				   const struct mask_array *ma, | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 				   const struct sw_flow_key *key, | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 				   u32 *n_mask_hit, | 
					
						
							|  |  |  | 				   u32 *index) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-08-05 10:47:23 -07:00
										 |  |  | 	struct sw_flow_mask *mask; | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	struct sw_flow *flow; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	int i; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-05 10:47:23 -07:00
										 |  |  | 	if (*index < ma->max) { | 
					
						
							|  |  |  | 		mask = rcu_dereference_ovsl(ma->masks[*index]); | 
					
						
							|  |  |  | 		if (mask) { | 
					
						
							|  |  |  | 			flow = masked_flow_lookup(ti, key, mask, n_mask_hit); | 
					
						
							|  |  |  | 			if (flow) | 
					
						
							|  |  |  | 				return flow; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < ma->max; i++)  { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (i == *index) | 
					
						
							|  |  |  | 			continue; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		mask = rcu_dereference_ovsl(ma->masks[i]); | 
					
						
							| 
									
										
										
										
											2014-07-24 13:32:35 -07:00
										 |  |  | 		if (!mask) | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 			continue; | 
					
						
							| 
									
										
										
										
											2014-07-24 13:32:35 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		flow = masked_flow_lookup(ti, key, mask, n_mask_hit); | 
					
						
							|  |  |  | 		if (flow) { /* Found */ | 
					
						
							|  |  |  | 			*index = i; | 
					
						
							|  |  |  | 			return flow; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	return NULL; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * mask_cache maps flow to probable mask. This cache is not tightly | 
					
						
							|  |  |  |  * coupled cache, It means updates to  mask list can result in inconsistent | 
					
						
							|  |  |  |  * cache entry in mask cache. | 
					
						
							|  |  |  |  * This is per cpu cache and is divided in MC_HASH_SEGS segments. | 
					
						
							|  |  |  |  * In case of a hash collision the entry is hashed in next segment. | 
					
						
							| 
									
										
										
										
											2014-10-20 15:19:10 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, | 
					
						
							|  |  |  | 					  const struct sw_flow_key *key, | 
					
						
							|  |  |  | 					  u32 skb_hash, | 
					
						
							|  |  |  | 					  u32 *n_mask_hit) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-07-24 13:32:35 -07:00
										 |  |  | 	struct mask_array *ma = rcu_dereference(tbl->mask_array); | 
					
						
							|  |  |  | 	struct table_instance *ti = rcu_dereference(tbl->ti); | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 	struct mask_cache_entry *entries, *ce; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	struct sw_flow *flow; | 
					
						
							| 
									
										
										
										
											2015-02-03 13:57:55 -08:00
										 |  |  | 	u32 hash; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	int seg; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	*n_mask_hit = 0; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	if (unlikely(!skb_hash)) { | 
					
						
							| 
									
										
										
										
											2014-08-05 10:47:23 -07:00
										 |  |  | 		u32 mask_index = 0; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-03 13:57:55 -08:00
										 |  |  | 	/* Pre and post recirulation flows usually have the same skb_hash
 | 
					
						
							|  |  |  | 	 * value. To avoid hash collisions, rehash the 'skb_hash' with | 
					
						
							|  |  |  | 	 * 'recirc_id'.  */ | 
					
						
							|  |  |  | 	if (key->recirc_id) | 
					
						
							|  |  |  | 		skb_hash = jhash_1word(skb_hash, key->recirc_id); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 	ce = NULL; | 
					
						
							| 
									
										
										
										
											2015-02-03 13:57:55 -08:00
										 |  |  | 	hash = skb_hash; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	entries = this_cpu_ptr(tbl->mask_cache); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 	/* Find the cache entry 'ce' to operate on. */ | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	for (seg = 0; seg < MC_HASH_SEGS; seg++) { | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 		int index = hash & (MC_HASH_ENTRIES - 1); | 
					
						
							|  |  |  | 		struct mask_cache_entry *e; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 		e = &entries[index]; | 
					
						
							|  |  |  | 		if (e->skb_hash == skb_hash) { | 
					
						
							| 
									
										
										
										
											2014-08-05 10:47:23 -07:00
										 |  |  | 			flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, | 
					
						
							|  |  |  | 					   &e->mask_index); | 
					
						
							|  |  |  | 			if (!flow) | 
					
						
							|  |  |  | 				e->skb_hash = 0; | 
					
						
							|  |  |  | 			return flow; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 		if (!ce || e->skb_hash < ce->skb_hash) | 
					
						
							|  |  |  | 			ce = e;  /* A better replacement cache candidate. */ | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		hash >>= MC_HASH_SHIFT; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 	/* Cache miss, do full lookup. */ | 
					
						
							|  |  |  | 	flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	if (flow) | 
					
						
							| 
									
										
										
										
											2014-06-06 13:30:27 -07:00
										 |  |  | 		ce->skb_hash = skb_hash; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	return flow; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-13 12:04:40 -08:00
										 |  |  | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, | 
					
						
							|  |  |  | 				    const struct sw_flow_key *key) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:08 -07:00
										 |  |  | 	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); | 
					
						
							| 
									
										
										
										
											2013-11-13 12:04:40 -08:00
										 |  |  | 	u32 __always_unused n_mask_hit; | 
					
						
							| 
									
										
										
										
											2014-08-05 10:47:23 -07:00
										 |  |  | 	u32 index = 0; | 
					
						
							| 
									
										
										
										
											2013-11-13 12:04:40 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); | 
					
						
							| 
									
										
										
										
											2013-11-13 12:04:40 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-30 14:51:02 -07:00
										 |  |  | struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, | 
					
						
							| 
									
										
										
										
											2014-09-23 16:02:35 +02:00
										 |  |  | 					  const struct sw_flow_match *match) | 
					
						
							| 
									
										
										
										
											2014-06-30 14:51:02 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-07-25 20:34:48 -07:00
										 |  |  | 	struct mask_array *ma = ovsl_dereference(tbl->mask_array); | 
					
						
							| 
									
										
										
										
											2014-06-30 14:51:02 -07:00
										 |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Always called under ovs-mutex. */ | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 	for (i = 0; i < ma->max; i++) { | 
					
						
							| 
									
										
										
										
											2014-07-25 20:34:48 -07:00
										 |  |  | 		struct table_instance *ti = ovsl_dereference(tbl->ti); | 
					
						
							|  |  |  | 		u32 __always_unused n_mask_hit; | 
					
						
							| 
									
										
										
										
											2014-06-30 14:51:02 -07:00
										 |  |  | 		struct sw_flow_mask *mask; | 
					
						
							| 
									
										
										
										
											2014-07-25 20:34:48 -07:00
										 |  |  | 		struct sw_flow *flow; | 
					
						
							| 
									
										
										
										
											2014-06-30 14:51:02 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		mask = ovsl_dereference(ma->masks[i]); | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 		if (!mask) | 
					
						
							|  |  |  | 			continue; | 
					
						
							| 
									
										
										
										
											2014-07-25 20:34:48 -07:00
										 |  |  | 		flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		if (flow && ovs_identifier_is_key(&flow->id) && | 
					
						
							|  |  |  | 		    ovs_flow_cmp_unmasked_key(flow, match)) | 
					
						
							|  |  |  | 			return flow; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static u32 ufid_hash(const struct sw_flow_id *sfid) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return jhash(sfid->ufid, sfid->ufid_len, 0); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, | 
					
						
							|  |  |  | 			      const struct sw_flow_id *sfid) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (flow->id.ufid_len != sfid->ufid_len) | 
					
						
							|  |  |  | 		return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (ovs_identifier_is_ufid(&flow->id)) | 
					
						
							|  |  |  | 		return flow_cmp_masked_key(flow, match->key, &match->range); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return ovs_flow_cmp_unmasked_key(flow, match); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, | 
					
						
							|  |  |  | 					 const struct sw_flow_id *ufid) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); | 
					
						
							|  |  |  | 	struct sw_flow *flow; | 
					
						
							|  |  |  | 	struct hlist_head *head; | 
					
						
							|  |  |  | 	u32 hash; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	hash = ufid_hash(ufid); | 
					
						
							|  |  |  | 	head = find_bucket(ti, hash); | 
					
						
							|  |  |  | 	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { | 
					
						
							|  |  |  | 		if (flow->ufid_table.hash == hash && | 
					
						
							|  |  |  | 		    ovs_flow_cmp_ufid(flow, ufid)) | 
					
						
							| 
									
										
										
										
											2014-07-25 20:34:48 -07:00
										 |  |  | 			return flow; | 
					
						
							| 
									
										
										
										
											2014-06-30 14:51:02 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-21 14:37:33 -07:00
										 |  |  | int ovs_flow_tbl_num_masks(const struct flow_table *table) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	struct mask_array *ma; | 
					
						
							| 
									
										
										
										
											2013-10-21 14:37:33 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	ma = rcu_dereference_ovsl(table->mask_array); | 
					
						
							|  |  |  | 	return ma->count; | 
					
						
							| 
									
										
										
										
											2013-10-21 14:37:33 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | static struct table_instance *table_instance_expand(struct table_instance *ti, | 
					
						
							|  |  |  | 						    bool ufid) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	return table_instance_rehash(ti, ti->n_buckets * 2, ufid); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | static void tbl_mask_array_delete_mask(struct mask_array *ma, | 
					
						
							|  |  |  | 				       struct sw_flow_mask *mask) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Remove the deleted mask pointers from the array */ | 
					
						
							|  |  |  | 	for (i = 0; i < ma->max; i++) { | 
					
						
							|  |  |  | 		if (mask == ovsl_dereference(ma->masks[i])) { | 
					
						
							|  |  |  | 			RCU_INIT_POINTER(ma->masks[i], NULL); | 
					
						
							|  |  |  | 			ma->count--; | 
					
						
							|  |  |  | 			call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	BUG(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 15:52:32 -07:00
										 |  |  | /* Remove 'mask' from the mask list, if it is not needed any more. */ | 
					
						
							|  |  |  | static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (mask) { | 
					
						
							|  |  |  | 		/* ovs-lock is required to protect mask-refcount and
 | 
					
						
							|  |  |  | 		 * mask list. | 
					
						
							|  |  |  | 		 */ | 
					
						
							|  |  |  | 		ASSERT_OVSL(); | 
					
						
							|  |  |  | 		BUG_ON(!mask->ref_count); | 
					
						
							|  |  |  | 		mask->ref_count--; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (!mask->ref_count) { | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 			struct mask_array *ma; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			ma = ovsl_dereference(tbl->mask_array); | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 			tbl_mask_array_delete_mask(ma, mask); | 
					
						
							| 
									
										
										
										
											2014-06-16 12:45:04 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 			/* Shrink the mask array if necessary. */ | 
					
						
							|  |  |  | 			if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && | 
					
						
							|  |  |  | 			    ma->count <= (ma->max / 3)) | 
					
						
							| 
									
										
										
										
											2014-06-16 12:45:04 -07:00
										 |  |  | 				tbl_mask_array_realloc(tbl, ma->max / 2); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 15:52:32 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Must be called with OVS mutex held. */ | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 	struct table_instance *ti = ovsl_dereference(table->ti); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:49:31 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	BUG_ON(table->count == 0); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	table->count--; | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	if (ovs_identifier_is_ufid(&flow->id)) { | 
					
						
							|  |  |  | 		hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); | 
					
						
							|  |  |  | 		table->ufid_count--; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-03-29 15:52:32 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
 | 
					
						
							| 
									
										
										
										
											2014-10-20 15:19:10 -07:00
										 |  |  | 	 * accessible as long as the RCU read lock is held. | 
					
						
							|  |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2014-03-29 15:52:32 -07:00
										 |  |  | 	flow_mask_remove(table, flow->mask); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | static struct sw_flow_mask *mask_alloc(void) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow_mask *mask; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	mask = kmalloc(sizeof(*mask), GFP_KERNEL); | 
					
						
							|  |  |  | 	if (mask) | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 		mask->ref_count = 1; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return mask; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static bool mask_equal(const struct sw_flow_mask *a, | 
					
						
							|  |  |  | 		       const struct sw_flow_mask *b) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-23 17:19:29 +01:00
										 |  |  | 	const u8 *a_ = (const u8 *)&a->key + a->range.start; | 
					
						
							|  |  |  | 	const u8 *b_ = (const u8 *)&b->key + b->range.start; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return  (a->range.end == b->range.end) | 
					
						
							|  |  |  | 		&& (a->range.start == b->range.start) | 
					
						
							|  |  |  | 		&& (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 					   const struct sw_flow_mask *mask) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 	struct mask_array *ma; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ma = ovsl_dereference(tbl->mask_array); | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 	for (i = 0; i < ma->max; i++) { | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 		struct sw_flow_mask *t; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 		t = ovsl_dereference(ma->masks[i]); | 
					
						
							|  |  |  | 		if (t && mask_equal(mask, t)) | 
					
						
							|  |  |  | 			return t; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-13 09:33:26 -08:00
										 |  |  | /* Add 'mask' into the mask list, if it is not already there. */ | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, | 
					
						
							| 
									
										
										
										
											2014-09-23 16:02:35 +02:00
										 |  |  | 			    const struct sw_flow_mask *new) | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct sw_flow_mask *mask; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 	mask = flow_mask_find(tbl, new); | 
					
						
							|  |  |  | 	if (!mask) { | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 		struct mask_array *ma; | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 		int i; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 		/* Allocate a new mask if none exsits. */ | 
					
						
							|  |  |  | 		mask = mask_alloc(); | 
					
						
							|  |  |  | 		if (!mask) | 
					
						
							|  |  |  | 			return -ENOMEM; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 		mask->key = new->key; | 
					
						
							|  |  |  | 		mask->range = new->range; | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		/* Add mask to mask-list. */ | 
					
						
							|  |  |  | 		ma = ovsl_dereference(tbl->mask_array); | 
					
						
							|  |  |  | 		if (ma->count >= ma->max) { | 
					
						
							|  |  |  | 			int err; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			err = tbl_mask_array_realloc(tbl, ma->max + | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 							  MASK_ARRAY_SIZE_MIN); | 
					
						
							| 
									
										
										
										
											2014-04-23 08:34:51 -07:00
										 |  |  | 			if (err) { | 
					
						
							|  |  |  | 				kfree(mask); | 
					
						
							|  |  |  | 				return err; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			ma = ovsl_dereference(tbl->mask_array); | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2014-06-16 12:45:04 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 00:02:18 -07:00
										 |  |  | 		for (i = 0; i < ma->max; i++) { | 
					
						
							|  |  |  | 			struct sw_flow_mask *t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			t = ovsl_dereference(ma->masks[i]); | 
					
						
							|  |  |  | 			if (!t) { | 
					
						
							|  |  |  | 				rcu_assign_pointer(ma->masks[i], mask); | 
					
						
							|  |  |  | 				ma->count++; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-10 15:57:04 -08:00
										 |  |  | 	} else { | 
					
						
							|  |  |  | 		BUG_ON(!mask->ref_count); | 
					
						
							|  |  |  | 		mask->ref_count++; | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	flow->mask = mask; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 15:52:32 -07:00
										 |  |  | /* Must be called with OVS mutex held. */ | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:49 -08:00
										 |  |  | static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 	struct table_instance *new_ti = NULL; | 
					
						
							|  |  |  | 	struct table_instance *ti; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 	ti = ovsl_dereference(table->ti); | 
					
						
							|  |  |  | 	table_instance_insert(ti, flow); | 
					
						
							|  |  |  | 	table->count++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Expand table, if necessary, to make room. */ | 
					
						
							|  |  |  | 	if (table->count > ti->n_buckets) | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		new_ti = table_instance_expand(ti, false); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		new_ti = table_instance_rehash(ti, ti->n_buckets, false); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if (new_ti) { | 
					
						
							|  |  |  | 		rcu_assign_pointer(table->ti, new_ti); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 		table->last_rehash = jiffies; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:49 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | /* Must be called with OVS mutex held. */ | 
					
						
							|  |  |  | static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct table_instance *ti; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	flow->ufid_table.hash = ufid_hash(&flow->id); | 
					
						
							|  |  |  | 	ti = ovsl_dereference(table->ufid_ti); | 
					
						
							|  |  |  | 	ufid_table_instance_insert(ti, flow); | 
					
						
							|  |  |  | 	table->ufid_count++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Expand table, if necessary, to make room. */ | 
					
						
							|  |  |  | 	if (table->ufid_count > ti->n_buckets) { | 
					
						
							|  |  |  | 		struct table_instance *new_ti; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		new_ti = table_instance_expand(ti, true); | 
					
						
							|  |  |  | 		if (new_ti) { | 
					
						
							|  |  |  | 			rcu_assign_pointer(table->ufid_ti, new_ti); | 
					
						
							|  |  |  | 			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:49 -08:00
										 |  |  | /* Must be called with OVS mutex held. */ | 
					
						
							|  |  |  | int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, | 
					
						
							|  |  |  | 			const struct sw_flow_mask *mask) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int err; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	err = flow_mask_insert(table, flow, mask); | 
					
						
							|  |  |  | 	if (err) | 
					
						
							|  |  |  | 		return err; | 
					
						
							|  |  |  | 	flow_key_insert(table, flow); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:52 -08:00
										 |  |  | 	if (ovs_identifier_is_ufid(&flow->id)) | 
					
						
							|  |  |  | 		flow_ufid_insert(table, flow); | 
					
						
							| 
									
										
										
										
											2015-01-21 16:42:49 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-03 13:54:51 -07:00
										 |  |  | 	return 0; | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Initializes the flow module.
 | 
					
						
							| 
									
										
										
										
											2014-10-20 15:19:10 -07:00
										 |  |  |  * Returns zero if successful or a negative error code. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | int ovs_flow_init(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); | 
					
						
							|  |  |  | 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) | 
					
						
							| 
									
										
										
										
											2015-09-18 13:34:22 -07:00
										 |  |  | 				       + (nr_node_ids | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 					  * sizeof(struct flow_stats *)), | 
					
						
							|  |  |  | 				       0, 0, NULL); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	if (flow_cache == NULL) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	flow_stats_cache | 
					
						
							|  |  |  | 		= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), | 
					
						
							|  |  |  | 				    0, SLAB_HWCACHE_ALIGN, NULL); | 
					
						
							|  |  |  | 	if (flow_stats_cache == NULL) { | 
					
						
							|  |  |  | 		kmem_cache_destroy(flow_cache); | 
					
						
							|  |  |  | 		flow_cache = NULL; | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Uninitializes the flow module. */ | 
					
						
							|  |  |  | void ovs_flow_exit(void) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
											  
											
												datapath: Per NUMA node flow stats.
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU.  This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master.  Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace.  The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows.  This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+   8.43%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   5.64%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   4.75%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   3.32%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   2.61%     ovs-vswitchd  [kernel.kallsyms]   [k] pcpu_alloc_area
+   2.19%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.03%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.84%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   1.64%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.58%     ovs-vswitchd  libc-2.15.so        [.] 0x7f4e6
+   1.07%     ovs-vswitchd  [kernel.kallsyms]   [k] memset
+   1.03%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
+   0.92%          swapper  [kernel.kallsyms]   [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+   6.85%     ovs-vswitchd  ovs-vswitchd        [.] find_match_wc
+   4.63%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_lock
+   3.06%     ovs-vswitchd  [kernel.kallsyms]   [k] __ticket_spin_lock
+   2.81%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask_range
+   2.51%     ovs-vswitchd  libpthread-2.15.so  [.] pthread_mutex_unlock
+   2.27%     ovs-vswitchd  ovs-vswitchd        [.] classifier_lookup
+   1.84%     ovs-vswitchd  libc-2.15.so        [.] 0x15d30f
+   1.74%     ovs-vswitchd  [kernel.kallsyms]   [k] mutex_spin_on_owner
+   1.47%          swapper  [kernel.kallsyms]   [k] intel_idle
+   1.34%     ovs-vswitchd  ovs-vswitchd        [.] flow_hash_in_minimask
+   1.33%     ovs-vswitchd  ovs-vswitchd        [.] rule_actions_unref
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] hindex_node_with_hash
+   1.16%     ovs-vswitchd  ovs-vswitchd        [.] do_xlate_actions
+   1.09%     ovs-vswitchd  ovs-vswitchd        [.] ofproto_rule_ref
+   1.01%          netperf  [kernel.kallsyms]   [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0).  As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated.  This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency.  If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated.  This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
											
										 
											2014-02-18 09:07:03 -08:00
										 |  |  | 	kmem_cache_destroy(flow_stats_cache); | 
					
						
							| 
									
										
										
										
											2013-09-11 20:26:11 -07:00
										 |  |  | 	kmem_cache_destroy(flow_cache); | 
					
						
							|  |  |  | } |