mirror of
https://github.com/openvswitch/ovs
synced 2025-09-02 07:15:17 +00:00
dpif-netdev: Add DPDK netdev.
Following patch adds DPDK netdev-class to userspace datapath. Now OVS can use DPDK port for IO by just configuring DPDK port and then adding dpdk type port to userspace datapath. Refer to INSTALL.DPDK doc for further info. This is based a patch from Gerald Rogers. Signed-off-by: Gerald Rogers <gerald.rogers@intel.com> Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Acked-by: Thomas Graf <tgraf@redhat.com>
This commit is contained in:
1
INSTALL
1
INSTALL
@@ -10,6 +10,7 @@ on a specific platform, please see one of these files:
|
||||
- INSTALL.RHEL
|
||||
- INSTALL.XenServer
|
||||
- INSTALL.NetBSD
|
||||
- INSTALL.DPDK
|
||||
|
||||
Build Requirements
|
||||
------------------
|
||||
|
93
INSTALL.DPDK
Normal file
93
INSTALL.DPDK
Normal file
@@ -0,0 +1,93 @@
|
||||
Using Open vSwitch with DPDK
|
||||
============================
|
||||
|
||||
Open vSwitch can use Intel(R) DPDK lib to operate entirely in
|
||||
userspace. This file explains how to install and use Open vSwitch in
|
||||
such a mode.
|
||||
|
||||
The DPDK support of Open vSwitch is considered experimental.
|
||||
It has not been thoroughly tested.
|
||||
|
||||
This version of Open vSwitch should be built manually with "configure"
|
||||
and "make".
|
||||
|
||||
Building and Installing:
|
||||
------------------------
|
||||
|
||||
Recommended to use DPDK 1.6.
|
||||
|
||||
DPDK:
|
||||
cd DPDK
|
||||
update config/defconfig_x86_64-default-linuxapp-gcc so that dpdk generate single lib file.
|
||||
CONFIG_RTE_BUILD_COMBINE_LIBS=y
|
||||
|
||||
make install T=x86_64-default-linuxapp-gcc
|
||||
For details refer to http://dpdk.org/
|
||||
|
||||
Linux kernel:
|
||||
Refer to intel-dpdk-getting-started-guide.pdf for understanding
|
||||
DPDK kernel requirement.
|
||||
|
||||
OVS:
|
||||
cd $(OVS_DIR)/openvswitch
|
||||
./boot.sh
|
||||
./configure --with-dpdk=$(DPDK_BUILD)
|
||||
make
|
||||
|
||||
Refer to INSTALL.userspace for general requirements of building
|
||||
userspace OVS.
|
||||
|
||||
Using the DPDK with ovs-vswitchd:
|
||||
---------------------------------
|
||||
|
||||
First setup DPDK devices:
|
||||
- insert uio.ko
|
||||
- insert igb_uio.ko
|
||||
e.g. insmod DPDK/x86_64-default-linuxapp-gcc/kmod/igb_uio.ko
|
||||
- mount hugetlbfs
|
||||
e.g. mount -t hugetlbfs -o pagesize=1G none /mnt/huge/
|
||||
- Bind network device to ibg_uio.
|
||||
e.g. DPDK/tools/pci_unbind.py --bind=igb_uio eth1
|
||||
|
||||
Ref to http://www.dpdk.org/doc/quick-start for verifying DPDK setup.
|
||||
|
||||
Start vswitchd:
|
||||
DPDK configuration arguments can be passed to vswitchd via `--dpdk`
|
||||
argument. dpdk arg -c is ignored by ovs-dpdk, but it is required parameter
|
||||
for dpdk initialization.
|
||||
|
||||
e.g.
|
||||
./vswitchd/ovs-vswitchd --dpdk -c 0x1 -n 4 -- unix:$DB_SOCK --pidfile --detach
|
||||
|
||||
To use ovs-vswitchd with DPDK, create a bridge with datapath_type
|
||||
"netdev" in the configuration database. For example:
|
||||
|
||||
ovs-vsctl add-br br0
|
||||
ovs-vsctl set bridge br0 datapath_type=netdev
|
||||
|
||||
Now you can add dpdk devices. OVS expect DPDK device name start with dpdk
|
||||
and end with portid. vswitchd should print number of dpdk devices found.
|
||||
|
||||
ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
|
||||
|
||||
Once first DPDK port is added vswitchd, it creates Polling thread and
|
||||
polls dpdk device in continuous loop. Therefore CPU utilization
|
||||
for that thread is always 100%.
|
||||
|
||||
Restrictions:
|
||||
-------------
|
||||
|
||||
- This Support is for Physical NIC. I have tested with Intel NIC only.
|
||||
- vswitchd userspace datapath does affine polling thread but it is
|
||||
assumed that devices are on numa node 0. Therefore if device is
|
||||
attached to non zero numa node switching performance would be
|
||||
suboptimal.
|
||||
- There are fixed number of polling thread and fixed number of per
|
||||
device queues configured.
|
||||
- Work with 1500 MTU, needs few changes in DPDK lib to fix this issue.
|
||||
- Currently DPDK port does not make use any offload functionality.
|
||||
|
||||
Bug Reporting:
|
||||
--------------
|
||||
|
||||
Please report problems to bugs@openvswitch.org.
|
@@ -63,6 +63,7 @@ EXTRA_DIST = \
|
||||
FAQ \
|
||||
INSTALL \
|
||||
INSTALL.Debian \
|
||||
INSTALL.DPDK \
|
||||
INSTALL.Fedora \
|
||||
INSTALL.KVM \
|
||||
INSTALL.Libvirt \
|
||||
|
1
NEWS
1
NEWS
@@ -10,6 +10,7 @@ Post-v2.1.0
|
||||
instead.
|
||||
- Support for Linux kernels up to 3.12. On Kernel 3.12 OVS uses tunnel
|
||||
API for GRE and VXLAN.
|
||||
- Added DPDK support.
|
||||
|
||||
|
||||
v2.1.0 - xx xxx xxxx
|
||||
|
26
acinclude.m4
26
acinclude.m4
@@ -157,6 +157,32 @@ AC_DEFUN([OVS_CHECK_LINUX], [
|
||||
AM_CONDITIONAL(LINUX_ENABLED, test -n "$KBUILD")
|
||||
])
|
||||
|
||||
dnl OVS_CHECK_DPDK
|
||||
dnl
|
||||
dnl Configure DPDK source tree
|
||||
AC_DEFUN([OVS_CHECK_DPDK], [
|
||||
AC_ARG_WITH([dpdk],
|
||||
[AC_HELP_STRING([--with-dpdk=/path/to/dpdk],
|
||||
[Specify the DPDP build directory])])
|
||||
|
||||
if test X"$with_dpdk" != X; then
|
||||
RTE_SDK=$with_dpdk
|
||||
|
||||
DPDK_INCLUDE=$RTE_SDK/include
|
||||
DPDK_LIB_DIR=$RTE_SDK/lib
|
||||
DPDK_LIBS="$DPDK_LIB_DIR/libintel_dpdk.a"
|
||||
|
||||
LIBS="$DPDK_LIBS $LIBS"
|
||||
CPPFLAGS="-I$DPDK_INCLUDE $CPPFLAGS"
|
||||
|
||||
AC_DEFINE([DPDK_NETDEV], [1], [System uses the DPDK module.])
|
||||
else
|
||||
RTE_SDK=
|
||||
fi
|
||||
|
||||
AM_CONDITIONAL([DPDK_NETDEV], test -n "$RTE_SDK")
|
||||
])
|
||||
|
||||
dnl OVS_GREP_IFELSE(FILE, REGEX, [IF-MATCH], [IF-NO-MATCH])
|
||||
dnl
|
||||
dnl Greps FILE for REGEX. If it matches, runs IF-MATCH, otherwise IF-NO-MATCH.
|
||||
|
@@ -120,6 +120,7 @@ OVS_ENABLE_SPARSE
|
||||
AC_ARG_VAR(KARCH, [Kernel Architecture String])
|
||||
AC_SUBST(KARCH)
|
||||
OVS_CHECK_LINUX
|
||||
OVS_CHECK_DPDK
|
||||
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CONFIG_FILES(datapath/Makefile)
|
||||
|
@@ -302,6 +302,12 @@ lib_libopenvswitch_la_SOURCES += \
|
||||
lib/route-table.h
|
||||
endif
|
||||
|
||||
if DPDK_NETDEV
|
||||
lib_libopenvswitch_la_SOURCES += \
|
||||
lib/netdev-dpdk.c \
|
||||
lib/netdev-dpdk.h
|
||||
endif
|
||||
|
||||
if HAVE_POSIX_AIO
|
||||
lib_libopenvswitch_la_SOURCES += lib/async-append-aio.c
|
||||
else
|
||||
|
1198
lib/netdev-dpdk.c
Normal file
1198
lib/netdev-dpdk.c
Normal file
File diff suppressed because it is too large
Load Diff
32
lib/netdev-dpdk.h
Normal file
32
lib/netdev-dpdk.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef NETDEV_DPDK_H
|
||||
#define NETDEV_DPDK_H
|
||||
|
||||
#ifdef DPDK_NETDEV
|
||||
#include <config.h>
|
||||
|
||||
#include <rte_config.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_debug.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_errno.h>
|
||||
#include <rte_memzone.h>
|
||||
#include <rte_memcpy.h>
|
||||
#include <rte_cycles.h>
|
||||
#include <rte_spinlock.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_malloc.h>
|
||||
|
||||
#include "ofpbuf.h"
|
||||
|
||||
int dpdk_init(int argc, char **argv);
|
||||
void netdev_dpdk_register(void);
|
||||
void free_dpdk_buf(struct ofpbuf *);
|
||||
|
||||
#else
|
||||
|
||||
#define dpdk_init(arg1, arg2) (0)
|
||||
#define netdev_dpdk_register()
|
||||
#define free_dpdk_buf(arg)
|
||||
|
||||
#endif /* DPDK_NETDEV */
|
||||
#endif
|
@@ -31,6 +31,7 @@
|
||||
#include "fatal-signal.h"
|
||||
#include "hash.h"
|
||||
#include "list.h"
|
||||
#include "netdev-dpdk.h"
|
||||
#include "netdev-provider.h"
|
||||
#include "netdev-vport.h"
|
||||
#include "ofpbuf.h"
|
||||
@@ -124,6 +125,7 @@ netdev_initialize(void)
|
||||
netdev_register_provider(&netdev_tap_class);
|
||||
netdev_register_provider(&netdev_bsd_class);
|
||||
#endif
|
||||
netdev_dpdk_register();
|
||||
|
||||
ovsthread_once_done(&once);
|
||||
}
|
||||
|
10
lib/ofpbuf.c
10
lib/ofpbuf.c
@@ -19,6 +19,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "dynamic-string.h"
|
||||
#include "netdev-dpdk.h"
|
||||
#include "util.h"
|
||||
|
||||
static void
|
||||
@@ -110,8 +111,13 @@ ofpbuf_init(struct ofpbuf *b, size_t size)
|
||||
void
|
||||
ofpbuf_uninit(struct ofpbuf *b)
|
||||
{
|
||||
if (b && b->source == OFPBUF_MALLOC) {
|
||||
free(b->base);
|
||||
if (b) {
|
||||
if (b->source == OFPBUF_MALLOC) {
|
||||
free(b->base);
|
||||
}
|
||||
if (b->source == OFPBUF_DPDK) {
|
||||
free_dpdk_buf(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -30,7 +30,8 @@ enum ofpbuf_source {
|
||||
OFPBUF_MALLOC, /* Obtained via malloc(). */
|
||||
OFPBUF_STACK, /* Un-movable stack space or static buffer. */
|
||||
OFPBUF_STUB, /* Starts on stack, may expand into heap. */
|
||||
OFPBUF_DPDK, /* buffer data is from DPDK allocated memory. */
|
||||
OFPBUF_DPDK, /* buffer data is from DPDK allocated memory.
|
||||
ref to build_ofpbuf() in netdev-dpdk. */
|
||||
};
|
||||
|
||||
/* Buffer for holding arbitrary data. An ofpbuf is automatically reallocated
|
||||
|
@@ -48,6 +48,7 @@
|
||||
#include "vconn.h"
|
||||
#include "vlog.h"
|
||||
#include "lib/vswitch-idl.h"
|
||||
#include "lib/netdev-dpdk.h"
|
||||
|
||||
VLOG_DEFINE_THIS_MODULE(vswitchd);
|
||||
|
||||
@@ -69,8 +70,12 @@ main(int argc, char *argv[])
|
||||
bool exiting;
|
||||
int retval;
|
||||
|
||||
proctitle_init(argc, argv);
|
||||
set_program_name(argv[0]);
|
||||
retval = dpdk_init(argc,argv);
|
||||
argc -= retval;
|
||||
argv += retval;
|
||||
|
||||
proctitle_init(argc, argv);
|
||||
service_start(&argc, &argv);
|
||||
remote = parse_options(argc, argv, &unixctl_path);
|
||||
fatal_ignore_sigpipe();
|
||||
@@ -143,7 +148,8 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
|
||||
OPT_ENABLE_DUMMY,
|
||||
OPT_DISABLE_SYSTEM,
|
||||
OPT_ENABLE_OF14,
|
||||
DAEMON_OPTION_ENUMS
|
||||
DAEMON_OPTION_ENUMS,
|
||||
OPT_DPDK,
|
||||
};
|
||||
static const struct option long_options[] = {
|
||||
{"help", no_argument, NULL, 'h'},
|
||||
@@ -158,6 +164,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
|
||||
{"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},
|
||||
{"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},
|
||||
{"enable-of14", no_argument, NULL, OPT_ENABLE_OF14},
|
||||
{"dpdk", required_argument, NULL, OPT_DPDK},
|
||||
{NULL, 0, NULL, 0},
|
||||
};
|
||||
char *short_options = long_options_to_short_options(long_options);
|
||||
@@ -213,6 +220,9 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
case OPT_DPDK:
|
||||
break;
|
||||
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
Reference in New Issue
Block a user