2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 01:51:26 +00:00

Merge remote-tracking branch 'origin/master' into ovn

Conflicts:
	tutorial/ovs-sandbox
This commit is contained in:
Justin Pettit 2015-04-12 10:13:59 -07:00
commit daaeeec0bd
160 changed files with 7291 additions and 2102 deletions

View File

@ -48,6 +48,7 @@ function install_dpdk()
fi
find ./ -type f | xargs sed -i 's/max-inline-insns-single=100/max-inline-insns-single=400/'
sed -ri 's,(CONFIG_RTE_BUILD_COMBINE_LIBS=).*,\1y,' config/common_linuxapp
sed -ri 's,(CONFIG_RTE_LIBRTE_VHOST=).*,\1y,' config/common_linuxapp
sed -ri '/CONFIG_RTE_LIBNAME/a CONFIG_RTE_BUILD_FPIC=y' config/common_linuxapp
sed -ri '/EXECENV_CFLAGS = -pthread -fPIC/{s/$/\nelse ifeq ($(CONFIG_RTE_BUILD_FPIC),y)/;s/$/\nEXECENV_CFLAGS = -pthread -fPIC/}' mk/exec-env/linuxapp/rte.vars.mk
make config CC=gcc T=x86_64-native-linuxapp-gcc

View File

@ -3,6 +3,9 @@
sudo apt-get update -qq
sudo apt-get install -qq libssl-dev llvm-dev
sudo apt-get install -qq gcc-multilib
if [ "$DPDK" ]; then
sudo apt-get install -qq libfuse-dev
fi
git clone git://git.kernel.org/pub/scm/devel/sparse/chrisl/sparse.git
cd sparse && make && sudo make install PREFIX=/usr && cd ..

View File

@ -85,12 +85,14 @@ Jesse Gross jesse@nicira.com
Jing Ai jinga@google.com
Joe Perches joe@perches.com
Joe Stringer joestringer@nicira.com
Jonathan Vestin jonavest@kau.se
Jun Nakajima jun.nakajima@intel.com
Justin Pettit jpettit@nicira.com
Keith Amidon keith@nicira.com
Ken Ajiro ajiro@mxw.nes.nec.co.jp
Kenneth Duda kduda@arista.com
Kentaro Ebisawa ebiken.g@gmail.com
Kevin Lo kevlo@FreeBSD.org
Kevin Traynor kevin.traynor@intel.com
Kmindg G kmindg@gmail.com
Krishna Kondaka kkondaka@vmware.com
@ -215,6 +217,7 @@ Brad Hall brad@nicira.com
Brandon Heller brandonh@stanford.edu
Brendan Kelley bkelley@nicira.com
Brent Salisbury brent.salisbury@gmail.com
Brian Field Brian_Field@cable.comcast.com
Bryan Fulton bryan@nicira.com
Bryan Osoro bosoro@nicira.com
Cedric Hobbs cedric@nicira.com
@ -246,6 +249,7 @@ Gur Stavi gstavi@mrv.com
Hari Sasank Bhamidipalli hbhamidi@cisco.com
Hassan Khan hassan.khan@seecs.edu.pk
Hector Oron hector.oron@gmail.com
Hemanth Kumar Mantri mantri@nutanix.com
Henrik Amren henrik@nicira.com
Hiroshi Tanaka htanaka@nicira.com
Hiroshi Miyata miyahiro.dazu@gmail.com
@ -345,6 +349,7 @@ Voravit T. voravit@kth.se
Yeming Zhao zhaoyeming@gmail.com
Ying Chen yingchen@vmware.com
Yongqiang Liu liuyq7809@gmail.com
Zhangguanghui zhang.guanghui@h3c.com
Ziyou Wang ziyouw@vmware.com
Zoltán Balogh zoltan.balogh@ericsson.com
ankur dwivedi ankurengg2003@gmail.com

View File

@ -0,0 +1,2 @@
EXTRA_DIST += \
Documentation/group-selection-method-property.txt

View File

@ -0,0 +1,153 @@
Proposal for Group Selection Method Property
Version: 0.0.3
Author: Simon Horman <simon.horman@netronome.com>, et al.
Initial Public Revision: September 2014
Contents
========
1. Introduction
2. How it Works
3. Experimenter Id
4. Experimenter Messages
5. History
1. Introduction
===============
This text describes a Netronome Extension to (draft) OpenFlow 1.5 that allows a
controller to provide more information on the selection method for select
groups. This proposal is in the form of an enhanced select group type.
This may subsequently be proposed as an extension or update to
the OpenFlow specification.
2. How it works
===============
A new Netronome group experimenter property is defined which provides
compatibility with the group mod message defined in draft Open Flow 1.5
(also known as ONF EXT-350) and allows parameters for the selection
method of select groups to be passed by the controller. In particular it
allows controllers to:
* Specify the fields used for bucket selection by the select group.
* Designate the selection method used.
* Provide a non-field parameter to the selection method.
3. Experimenter ID
==================
The Experimenter ID of this extension is:
NTR_VENDOR_ID = 0x00001540
4. Group Experimenter Property
==============================
The following group property experimenter type defined by this extension.
enum ntr_group_mod_subtype {
NTRT_SELECTION_METHOD = 1,
};
Modifications to the group table from the controller may be done with a
OFPT_GROUP_MOD message described (draft) Open Flow 1.5. Group Entry
Message. Of relevance here is that (draft) Open Flow 1.5 group messages
have properties.
This proposal is defined in terms of an implementation of struct
ofp_group_prop_experimenter which is described in (draft) Open Flow 1.5.
The implementation is:
struct ntr_group_prop_selection_method {
ovs_be16 type; /* OFPGPT_EXPERIMENTER. */
ovs_be16 length; /* Length in bytes of this property. */
ovs_be32 experimenter; /* NTR_VENDOR_ID. */
ovs_be32 exp_type; /* NTRT_SELECTION_METHOD. */
ovs_be32 pad;
char selection_method[NTR_MAX_SELECTION_METHOD_LEN];
/* Null-terminated */
ovs_be64 selection_method_param; /* Non-Field parameter for
* bucket selection. */
/* Followed by:
* - Exactly (length - 40) (possibly 0) bytes containing OXM TLVs, then
* - Exactly ((length + 7)/8*8 - length) (between 0 and 7) bytes of
* all-zero bytes
* In summary, ntr_group_prop_selection_method is padded as needed,
* to make its overall size a multiple of 8, to preserve alignment
* in structures using it.
*/
/* uint8_t field_array[0]; */ /* Zero or more fields encoded as
* OXM TLVs where the has_mask bit must
* be zero and the value it specifies is
* a mask to apply to packet fields and
* then input them to the selection
* method of a select group. */
/* uint8_t pad2[0]; */
};
OFP_ASSERT(sizeof(struct ntr_group_mod) == 40);
This property may only be used with group mod messages whose:
* command is OFPGC_ADD or OFPGC_MODIFY; and
* type is OFPGT_SELECT
The type field is the OFPGPT_EXPERIMENTER which is
defined in EXT-350 as 0xffff.
The experimenter field is the Experimenter ID (see 3).
The exp_type field is NTRT_SELECTION_METHOD.
The group selection_method is a null-terminated string which if non-zero
length specifies a selection method known to an underlying layer of the
switch. The value of NTR_MAX_SELECTION_METHOD_LEN is 16.
The group selection_method may be zero-length to request compatibility with
Open Flow 1.4.
The selection_method_param provides a non-field parameter for
the group selection_method. It must be all-zeros unless the
group selection_method is non-zero length.
The selection_method_param may for example be used as an initial value for
the hash of a hash group selection method.
The fields field is an ofp_match structure which includes the fields which
should be used as inputs to bucket selection. ofp_match is described in
Open Flow 1.4 section 7.2.2 Flow Match Structures.
Fields must not be specified unless the group selection_method is non-zero
length.
The pre-requisites for fields specified must be satisfied in the match for
any flow that uses the group.
Masking is allowed but not required for fields whose TLVs allow masking.
The fields may for example be used as the fields that are hashed
by a hash group selection method.
5. History
==========
This proposal has been developed independently of any similar work in this
area. No such work is known.

9
FAQ.md
View File

@ -207,6 +207,7 @@ A: Support for tunnels was added to the upstream Linux kernel module
|:--------:|:-------------:
| GRE | 3.11
| VXLAN | 3.12
| Geneve | 3.18
| LISP | <not upstream>
If you are using a version of the kernel that is older than the one
@ -216,6 +217,14 @@ A: Support for tunnels was added to the upstream Linux kernel module
persist after doing this, check to make sure that the module that is
loaded is the one you expect.
### Q: Why are UDP tunnel checksums not computed for VXLAN or Geneve?
A: Generating outer UDP checksums requires kernel support that was not
part of the initial implementation of these protocols. If using the
upstream Linux Open vSwitch module, you must use kernel 4.0 or
newer. The out-of-tree modules from Open vSwitch release 2.4 and later
support UDP checksums.
### Q: What features are not available when using the userspace datapath?
A: Tunnel virtual ports are not supported, as described in the

View File

@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support.
Building and Installing:
------------------------
Required DPDK 1.8.0
Required DPDK 1.8.0, `fuse`, `fuse-devel` (`libfuse-dev` on Debian/Ubuntu)
1. Configure build & install DPDK:
1. Set `$DPDK_DIR`
@ -31,7 +31,12 @@ Required DPDK 1.8.0
`CONFIG_RTE_BUILD_COMBINE_LIBS=y`
Then run `make install` to build and isntall the library.
Update `config/common_linuxapp` so that DPDK is built with vhost
libraries:
`CONFIG_RTE_LIBRTE_VHOST=y`
Then run `make install` to build and install the library.
For default install without IVSHMEM:
`make install T=x86_64-native-linuxapp-gcc`
@ -290,12 +295,256 @@ A general rule of thumb for better performance is that the client
application should not be assigned the same dpdk core mask "-c" as
the vswitchd.
DPDK vhost:
-----------
vhost-cuse is only supported at present i.e. not using the standard QEMU
vhost-user interface. It is intended that vhost-user support will be added
in future releases when supported in DPDK and that vhost-cuse will eventually
be deprecated. See [DPDK Docs] for more info on vhost.
Prerequisites:
1. Insert the Cuse module:
`modprobe cuse`
2. Build and insert the `eventfd_link` module:
`cd $DPDK_DIR/lib/librte_vhost/eventfd_link/`
`make`
`insmod $DPDK_DIR/lib/librte_vhost/eventfd_link.ko`
Following the steps above to create a bridge, you can now add DPDK vhost
as a port to the vswitch.
`ovs-vsctl add-port br0 dpdkvhost0 -- set Interface dpdkvhost0 type=dpdkvhost`
Unlike DPDK ring ports, DPDK vhost ports can have arbitrary names:
`ovs-vsctl add-port br0 port123ABC -- set Interface port123ABC type=dpdkvhost`
However, please note that when attaching userspace devices to QEMU, the
name provided during the add-port operation must match the ifname parameter
on the QEMU command line.
DPDK vhost VM configuration:
----------------------------
vhost ports use a Linux* character device to communicate with QEMU.
By default it is set to `/dev/vhost-net`. It is possible to reuse this
standard device for DPDK vhost, which makes setup a little simpler but it
is better practice to specify an alternative character device in order to
avoid any conflicts if kernel vhost is to be used in parallel.
1. This step is only needed if using an alternative character device.
The new character device filename must be specified on the vswitchd
commandline:
`./vswitchd/ovs-vswitchd --dpdk --cuse_dev_name my-vhost-net -c 0x1 ...`
Note that the `--cuse_dev_name` argument and associated string must be the first
arguments after `--dpdk` and come before the EAL arguments. In the example
above, the character device to be used will be `/dev/my-vhost-net`.
2. This step is only needed if reusing the standard character device. It will
conflict with the kernel vhost character device so the user must first
remove it.
`rm -rf /dev/vhost-net`
3a. Configure virtio-net adaptors:
The following parameters must be passed to the QEMU binary:
```
-netdev tap,id=<id>,script=no,downscript=no,ifname=<name>,vhost=on
-device virtio-net-pci,netdev=net1,mac=<mac>
```
Repeat the above parameters for multiple devices.
The DPDK vhost library will negiotiate its own features, so they
need not be passed in as command line params. Note that as offloads are
disabled this is the equivalent of setting:
`csum=off,gso=off,guest_tso4=off,guest_tso6=off,guest_ecn=off`
3b. If using an alternative character device. It must be also explicitly
passed to QEMU using the `vhostfd` argument:
```
-netdev tap,id=<id>,script=no,downscript=no,ifname=<name>,vhost=on,
vhostfd=<open_fd>
-device virtio-net-pci,netdev=net1,mac=<mac>
```
The open file descriptor must be passed to QEMU running as a child
process. This could be done with a simple python script.
```
#!/usr/bin/python
fd = os.open("/dev/usvhost", os.O_RDWR)
subprocess.call("qemu-system-x86_64 .... -netdev tap,id=vhostnet0,\
vhost=on,vhostfd=" + fd +"...", shell=True)
Alternatively the the `qemu-wrap.py` script can be used to automate the
requirements specified above and can be used in conjunction with libvirt if
desired. See the "DPDK vhost VM configuration with QEMU wrapper" section
below.
4. Configure huge pages:
QEMU must allocate the VM's memory on hugetlbfs. Vhost ports access a
virtio-net device's virtual rings and packet buffers mapping the VM's
physical memory on hugetlbfs. To enable vhost-ports to map the VM's
memory into their process address space, pass the following paramters
to QEMU:
`-object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,
share=on -numa node,memdev=mem -mem-prealloc`
DPDK vhost VM configuration with QEMU wrapper:
----------------------------------------------
The QEMU wrapper script automatically detects and calls QEMU with the
necessary parameters. It performs the following actions:
* Automatically detects the location of the hugetlbfs and inserts this
into the command line parameters.
* Automatically open file descriptors for each virtio-net device and
inserts this into the command line parameters.
* Calls QEMU passing both the command line parameters passed to the
script itself and those it has auto-detected.
Before use, you **must** edit the configuration parameters section of the
script to point to the correct emulator location and set additional
settings. Of these settings, `emul_path` and `us_vhost_path` **must** be
set. All other settings are optional.
To use directly from the command line simply pass the wrapper some of the
QEMU parameters: it will configure the rest. For example:
```
qemu-wrap.py -cpu host -boot c -hda <disk image> -m 4096 -smp 4
--enable-kvm -nographic -vnc none -net none -netdev tap,id=net1,
script=no,downscript=no,ifname=if1,vhost=on -device virtio-net-pci,
netdev=net1,mac=00:00:00:00:00:01
```
DPDK vhost VM configuration with libvirt:
-----------------------------------------
If you are using libvirt, you must enable libvirt to access the character
device by adding it to controllers cgroup for libvirtd using the following
steps.
1. In `/etc/libvirt/qemu.conf` add/edit the following lines:
```
1) clear_emulator_capabilities = 0
2) user = "root"
3) group = "root"
4) cgroup_device_acl = [
"/dev/null", "/dev/full", "/dev/zero",
"/dev/random", "/dev/urandom",
"/dev/ptmx", "/dev/kvm", "/dev/kqemu",
"/dev/rtc", "/dev/hpet", "/dev/net/tun",
"/dev/<my-vhost-device>",
"/dev/hugepages"]
```
<my-vhost-device> refers to "vhost-net" if using the `/dev/vhost-net`
device. If you have specificed a different name on the ovs-vswitchd
commandline using the "--cuse_dev_name" parameter, please specify that
filename instead.
2. Disable SELinux or set to permissive mode
3. Restart the libvirtd process
For example, on Fedora:
`systemctl restart libvirtd.service`
After successfully editing the configuration, you may launch your
vhost-enabled VM. The XML describing the VM can be configured like so
within the <qemu:commandline> section:
1. Set up shared hugepages:
```
<qemu:arg value='-object'/>
<qemu:arg value='memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on'/>
<qemu:arg value='-numa'/>
<qemu:arg value='node,memdev=mem'/>
<qemu:arg value='-mem-prealloc'/>
```
2. Set up your tap devices:
```
<qemu:arg value='-netdev'/>
<qemu:arg value='type=tap,id=net1,script=no,downscript=no,ifname=vhost0,vhost=on'/>
<qemu:arg value='-device'/>
<qemu:arg value='virtio-net-pci,netdev=net1,mac=00:00:00:00:00:01'/>
```
Repeat for as many devices as are desired, modifying the id, ifname
and mac as necessary.
Again, if you are using an alternative character device (other than
`/dev/vhost-net`), please specify the file descriptor like so:
`<qemu:arg value='type=tap,id=net3,script=no,downscript=no,ifname=vhost0,vhost=on,vhostfd=<open_fd>'/>`
Where <open_fd> refers to the open file descriptor of the character device.
Instructions of how to retrieve the file descriptor can be found in the
"DPDK vhost VM configuration" section.
Alternatively, the process is automated with the qemu-wrap.py script,
detailed in the next section.
Now you may launch your VM using virt-manager, or like so:
`virsh create my_vhost_vm.xml`
DPDK vhost VM configuration with libvirt and QEMU wrapper:
----------------------------------------------------------
To use the qemu-wrapper script in conjuntion with libvirt, follow the
steps in the previous section before proceeding with the following steps:
1. Place `qemu-wrap.py` in libvirtd's binary search PATH ($PATH)
Ideally in the same directory that the QEMU binary is located.
2. Ensure that the script has the same owner/group and file permissions
as the QEMU binary.
3. Update the VM xml file using "virsh edit VM.xml"
1. Set the VM to use the launch script.
Set the emulator path contained in the `<emulator><emulator/>` tags.
For example, replace:
`<emulator>/usr/bin/qemu-kvm<emulator/>`
with:
`<emulator>/usr/bin/qemu-wrap.py<emulator/>`
4. Edit the Configuration Parameters section of the script to point to
the correct emulator location and set any additional options. If you are
using a alternative character device name, please set "us_vhost_path" to the
location of that device. The script will automatically detect and insert
the correct "vhostfd" value in the QEMU command line arguements.
5. Use virt-manager to launch the VM
Restrictions:
-------------
- This Support is for Physical NIC. I have tested with Intel NIC only.
- Work with 1500 MTU, needs few changes in DPDK lib to fix this issue.
- Currently DPDK port does not make use any offload functionality.
- DPDK-vHost support works with 1G huge pages.
ivshmem:
- The shared memory is currently restricted to the use of a 1GB
@ -311,3 +560,4 @@ Please report problems to bugs@openvswitch.org.
[INSTALL.userspace.md]:INSTALL.userspace.md
[INSTALL.md]:INSTALL.md
[DPDK Linux GSG]: http://www.dpdk.org/doc/guides/linux_gsg/build_dpdk.html#binding-and-unbinding-network-ports-to-from-the-igb-uioor-vfio-modules
[DPDK Docs]: http://dpdk.org/doc

View File

@ -35,16 +35,19 @@ install Windows Driver Kit (WDK) 8.1 Update.
It is important to get the Visual Studio related environment variables and to
have the $PATH inside the bash to point to the proper compiler and linker. One
easy way to achieve this is to get into the "Developer Command prompt for visual
studio" and through it enter into the bash shell available from msys.
studio" and through it enter into the bash shell available from msys by typing
'bash --login'.
If after the above step, a 'which link' inside MSYS's bash says,
"/bin/link.exe", rename /bin/link.exe to something else so that the
Visual studio's linker is used.
Visual studio's linker is used. You should also see a 'which sort' report
"/bin/sort.exe".
* For pthread support, install the library, dll and includes of pthreads-win32
project from
ftp://sourceware.org/pub/pthreads-win32/prebuilt-dll-2-9-1-release to a
directory (e.g.: C:/pthread).
directory (e.g.: C:/pthread). You should add the pthread-win32's dll
path (e.g.: C:\pthread\dll\x86) to the Windows' PATH environment variable.
* Get the Open vSwitch sources from either cloning the repo using git
or from a distribution tar ball.
@ -71,10 +74,32 @@ or from a distribution tar ball.
% make
* To run all the unit tests:
For faster compilation, you can pass the '-j' argument to make. For
example, to run 4 jobs simultaneously, run 'make -j4'.
Note: MSYS 1.0.18 has a bug that causes parallel make to hang. You
can overcome this by downgrading to MSYS 1.0.17. A simple way to
downgrade is to exit all MinGW sessions and then run the command
'mingw-get upgrade msys-core-bin=1.0.17-1' from MSVC developers command
prompt.
* To run all the unit tests in Open vSwitch, one at a time:
% make check
To run all the unit tests in Open vSwitch, up to 8 in parallel:
% make check TESTSUITEFLAGS="-j8"
* To install all the compiled executables on the local machine, run:
% make install
The above command will install the Open vSwitch executables in
C:/openvswitch. You can add 'C:\openvswitch\usr\bin' and
'C:\openvswitch\usr\sbin' to Windows' PATH environment variable
for easy access.
OpenSSL, Open vSwitch and Visual C++
------------------------------------
To get SSL support for Open vSwitch on Windows, do the following:
@ -134,128 +159,254 @@ Steps to install the module
---------------------------
01> Run ./uninstall.cmd to remove the old extension.
02> Run ./install.cmd to insert the new one. For this to work you will have to
02> Run ./install.cmd to insert the new one. For this to work you will have to
turn on TESTSIGNING boot option or 'Disable Driver Signature Enforcement'
during boot.
03> In the Virtual Switch Manager configuration you should now see "VMWare OVS
Extension" under 'Virtual Switch Extensions'. Click the check box to enable the
extension.
Steps to run the user processes & configure VXLAN ports
-------------------------------------------------------
03> In the Virtual Switch Manager configuration you can enable the Open vSwitch
Extension on an existing switch or create a new switch. If you are using an
existing switch, make sure to enable the "Allow Management OS" option for VXLAN
to work (covered later).
01> Create the conf db file.
ovsdb\ovsdb-tool.exe create conf.db .\vswitchd\vswitch.ovsschema
The command to create a new switch named 'OVS-Extended-Switch' using a physical
NIC named 'Ethernet 1' is:
% New-VMSwitch "OVS-Extended-Switch" -AllowManagementOS $true \
-NetAdapterName "Ethernet 1"
02> Run ovsdb-server
ovsdb\ovsdb-server.exe -v --remote=ptcp:6640:127.0.0.1 conf.db
Note: you can obtain the list of physical NICs on the host using
'Get-NetAdapter' command.
03> Create integration bridge & pif bridge
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-br br-int
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-br br-pif
04> In the properties of any switch, you should should now see "Open
vSwitch Extension" under 'Extensions'. Click the check box to enable the
extension. An alternative way to do the same is to run the following command:
% Enable-VMSwitchExtension "Open vSwitch Extension" OVS-Extended-Switch
04> Dump the ports
utilities\ovs-dpctl.exe show
Note: If you enabled the extension using the command line, a delay of a few
seconds has been observed for the change to be reflected in the UI. This is
not a bug in Open vSwitch.
* Sample output shows up like this. Currently it is not possible to figure out
* the mapping between VIF and VM from the output.
Steps to run the user processes & configure ports
-------------------------------------------------
The following steps assume that you have installed the Open vSwitch
utilities in the local machine via 'make install'.
$ utilities\ovs-dpctl.exe show
2014-06-27T01:55:32Z|00001|socket_util|ERR|4789:0.0.0.0:
socket: Either the application has not called WSAStartup, or WSAStartup failed.
<<< Ignore this error, it is harmless.
system@ovs-system:
lookups: hit:0 missed:0 lost:0
flows: 0
masks: hit:0 total:0 hit/pkt:0.00
port 16777216: internal <<< VTEP created by AllowManagementOS
setting
port 16777225: external.1 <<< Physical NIC
port 16777288: vmNICEmu.1000048 <<< VIF #1
port 16777289: vmNICSyn.1000049 <<< VIF #2
01> Create the database.
% ovsdb-tool create C:\openvswitch\etc\openvswitch\conf.db \
C:\openvswitch\usr\share\openvswitch\vswitch.ovsschema
02> Start the ovsdb-server and initialize the database.
% ovsdb-server -vfile:info --remote=punix:db.sock --log-file --pidfile \
--detach
% ovs-vsctl --no-wait init
05> Add the physical NIC and the internal port to br-pif
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-pif <port name>
If you would like to terminate the started ovsdb-server, run:
% ovs-appctl -t ovsdb-server exit
Eg:
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-pif external.1
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-pif internal
(Note that the logfile is created at C:/openvswitch/var/log/openvswitch/)
06> Add the VIFs to br-int
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-int <port name>
03> Start ovs-vswitchd.
% ovs-vswitchd -vfile:info --log-file --pidfile --detach
Eg:
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-int vmNICEmu.1000048
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-int vmNICSyn.1000049
If you would like to terminate the started ovs-vswitchd, run:
% ovs-appctl exit
07> Verify the status
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 show
(Note that the logfile is created at C:/openvswitch/var/log/openvswitch/)
Eg:
$ utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 show
4cd86499-74df-48bd-a64d-8d115b12a9f2
Bridge br-pif
Port internal
Interface internal
Port "external.1"
Interface "external.1"
Port br-pif
Interface br-pif
type: internal
Bridge br-int
Port br-int
Interface br-int
type: internal
Port "vmNICEmu.1000048"
Interface "vmNICEmu.1000048"
Port "vmNICSyn.1000049"
Interface "vmNICSyn.1000049"
04> Create integration bridge & pif bridge
% ovs-vsctl add-br br-int
% ovs-vsctl add-br br-pif
NOTE: There's a known bug that running the ovs-vsctl command does not
terminate. This is generally solved by having ovs-vswitchd running. If
you face the issue despite that, hit Ctrl-C to terminate ovs-vsctl and
check the output to see if your command succeeded.
09> Run vswitchd
vswitchd\ovs-vswitchd.exe -v tcp:127.0.0.1:6640
NOTE: There's a known bug that the ports added to OVSDB via ovs-vsctl don't
get to the kernel datapath immediately, ie. they don't show up in the output of
"ovs-dpctl show" even though they show up in output of "ovs-vsctl show".
In order to workaround this issue, restart ovs-vswitchd. (You can terminate
ovs-vswitchd by running 'ovs-appctl exit'.)
10> You can figure out the port name to MAC address mapping now. (optional)
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 list interface
05> Dump the ports in the kernel datapath
% ovs-dpctl show
//********** VXLAN PORT CONFIGURATION (Supports Multiple ports) ************//
(Remove all patch ports added to create VLAN networks.)
11> Add the vxlan port between 172.168.201.101 <-> 172.168.201.102
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-int vxlan-1
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-1 type=vxlan
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-1 options:local_ip=172.168.201.101
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-1 options:remote_ip=172.168.201.102
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-1 options:in_key=flow
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-1 options:out_key=flow
* Sample output is as follows:
12> Add the vxlan port between 172.168.201.101 <-> 172.168.201.105
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 add-port br-int vxlan-2
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-2 type=vxlan
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-2 options:local_ip=172.168.201.102
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-2 options:remote_ip=172.168.201.105
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-2 options:in_key=flow
utilities\ovs-vsctl.exe --db=tcp:127.0.0.1:6640 set Interface vxlan-2 options:out_key=flow
% ovs-dpctl show
system@ovs-system:
lookups: hit:0 missed:0 lost:0
flows: 0
port 2: br-pif (internal) <<< internal port on 'br-pif' bridge
port 1: br-int (internal) <<< internal port on 'br-int' bridge
06> Dump the ports in the OVSDB
% ovs-vsctl show
//********** VLAN CONFIGURATION (Using patch ports) ************//
(Remove all VXLAN ports from the configuration.)
13> Add a patch port from br-int to br-pif
utilities/ovs-vsctl.exe -- add-port br-int patch-to-pif
utilities/ovs-vsctl.exe -- set interface patch-to-pif type=patch options:peer=patch-to-int
* Sample output is as follows:
% ovs-vsctl show
a56ec7b5-5b1f-49ec-a795-79f6eb63228b
Bridge br-pif
Port br-pif
Interface br-pif
type: internal
Bridge br-int
Port br-int
Interface br-int
type: internal
14> Add a patch port from br-pif to br-int
utilities/ovs-vsctl.exe -- add-port br-pif patch-to-int
utilities/ovs-vsctl.exe -- set interface patch-to-int type=patch options:peer=patch-to-pif
07> Add the physical NIC and the internal port to br-pif.
15> Re-Add the VIF ports with the VLAN tag
utilities\ovs-vsctl.exe add-port br-int vmNICEmu.1000048 tag=900
utilities\ovs-vsctl.exe add-port br-int vmNICSyn.1000049 tag=900
In OVS for Hyper-V, we use 'external' as a special name to refer to the
physical NICs connected to the Hyper-V switch. An index is added to this
special name to refer to the particular physical NIC. Eg. 'external.1' refers
to the first physical NIC on the Hyper-V switch.
Note: Currently, we assume that the Hyper-V switch on which OVS extension is
enabled has a single physical NIC connected to it.
Interal port is the virtual adapter created on the Hyper-V switch using the
'AllowManagementOS' setting. This has already been setup while creating the
switch using the instructions above. In OVS for Hyper-V, we use a 'internal'
as a special name to refer to that adapter.
% ovs-vsctl add-port br-pif external.1
% ovs-vsctl add-port br-pif internal
* Dumping the ports should show the additional ports that were just added.
Sample output shows up as follows:
% ovs-dpctl show
system@ovs-system:
lookups: hit:0 missed:0 lost:0
flows: 0
port 4: internal (internal) <<< 'AllowManagementOS' adapter on
Hyper-V switch
port 2: br-pif (internal)
port 1: br-int (internal
port 3: external.1 <<< Physical NIC
% ovs-vsctl show
a56ec7b5-5b1f-49ec-a795-79f6eb63228b
Bridge br-pif
Port internal
Interface internal
Port br-pif
Interface br-pif
type: internal
Bridge br-int
Port "external.1"
Interface "external.1"
Port br-int
Interface br-int
type: internal
08> Add the VIFs to br-int
Adding VIFs to openvswitch is a two step procedure. The first step is to
assign a 'OVS port name' which is a unique name across all VIFs on this
Hyper-V. The next step is to add the VIF to the ovsdb using its 'OVS port
name' as key.
08a> Assign a unique 'OVS port name' to the VIF
Note that the VIF needs to have been disconnected from the Hyper-V switch
before assigning a 'OVS port name' to it. In the example below, we assign a
'OVS port name' called 'ovs-port-a' to a VIF on a VM by name 'VM1'. By using
index 0 for '$vnic', the first VIF of the VM is being addressed. After
assigning the name 'ovs-port-a', the VIF is connected back to the Hyper-V
switch with name 'OVS-HV-Switch', which is assumed to be the Hyper-V switch
with OVS extension enabled.
Eg:
% import-module .\datapath-windows\misc\OVS.psm1
% $vnic = Get-VMNetworkAdapter <Name of the VM>
% Disconnect-VMNetworkAdapter -VMNetworkAdapter $vnic[0]
% $vnic[0] | Set-VMNetworkAdapterOVSPort -OVSPortName ovs-port-a
% Connect-VMNetworkAdapter -VMNetworkAdapter $vnic[0] \
-SwitchName OVS-Extended-Switch
08b> Add the VIFs to br-int in ovsdb
Eg:
% ovs-vsctl add-port br-int ovs-port-a
09> Verify the status
% ovs-dpctl show
system@ovs-system:
lookups: hit:0 missed:0 lost:0
flows: 0
port 4: internal (internal)
port 5: ovs-port-a
port 2: br-pif (internal)
port 1: br-int (internal
port 3: external.1
% ovs-vsctl show
4cd86499-74df-48bd-a64d-8d115b12a9f2
Bridge br-pif
Port internal
Interface internal
Port "external.1"
Interface "external.1"
Port br-pif
Interface br-pif
type: internal
Bridge br-int
Port br-int
Interface br-int
type: internal
Port "ovs-port-a"
Interface "ovs-port-a"
Steps to configure patch ports and switch VLAN tagging
------------------------------------------------------
The Windows Open vSwitch implementation support VLAN tagging in the switch.
Switch VLAN tagging along with patch ports between 'br-int' and 'br-pif' is
used to configure VLAN tagging functionality between two VMs on different
Hyper-Vs. The following examples demonstrate how it can be done:
01> Add a patch port from br-int to br-pif
% ovs-vsctl add-port br-int patch-to-pif
% ovs-vsctl set interface patch-to-pif type=patch \
options:peer=patch-to-int
02> Add a patch port from br-pif to br-int
% ovs-vsctl add-port br-pif patch-to-int
% ovs-vsctl set interface patch-to-int type=patch \
options:peer=patch-to-pif
03> Re-Add the VIF ports with the VLAN tag
% ovs-vsctl add-port br-int ovs-port-a tag=900
% ovs-vsctl add-port br-int ovs-port-b tag=900
Steps to add VXLAN tunnels
--------------------------
The Windows Open vSwitch implementation support VXLAN tunnels. To add VXLAN
tunnels, the following steps serve as examples.
Note that, any patch ports created between br-int and br-pif MUST be beleted
prior to adding VXLAN tunnels.
01> Add the vxlan port between 172.168.201.101 <-> 172.168.201.102
% ovs-vsctl add-port br-int vxlan-1
% ovs-vsctl set Interface vxlan-1 type=vxlan
% ovs-vsctl set Interface vxlan-1 options:local_ip=172.168.201.101
% ovs-vsctl set Interface vxlan-1 options:remote_ip=172.168.201.102
% ovs-vsctl set Interface vxlan-1 options:in_key=flow
% ovs-vsctl set Interface vxlan-1 options:out_key=flow
02> Add the vxlan port between 172.168.201.101 <-> 172.168.201.105
% ovs-vsctl add-port br-int vxlan-2
% ovs-vsctl set Interface vxlan-2 type=vxlan
% ovs-vsctl set Interface vxlan-2 options:local_ip=172.168.201.102
% ovs-vsctl set Interface vxlan-2 options:remote_ip=172.168.201.105
% ovs-vsctl set Interface vxlan-2 options:in_key=flow
% ovs-vsctl set Interface vxlan-2 options:out_key=flow
Requirements
------------
* We require that you don't disable the "Allow management operating system to
share this network adapter" under 'Virtual Switch Properties' > 'Connection
type: External network', in the HyperV virtual network switch configuration.
@ -265,9 +416,59 @@ type: External network', in the HyperV virtual network switch configuration.
this is still a work in progress. Till the support is complete we recommend
disabling TX/RX offloads for both the VM's as well as the HyperV.
Windows Services
----------------
Open vSwitch daemons come with support to run as a Windows service. The
instructions here assume that you have installed the Open vSwitch utilities
and daemons via 'make install'. The commands shown here can be run from
MSYS bash or Windows command prompt.
* Create the database.
% ovsdb-tool create C:/openvswitch/etc/openvswitch/conf.db \
"C:/openvswitch/usr/share/openvswitch/vswitch.ovsschema"
* Create the ovsdb-server service and start it.
% sc create ovsdb-server binpath="C:/Shares/openvswitch/ovsdb/ovsdb-server.exe C:/openvswitch/etc/openvswitch/conf.db -vfile:info --log-file --pidfile --remote=punix:db.sock --service --service-monitor"
One of the common issues with creating a Windows service is with mungled
paths. You can make sure that the correct path has been registered with
the Windows services manager by running:
% sc qc ovsdb-server
Start the service.
% sc start ovsdb-server
Check that the service is healthy by running:
% sc query ovsdb-server
* Initialize the database.
% ovs-vsctl --no-wait init
* Create the ovs-vswitchd service and start it.
% sc create ovs-vswitchd binpath="C:/Shares/openvswitch/vswitchd/ovs-vswitchd.exe --pidfile -vfile:info --log-file --service --service-monitor"
% sc start ovs-vswitchd
Check that the service is healthy by running:
% sc query ovs-vswitchd
* To stop and delete the services, run:
% sc stop ovs-vswitchd
% sc stop ovsdb-server
% sc delete ovs-vswitchd
% sc delete ovsdb-server
Windows autobuild service
-------------------------
AppVeyor (appveyor.com) provides a free Windows autobuild service for
opensource projects. Open vSwitch has integration with AppVeyor for
continuous build. A developer can build test his changes for Windows by

View File

@ -10,6 +10,7 @@ on a specific platform, please see one of these files:
- [INSTALL.RHEL.md]
- [INSTALL.XenServer.md]
- [INSTALL.NetBSD.md]
- [INSTALL.Windows.md]
- [INSTALL.DPDK.md]
Build Requirements
@ -28,6 +29,9 @@ you will need the following software:
analysis and thread-safety checks. For Ubuntu, there are
nightly built packages available on clang's website.
* MSVC 2013. See [INSTALL.Windows] for additional Windows build
instructions.
While OVS may be compatible with other compilers, optimal
support for atomic operations may be missing, making OVS very
slow (see lib/ovs-atomic.h).
@ -192,6 +196,20 @@ To use 'clang' compiler:
`% ./configure CC=clang`
To supply special flags to the C compiler, specify them as CFLAGS on
the configure command line. If you want the default CFLAGS, which
include "-g" to build debug symbols and "-O2" to enable optimizations,
you must include them yourself. For example, to build with the
default CFLAGS plus "-mssse3", you might run configure as follows:
`% ./configure CFLAGS="-g -O2 -mssse3"`
Note that these CFLAGS are not applied when building the Linux
kernel module. Custom CFLAGS for the kernel module are supplied
using the EXTRA_CFLAGS variable when running make. So, for example:
`% make EXTRA_CFLAGS="-Wno-error=date-time"
To build the Linux kernel module, so that you can run the
kernel-based switch, pass the location of the kernel build
directory on --with-linux. For example, to build for a running
@ -255,6 +273,10 @@ Building the Sources
For improved warnings if you installed "sparse" (see "Prerequisites"),
add C=1 to the command line.
Some versions of Clang and ccache are not completely compatible.
If you see unusual warnings when you use both together, consider
disabling ccache for use with Clang.
2. Consider running the testsuite. Refer to "Running the Testsuite"
below, for instructions.

View File

@ -76,7 +76,7 @@ Other settings
On NetBSD, depending on your network topology and applications, the
following configuration might help. See sysctl(7).
sysctl net.inet.ip.checkinterface=1
sysctl -w net.inet.ip.checkinterface=1
Bug Reporting
-------------

View File

@ -32,6 +32,10 @@ AM_CFLAGS = -Wstrict-prototypes
AM_CFLAGS += $(WARNING_FLAGS)
AM_CFLAGS += $(OVS_CFLAGS)
if DPDK_NETDEV
AM_CFLAGS += -D_FILE_OFFSET_BITS=64
endif
if NDEBUG
AM_CPPFLAGS += -DNDEBUG
AM_CFLAGS += -fomit-frame-pointer
@ -133,6 +137,7 @@ OVSIDL_BUILT =
pkgdata_DATA =
sbin_SCRIPTS =
scripts_SCRIPTS =
completion_SCRIPTS =
scripts_DATA =
SUFFIXES =
check_DATA =
@ -140,6 +145,7 @@ check_SCRIPTS =
pkgconfig_DATA =
scriptsdir = $(pkgdatadir)/scripts
completiondir = $(sysconfdir)/bash_completion.d
pkgconfigdir = $(libdir)/pkgconfig
# This ensures that files added to EXTRA_DIST are always distributed,
@ -196,7 +202,7 @@ dist-hook-git: distfiles
LC_ALL=C sort -u > all-gitfiles; \
LC_ALL=C comm -1 -3 all-distfiles all-gitfiles > missing-distfiles; \
if test -s missing-distfiles; then \
echo "The distribution is missing the following files:"; \
echo "The following files are in git but not the distribution:"; \
cat missing-distfiles; \
exit 1; \
fi; \
@ -352,6 +358,7 @@ dist-docs:
VERSION=$(VERSION) $(srcdir)/build-aux/dist-docs $(srcdir) $(docs)
.PHONY: dist-docs
include Documentation/automake.mk
include m4/automake.mk
include lib/automake.mk
include ofproto/automake.mk

10
NEWS
View File

@ -1,5 +1,6 @@
Post-v2.3.0
---------------------
- Added support for SFQ, FQ_CoDel and CoDel qdiscs.
- Add bash command-line completion support for ovs-vsctl Please check
utilities/ovs-command-compgen.INSTALL.md for how to use.
- The MAC learning feature now includes per-port fairness to mitigate
@ -37,6 +38,9 @@ Post-v2.3.0
is executed last, and only if the action set has no "output" or "group"
action.
* OpenFlow 1.4+ flow "importance" is now maintained in the flow table.
* A new Netronome extension to OpenFlow 1.5+ allows control over the
fields hashed for OpenFlow select groups. See "selection_method" and
related options in ovs-ofctl(8) for details.
- ovs-pki: Changed message digest algorithm from MD5 to SHA-1 because
MD5 is no longer secure and some operating systems have started to disable
it in OpenSSL.
@ -58,8 +62,8 @@ Post-v2.3.0
- A simple wrapper script, 'ovs-docker', to integrate OVS with Docker
containers. If and when there is a native integration of Open vSwitch
with Docker, the wrapper script will be retired.
- Added support for DPDK Tunneling. VXLAN and GRE are supported protocols.
This is generic tunneling mechanism for userspace datapath.
- Added support for DPDK Tunneling. VXLAN, GRE, and Geneve are supported
protocols. This is generic tunneling mechanism for userspace datapath.
- Support for multicast snooping (IGMPv1 and IGMPv2)
- Support for Linux kernels up to 3.19.x
- The documentation now use the term 'destination' to mean one of syslog,
@ -71,6 +75,8 @@ Post-v2.3.0
Auto-Attach.
- The default OpenFlow and OVSDB ports are now the IANA-assigned
numbers. OpenFlow is 6653 and OVSDB is 6640.
- Support for DPDK vHost.
- Support for outer UDP checksums in Geneve and VXLAN.
v2.3.0 - 14 Aug 2014

View File

@ -1,6 +1,6 @@
# -*- autoconf -*-
# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -170,7 +170,8 @@ AC_DEFUN([OVS_CHECK_DPDK], [
DPDK_INCLUDE=$RTE_SDK/include
DPDK_LIB_DIR=$RTE_SDK/lib
DPDK_LIB=-lintel_dpdk
DPDK_LIB="-lintel_dpdk"
DPDK_EXTRA_LIB="-lfuse"
ovs_save_CFLAGS="$CFLAGS"
ovs_save_LDFLAGS="$LDFLAGS"
@ -187,7 +188,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
found=false
save_LIBS=$LIBS
for extras in "" "-ldl"; do
LIBS="$DPDK_LIB $extras $save_LIBS"
LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([#include <rte_config.h>
#include <rte_eal.h>],
@ -206,7 +207,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
OVS_LDFLAGS="$OVS_LDFLAGS -L$DPDK_LIB_DIR"
OVS_CFLAGS="$OVS_CFLAGS -I$DPDK_INCLUDE"
# DPDK 1.7 pmd drivers are not linked unless --whole-archive is used.
# DPDK pmd drivers are not linked unless --whole-archive is used.
#
# This happens because the rest of the DPDK code doesn't use any symbol in
# the pmd driver objects, and the drivers register themselves using an
@ -253,6 +254,37 @@ AC_DEFUN([OVS_GREP_IFELSE], [
fi
])
dnl OVS_FIND_FIELD_IFELSE(FILE, STRUCTURE, REGEX, [IF-MATCH], [IF-NO-MATCH])
dnl
dnl Looks for STRUCTURE in FILE. If it is found, greps for REGEX within the
dnl structure definition. If this is successful, runs IF-MATCH, otherwise
dnl IF_NO_MATCH. If IF-MATCH is empty then it defines to
dnl OVS_DEFINE(HAVE_<STRUCTURE>_WITH_<REGEX>), with <STRUCTURE> and <REGEX>
dnl translated to uppercase.
AC_DEFUN([OVS_FIND_FIELD_IFELSE], [
AC_MSG_CHECKING([whether $2 has member $3 in $1])
if test -f $1; then
awk '/$2.{/,/^}/' $1 2>/dev/null | grep '$3'
status=$?
case $status in
0)
AC_MSG_RESULT([yes])
m4_if([$4], [], [OVS_DEFINE([HAVE_]m4_toupper([$2])[_WITH_]m4_toupper([$3]))], [$4])
;;
1)
AC_MSG_RESULT([no])
$5
;;
*)
AC_MSG_ERROR([grep exited with status $status])
;;
esac
else
AC_MSG_RESULT([file not found])
$5
fi
])
dnl OVS_DEFINE(NAME)
dnl
dnl Defines NAME to 1 in kcompat.h.
@ -293,6 +325,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/linux/in.h], [ipv4_is_multicast])
OVS_GREP_IFELSE([$KSRC/include/net/ip.h], [__ip_select_ident.*dst_entry],
[OVS_DEFINE([HAVE_IP_SELECT_IDENT_USING_DST_ENTRY])])
OVS_GREP_IFELSE([$KSRC/include/net/ip.h], [inet_get_local_port_range.*net],
[OVS_DEFINE([HAVE_INET_GET_LOCAL_PORT_RANGE_USING_NET])])
OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_disable_lro])
OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_get_stats])
@ -365,6 +399,12 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [netlink_has_listeners(net->genl_sock],
[OVS_DEFINE([HAVE_GENL_HAS_LISTENERS_TAKES_NET])])
OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [genlmsg_parse])
OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [genl_notify.*family],
[OVS_DEFINE([HAVE_GENL_NOTIFY_TAKES_FAMILY])])
OVS_FIND_FIELD_IFELSE([$KSRC/include/net/genetlink.h],
[genl_multicast_group], [id])
OVS_GREP_IFELSE([$KSRC/include/net/gre.h], [gre_cisco_register])
OVS_GREP_IFELSE([$KSRC/include/net/ipv6.h], [IP6_FH_F_SKIP_RH])
OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_get_be16])
@ -389,9 +429,16 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [struct vxlan_metadata],
[OVS_DEFINE([HAVE_VXLAN_METADATA])])
OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_flow_src_port],
[OVS_DEFINE([HAVE_UDP_FLOW_SRC_PORT])])
[OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [inet_get_local_port_range(net],
[OVS_DEFINE([HAVE_UDP_FLOW_SRC_PORT])])])
OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_v4_check])
OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_set_csum])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [ignore_df:1],
[OVS_DEFINE([HAVE_IGNORE_DF_RENAME])])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [SKB_GSO_GRE_CSUM],
[OVS_DEFINE([HAVE_SKB_GSO_GRE_CSUM])])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [SKB_GSO_UDP_TUNNEL_CSUM],
[OVS_DEFINE([HAVE_SKB_GSO_UDP_TUNNEL_CSUM])])
OVS_GREP_IFELSE([$KSRC/include/uapi/linux/netdevice.h], [NET_NAME_UNKNOWN],
[OVS_DEFINE([HAVE_NET_NAME_UNKNOWN])])
@ -447,7 +494,13 @@ AC_DEFUN([OVS_CHECK_STRTOK_R],
[AC_LANG_PROGRAM([#include <stdio.h>
#include <string.h>
],
[[char string[] = ":::";
[[#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 8
/* Assume bug is present, because relatively minor
changes in compiler settings (e.g. optimization
level) can make it crop up. */
return 1;
#else
char string[] = ":::";
char *save_ptr = (char *) 0xc0ffee;
char *token1, *token2;
token1 = strtok_r(string, ":", &save_ptr);
@ -455,6 +508,7 @@ AC_DEFUN([OVS_CHECK_STRTOK_R],
freopen ("/dev/null", "w", stdout);
printf ("%s %s\n", token1, token2);
return 0;
#endif
]])],
[ovs_cv_strtok_r_bug=no],
[ovs_cv_strtok_r_bug=yes],

View File

@ -90,7 +90,7 @@ EOF
;;
-O0)
clopt="$clopt ${slash}Ot"
clopt="$clopt ${slash}Od ${slash}D_DEBUG"
;;
-O2)

View File

@ -29,22 +29,27 @@
#define OVS_IOCTL_DEVICE_TYPE 45000
/* We used Direct I/O (zero copy) for the buffers. */
#define OVS_IOCTL_START 0x100
/* We used Direct I/O (zero copy) for the buffers. */
/* Non-Netlink-based IOCTLs. */
#define OVS_IOCTL_GET_PID \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x0, METHOD_BUFFERED,\
FILE_WRITE_ACCESS)
/* Netlink-based IOCTLs. */
#define OVS_IOCTL_READ \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x0, METHOD_OUT_DIRECT,\
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x1, METHOD_OUT_DIRECT,\
FILE_READ_ACCESS)
#define OVS_IOCTL_READ_EVENT \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x1, METHOD_OUT_DIRECT, \
FILE_READ_ACCESS)
#define OVS_IOCTL_READ_PACKET \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x2, METHOD_OUT_DIRECT, \
FILE_READ_ACCESS)
#define OVS_IOCTL_READ_PACKET \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x3, METHOD_OUT_DIRECT, \
FILE_READ_ACCESS)
#define OVS_IOCTL_WRITE \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x3, METHOD_IN_DIRECT,\
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x4, METHOD_IN_DIRECT,\
FILE_READ_ACCESS)
#define OVS_IOCTL_TRANSACT \
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x4, METHOD_OUT_DIRECT,\
CTL_CODE (OVS_IOCTL_DEVICE_TYPE, OVS_IOCTL_START + 0x5, METHOD_OUT_DIRECT,\
FILE_WRITE_ACCESS)
/*
@ -75,7 +80,6 @@
/* Commands available under the OVS_WIN_CONTROL_FAMILY. */
enum ovs_win_control_cmd {
OVS_CTRL_CMD_WIN_GET_PID,
OVS_CTRL_CMD_WIN_PEND_REQ,
OVS_CTRL_CMD_WIN_PEND_PACKET_REQ,
OVS_CTRL_CMD_MC_SUBSCRIBE_REQ,

View File

@ -433,14 +433,14 @@ OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle,
PMDL mdl;
PVOID data;
data = OvsAllocateMemory(dataSize);
data = OvsAllocateMemoryWithTag(dataSize, OVS_MDL_POOL_TAG);
if (data == NULL) {
return NULL;
}
mdl = NdisAllocateMdl(ndisHandle, data, dataSize);
if (mdl == NULL) {
OvsFreeMemory(data);
OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
}
return mdl;
@ -454,7 +454,7 @@ OvsFreeMDLAndData(PMDL mdl)
data = MmGetMdlVirtualAddress(mdl);
NdisFreeMdl(mdl);
OvsFreeMemory(data);
OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
}

View File

@ -87,8 +87,7 @@ typedef struct _NETLINK_FAMILY {
} NETLINK_FAMILY, *PNETLINK_FAMILY;
/* Handlers for the various netlink commands. */
static NetlinkCmdHandler OvsGetPidCmdHandler,
OvsPendEventCmdHandler,
static NetlinkCmdHandler OvsPendEventCmdHandler,
OvsPendPacketCmdHandler,
OvsSubscribeEventCmdHandler,
OvsSubscribePacketCmdHandler,
@ -110,6 +109,8 @@ static NTSTATUS HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
UINT32 *replyLen);
static NTSTATUS HandleDpTransactionCommon(
POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT32 *replyLen);
static NTSTATUS OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
UINT32 *replyLen);
/*
* The various netlink families, along with the supported commands. Most of
@ -120,11 +121,6 @@ static NTSTATUS HandleDpTransactionCommon(
/* Netlink control family: this is a Windows specific family. */
NETLINK_CMD nlControlFamilyCmdOps[] = {
{ .cmd = OVS_CTRL_CMD_WIN_GET_PID,
.handler = OvsGetPidCmdHandler,
.supportedDevOp = OVS_TRANSACTION_DEV_OP,
.validateDpIndex = FALSE,
},
{ .cmd = OVS_CTRL_CMD_WIN_PEND_REQ,
.handler = OvsPendEventCmdHandler,
.supportedDevOp = OVS_WRITE_DEV_OP,
@ -349,39 +345,54 @@ extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
NDIS_SPIN_LOCK ovsCtrlLockObj;
PNDIS_SPIN_LOCK gOvsCtrlLock;
NTSTATUS
InitUserDumpState(POVS_OPEN_INSTANCE instance,
POVS_MESSAGE ovsMsg)
{
/* Clear the dumpState from a previous dump sequence. */
ASSERT(instance->dumpState.ovsMsg == NULL);
ASSERT(ovsMsg);
instance->dumpState.ovsMsg =
(POVS_MESSAGE)OvsAllocateMemoryWithTag(sizeof(OVS_MESSAGE),
OVS_DATAPATH_POOL_TAG);
if (instance->dumpState.ovsMsg == NULL) {
return STATUS_NO_MEMORY;
}
RtlCopyMemory(instance->dumpState.ovsMsg, ovsMsg,
sizeof *instance->dumpState.ovsMsg);
RtlZeroMemory(instance->dumpState.index,
sizeof instance->dumpState.index);
return STATUS_SUCCESS;
}
VOID
FreeUserDumpState(POVS_OPEN_INSTANCE instance)
{
if (instance->dumpState.ovsMsg != NULL) {
OvsFreeMemoryWithTag(instance->dumpState.ovsMsg,
OVS_DATAPATH_POOL_TAG);
RtlZeroMemory(&instance->dumpState, sizeof instance->dumpState);
}
}
VOID
OvsInit()
{
HANDLE handle = NULL;
gOvsCtrlLock = &ovsCtrlLockObj;
NdisAllocateSpinLock(gOvsCtrlLock);
OvsInitEventQueue();
OvsTunnelEngineOpen(&handle);
if (handle) {
OvsTunnelAddSystemProvider(handle);
}
OvsTunnelEngineClose(&handle);
}
VOID
OvsCleanup()
{
HANDLE handle = NULL;
OvsCleanupEventQueue();
if (gOvsCtrlLock) {
NdisFreeSpinLock(gOvsCtrlLock);
gOvsCtrlLock = NULL;
}
OvsTunnelEngineOpen(&handle);
if (handle) {
OvsTunnelRemoveSystemProvider(handle);
}
OvsTunnelEngineClose(&handle);
}
VOID
@ -448,6 +459,8 @@ OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle)
if (ovsExt) {
ovsExt->numberOpenInstance = 0;
}
} else {
OvsRegisterSystemProvider((PVOID)gOvsDeviceObject);
}
OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject);
@ -471,6 +484,8 @@ OvsDeleteDeviceObject()
NdisDeregisterDeviceEx(gOvsDeviceHandle);
gOvsDeviceHandle = NULL;
gOvsDeviceObject = NULL;
OvsUnregisterSystemProvider();
}
}
@ -509,7 +524,8 @@ OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
PFILE_OBJECT fileObject)
{
POVS_OPEN_INSTANCE instance =
(POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE));
(POVS_OPEN_INSTANCE)OvsAllocateMemoryWithTag(sizeof(OVS_OPEN_INSTANCE),
OVS_DATAPATH_POOL_TAG);
UINT32 i;
if (instance == NULL) {
@ -520,7 +536,7 @@ OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) {
OvsReleaseCtrlLock();
OvsFreeMemory(instance);
OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE));
@ -571,7 +587,7 @@ OvsRemoveOpenInstance(PFILE_OBJECT fileObject)
OvsReleaseCtrlLock();
ASSERT(instance->eventQueue == NULL);
ASSERT (instance->packetQueue == NULL);
OvsFreeMemory(instance);
OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
}
NTSTATUS
@ -701,8 +717,13 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
/* Check if the extension is enabled. */
if (NULL == gOvsSwitchContext) {
status = STATUS_DEVICE_NOT_READY;
goto done;
status = STATUS_NOT_FOUND;
goto exit;
}
if (!OvsAcquireSwitchContext()) {
status = STATUS_NOT_FOUND;
goto exit;
}
/* Concurrent netlink operations are not supported. */
@ -716,6 +737,24 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
* operation.
*/
switch (code) {
case OVS_IOCTL_GET_PID:
/* Both input buffer and output buffer use the same location. */
outputBuffer = irp->AssociatedIrp.SystemBuffer;
if (outputBufferLen != 0) {
InitUserParamsCtx(irp, instance, 0, NULL,
inputBuffer, inputBufferLen,
outputBuffer, outputBufferLen,
&usrParamsCtx);
ASSERT(outputBuffer);
} else {
status = STATUS_NDIS_INVALID_LENGTH;
goto done;
}
status = OvsGetPidHandler(&usrParamsCtx, &replyLen);
goto done;
case OVS_IOCTL_TRANSACT:
/* Both input buffer and output buffer are mandatory. */
if (outputBufferLen != 0) {
@ -874,6 +913,9 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
status = InvokeNetlinkCmdHandler(&usrParamsCtx, nlFamilyOps, &replyLen);
done:
OvsReleaseSwitchContext(gOvsSwitchContext);
exit:
KeMemoryBarrier();
instance->inUse = 0;
@ -927,11 +969,9 @@ ValidateNetlinkCmd(UINT32 devOp,
}
/* Validate the PID. */
if (ovsMsg->genlMsg.cmd != OVS_CTRL_CMD_WIN_GET_PID) {
if (ovsMsg->nlMsg.nlmsgPid != instance->pid) {
status = STATUS_INVALID_PARAMETER;
goto done;
}
if (ovsMsg->nlMsg.nlmsgPid != instance->pid) {
status = STATUS_INVALID_PARAMETER;
goto done;
}
status = STATUS_SUCCESS;
@ -972,38 +1012,33 @@ InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
/*
* --------------------------------------------------------------------------
* Command Handler for 'OVS_CTRL_CMD_WIN_GET_PID'.
* Handler for 'OVS_IOCTL_GET_PID'.
*
* Each handle on the device is assigned a unique PID when the handle is
* created. On platforms that support netlink natively, the PID is available
* to userspace when the netlink socket is created. However, without native
* netlink support on Windows, OVS datapath generates the PID and lets the
* userspace query it.
*
* This function implements the query.
* created. This function passes the PID to userspace using METHOD_BUFFERED
* method.
* --------------------------------------------------------------------------
*/
static NTSTATUS
OvsGetPidCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
UINT32 *replyLen)
OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
UINT32 *replyLen)
{
POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
NTSTATUS status = STATUS_SUCCESS;
PUINT32 msgOut = (PUINT32)usrParamsCtx->outputBuffer;
if (usrParamsCtx->outputLength >= sizeof *msgOut) {
POVS_OPEN_INSTANCE instance =
(POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
RtlZeroMemory(msgOut, sizeof *msgOut);
msgOut->nlMsg.nlmsgSeq = msgIn->nlMsg.nlmsgSeq;
msgOut->nlMsg.nlmsgPid = instance->pid;
RtlCopyMemory(msgOut, &instance->pid, sizeof(*msgOut));
*replyLen = sizeof *msgOut;
/* XXX: We might need to return the DP index as well. */
} else {
return STATUS_NDIS_INVALID_LENGTH;
*replyLen = sizeof *msgOut;
status = STATUS_NDIS_INVALID_LENGTH;
}
return STATUS_SUCCESS;
return status;
}
/*

View File

@ -129,35 +129,10 @@ InitUserParamsCtx(PIRP irp,
usrParamsCtx->outputLength = outputLength;
}
static __inline NTSTATUS
InitUserDumpState(POVS_OPEN_INSTANCE instance,
POVS_MESSAGE ovsMsg)
{
/* Clear the dumpState from a previous dump sequence. */
ASSERT(instance->dumpState.ovsMsg == NULL);
ASSERT(ovsMsg);
NTSTATUS InitUserDumpState(POVS_OPEN_INSTANCE instance,
POVS_MESSAGE ovsMsg);
instance->dumpState.ovsMsg =
(POVS_MESSAGE) OvsAllocateMemory(sizeof (OVS_MESSAGE));
if (instance->dumpState.ovsMsg == NULL) {
return STATUS_NO_MEMORY;
}
RtlCopyMemory(instance->dumpState.ovsMsg, ovsMsg,
sizeof *instance->dumpState.ovsMsg);
RtlZeroMemory(instance->dumpState.index,
sizeof instance->dumpState.index);
return STATUS_SUCCESS;
}
static __inline VOID
FreeUserDumpState(POVS_OPEN_INSTANCE instance)
{
if (instance->dumpState.ovsMsg != NULL) {
OvsFreeMemory(instance->dumpState.ovsMsg);
RtlZeroMemory(&instance->dumpState, sizeof instance->dumpState);
}
}
VOID FreeUserDumpState(POVS_OPEN_INSTANCE instance);
NTSTATUS OvsSetupDumpStart(POVS_USER_PARAMS_CONTEXT usrParamsCtx);

View File

@ -96,9 +96,9 @@ OvsCleanupEvent(POVS_OPEN_INSTANCE instance)
LIST_FORALL_SAFE(&queue->elemList, link, next) {
elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link);
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_EVENT_POOL_TAG);
}
OvsFreeMemory(queue);
OvsFreeMemoryWithTag(queue, OVS_EVENT_POOL_TAG);
}
}
@ -139,7 +139,8 @@ OvsPostEvent(UINT32 portNo,
portNo == OVS_DEFAULT_PORT_NO) {
queue->pollAll = TRUE;
} else {
elem = (POVS_EVENT_QUEUE_ELEM)OvsAllocateMemory(sizeof(*elem));
elem = (POVS_EVENT_QUEUE_ELEM)OvsAllocateMemoryWithTag(
sizeof(*elem), OVS_EVENT_POOL_TAG);
if (elem == NULL) {
queue->pollAll = TRUE;
} else {
@ -158,7 +159,7 @@ OvsPostEvent(UINT32 portNo,
LIST_FORALL_SAFE(&queue->elemList, curr, next) {
RemoveEntryList(curr);
elem = CONTAINING_RECORD(curr, OVS_EVENT_QUEUE_ELEM, link);
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_EVENT_POOL_TAG);
}
queue->numElems = 0;
}
@ -243,7 +244,8 @@ OvsSubscribeEventIoctl(PFILE_OBJECT fileObject,
}
if (request->subscribe) {
queue = (POVS_EVENT_QUEUE)OvsAllocateMemory(sizeof (OVS_EVENT_QUEUE));
queue = (POVS_EVENT_QUEUE)OvsAllocateMemoryWithTag(
sizeof(OVS_EVENT_QUEUE), OVS_EVENT_POOL_TAG);
if (queue == NULL) {
status = STATUS_NO_MEMORY;
OVS_LOG_WARN("Fail to allocate event queue");
@ -284,9 +286,9 @@ done_event_subscribe:
}
LIST_FORALL_SAFE(&queue->elemList, link, next) {
elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link);
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_EVENT_POOL_TAG);
}
OvsFreeMemory(queue);
OvsFreeMemoryWithTag(queue, OVS_EVENT_POOL_TAG);
} else {
OvsReleaseEventQueueLock();
}
@ -446,7 +448,7 @@ OvsRemoveEventEntry(POVS_OPEN_INSTANCE instance,
elem = (POVS_EVENT_QUEUE_ELEM)RemoveHeadList(&queue->elemList);
entry->portNo = elem->portNo;
entry->status = elem->status;
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_EVENT_POOL_TAG);
queue->numElems--;
status = STATUS_SUCCESS;
}

View File

@ -319,7 +319,7 @@ OvsFlowNlCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
rc = OvsPutFlowIoctl(&mappedFlow, sizeof (struct OvsFlowPut),
&stats);
if (rc != STATUS_SUCCESS) {
OVS_LOG_ERROR("OvsFlowPut failed.");
OVS_LOG_ERROR("OvsPutFlowIoctl failed.");
goto done;
}
@ -1512,7 +1512,7 @@ OvsDeleteFlowTable(OVS_DATAPATH *datapath)
}
DeleteAllFlows(datapath);
OvsFreeMemory(datapath->flowTable);
OvsFreeMemoryWithTag(datapath->flowTable, OVS_FLOW_POOL_TAG);
datapath->flowTable = NULL;
NdisFreeRWLock(datapath->lock);
@ -1534,8 +1534,8 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,
PLIST_ENTRY bucket;
int i;
datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE *
sizeof (LIST_ENTRY));
datapath->flowTable = OvsAllocateMemoryWithTag(
OVS_FLOW_TABLE_SIZE * sizeof(LIST_ENTRY), OVS_FLOW_POOL_TAG);
if (!datapath->flowTable) {
return NDIS_STATUS_RESOURCES;
}
@ -1976,7 +1976,7 @@ VOID
FreeFlow(OvsFlow *flow)
{
ASSERT(flow);
OvsFreeMemory(flow);
OvsFreeMemoryWithTag(flow, OVS_FLOW_POOL_TAG);
}
NTSTATUS
@ -2259,7 +2259,8 @@ OvsPrepareFlow(OvsFlow **flow,
do {
*flow = localFlow =
OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen);
OvsAllocateMemoryWithTag(sizeof(OvsFlow) + put->actionsLen,
OVS_FLOW_POOL_TAG);
if (localFlow == NULL) {
status = STATUS_NO_MEMORY;
break;

View File

@ -777,7 +777,8 @@ OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh)
UINT64 timeVal;
ASSERT(ipNeigh != NULL);
entry = (POVS_IPNEIGH_ENTRY)OvsAllocateMemory(sizeof (OVS_IPNEIGH_ENTRY));
entry = (POVS_IPNEIGH_ENTRY)OvsAllocateMemoryWithTag(
sizeof(OVS_IPNEIGH_ENTRY), OVS_IPHELPER_POOL_TAG);
if (entry == NULL) {
return NULL;
}
@ -802,8 +803,8 @@ OvsCreateIPForwardEntry(PMIB_IPFORWARD_ROW2 ipRoute)
ASSERT(ipRoute);
entry =
(POVS_IPFORWARD_ENTRY)OvsAllocateMemory(sizeof (OVS_IPFORWARD_ENTRY));
entry = (POVS_IPFORWARD_ENTRY)OvsAllocateMemoryWithTag(
sizeof(OVS_IPFORWARD_ENTRY), OVS_IPHELPER_POOL_TAG);
if (entry == NULL) {
return NULL;
}
@ -823,7 +824,8 @@ OvsCreateFwdEntry(POVS_FWD_INFO fwdInfo)
{
POVS_FWD_ENTRY entry;
entry = (POVS_FWD_ENTRY)OvsAllocateMemory(sizeof (OVS_FWD_ENTRY));
entry = (POVS_FWD_ENTRY)OvsAllocateMemoryWithTag(
sizeof(OVS_FWD_ENTRY), OVS_IPHELPER_POOL_TAG);
if (entry == NULL) {
return NULL;
}
@ -855,7 +857,7 @@ OvsRemoveFwdEntry(POVS_FWD_ENTRY fwdEntry)
if (ipf->refCount == 0) {
ASSERT(IsListEmpty(&ipf->fwdList));
RemoveEntryList(&ipf->link);
OvsFreeMemory(ipf);
OvsFreeMemoryWithTag(ipf, OVS_IPHELPER_POOL_TAG);
}
if (ipn->refCount == 0) {
@ -864,10 +866,10 @@ OvsRemoveFwdEntry(POVS_FWD_ENTRY fwdEntry)
NdisAcquireSpinLock(&ovsIpHelperLock);
RemoveEntryList(&ipn->slink);
NdisReleaseSpinLock(&ovsIpHelperLock);
OvsFreeMemory(ipn);
OvsFreeMemoryWithTag(ipn, OVS_IPHELPER_POOL_TAG);
}
OvsFreeMemory(fwdEntry);
OvsFreeMemoryWithTag(fwdEntry, OVS_IPHELPER_POOL_TAG);
}
@ -886,7 +888,7 @@ OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf)
ASSERT(ipf->refCount == 1);
RemoveEntryList(&ipf->link);
OvsFreeMemory(ipf);
OvsFreeMemoryWithTag(ipf, OVS_IPHELPER_POOL_TAG);
}
@ -908,7 +910,7 @@ OvsRemoveIPNeighEntry(POVS_IPNEIGH_ENTRY ipn)
NdisAcquireSpinLock(&ovsIpHelperLock);
RemoveEntryList(&ipn->slink);
NdisReleaseSpinLock(&ovsIpHelperLock);
OvsFreeMemory(ipn);
OvsFreeMemoryWithTag(ipn, OVS_IPHELPER_POOL_TAG);
}
}
@ -1041,7 +1043,7 @@ OvsCleanupIpHelperRequestList(VOID)
STATUS_DEVICE_NOT_READY,
NULL);
}
OvsFreeMemory(request);
OvsFreeMemoryWithTag(request, OVS_IPHELPER_POOL_TAG);
}
}
@ -1076,8 +1078,8 @@ OvsInternalAdapterUp(UINT32 portNo,
RtlCopyMemory(&ovsInternalNetCfgId, netCfgInstanceId, sizeof (GUID));
RtlZeroMemory(&ovsInternalRow, sizeof (MIB_IF_ROW2));
request =
(POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST));
request = (POVS_IP_HELPER_REQUEST)OvsAllocateMemoryWithTag(
sizeof(OVS_IP_HELPER_REQUEST), OVS_IPHELPER_POOL_TAG);
if (request == NULL) {
OVS_LOG_ERROR("Fail to initialize Internal Adapter");
return;
@ -1103,7 +1105,7 @@ OvsHandleInternalAdapterUp(POVS_IP_HELPER_REQUEST request)
MIB_UNICASTIPADDRESS_ROW ipEntry;
GUID *netCfgInstanceId = &ovsInternalNetCfgId;
OvsFreeMemory(request);
OvsFreeMemoryWithTag(request, OVS_IPHELPER_POOL_TAG);
status = OvsGetIfEntry(&ovsInternalNetCfgId, &ovsInternalRow);
@ -1161,7 +1163,7 @@ OvsEnqueueIpHelperRequest(POVS_IP_HELPER_REQUEST request)
if (ovsInternalPortNo == OVS_DEFAULT_PORT_NO ||
ovsInternalIPConfigured == FALSE) {
NdisReleaseSpinLock(&ovsIpHelperLock);
OvsFreeMemory(request);
OvsFreeMemoryWithTag(request, OVS_IPHELPER_POOL_TAG);
return STATUS_NDIS_ADAPTER_NOT_READY;
} else {
InsertHeadList(&ovsIpHelperRequestList, &request->link);
@ -1185,8 +1187,8 @@ OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl,
{
POVS_IP_HELPER_REQUEST request;
request =
(POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST));
request = (POVS_IP_HELPER_REQUEST)OvsAllocateMemoryWithTag(
sizeof(OVS_IP_HELPER_REQUEST), OVS_IPHELPER_POOL_TAG);
if (request == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
@ -1328,15 +1330,15 @@ fwd_handle_nbl:
if (status != STATUS_SUCCESS) {
if (newFWD) {
ASSERT(fwdEntry != NULL);
OvsFreeMemory(fwdEntry);
OvsFreeMemoryWithTag(fwdEntry, OVS_IPHELPER_POOL_TAG);
}
if (newIPF) {
ASSERT(ipf && ipf->refCount == 0);
OvsFreeMemory(ipf);
OvsFreeMemoryWithTag(ipf, OVS_IPHELPER_POOL_TAG);
}
if (newIPN) {
ASSERT(ipn && ipn->refCount == 0);
OvsFreeMemory(ipn);
OvsFreeMemoryWithTag(ipn, OVS_IPHELPER_POOL_TAG);
}
ipAddr = request->fwdReq.tunnelKey.dst;
OVS_LOG_INFO("Fail to handle IP helper request for dst: %d.%d.%d.%d",
@ -1352,7 +1354,7 @@ fwd_handle_nbl:
status,
status == STATUS_SUCCESS ? &fwdInfo : NULL);
}
OvsFreeMemory(request);
OvsFreeMemoryWithTag(request, OVS_IPHELPER_POOL_TAG);
}
@ -1477,7 +1479,7 @@ OvsStartIpHelper(PVOID data)
OvsHandleFwdRequest(req);
break;
default:
OvsFreeMemory(req);
OvsFreeMemoryWithTag(req, OVS_IPHELPER_POOL_TAG);
}
NdisAcquireSpinLock(&ovsIpHelperLock);
}
@ -1539,14 +1541,14 @@ OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle)
HANDLE threadHandle;
UINT32 i;
ovsFwdHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) *
OVS_FWD_HASH_TABLE_SIZE);
ovsFwdHashTable = (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_FWD_HASH_TABLE_SIZE, OVS_IPHELPER_POOL_TAG);
ovsRouteHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) *
OVS_ROUTE_HASH_TABLE_SIZE);
ovsRouteHashTable = (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_ROUTE_HASH_TABLE_SIZE, OVS_IPHELPER_POOL_TAG);
ovsNeighHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) *
OVS_NEIGH_HASH_TABLE_SIZE);
ovsNeighHashTable = (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_NEIGH_HASH_TABLE_SIZE, OVS_IPHELPER_POOL_TAG);
RtlZeroMemory(&ovsInternalRow, sizeof(MIB_IF_ROW2));
RtlZeroMemory(&ovsInternalIPRow, sizeof (MIB_IPINTERFACE_ROW));
@ -1609,15 +1611,15 @@ init_cleanup:
if (status != STATUS_SUCCESS) {
OvsCancelChangeNotification();
if (ovsFwdHashTable) {
OvsFreeMemory(ovsFwdHashTable);
OvsFreeMemoryWithTag(ovsFwdHashTable, OVS_IPHELPER_POOL_TAG);
ovsFwdHashTable = NULL;
}
if (ovsRouteHashTable) {
OvsFreeMemory(ovsRouteHashTable);
OvsFreeMemoryWithTag(ovsRouteHashTable, OVS_IPHELPER_POOL_TAG);
ovsRouteHashTable = NULL;
}
if (ovsNeighHashTable) {
OvsFreeMemory(ovsNeighHashTable);
OvsFreeMemoryWithTag(ovsNeighHashTable, OVS_IPHELPER_POOL_TAG);
ovsNeighHashTable = NULL;
}
if (ovsTableLock) {
@ -1644,9 +1646,9 @@ OvsCleanupIpHelper(VOID)
KernelMode, FALSE, NULL);
ObDereferenceObject(ovsIpHelperThreadContext.threadObject);
OvsFreeMemory(ovsFwdHashTable);
OvsFreeMemory(ovsRouteHashTable);
OvsFreeMemory(ovsNeighHashTable);
OvsFreeMemoryWithTag(ovsFwdHashTable, OVS_IPHELPER_POOL_TAG);
OvsFreeMemoryWithTag(ovsRouteHashTable, OVS_IPHELPER_POOL_TAG);
OvsFreeMemoryWithTag(ovsNeighHashTable, OVS_IPHELPER_POOL_TAG);
NdisFreeRWLock(ovsTableLock);
NdisFreeSpinLock(&ovsIpHelperLock);
@ -1684,6 +1686,6 @@ OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl)
STATUS_DEVICE_NOT_READY,
NULL);
}
OvsFreeMemory(req);
OvsFreeMemoryWithTag(req, OVS_IPHELPER_POOL_TAG);
}
}

View File

@ -605,7 +605,7 @@ OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext,
NDIS_STATUS status;
PNDIS_OID_REQUEST oidRequest;
POVS_OID_CONTEXT oidContext;
ULONG OvsExtOidRequestId = 'ISVO';
ULONG OvsExtOidRequestId = 'ISVO';
DBG_UNREFERENCED_PARAMETER(inputSize);
DBG_UNREFERENCED_PARAMETER(oidInputBuffer);
@ -617,15 +617,17 @@ OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext,
ASSERT(oidOutputBuffer == NULL || outputSize != 0);
ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
oidRequest = OvsAllocateMemory(sizeof *oidRequest);
oidRequest = OvsAllocateMemoryWithTag(sizeof *oidRequest,
OVS_OID_POOL_TAG);
if (!oidRequest) {
status = NDIS_STATUS_RESOURCES;
goto done;
}
oidContext = OvsAllocateMemory(sizeof *oidContext);
oidContext = OvsAllocateMemoryWithTag(sizeof *oidContext,
OVS_OID_POOL_TAG);
if (!oidContext) {
OvsFreeMemory(oidRequest);
OvsFreeMemoryWithTag(oidRequest, OVS_OID_POOL_TAG);
status = NDIS_STATUS_RESOURCES;
goto done;
}
@ -684,8 +686,8 @@ OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext,
status = oidContext->status;
ASSERT(status != NDIS_STATUS_PENDING);
OvsFreeMemory(oidRequest);
OvsFreeMemory(oidContext);
OvsFreeMemoryWithTag(oidRequest, OVS_OID_POOL_TAG);
OvsFreeMemoryWithTag(oidContext, OVS_OID_POOL_TAG);
done:
OVS_LOG_TRACE("Exit: status %8x.", status);
@ -710,7 +712,8 @@ OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext,
OVS_LOG_TRACE("Enter: switchContext: %p, switchActive: %p",
switchContext, switchActive);
switchParams = OvsAllocateMemory(sizeof *switchParams);
switchParams = OvsAllocateMemoryWithTag(sizeof *switchParams,
OVS_OID_POOL_TAG);
if (!switchParams) {
status = NDIS_STATUS_RESOURCES;
goto done;
@ -741,7 +744,7 @@ OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext,
*switchActive = switchParams->IsActive;
}
OvsFreeMemory(switchParams);
OvsFreeMemoryWithTag(switchParams, OVS_OID_POOL_TAG);
done:
OVS_LOG_TRACE("Exit: status %8x, switchActive: %d.",
@ -769,7 +772,7 @@ OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
do {
UINT32 reqdArraySize;
portArray = OvsAllocateMemory(arraySize);
portArray = OvsAllocateMemoryWithTag(arraySize, OVS_OID_POOL_TAG);
if (!portArray) {
status = NDIS_STATUS_RESOURCES;
goto done;
@ -794,7 +797,7 @@ OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
break;
}
OvsFreeMemory(portArray);
OvsFreeMemoryWithTag(portArray, OVS_OID_POOL_TAG);
arraySize = reqdArraySize;
if (status != NDIS_STATUS_INVALID_LENGTH) {
break;
@ -827,7 +830,7 @@ OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
do {
UINT32 reqdArraySize;
nicArray = OvsAllocateMemory(arraySize);
nicArray = OvsAllocateMemoryWithTag(arraySize, OVS_OID_POOL_TAG);
if (!nicArray) {
status = NDIS_STATUS_RESOURCES;
goto done;
@ -852,7 +855,7 @@ OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
break;
}
OvsFreeMemory(nicArray);
OvsFreeMemoryWithTag(nicArray, OVS_OID_POOL_TAG);
arraySize = reqdArraySize;
if (status != NDIS_STATUS_INVALID_LENGTH) {
break;
@ -863,3 +866,17 @@ done:
OVS_LOG_TRACE("Exit: status %8x.", status);
return status;
}
VOID OvsFreeSwitchPortsArray(PNDIS_SWITCH_PORT_ARRAY portsArray)
{
if (portsArray) {
OvsFreeMemoryWithTag(portsArray, OVS_OID_POOL_TAG);
}
}
VOID OvsFreeSwitchNicsArray(PNDIS_SWITCH_NIC_ARRAY nicsArray)
{
if (nicsArray) {
OvsFreeMemoryWithTag(nicsArray, OVS_OID_POOL_TAG);
}
}

View File

@ -23,4 +23,7 @@ NDIS_STATUS OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
PNDIS_SWITCH_PORT_ARRAY *portArrayOut);
NDIS_STATUS OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
PNDIS_SWITCH_NIC_ARRAY *nicArrayOut);
VOID OvsFreeSwitchPortsArray(PNDIS_SWITCH_PORT_ARRAY portsArray);
VOID OvsFreeSwitchNicsArray(PNDIS_SWITCH_NIC_ARRAY nicsArray);
#endif /* __OID_H_ */

View File

@ -42,6 +42,12 @@ extern PNDIS_SPIN_LOCK gOvsCtrlLock;
extern NDIS_HANDLE gOvsExtDriverHandle;
extern NDIS_HANDLE gOvsExtDriverObject;
/*
* Reference count used to prevent premature deallocation of the global switch
* context structure, gOvsSwitchContext.
*/
volatile LONG gOvsSwitchContextRefCount = 1;
static NDIS_STATUS OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
POVS_SWITCH_CONTEXT *switchContextOut);
static NDIS_STATUS OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext);
@ -168,8 +174,8 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
OVS_LOG_TRACE("Enter: Create switch object");
switchContext =
(POVS_SWITCH_CONTEXT) OvsAllocateMemory(sizeof(OVS_SWITCH_CONTEXT));
switchContext = (POVS_SWITCH_CONTEXT) OvsAllocateMemoryWithTag(
sizeof(OVS_SWITCH_CONTEXT), OVS_SWITCH_POOL_TAG);
if (switchContext == NULL) {
status = NDIS_STATUS_RESOURCES;
goto create_switch_done;
@ -187,7 +193,7 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
if (status != NDIS_STATUS_SUCCESS) {
OVS_LOG_ERROR("OvsExtAttach: Extension is running in "
"non-switch environment.");
OvsFreeMemory(switchContext);
OvsFreeMemoryWithTag(switchContext, OVS_SWITCH_POOL_TAG);
goto create_switch_done;
}
@ -198,14 +204,14 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
status = OvsInitSwitchContext(switchContext);
if (status != NDIS_STATUS_SUCCESS) {
OvsFreeMemory(switchContext);
OvsFreeMemoryWithTag(switchContext, OVS_SWITCH_POOL_TAG);
goto create_switch_done;
}
status = OvsTunnelFilterInitialize(gOvsExtDriverObject);
if (status != NDIS_STATUS_SUCCESS) {
OvsUninitSwitchContext(switchContext);
OvsFreeMemory(switchContext);
OvsFreeMemoryWithTag(switchContext, OVS_SWITCH_POOL_TAG);
goto create_switch_done;
}
*switchContextOut = switchContext;
@ -264,7 +270,7 @@ OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext)
OvsTunnelFilterUninitialize(gOvsExtDriverObject);
OvsClearAllSwitchVports(switchContext);
OvsUninitSwitchContext(switchContext);
OvsFreeMemory(switchContext);
OvsFreeMemoryWithTag(switchContext, OVS_SWITCH_POOL_TAG);
}
OVS_LOG_TRACE("Exit: deleted switch %p dpNo: %d", switchContext, dpNo);
}
@ -358,14 +364,14 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
switchContext->dispatchLock =
NdisAllocateRWLock(switchContext->NdisFilterHandle);
switchContext->portNoHashArray = (PLIST_ENTRY)
OvsAllocateMemory(sizeof(LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE);
switchContext->ovsPortNameHashArray = (PLIST_ENTRY)
OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE);
switchContext->portIdHashArray= (PLIST_ENTRY)
OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE);
switchContext->pidHashArray = (PLIST_ENTRY)
OvsAllocateMemory(sizeof(LIST_ENTRY) * OVS_MAX_PID_ARRAY_SIZE);
switchContext->portNoHashArray = (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE, OVS_SWITCH_POOL_TAG);
switchContext->ovsPortNameHashArray = (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE, OVS_SWITCH_POOL_TAG);
switchContext->portIdHashArray= (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE, OVS_SWITCH_POOL_TAG);
switchContext->pidHashArray = (PLIST_ENTRY)OvsAllocateMemoryWithTag(
sizeof(LIST_ENTRY) * OVS_MAX_PID_ARRAY_SIZE, OVS_SWITCH_POOL_TAG);
status = OvsAllocateFlowTable(&switchContext->datapath, switchContext);
if (status == NDIS_STATUS_SUCCESS) {
@ -381,17 +387,20 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
NdisFreeRWLock(switchContext->dispatchLock);
}
if (switchContext->portNoHashArray) {
OvsFreeMemory(switchContext->portNoHashArray);
OvsFreeMemoryWithTag(switchContext->portNoHashArray,
OVS_SWITCH_POOL_TAG);
}
if (switchContext->ovsPortNameHashArray) {
OvsFreeMemory(switchContext->ovsPortNameHashArray);
OvsFreeMemoryWithTag(switchContext->ovsPortNameHashArray,
OVS_SWITCH_POOL_TAG);
}
if (switchContext->portIdHashArray) {
OvsFreeMemory(switchContext->portIdHashArray);
OvsFreeMemoryWithTag(switchContext->portIdHashArray,
OVS_SWITCH_POOL_TAG);
}
if (switchContext->pidHashArray) {
OvsFreeMemory(switchContext->pidHashArray);
OvsFreeMemoryWithTag(switchContext->pidHashArray,
OVS_SWITCH_POOL_TAG);
}
OvsDeleteFlowTable(&switchContext->datapath);
@ -420,6 +429,7 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
switchContext->isActivateFailed = FALSE;
switchContext->dpNo = OVS_DP_NUMBER;
ovsTimeIncrementPerTick = KeQueryTimeIncrement() / 10000;
OVS_LOG_TRACE("Exit: Succesfully initialized switchContext: %p",
switchContext);
return NDIS_STATUS_SUCCESS;
@ -427,6 +437,12 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
static VOID
OvsUninitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
{
OvsReleaseSwitchContext(switchContext);
}
VOID
OvsDeleteSwitchContext(POVS_SWITCH_CONTEXT switchContext)
{
OVS_LOG_TRACE("Enter: Delete switchContext:%p", switchContext);
@ -437,19 +453,66 @@ OvsUninitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
NdisFreeRWLock(switchContext->dispatchLock);
switchContext->dispatchLock = NULL;
NdisFreeSpinLock(&(switchContext->pidHashLock));
OvsFreeMemory(switchContext->ovsPortNameHashArray);
OvsFreeMemoryWithTag(switchContext->ovsPortNameHashArray,
OVS_SWITCH_POOL_TAG);
switchContext->ovsPortNameHashArray = NULL;
OvsFreeMemory(switchContext->portIdHashArray);
OvsFreeMemoryWithTag(switchContext->portIdHashArray,
OVS_SWITCH_POOL_TAG);
switchContext->portIdHashArray = NULL;
OvsFreeMemory(switchContext->portNoHashArray);
OvsFreeMemoryWithTag(switchContext->portNoHashArray,
OVS_SWITCH_POOL_TAG);
switchContext->portNoHashArray = NULL;
OvsFreeMemory(switchContext->pidHashArray);
OvsFreeMemoryWithTag(switchContext->pidHashArray,
OVS_SWITCH_POOL_TAG);
switchContext->pidHashArray = NULL;
OvsDeleteFlowTable(&switchContext->datapath);
OvsCleanupBufferPool(switchContext);
OVS_LOG_TRACE("Exit: Delete switchContext: %p", switchContext);
}
VOID
OvsReleaseSwitchContext(POVS_SWITCH_CONTEXT switchContext)
{
LONG ref = 0;
LONG newRef = 0;
LONG icxRef = 0;
do {
ref = gOvsSwitchContextRefCount;
newRef = (0 == ref) ? 0 : ref - 1;
icxRef = InterlockedCompareExchange(&gOvsSwitchContextRefCount,
newRef,
ref);
} while (icxRef != ref);
if (ref == 1) {
OvsDeleteSwitchContext(switchContext);
}
}
BOOLEAN
OvsAcquireSwitchContext(VOID)
{
LONG ref = 0;
LONG newRef = 0;
LONG icxRef = 0;
BOOLEAN ret = FALSE;
do {
ref = gOvsSwitchContextRefCount;
newRef = (0 == ref) ? 0 : ref + 1;
icxRef = InterlockedCompareExchange(&gOvsSwitchContextRefCount,
newRef,
ref);
} while (icxRef != ref);
if (ref != 0) {
ret = TRUE;
}
return ret;
}
/*
* --------------------------------------------------------------------------
* This function activates the switch by initializing it with all the runtime

View File

@ -202,7 +202,6 @@ OvsAcquireDatapathWrite(OVS_DATAPATH *datapath,
dispatch ? NDIS_RWL_AT_DISPATCH_LEVEL : 0);
}
static __inline VOID
OvsReleaseDatapath(OVS_DATAPATH *datapath,
LOCK_STATE_EX *lockState)
@ -211,6 +210,11 @@ OvsReleaseDatapath(OVS_DATAPATH *datapath,
NdisReleaseRWLock(datapath->lock, lockState);
}
BOOLEAN
OvsAcquireSwitchContext(VOID);
VOID
OvsReleaseSwitchContext(POVS_SWITCH_CONTEXT switchContext);
PVOID OvsGetExternalVport();

View File

@ -111,6 +111,7 @@ DEFINE_GUID(
PDEVICE_OBJECT gDeviceObject;
HANDLE gEngineHandle = NULL;
HANDLE gBfeSubscriptionHandle = NULL;
UINT32 gCalloutIdV4;
@ -173,17 +174,20 @@ OvsTunnelAddSystemProvider(HANDLE handle)
provider.displayData.name = OVS_TUNNEL_PROVIDER_NAME;
provider.displayData.description = OVS_TUNNEL_PROVIDER_DESC;
/*
* Since we always want the provider to be present, it's easiest to add
* it as persistent object during driver load.
*/
* Since we always want the provider to be present, it's easiest to add
* it as persistent object during driver load.
*/
provider.flags = FWPM_PROVIDER_FLAG_PERSISTENT;
status = FwpmProviderAdd(handle,
&provider,
NULL);
if (!NT_SUCCESS(status)) {
OVS_LOG_ERROR("Fail to add WFP provider, status: %x.", status);
break;
if (STATUS_FWP_ALREADY_EXISTS != status) {
OVS_LOG_ERROR("Failed to add WFP provider, status: %x.",
status);
break;
}
}
status = FwpmTransactionCommit(handle);
@ -541,3 +545,88 @@ Exit:
return status;
}
VOID NTAPI
OvsBfeStateChangeCallback(PVOID context,
FWPM_SERVICE_STATE bfeState)
{
HANDLE handle = NULL;
DBG_UNREFERENCED_PARAMETER(context);
if (FWPM_SERVICE_RUNNING == bfeState) {
OvsTunnelEngineOpen(&handle);
if (handle) {
OvsTunnelAddSystemProvider(handle);
}
OvsTunnelEngineClose(&handle);
}
}
NTSTATUS
OvsSubscribeBfeStateChanges(PVOID deviceObject)
{
NTSTATUS status = STATUS_SUCCESS;
if (!gBfeSubscriptionHandle) {
status = FwpmBfeStateSubscribeChanges(deviceObject,
OvsBfeStateChangeCallback,
NULL,
&gBfeSubscriptionHandle);
if (!NT_SUCCESS(status)) {
OVS_LOG_ERROR(
"Failed to open subscribe BFE state change callback, status: %x.",
status);
}
}
return status;
}
VOID
OvsUnsubscribeBfeStateChanges()
{
NTSTATUS status = STATUS_SUCCESS;
if (gBfeSubscriptionHandle) {
status = FwpmBfeStateUnsubscribeChanges(gBfeSubscriptionHandle);
if (!NT_SUCCESS(status)) {
OVS_LOG_ERROR(
"Failed to open unsubscribe BFE state change callback, status: %x.",
status);
}
gBfeSubscriptionHandle = NULL;
}
}
VOID OvsRegisterSystemProvider(PVOID deviceObject)
{
NTSTATUS status = STATUS_SUCCESS;
HANDLE handle = NULL;
status = OvsSubscribeBfeStateChanges(deviceObject);
if (NT_SUCCESS(status)) {
if (FWPM_SERVICE_RUNNING == FwpmBfeStateGet()) {
OvsTunnelEngineOpen(&handle);
if (handle) {
OvsTunnelAddSystemProvider(handle);
}
OvsTunnelEngineClose(&handle);
OvsUnsubscribeBfeStateChanges();
}
}
}
VOID OvsUnregisterSystemProvider()
{
HANDLE handle = NULL;
OvsTunnelEngineOpen(&handle);
if (handle) {
OvsTunnelRemoveSystemProvider(handle);
}
OvsTunnelEngineClose(&handle);
OvsUnsubscribeBfeStateChanges();
}

View File

@ -22,12 +22,8 @@ NTSTATUS OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject);
VOID OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject);
NTSTATUS OvsTunnelEngineOpen(HANDLE *handle);
VOID OvsRegisterSystemProvider(PVOID deviceObject);
VOID OvsTunnelEngineClose(HANDLE *handle);
VOID OvsTunnelAddSystemProvider(HANDLE handle);
VOID OvsTunnelRemoveSystemProvider(HANDLE handle);
VOID OvsUnregisterSystemProvider();
#endif /* __TUNNEL_INTF_H_ */

View File

@ -85,7 +85,7 @@ OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
LIST_FORALL_SAFE(&tmp, link, next) {
RemoveEntryList(link);
elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
}
}
@ -132,13 +132,13 @@ OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
LIST_FORALL_SAFE(&tmp, link, next) {
RemoveEntryList(link);
elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
}
if (irp) {
OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
}
if (queue) {
OvsFreeMemory(queue);
OvsFreeMemoryWithTag(queue, OVS_USER_POOL_TAG);
}
/* Verify if gOvsSwitchContext exists. */
@ -170,7 +170,8 @@ OvsSubscribeDpIoctl(PVOID instanceP,
OvsReleasePidHashLock();
} else if (instance->packetQueue == NULL && join) {
queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemoryWithTag(
sizeof *queue, OVS_USER_POOL_TAG);
if (queue == NULL) {
return STATUS_NO_MEMORY;
}
@ -248,7 +249,7 @@ OvsReadDpIoctl(PFILE_OBJECT fileObject,
}
*replyLen = len;
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
}
return STATUS_SUCCESS;
}
@ -762,7 +763,7 @@ OvsQueuePackets(PLIST_ENTRY packetList,
while (!IsListEmpty(&dropPackets)) {
link = RemoveHeadList(&dropPackets);
elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
num++;
}
@ -1060,7 +1061,8 @@ OvsCreateQueueNlPacket(PVOID userData,
dataLen + extraLen);
allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemoryWithTag(allocLen,
OVS_USER_POOL_TAG);
if (elem == NULL) {
ovsUserStats.dropDuetoResource++;
return NULL;
@ -1163,6 +1165,6 @@ OvsCreateQueueNlPacket(PVOID userData,
return elem;
fail:
OvsFreeMemory(elem);
OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
return NULL;
}

View File

@ -24,6 +24,21 @@
extern NDIS_HANDLE gOvsExtDriverHandle;
VOID*
OvsAllocateMemoryWithTag(size_t size, ULONG tag)
{
OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL);
return NdisAllocateMemoryWithTagPriority(gOvsExtDriverHandle,
(UINT32)size, tag, NormalPoolPriority);
}
VOID
OvsFreeMemoryWithTag(VOID *ptr, ULONG tag)
{
ASSERT(ptr);
NdisFreeMemoryWithTagPriority(gOvsExtDriverHandle, ptr, tag);
}
VOID *
OvsAllocateMemory(size_t size)
{

View File

@ -23,10 +23,22 @@
#define OVS_NBL_ONLY_POOL_TAG 'OSVO'
#define OVS_NET_BUFFER_POOL_TAG 'NSVO'
#define OVS_OTHER_POOL_TAG 'MSVO'
#define OVS_MDL_POOL_TAG 'BSVO'
#define OVS_DATAPATH_POOL_TAG 'DSVO'
#define OVS_EVENT_POOL_TAG 'ESVO'
#define OVS_FLOW_POOL_TAG 'LSVO'
#define OVS_VXLAN_POOL_TAG 'XSVO'
#define OVS_IPHELPER_POOL_TAG 'HSVO'
#define OVS_OID_POOL_TAG 'ASVO'
#define OVS_SWITCH_POOL_TAG 'SSVO'
#define OVS_USER_POOL_TAG 'USVO'
#define OVS_VPORT_POOL_TAG 'PSVO'
VOID *OvsAllocateMemory(size_t size);
VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag);
VOID *OvsAllocateAlignedMemory(size_t size, UINT16 align);
VOID OvsFreeMemory(VOID *ptr);
VOID OvsFreeMemoryWithTag(VOID *ptr, ULONG tag);
VOID OvsFreeAlignedMemory(VOID *ptr);
#define LIST_FORALL(_headPtr, _itemPtr) \

View File

@ -167,8 +167,8 @@ HvUpdatePort(POVS_SWITCH_CONTEXT switchContext,
* Update properties only for NETDEV ports for supprting PS script
* We don't allow changing the names of the internal or external ports
*/
if (vport == NULL || ( vport->portType != NdisSwitchPortTypeSynthetic) ||
( vport->portType != NdisSwitchPortTypeEmulated)) {
if (vport == NULL || (( vport->portType != NdisSwitchPortTypeSynthetic) &&
( vport->portType != NdisSwitchPortTypeEmulated))) {
goto update_port_done;
}
@ -306,7 +306,7 @@ HvCreateNic(POVS_SWITCH_CONTEXT switchContext,
OvsInitPhysNicVport(vport, virtExtVport, nicParam->NicIndex);
status = InitHvVportCommon(switchContext, vport, TRUE);
if (status != NDIS_STATUS_SUCCESS) {
OvsFreeMemory(vport);
OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
goto add_nic_done;
}
}
@ -404,6 +404,7 @@ HvUpdateNic(POVS_SWITCH_CONTEXT switchContext,
nicParam->PortId,
nicParam->NicIndex);
if (vport == NULL) {
NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
OVS_LOG_WARN("Vport search failed.");
goto update_nic_done;
}
@ -658,7 +659,7 @@ OvsFindVportByHvNameA(POVS_SWITCH_CONTEXT switchContext,
SIZE_T wstrSize = length * sizeof(WCHAR);
UINT i;
PWSTR wsName = OvsAllocateMemory(wstrSize);
PWSTR wsName = OvsAllocateMemoryWithTag(wstrSize, OVS_VPORT_POOL_TAG);
if (!wsName) {
return NULL;
}
@ -666,7 +667,7 @@ OvsFindVportByHvNameA(POVS_SWITCH_CONTEXT switchContext,
wsName[i] = name[i];
}
vport = OvsFindVportByHvNameW(switchContext, wsName, wstrSize);
OvsFreeMemory(wsName);
OvsFreeMemoryWithTag(wsName, OVS_VPORT_POOL_TAG);
return vport;
}
@ -703,7 +704,8 @@ POVS_VPORT_ENTRY
OvsAllocateVport(VOID)
{
POVS_VPORT_ENTRY vport;
vport = (POVS_VPORT_ENTRY)OvsAllocateMemory(sizeof (OVS_VPORT_ENTRY));
vport = (POVS_VPORT_ENTRY)OvsAllocateMemoryWithTag(
sizeof(OVS_VPORT_ENTRY), OVS_VPORT_POOL_TAG);
if (vport == NULL) {
return NULL;
}
@ -1073,7 +1075,7 @@ OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext,
ASSERT(switchContext->numPhysicalNics == 0);
switchContext->virtualExternalPortId = 0;
switchContext->virtualExternalVport = NULL;
OvsFreeMemory(vport);
OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
if (vportDeallocated) {
*vportDeallocated = TRUE;
}
@ -1151,7 +1153,7 @@ OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext,
} else {
switchContext->numNonHvVports--;
}
OvsFreeMemory(vport);
OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
if (vportDeallocated) {
*vportDeallocated = TRUE;
}
@ -1189,19 +1191,20 @@ OvsAddConfiguredSwitchPorts(POVS_SWITCH_CONTEXT switchContext)
OvsInitVportWithPortParam(vport, portParam);
status = InitHvVportCommon(switchContext, vport, TRUE);
if (status != NDIS_STATUS_SUCCESS) {
OvsFreeMemory(vport);
OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
goto cleanup;
}
}
cleanup:
if (status != NDIS_STATUS_SUCCESS) {
OvsClearAllSwitchVports(switchContext);
}
if (portArray != NULL) {
OvsFreeMemory(portArray);
}
OvsFreeSwitchPortsArray(portArray);
OVS_LOG_TRACE("Exit: status: %x", status);
return status;
}
@ -1248,7 +1251,7 @@ OvsInitConfiguredSwitchNics(POVS_SWITCH_CONTEXT switchContext)
nicParam->NicIndex);
status = InitHvVportCommon(switchContext, vport, TRUE);
if (status != NDIS_STATUS_SUCCESS) {
OvsFreeMemory(vport);
OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
vport = NULL;
}
}
@ -1268,9 +1271,8 @@ OvsInitConfiguredSwitchNics(POVS_SWITCH_CONTEXT switchContext)
}
cleanup:
if (nicArray != NULL) {
OvsFreeMemory(nicArray);
}
OvsFreeSwitchNicsArray(nicArray);
OVS_LOG_TRACE("Exit: status: %x", status);
return status;
}
@ -2129,7 +2131,7 @@ Cleanup:
OvsCleanupVxlanTunnel(vport);
}
}
OvsFreeMemory(vport);
OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
}
NlBuildErrorMsg(msgIn, msgError, nlError);

View File

@ -59,7 +59,8 @@ OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport,
{
POVS_VXLAN_VPORT vxlanPort;
vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort));
vxlanPort = OvsAllocateMemoryWithTag(sizeof (*vxlanPort),
OVS_VXLAN_POOL_TAG);
if (vxlanPort == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
}
@ -86,7 +87,7 @@ OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport)
return;
}
OvsFreeMemory(vport->priv);
OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG);
vport->priv = NULL;
}

View File

@ -2,7 +2,14 @@
#
# Some modules should be built but not distributed, e.g. third-party
# hwtable modules.
both_modules = openvswitch
build_multi_modules = \
openvswitch
both_modules = \
$(build_multi_modules) \
vport_geneve \
vport_gre \
vport_lisp \
vport_vxlan
build_modules = $(both_modules) # Modules to build
dist_modules = $(both_modules) # Modules to distribute
@ -14,12 +21,13 @@ openvswitch_sources = \
flow_netlink.c \
flow_table.c \
vport.c \
vport-geneve.c \
vport-gre.c \
vport-internal_dev.c \
vport-lisp.c \
vport-netdev.c \
vport-vxlan.c
vport-netdev.c
vport_geneve_sources = vport-geneve.c
vport_vxlan_sources = vport-vxlan.c
vport_gre_sources = vport-gre.c
vport_lisp_sources = vport-lisp.c
openvswitch_headers = \
compat.h \

View File

@ -25,10 +25,10 @@
#include <net/route.h>
#include <net/xfrm.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0)
#define GROUP_ID(grp) 0
#else
#ifdef HAVE_GENL_MULTICAST_GROUP_WITH_ID
#define GROUP_ID(grp) ((grp)->id)
#else
#define GROUP_ID(grp) 0
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)

View File

@ -61,6 +61,7 @@
#include "vport-netdev.h"
int ovs_net_id __read_mostly;
EXPORT_SYMBOL_GPL(ovs_net_id);
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
@ -134,6 +135,7 @@ int lockdep_ovsl_is_held(void)
else
return 1;
}
EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
#endif
static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
@ -1900,6 +1902,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
restart:
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
@ -1931,8 +1934,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport = new_vport(&parms);
err = PTR_ERR(vport);
if (IS_ERR(vport))
if (IS_ERR(vport)) {
if (err == -EAGAIN)
goto restart;
goto exit_unlock_free;
}
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
@ -2291,12 +2297,18 @@ static int __init dp_init(void)
if (err)
goto error_netns_exit;
err = ovs_netdev_init();
if (err)
goto error_unreg_notifier;
err = dp_register_genl();
if (err < 0)
goto error_unreg_notifier;
goto error_unreg_netdev;
return 0;
error_unreg_netdev:
ovs_netdev_exit();
error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
error_netns_exit:
@ -2316,6 +2328,7 @@ error:
static void dp_cleanup(void)
{
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
rcu_barrier();

View File

@ -18,10 +18,10 @@ ccflags-y += -include $(builddir)/kcompat.h
# right place, even though it's conceptually incorrect.
NOSTDINC_FLAGS += -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include
obj-m := $(patsubst %,%.o,$(build_modules))
obj-m := $(subst _,-,$(patsubst %,%.o,$(build_modules)))
define module_template
$(1)-y = $$(notdir $$(patsubst %.c,%.o,$($(1)_sources)))
endef
$(foreach module,$(build_modules),$(eval $(call module_template,$(module))))
$(foreach module,$(build_multi_modules),$(eval $(call module_template,$(module))))

View File

@ -231,4 +231,5 @@ u32 __skb_get_hash(struct sk_buff *skb)
#endif
return hash;
}
EXPORT_SYMBOL_GPL(__skb_get_hash);
#endif

View File

@ -1,7 +1,7 @@
#include <net/genetlink.h>
#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
#ifndef HAVE_GENL_NOTIFY_TAKES_FAMILY
#undef genl_notify

View File

@ -91,10 +91,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
int min_headroom;
int err;
skb = udp_tunnel_handle_offloads(skb, csum, (opt_len == 0));
if (IS_ERR(skb))
return PTR_ERR(skb);
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@ -109,6 +105,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
if (unlikely(!skb))
return -ENOMEM;
skb = udp_tunnel_handle_offloads(skb, csum, (opt_len == 0));
if (IS_ERR(skb))
return PTR_ERR(skb);
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
@ -118,6 +118,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
tos, ttl, df, src_port, dst_port, xnet,
!csum);
}
EXPORT_SYMBOL_GPL(geneve_xmit_skb);
/* Callback from net/ipv4/udp.c to receive packets */
static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
@ -226,6 +227,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
{
return geneve_socket_create(net, port, rcv, data, ipv6);
}
EXPORT_SYMBOL_GPL(geneve_sock_add);
static void rcu_free_gs(struct rcu_head *rcu)
{
@ -239,3 +241,4 @@ void geneve_sock_release(struct geneve_sock *gs)
udp_tunnel_sock_release(gs->sock);
call_rcu(&gs->rcu, rcu_free_gs);
}
EXPORT_SYMBOL_GPL(geneve_sock_release);

View File

@ -250,6 +250,7 @@ int gre_cisco_register(struct gre_cisco_protocol *newp)
return (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, NULL, newp) == NULL) ?
0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(gre_cisco_register);
int gre_cisco_unregister(struct gre_cisco_protocol *proto)
{
@ -265,6 +266,7 @@ int gre_cisco_unregister(struct gre_cisco_protocol *proto)
ret = gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
return ret;
}
EXPORT_SYMBOL_GPL(gre_cisco_unregister);
#endif /* !HAVE_GRE_CISCO_REGISTER */
@ -297,6 +299,7 @@ struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
return ovs_iptunnel_handle_offloads(skb, gre_csum, type, fix_segment);
}
EXPORT_SYMBOL_GPL(gre_handle_offloads);
static bool is_gre_gso(struct sk_buff *skb)
{
@ -334,6 +337,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
ovs_skb_set_inner_protocol(skb, tpi->proto);
}
EXPORT_SYMBOL_GPL(gre_build_header);
#endif /* CONFIG_NET_IPGRE_DEMUX */

View File

@ -14,8 +14,11 @@
#define SKB_GSO_UDP_TUNNEL 0
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
#ifndef HAVE_SKB_GSO_GRE_CSUM
#define SKB_GSO_GRE_CSUM 0
#endif
#ifndef HAVE_SKB_GSO_UDP_TUNNEL_CSUM
#define SKB_GSO_UDP_TUNNEL_CSUM 0
#endif

View File

@ -17,7 +17,7 @@
#define portid pid
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
#ifndef HAVE_GENL_NOTIFY_TAKES_FAMILY
struct rpl_genl_family {
struct genl_family compat_family;
unsigned int id;
@ -122,7 +122,11 @@ static inline int genl_has_listeners(struct genl_family *family,
static inline int rpl_genl_has_listeners(struct genl_family *family,
struct net *net, unsigned int group)
{
#ifdef HAVE_GENL_NOTIFY_TAKES_FAMILY
return genl_has_listeners(family, net->genl_sock, group);
#else
return genl_has_listeners(&family->compat_family, net->genl_sock, group);
#endif
}
#define genl_has_listeners rpl_genl_has_listeners

View File

@ -12,7 +12,7 @@ static inline bool ip_is_fragment(const struct iphdr *iph)
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
#ifndef HAVE_INET_GET_LOCAL_PORT_RANGE_USING_NET
static inline void rpl_inet_get_local_port_range(struct net *net, int *low,
int *high)
{

View File

@ -2,11 +2,20 @@
#define __NET_UDP_WRAPPER_H 1
#include <linux/version.h>
#ifdef inet_get_local_port_range
/* RHEL7 backports udp_flow_src_port() using an older version of
* inet_get_local_port_range(). */
#undef inet_get_local_port_range
#include_next <net/udp.h>
#define inet_get_local_port_range rpl_inet_get_local_port_range
#else
#include_next <net/udp.h>
#endif
#ifndef HAVE_UDP_FLOW_SRC_PORT
static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
int min, int max, bool use_eth)
static inline __be16 rpl_udp_flow_src_port(struct net *net, struct sk_buff *skb,
int min, int max, bool use_eth)
{
u32 hash;
@ -33,15 +42,19 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
return htons((((u64) hash * (max - min)) >> 32) + min);
}
#define udp_flow_src_port rpl_udp_flow_src_port
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
#ifndef HAVE_UDP_V4_CHECK
static inline __sum16 udp_v4_check(int len, __be32 saddr,
__be32 daddr, __wsum base)
{
return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
}
#endif
#ifndef HAVE_UDP_SET_CSUM
void udp_set_csum(bool nocheck, struct sk_buff *skb,
__be32 saddr, __be32 daddr, int len);
#endif

View File

@ -49,6 +49,7 @@ struct udp_port_cfg {
use_udp6_rx_checksums:1;
};
#define udp_sock_create rpl_udp_sock_create
int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp);

View File

@ -80,6 +80,7 @@ int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
pkt_len = 0;
return pkt_len;
}
EXPORT_SYMBOL_GPL(rpl_iptunnel_xmit);
struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help, int gso_type_mask,
@ -132,6 +133,7 @@ error:
kfree_skb(skb);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(ovs_iptunnel_handle_offloads);
int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
{
@ -166,6 +168,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
skb->pkt_type = PACKET_HOST;
return 0;
}
EXPORT_SYMBOL_GPL(iptunnel_pull_header);
#endif
@ -176,3 +179,4 @@ bool skb_is_encapsulated(struct sk_buff *skb)
*/
return ovs_skb_get_inner_protocol(skb) || skb_encapsulation(skb);
}
EXPORT_SYMBOL_GPL(skb_is_encapsulated);

View File

@ -1,6 +1,6 @@
#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
#ifndef HAVE_UDP_SET_CSUM
#include <net/udp.h>

View File

@ -95,6 +95,7 @@ error:
*sockp = NULL;
return err;
}
EXPORT_SYMBOL_GPL(udp_sock_create);
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *cfg)
@ -114,6 +115,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_tunnel_encap_enable(sock);
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
void ovs_udp_gso(struct sk_buff *skb)
{
@ -123,6 +125,7 @@ void ovs_udp_gso(struct sk_buff *skb)
uh = udp_hdr(skb);
uh->len = htons(skb->len - udp_offset);
}
EXPORT_SYMBOL_GPL(ovs_udp_gso);
void ovs_udp_csum_gso(struct sk_buff *skb)
{
@ -137,6 +140,7 @@ void ovs_udp_csum_gso(struct sk_buff *skb)
udp_set_csum(true, skb, iph->saddr, iph->daddr,
skb->len - udp_offset);
}
EXPORT_SYMBOL_GPL(ovs_udp_csum_gso);
int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
@ -158,6 +162,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP,
tos, ttl, df, xnet);
}
EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
void udp_tunnel_sock_release(struct socket *sock)
{
@ -165,5 +170,6 @@ void udp_tunnel_sock_release(struct socket *sock)
kernel_sock_shutdown(sock, SHUT_RDWR);
sk_release_kernel(sock->sk);
}
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
#endif /* Linux version < 3.20 */

View File

@ -191,10 +191,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
int err;
bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
skb = udp_tunnel_handle_offloads(skb, udp_sum, true);
if (IS_ERR(skb))
return PTR_ERR(skb);
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ VXLAN_HLEN + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@ -210,6 +206,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
if (WARN_ON(!skb))
return -ENOMEM;
skb = udp_tunnel_handle_offloads(skb, udp_sum, true);
if (IS_ERR(skb))
return PTR_ERR(skb);
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = md->vni;
@ -225,6 +225,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
ttl, df, src_port, dst_port, xnet,
!udp_sum);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
static void rcu_free_vs(struct rcu_head *rcu)
{
@ -313,6 +314,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
{
return vxlan_socket_create(net, port, rcv, data, flags);
}
EXPORT_SYMBOL_GPL(vxlan_sock_add);
void vxlan_sock_release(struct vxlan_sock *vs)
{
@ -320,5 +322,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
queue_work(system_wq, &vs->del_work);
}
EXPORT_SYMBOL_GPL(vxlan_sock_release);
#endif /* !USE_UPSTREAM_VXLAN */

View File

@ -27,6 +27,8 @@
#include "datapath.h"
#include "vport.h"
static struct vport_ops ovs_geneve_vport_ops;
/**
* struct geneve_port - Keeps track of open UDP ports
* @gs: The socket created for this port number.
@ -248,7 +250,7 @@ static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
IPPROTO_UDP, skb->mark, sport, dport);
}
const struct vport_ops ovs_geneve_vport_ops = {
static struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
.create = geneve_tnl_create,
.destroy = geneve_tnl_destroy,
@ -256,4 +258,22 @@ const struct vport_ops ovs_geneve_vport_ops = {
.get_options = geneve_get_options,
.send = geneve_tnl_send,
.get_egress_tun_info = geneve_get_egress_tun_info,
.owner = THIS_MODULE,
};
static int __init ovs_geneve_tnl_init(void)
{
return ovs_vport_ops_register(&ovs_geneve_vport_ops);
}
static void __exit ovs_geneve_tnl_exit(void)
{
ovs_vport_ops_unregister(&ovs_geneve_vport_ops);
}
module_init(ovs_geneve_tnl_init);
module_exit(ovs_geneve_tnl_exit);
MODULE_DESCRIPTION("OVS: Geneve swiching port");
MODULE_LICENSE("GPL");
MODULE_ALIAS("vport-type-5");

View File

@ -31,6 +31,7 @@
#include <linux/jhash.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/rculist.h>
#include <net/net_namespace.h>
@ -47,6 +48,9 @@
#include "datapath.h"
#include "vport.h"
static struct vport_ops ovs_gre_vport_ops;
static struct vport_ops ovs_gre64_vport_ops;
/* Returns the least-significant 32 bits of a __be64. */
static __be32 be64_get_low32(__be64 x)
{
@ -308,13 +312,14 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
IPPROTO_GRE, skb->mark, 0, 0);
}
const struct vport_ops ovs_gre_vport_ops = {
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.destroy = gre_tnl_destroy,
.get_name = gre_get_name,
.send = gre_send,
.get_egress_tun_info = gre_get_egress_tun_info,
.owner = THIS_MODULE,
};
/* GRE64 vport. */
@ -387,12 +392,42 @@ static int gre64_send(struct vport *vport, struct sk_buff *skb)
return __send(vport, skb, hlen, seq, (TUNNEL_KEY|TUNNEL_SEQ));
}
const struct vport_ops ovs_gre64_vport_ops = {
static struct vport_ops ovs_gre64_vport_ops = {
.type = OVS_VPORT_TYPE_GRE64,
.create = gre64_create,
.destroy = gre64_tnl_destroy,
.get_name = gre_get_name,
.send = gre64_send,
.get_egress_tun_info = gre_get_egress_tun_info,
.owner = THIS_MODULE,
};
static int __init ovs_gre_tnl_init(void)
{
int err;
err = ovs_vport_ops_register(&ovs_gre_vport_ops);
if (err < 0)
return err;
err = ovs_vport_ops_register(&ovs_gre64_vport_ops);
if (err < 0)
ovs_vport_ops_unregister(&ovs_gre_vport_ops);
return err;
}
static void __exit ovs_gre_tnl_exit(void)
{
ovs_vport_ops_unregister(&ovs_gre64_vport_ops);
ovs_vport_ops_unregister(&ovs_gre_vport_ops);
}
module_init(ovs_gre_tnl_init);
module_exit(ovs_gre_tnl_exit);
MODULE_DESCRIPTION("OVS: GRE switching port");
MODULE_LICENSE("GPL");
MODULE_ALIAS("vport-type-3");
MODULE_ALIAS("vport-type-104");
#endif

View File

@ -38,6 +38,8 @@ struct internal_dev {
struct vport *vport;
};
static struct vport_ops ovs_internal_vport_ops;
static struct internal_dev *internal_dev_priv(struct net_device *netdev)
{
return netdev_priv(netdev);
@ -285,7 +287,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
return len;
}
const struct vport_ops ovs_internal_vport_ops = {
static struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
@ -308,10 +310,21 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
int ovs_internal_dev_rtnl_link_register(void)
{
return rtnl_link_register(&internal_dev_link_ops);
int err;
err = rtnl_link_register(&internal_dev_link_ops);
if (err < 0)
return err;
err = ovs_vport_ops_register(&ovs_internal_vport_ops);
if (err < 0)
rtnl_link_unregister(&internal_dev_link_ops);
return err;
}
void ovs_internal_dev_rtnl_link_unregister(void)
{
ovs_vport_ops_unregister(&ovs_internal_vport_ops);
rtnl_link_unregister(&internal_dev_link_ops);
}

View File

@ -24,6 +24,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <linux/module.h>
#include <linux/rculist.h>
#include <linux/udp.h>
@ -110,6 +111,7 @@ struct lisp_port {
};
static LIST_HEAD(lisp_ports);
static struct vport_ops ovs_lisp_vport_ops;
static inline struct lisp_port *lisp_vport(const struct vport *vport)
{
@ -493,7 +495,7 @@ static int lisp_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
lisp_port->dst_port);
}
const struct vport_ops ovs_lisp_vport_ops = {
static struct vport_ops ovs_lisp_vport_ops = {
.type = OVS_VPORT_TYPE_LISP,
.create = lisp_tnl_create,
.destroy = lisp_tnl_destroy,
@ -501,4 +503,22 @@ const struct vport_ops ovs_lisp_vport_ops = {
.get_options = lisp_get_options,
.send = lisp_send,
.get_egress_tun_info = lisp_get_egress_tun_info,
.owner = THIS_MODULE,
};
static int __init ovs_lisp_tnl_init(void)
{
return ovs_vport_ops_register(&ovs_lisp_vport_ops);
}
static void __exit ovs_lisp_tnl_exit(void)
{
ovs_vport_ops_unregister(&ovs_lisp_vport_ops);
}
module_init(ovs_lisp_tnl_init);
module_exit(ovs_lisp_tnl_exit);
MODULE_DESCRIPTION("OVS: LISP switching port");
MODULE_LICENSE("GPL");
MODULE_ALIAS("vport-type-105");

View File

@ -35,6 +35,7 @@
#include "vport-internal_dev.h"
#include "vport-netdev.h"
static struct vport_ops ovs_netdev_vport_ops;
static void netdev_port_receive(struct vport *vport, struct sk_buff *skb);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
@ -274,7 +275,7 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev)
#endif
}
const struct vport_ops ovs_netdev_vport_ops = {
static struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.create = netdev_create,
.destroy = netdev_destroy,
@ -282,6 +283,16 @@ const struct vport_ops ovs_netdev_vport_ops = {
.send = netdev_send,
};
int __init ovs_netdev_init(void)
{
return ovs_vport_ops_register(&ovs_netdev_vport_ops);
}
void ovs_netdev_exit(void)
{
ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) && \
!defined HAVE_RHEL_OVS_HOOK
/*

View File

@ -41,4 +41,7 @@ netdev_vport_priv(const struct vport *vport)
const char *ovs_netdev_get_name(const struct vport *);
void ovs_netdev_detach_dev(struct vport *);
int __init ovs_netdev_init(void);
void ovs_netdev_exit(void);
#endif /* vport_netdev.h */

View File

@ -26,6 +26,7 @@
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
#include <linux/module.h>
#include <net/icmp.h>
#include <net/ip.h>
@ -54,6 +55,8 @@ struct vxlan_port {
u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
};
static struct vport_ops ovs_vxlan_vport_ops;
static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
{
return vport_priv(vport);
@ -293,7 +296,7 @@ static const char *vxlan_get_name(const struct vport *vport)
return vxlan_port->name;
}
const struct vport_ops ovs_vxlan_vport_ops = {
static struct vport_ops ovs_vxlan_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_tnl_create,
.destroy = vxlan_tnl_destroy,
@ -301,4 +304,22 @@ const struct vport_ops ovs_vxlan_vport_ops = {
.get_options = vxlan_get_options,
.send = vxlan_tnl_send,
.get_egress_tun_info = vxlan_get_egress_tun_info,
.owner = THIS_MODULE,
};
static int __init ovs_vxlan_tnl_init(void)
{
return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
}
static void __exit ovs_vxlan_tnl_exit(void)
{
ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
}
module_init(ovs_vxlan_tnl_init);
module_exit(ovs_vxlan_tnl_exit);
MODULE_DESCRIPTION("OVS: VXLAN switching port");
MODULE_LICENSE("GPL");
MODULE_ALIAS("vport-type-4");

View File

@ -23,6 +23,7 @@
#include <linux/kconfig.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
@ -39,20 +40,7 @@
static void ovs_vport_record_error(struct vport *,
enum vport_err_type err_type);
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the bottom of vport.h.
*/
static const struct vport_ops *vport_ops_list[] = {
&ovs_netdev_vport_ops,
&ovs_internal_vport_ops,
&ovs_geneve_vport_ops,
#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
&ovs_gre_vport_ops,
&ovs_gre64_vport_ops,
#endif
&ovs_vxlan_vport_ops,
&ovs_lisp_vport_ops,
};
static LIST_HEAD(vport_ops_list);
/* Protected by RCU read lock for reading, ovs_mutex for writing. */
static struct hlist_head *dev_table;
@ -89,6 +77,32 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
int ovs_vport_ops_register(struct vport_ops *ops)
{
int err = -EEXIST;
struct vport_ops *o;
ovs_lock();
list_for_each_entry(o, &vport_ops_list, list)
if (ops->type == o->type)
goto errout;
list_add_tail(&ops->list, &vport_ops_list);
err = 0;
errout:
ovs_unlock();
return err;
}
EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
void ovs_vport_ops_unregister(struct vport_ops *ops)
{
ovs_lock();
list_del(&ops->list);
ovs_unlock();
}
EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
/**
* ovs_vport_locate - find a port that has already been created
*
@ -154,6 +168,18 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
return vport;
}
EXPORT_SYMBOL_GPL(ovs_vport_alloc);
static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
{
struct vport_ops *ops;
list_for_each_entry(ops, &vport_ops_list, list)
if (ops->type == parms->type)
return ops;
return NULL;
}
/**
* ovs_vport_free - uninitialize and free vport
@ -171,6 +197,7 @@ void ovs_vport_free(struct vport *vport)
free_percpu(vport->percpu_stats);
kfree(vport);
}
EXPORT_SYMBOL_GPL(ovs_vport_free);
/**
* ovs_vport_add - add vport device (for kernel callers)
@ -182,31 +209,40 @@ void ovs_vport_free(struct vport *vport)
*/
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
struct vport_ops *ops;
struct vport *vport;
int err = 0;
int i;
for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]->type == parms->type) {
struct hlist_head *bucket;
ops = ovs_vport_lookup(parms);
if (ops) {
struct hlist_head *bucket;
vport = vport_ops_list[i]->create(parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto out;
}
if (!try_module_get(ops->owner))
return ERR_PTR(-EAFNOSUPPORT);
bucket = hash_bucket(ovs_dp_get_net(vport->dp),
vport->ops->get_name(vport));
hlist_add_head_rcu(&vport->hash_node, bucket);
vport = ops->create(parms);
if (IS_ERR(vport)) {
module_put(ops->owner);
return vport;
}
bucket = hash_bucket(ovs_dp_get_net(vport->dp),
vport->ops->get_name(vport));
hlist_add_head_rcu(&vport->hash_node, bucket);
return vport;
}
err = -EAFNOSUPPORT;
/* Unlock to attempt module load and return -EAGAIN if load
* was successful as we need to restart the port addition
* workflow.
*/
ovs_unlock();
request_module("vport-type-%d", parms->type);
ovs_lock();
out:
return ERR_PTR(err);
if (!ovs_vport_lookup(parms))
return ERR_PTR(-EAFNOSUPPORT);
else
return ERR_PTR(-EAGAIN);
}
/**
@ -238,6 +274,7 @@ void ovs_vport_del(struct vport *vport)
ASSERT_OVSL();
hlist_del_rcu(&vport->hash_node);
module_put(vport->ops->owner);
vport->ops->destroy(vport);
}
@ -467,6 +504,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
ovs_dp_process_packet(skb, &key);
}
EXPORT_SYMBOL_GPL(ovs_vport_receive);
/**
* ovs_vport_send - send a packet on a device
@ -544,6 +582,7 @@ void ovs_vport_deferred_free(struct vport *vport)
call_rcu(&vport->rcu, free_vport_rcu);
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
struct net *net,
@ -592,6 +631,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
return 0;
}
EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ovs_tunnel_info *info)

View File

@ -175,6 +175,9 @@ struct vport_ops {
int (*send)(struct vport *, struct sk_buff *);
int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
struct ovs_tunnel_info *);
struct module *owner;
struct list_head list;
};
enum vport_err_type {
@ -223,16 +226,6 @@ static inline struct vport *vport_from_priv(void *priv)
void ovs_vport_receive(struct vport *, struct sk_buff *,
const struct ovs_tunnel_info *);
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the top of vport.c.
*/
extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops;
extern const struct vport_ops ovs_geneve_vport_ops;
extern const struct vport_ops ovs_gre_vport_ops;
extern const struct vport_ops ovs_gre64_vport_ops;
extern const struct vport_ops ovs_vxlan_vport_ops;
extern const struct vport_ops ovs_lisp_vport_ops;
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
@ -240,4 +233,7 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
}
int ovs_vport_ops_register(struct vport_ops *ops);
void ovs_vport_ops_unregister(struct vport_ops *ops);
#endif /* vport.h */

View File

@ -1,3 +1,4 @@
etc/bash_completion.d/ovs-appctl-bashcomp.bash
usr/bin/ovs-appctl
usr/bin/ovs-benchmark
usr/bin/ovs-docker

View File

@ -1,3 +1,4 @@
etc/bash_completion.d/ovs-vsctl-bashcomp.bash
usr/bin/ovs-dpctl
usr/bin/ovs-dpctl-top
usr/bin/ovs-pcap

View File

@ -1,5 +1,6 @@
openflowincludedir = $(includedir)/openflow
openflowinclude_HEADERS = \
include/openflow/netronome-ext.h \
include/openflow/nicira-ext.h \
include/openflow/openflow-1.0.h \
include/openflow/openflow-1.1.h \
@ -17,42 +18,10 @@ SUFFIXES += .h .hstamp
$(AM_V_GEN)$(run_python) $(srcdir)/build-aux/check-structs -I$(srcdir)/include $< && \
touch $@
HSTAMP_FILES = \
include/openflow/nicira-ext.hstamp \
include/openflow/openflow-1.0.hstamp \
include/openflow/openflow-1.1.hstamp \
include/openflow/openflow-1.2.hstamp \
include/openflow/openflow-1.3.hstamp \
include/openflow/openflow-1.4.hstamp \
include/openflow/openflow-1.5.hstamp \
include/openflow/openflow-common.hstamp \
include/openflow/openflow.hstamp
HSTAMP_FILES = $(openflowinclude_HEADERS:.h=.hstamp)
CLEANFILES += $(HSTAMP_FILES)
ALL_LOCAL += $(HSTAMP_FILES)
$(HSTAMP_FILES): build-aux/check-structs
include/openflow/openflow-1.0.hstamp: \
include/openflow/openflow-common.h
include/openflow/openflow-1.1.hstamp: \
include/openflow/openflow-common.h
include/openflow/openflow-1.2.hstamp: \
include/openflow/openflow-common.h \
include/openflow/openflow-1.1.h
include/openflow/openflow-1.3.hstamp: \
include/openflow/openflow-common.h \
include/openflow/openflow-1.1.h \
include/openflow/openflow-1.2.h
include/openflow/openflow-1.4.hstamp: \
include/openflow/openflow-1.4.h
include/openflow/openflow-1.5.hstamp: \
include/openflow/openflow-1.5.h
include/openflow/nicira-ext.hstamp: \
include/openflow/openflow.h \
include/openflow/openflow-common.h \
include/openflow/openflow-1.0.h \
include/openflow/openflow-1.1.h \
include/openflow/openflow-1.2.h \
include/openflow/openflow-1.3.h
$(HSTAMP_FILES): build-aux/check-structs $(openflowinclude_HEADERS)
endif
EXTRA_DIST += build-aux/check-structs

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2014 Netronome.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef OPENFLOW_NETRONOME_EXT_H
#define OPENFLOW_NETRONOME_EXT_H 1
#include "openflow/openflow.h"
#include "openvswitch/types.h"
/* The following vendor extension, proposed by Netronome, is not yet
* standardized, so they are not included in openflow.h. It may
* be suitable for standardization */
/* Netronome enhanced select group */
enum ntr_group_mod_subtype {
NTRT_SELECTION_METHOD = 1,
};
#define NTR_MAX_SELECTION_METHOD_LEN 16
struct ntr_group_prop_selection_method {
ovs_be16 type; /* OFPGPT15_EXPERIMENTER. */
ovs_be16 length; /* Length in bytes of this property
* excluding trailing padding. */
ovs_be32 experimenter; /* NTR_VENDOR_ID. */
ovs_be32 exp_type; /* NTRT_SELECTION_METHOD. */
ovs_be32 pad;
char selection_method[NTR_MAX_SELECTION_METHOD_LEN];
/* Null-terminated */
ovs_be64 selection_method_param; /* Non-Field parameter for
* bucket selection. */
/* Followed by:
* - Exactly (length - 40) (possibly 0) bytes containing OXM TLVs, then
* - Exactly ((length + 7)/8*8 - length) (between 0 and 7) bytes of
* all-zero bytes
* In summary, ntr_group_prop_selection_method is padded as needed,
* to make its overall size a multiple of 8, to preserve alignment
* in structures using it.
*/
/* uint8_t field_array[0]; */ /* Zero or more fields encoded as
* OXM TLVs where the has_mask bit must
* be zero and the value it specifies is
* a mask to apply to packet fields and
* then input them to the selection
* method of a select group. */
/* uint8_t pad2[0]; */
};
OFP_ASSERT(sizeof(struct ntr_group_prop_selection_method) == 40);
#endif /* openflow/netronome-ext.h */

View File

@ -103,7 +103,7 @@ enum ofp_version {
*/
#define OF_VENDOR_ID 0
#define HPL_VENDOR_ID 0x000004EA /* HP Labs. */
#define NMX_VENDOR_ID 0x00001540 /* Netronome. */
#define NTR_VENDOR_ID 0x00001540 /* Netronome. */
#define NX_VENDOR_ID 0x00002320 /* Nicira. */
#define ONF_VENDOR_ID 0x4f4e4600 /* Open Networking Foundation. */

View File

@ -29,6 +29,7 @@
#include "hash.h"
#include "hmap.h"
#include "netdev.h"
#include "ovs-atomic.h"
#include "packets.h"
#include "poll-loop.h"
#include "random.h"

View File

@ -465,10 +465,9 @@ dp_packet_to_string(const struct dp_packet *b, size_t maxbytes)
void
dp_packet_list_delete(struct ovs_list *list)
{
struct dp_packet *b, *next;
struct dp_packet *b;
LIST_FOR_EACH_SAFE (b, next, list_node, list) {
list_remove(&b->list_node);
LIST_FOR_EACH_POP (b, list_node, list) {
dp_packet_delete(b);
}
}

View File

@ -220,7 +220,8 @@ static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
odp_port_t);
enum dp_stat_type {
DP_STAT_HIT, /* Packets that matched in the flow table. */
DP_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */
DP_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
DP_STAT_MISS, /* Packets that did not match. */
DP_STAT_LOST, /* Packets not passed up to the client. */
DP_N_STATS
@ -239,10 +240,10 @@ struct dp_netdev_port {
/* Contained by struct dp_netdev_flow's 'stats' member. */
struct dp_netdev_flow_stats {
long long int used; /* Last used time, in monotonic msecs. */
long long int packet_count; /* Number of packets matched. */
long long int byte_count; /* Number of bytes matched. */
uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
atomic_llong used; /* Last used time, in monotonic msecs. */
atomic_ullong packet_count; /* Number of packets matched. */
atomic_ullong byte_count; /* Number of bytes matched. */
atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
};
/* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
@ -338,7 +339,7 @@ static void dp_netdev_actions_free(struct dp_netdev_actions *);
/* Contained by struct dp_netdev_pmd_thread's 'stats' member. */
struct dp_netdev_pmd_stats {
/* Indexed by DP_STAT_*. */
unsigned long long int n[DP_N_STATS];
atomic_ullong n[DP_N_STATS];
};
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
@ -746,6 +747,21 @@ dpif_netdev_destroy(struct dpif *dpif)
return 0;
}
/* Add 'n' to the atomic variable 'var' non-atomically and using relaxed
* load/store semantics. While the increment is not atomic, the load and
* store operations are, making it impossible to read inconsistent values.
*
* This is used to update thread local stats counters. */
static void
non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
{
unsigned long long tmp;
atomic_read_relaxed(var, &tmp);
tmp += n;
atomic_store_relaxed(var, tmp);
}
static int
dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
{
@ -754,10 +770,17 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
stats->n_flows = stats->n_hit = stats->n_missed = stats->n_lost = 0;
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
unsigned long long n;
stats->n_flows += cmap_count(&pmd->flow_table);
stats->n_hit += pmd->stats.n[DP_STAT_HIT];
stats->n_missed += pmd->stats.n[DP_STAT_MISS];
stats->n_lost += pmd->stats.n[DP_STAT_LOST];
atomic_read_relaxed(&pmd->stats.n[DP_STAT_MASKED_HIT], &n);
stats->n_hit += n;
atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_HIT], &n);
stats->n_hit += n;
atomic_read_relaxed(&pmd->stats.n[DP_STAT_MISS], &n);
stats->n_missed += n;
atomic_read_relaxed(&pmd->stats.n[DP_STAT_LOST], &n);
stats->n_lost += n;
}
stats->n_masks = UINT32_MAX;
stats->n_mask_hit = UINT64_MAX;
@ -1545,13 +1568,24 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
}
static void
get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow,
get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
struct dpif_flow_stats *stats)
{
stats->n_packets = netdev_flow->stats.packet_count;
stats->n_bytes = netdev_flow->stats.byte_count;
stats->used = netdev_flow->stats.used;
stats->tcp_flags = netdev_flow->stats.tcp_flags;
struct dp_netdev_flow *netdev_flow;
unsigned long long n;
long long used;
uint16_t flags;
netdev_flow = CONST_CAST(struct dp_netdev_flow *, netdev_flow_);
atomic_read_relaxed(&netdev_flow->stats.packet_count, &n);
stats->n_packets = n;
atomic_read_relaxed(&netdev_flow->stats.byte_count, &n);
stats->n_bytes = n;
atomic_read_relaxed(&netdev_flow->stats.used, &used);
stats->used = used;
atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
stats->tcp_flags = flags;
}
/* Converts to the dpif_flow format, using 'key_buf' and 'mask_buf' for
@ -1842,7 +1876,16 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
get_dpif_flow_stats(netdev_flow, put->stats);
}
if (put->flags & DPIF_FP_ZERO_STATS) {
memset(&netdev_flow->stats, 0, sizeof netdev_flow->stats);
/* XXX: The userspace datapath uses thread local statistics
* (for flows), which should be updated only by the owning
* thread. Since we cannot write on stats memory here,
* we choose not to support this flag. Please note:
* - This feature is currently used only by dpctl commands with
* option --clear.
* - Should the need arise, this operation can be implemented
* by keeping a base value (to be update here) for each
* counter, and subtracting it before outputting the stats */
error = EOPNOTSUPP;
}
ovsrcu_postpone(dp_netdev_actions_free, old_actions);
@ -2669,19 +2712,22 @@ static void
dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, int cnt, int size,
uint16_t tcp_flags)
{
long long int now = time_msec();
long long now = time_msec();
uint16_t flags;
netdev_flow->stats.used = MAX(now, netdev_flow->stats.used);
netdev_flow->stats.packet_count += cnt;
netdev_flow->stats.byte_count += size;
netdev_flow->stats.tcp_flags |= tcp_flags;
atomic_store_relaxed(&netdev_flow->stats.used, now);
non_atomic_ullong_add(&netdev_flow->stats.packet_count, cnt);
non_atomic_ullong_add(&netdev_flow->stats.byte_count, size);
atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
flags |= tcp_flags;
atomic_store_relaxed(&netdev_flow->stats.tcp_flags, flags);
}
static void
dp_netdev_count_packet(struct dp_netdev_pmd_thread *pmd,
enum dp_stat_type type, int cnt)
{
pmd->stats.n[type] += cnt;
non_atomic_ullong_add(&pmd->stats.n[type], cnt);
}
static int
@ -2692,10 +2738,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
{
struct dp_netdev *dp = pmd->dp;
if (type == DPIF_UC_MISS) {
dp_netdev_count_packet(pmd, DP_STAT_MISS, 1);
}
if (OVS_UNLIKELY(!dp->upcall_cb)) {
return ENODEV;
}
@ -2771,7 +2813,8 @@ packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow)
static inline void
packet_batch_execute(struct packet_batch *batch,
struct dp_netdev_pmd_thread *pmd)
struct dp_netdev_pmd_thread *pmd,
enum dp_stat_type hit_type)
{
struct dp_netdev_actions *actions;
struct dp_netdev_flow *flow = batch->flow;
@ -2784,7 +2827,7 @@ packet_batch_execute(struct packet_batch *batch,
dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true,
actions->actions, actions->size);
dp_netdev_count_packet(pmd, DP_STAT_HIT, batch->packet_count);
dp_netdev_count_packet(pmd, hit_type, batch->packet_count);
}
static inline bool
@ -2875,7 +2918,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets,
}
for (i = 0; i < n_batches; i++) {
packet_batch_execute(&batches[i], pmd);
packet_batch_execute(&batches[i], pmd, DP_STAT_EXACT_HIT);
}
return notfound_cnt;
@ -2907,6 +2950,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
struct ofpbuf actions, put_actions;
int miss_cnt = 0, lost_cnt = 0;
ovs_u128 ufid;
ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub);
@ -2931,6 +2975,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
continue;
}
miss_cnt++;
miniflow_expand(&keys[i].mf, &match.flow);
ofpbuf_clear(&actions);
@ -2941,6 +2987,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
&ufid, DPIF_UC_MISS, NULL, &actions,
&put_actions);
if (OVS_UNLIKELY(error && error != ENOSPC)) {
dp_packet_delete(packets[i]);
lost_cnt++;
continue;
}
@ -2974,6 +3022,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
ofpbuf_uninit(&actions);
ofpbuf_uninit(&put_actions);
fat_rwlock_unlock(&dp->upcall_rwlock);
dp_netdev_count_packet(pmd, DP_STAT_MISS, miss_cnt);
dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt);
} else if (OVS_UNLIKELY(any_miss)) {
int dropped_cnt = 0;
@ -2984,6 +3034,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
}
}
dp_netdev_count_packet(pmd, DP_STAT_MISS, dropped_cnt);
dp_netdev_count_packet(pmd, DP_STAT_LOST, dropped_cnt);
}
@ -3004,7 +3055,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
}
for (i = 0; i < n_batches; i++) {
packet_batch_execute(&batches[i], pmd);
packet_batch_execute(&batches[i], pmd, DP_STAT_MASKED_HIT);
}
}

View File

@ -859,10 +859,14 @@ flow_format(struct ds *ds, const struct flow *flow)
/* As this function is most often used for formatting a packet in a
* packet-in message, skip formatting the packet context fields that are
* all-zeroes (Openflow spec encourages leaving out all-zeroes context
* fields from the packet-in messages). We make an exception with the
* 'in_port' field, which we always format, as packets usually have an
* in_port, and 0 is a port just like any other port. */
* all-zeroes to make the print-out easier on the eyes. This means that a
* missing context field implies a zero value for that field. This is
* similar to OpenFlow encoding of these fields, as the specification
* states that all-zeroes context fields should not be encoded in the
* packet-in messages. */
if (!flow->in_port.ofp_port) {
WC_UNMASK_FIELD(wc, in_port);
}
if (!flow->skb_priority) {
WC_UNMASK_FIELD(wc, skb_priority);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
* Copyright (c) 2008, 2009, 2010, 2012, 2013, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -19,7 +19,6 @@
#include <stdbool.h>
#include <stdlib.h>
#include "ovs-atomic.h"
#include "util.h"
#ifdef __cplusplus
@ -124,6 +123,16 @@ struct hmap_node *hmap_random_node(const struct hmap *);
* iteration).
*
* HASH is only evaluated once.
*
*
* Warning
* -------
*
* When the loop terminates, &NODE->MEMBER will equal NULL. Unless MEMBER is
* the first member in its struct, this means that NODE itself will not be
* NULL.
*
* (This is true for all of the HMAP_FOR_EACH_*() macros.)
*/
#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \
for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \

View File

@ -1063,36 +1063,49 @@ jsonrpc_session_recv_wait(struct jsonrpc_session *s)
}
}
/* Returns true if 's' is currently connected or trying to connect. */
bool
jsonrpc_session_is_alive(const struct jsonrpc_session *s)
{
return s->rpc || s->stream || reconnect_get_max_tries(s->reconnect);
}
/* Returns true if 's' is currently connected. */
bool
jsonrpc_session_is_connected(const struct jsonrpc_session *s)
{
return s->rpc != NULL;
}
/* Returns a sequence number for 's'. The sequence number increments every
* time 's' connects or disconnects. Thus, a caller can use the change (or
* lack of change) in the sequence number to figure out whether the underlying
* connection is the same as before. */
unsigned int
jsonrpc_session_get_seqno(const struct jsonrpc_session *s)
{
return s->seqno;
}
/* Returns the current status of 's'. If 's' is NULL or is disconnected, this
* is 0, otherwise it is the status of the connection, as reported by
* jsonrpc_get_status(). */
int
jsonrpc_session_get_status(const struct jsonrpc_session *s)
{
return s && s->rpc ? jsonrpc_get_status(s->rpc) : 0;
}
/* Returns the last error reported on a connection by 's'. The return value is
* 0 only if no connection made by 's' has ever encountered an error. See
* jsonrpc_get_status() for return value interpretation. */
int
jsonrpc_session_get_last_error(const struct jsonrpc_session *s)
{
return s->last_error;
}
/* Populates 'stats' with statistics from 's'. */
void
jsonrpc_session_get_reconnect_stats(const struct jsonrpc_session *s,
struct reconnect_stats *stats)
@ -1100,6 +1113,7 @@ jsonrpc_session_get_reconnect_stats(const struct jsonrpc_session *s,
reconnect_get_stats(s->reconnect, time_msec(), stats);
}
/* Enables 's' to reconnect to the peer if the connection drops. */
void
jsonrpc_session_enable_reconnect(struct jsonrpc_session *s)
{
@ -1108,18 +1122,27 @@ jsonrpc_session_enable_reconnect(struct jsonrpc_session *s)
RECONNECT_DEFAULT_MAX_BACKOFF);
}
/* Forces 's' to drop its connection (if any) and reconnect. */
void
jsonrpc_session_force_reconnect(struct jsonrpc_session *s)
{
reconnect_force_reconnect(s->reconnect, time_msec());
}
/* Sets 'max_backoff' as the maximum time, in milliseconds, to wait after a
* connection attempt fails before attempting to connect again. */
void
jsonrpc_session_set_max_backoff(struct jsonrpc_session *s, int max_backoff)
{
reconnect_set_backoff(s->reconnect, 0, max_backoff);
}
/* Sets the "probe interval" for 's' to 'probe_interval', in milliseconds. If
* this is zero, it disables the connection keepalive feature. Otherwise, if
* 's' is idle for 'probe_interval' milliseconds then 's' will send an echo
* request and, if no reply is received within an additional 'probe_interval'
* milliseconds, close the connection (then reconnect, if that feature is
* enabled). */
void
jsonrpc_session_set_probe_interval(struct jsonrpc_session *s,
int probe_interval)
@ -1127,9 +1150,11 @@ jsonrpc_session_set_probe_interval(struct jsonrpc_session *s,
reconnect_set_probe_interval(s->reconnect, probe_interval);
}
/* Sets the DSCP value used for 's''s connection to 'dscp'. If this is
* different from the DSCP value currently in use then the connection is closed
* and reconnected. */
void
jsonrpc_session_set_dscp(struct jsonrpc_session *s,
uint8_t dscp)
jsonrpc_session_set_dscp(struct jsonrpc_session *s, uint8_t dscp)
{
if (s->dscp != dscp) {
pstream_close(s->pstream);

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2011, 2012, 2013, 2014 Nicira, Inc.
/* Copyright (c) 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,6 +23,7 @@
#include "hash.h"
#include "hmap.h"
#include "dp-packet.h"
#include "ovs-atomic.h"
#include "packets.h"
#include "poll-loop.h"
#include "seq.h"

View File

@ -73,6 +73,9 @@ static inline bool list_is_short(const struct ovs_list *);
? INIT_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \
: 0); \
(ITER) = (NEXT))
#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \
while (!list_is_empty(LIST) \
&& (INIT_CONTAINER(ITER, list_pop_front(LIST), MEMBER), 1))
/* Inline implementations. */

View File

@ -26,12 +26,14 @@
struct lldp_aa_element_system_id {
uint8_t system_mac[6];
uint16_t conn_type;
uint16_t smlt_id;
uint8_t mlt_id[2];
uint16_t rsvd;
uint8_t rsvd2[2];
};
struct lldpd_aa_element_tlv {
uint16_t type;
uint16_t vlan_tagging;
uint16_t auto_prov_mode;
uint16_t mgmt_vlan;
struct lldp_aa_element_system_id system_id;
};

View File

@ -36,7 +36,7 @@
#define LLDP_TLV_ORG_DOT1 {0x00, 0x80, 0xc2}
#define LLDP_TLV_ORG_DOT3 {0x00, 0x12, 0x0f}
#define LLDP_TLV_ORG_MED {0x00, 0x12, 0xbb}
#define LLDP_TLV_ORG_AVAYA {0x00, 0x40, 0x0D}
#define LLDP_TLV_ORG_AVAYA {0x00, 0x04, 0x0D}
#define LLDP_TLV_ORG_DCBX {0x00, 0x1b, 0x21}
#define LLDP_TLV_DOT1_PVID 1
@ -61,16 +61,26 @@
#define LLDP_TLV_MED_IV_MODEL 10
#define LLDP_TLV_MED_IV_ASSET 11
#define LLDP_TLV_AA_ELEMENT_SUBTYPE 0x08
#define LLDP_TLV_AA_ISID_VLAN_ASGNS_SUBTYPE 0x09
#define LLDP_TLV_AA_ELEMENT_SUBTYPE 0x0b
#define LLDP_TLV_AA_ISID_VLAN_ASGNS_SUBTYPE 0x0c
#define LLDP_TLV_AA_ISID_VLAN_DIGEST_LENGTH 32
#define LLDP_TLV_AA_ELEM_TYPE_UNKNOWN 1
#define LLDP_TLV_AA_ELEM_TYPE_SERVER 2
#define LLDP_TLV_AA_ELEM_TYPE_PROXY 3
#define LLDP_TLV_AA_ELEM_TYPE_UNTAG_CLIENT 4
#define LLDP_TLV_AA_ELEM_TYPE_TAG_CLIENT 5
#define LLDP_TLV_AA_ELEM_TYPE_SERV_NO_AUTH 6
#define LLDP_TLV_AA_ELEM_TYPE_PROXY_NO_AUTH 7
#define LLDP_TLV_AA_ELEM_TYPE_UNKNOWN 1
#define LLDP_TLV_AA_ELEM_TYPE_SERVER 2
#define LLDP_TLV_AA_ELEM_TYPE_PROXY 3
#define LLDP_TLV_AA_ELEM_TYPE_SERV_NO_AUTH 4
#define LLDP_TLV_AA_ELEM_TYPE_PROXY_NO_AUTH 5
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_WIRELESS_ACCESS_POINT_TYPE1 6
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_WIRELESS_ACCESS_POINT_TYPE2 7
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_SWITCH 8
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_ROUTER 9
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_IP_PHONE 10
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_IP_CAMERA 11
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_IP_VIDEO 12
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_SECURITY_DEVICE 13
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_VIRTUAL_SWITCH 14
#define LLDP_TLV_AA_ELEM_TYPE_CLIENT_SERVER_ENDPOINT 15
#define LLDP_TLV_AA_ELEM_CONN_TYPE_SINGLE 0
#define LLDP_TLV_AA_ELEM_CONN_TYPE_MLT 1
#define LLDP_TLV_AA_ELEM_CONN_TYPE_SLT 2

View File

@ -235,53 +235,69 @@ lldp_send(struct lldpd *global OVS_UNUSED,
lldp_tlv_end(p, start);
}
/* Add Auto Attach tlvs to packet */
/* Add Auto Attach tlvs V3.1 to packet. LLDP FA element v3.1 format:
TLV Type[127] TLV Length[50 octets] Avaya OUI[00-04-0D] Subtype[11]
7 bits 9 bits 3 octets 1 octet
HMAC-SHA Digest Element Type State Mgmt VLAN Rsvd System ID
32 octets 6 bits 6 bits 12 bits 1 octet 10 octets
*/
/* AA-ELEMENT */
if (port->p_element.type != 0) {
u_int8_t aa_element_first_byte;
u_int8_t aa_element_second_byte = 0;
u_int16_t aa_element_first_word = 0;
u_int16_t aa_element_second_word = 0;
u_int16_t aa_element_state = 0;
u_int8_t aa_elem_sys_id_first_byte;
u_int8_t aa_elem_sys_id_second_byte;
/* Element type should be first 4 most significant bits, so bitwise OR
* that with the first 4 bits of the 12-bit-wide mgmt_vlan
*/
aa_element_first_byte = (((port->p_element.type & 0xF) << 4) |
((port->p_element.mgmt_vlan >> 8) & 0xF));
/* Link VLAN Tagging Requirements (bit 1),
* Automatic Provisioning Mode (bit 2/3) (left to right, 1 based) */
aa_element_state = ((port->p_element.vlan_tagging & 0x1) << 5) |
((port->p_element.auto_prov_mode & 0x3) << 3);
/* Second byte should just be the remaining 8 bits of .mgmt_vlan */
aa_element_second_byte = port->p_element.mgmt_vlan & 0x0FF;
/* Element first word should be first 6 most significant bits of
* element type, bitwise OR that with the next 6 bits of the state,
* bitwise OR with the first 4 bits of mgmt vlan id.
* Element type should be LLDP_TLV_AA_ELEM_TYPE_VIRTUAL_SWITCH for
* AA client */
aa_element_first_word = (port->p_element.type << 10) |
(aa_element_state << 4) |
((port->p_element.mgmt_vlan & 0x0F00)>> 8);
/* .conn_type should be 4 most sig. bits, so bitwise OR that
* with the first 4 bits of the 12-bit-wide .smlt_id
*/
/* Element second type should be the first 8 most significant bits
* of the remaining 8 bits of mgmt vlan id. */
aa_element_second_word = (port->p_element.mgmt_vlan & 0xFF) << 8;
/* System id first byte should be first 3 most significant bits of
* connecion type, bitwise OR that with the device state and bitwise
* OR that with the first 2 most significant bitsof rsvd (10 bits). */
aa_elem_sys_id_first_byte =
((port->p_element.system_id.conn_type & 0xF) << 4) |
((port->p_element.system_id.smlt_id >> 8) & 0xF);
((port->p_element.system_id.conn_type & 0x7) << 5) |
((port->p_element.system_id.rsvd >> 8) & 0x3);
/* Second byte should just be the remaining 8 bits of .smlt_id */
aa_elem_sys_id_second_byte = port->p_element.system_id.smlt_id & 0x0FF;
/* Second byte should just be the remaining 8 bits of 10 bits rsvd */
aa_elem_sys_id_second_byte =
(port->p_element.system_id.rsvd & 0xFF);
memset(msg_auth_digest, 0, sizeof msg_auth_digest);
lldp_tlv_start(p, LLDP_TLV_ORG, &start);
dp_packet_put(p, avaya, sizeof avaya);
lldp_tlv_put_u8(p, LLDP_TLV_AA_ELEMENT_SUBTYPE);
lldp_tlv_put_u8(p, aa_element_first_byte);
lldp_tlv_put_u8(p, aa_element_second_byte);
dp_packet_put(p, msg_auth_digest, sizeof msg_auth_digest);
lldp_tlv_put_u16(p, aa_element_first_word);
lldp_tlv_put_u16(p, aa_element_second_word);
dp_packet_put(p, &port->p_element.system_id.system_mac,
sizeof port->p_element.system_id.system_mac);
lldp_tlv_put_u8(p, aa_elem_sys_id_first_byte);
lldp_tlv_put_u8(p, aa_elem_sys_id_second_byte);
dp_packet_put(p, &port->p_element.system_id.mlt_id,
sizeof port->p_element.system_id.mlt_id);
dp_packet_put(p, &port->p_element.system_id.rsvd2,
sizeof port->p_element.system_id.rsvd2);
lldp_tlv_end(p, start);
}
if (!list_is_empty(&port->p_isid_vlan_maps)) {
int j;
for (j = 0; j < LLDP_TLV_AA_ISID_VLAN_DIGEST_LENGTH; j++) {
msg_auth_digest[j] = 0;
}
memset(msg_auth_digest, 0, sizeof msg_auth_digest);
lldp_tlv_start(p, LLDP_TLV_ORG, &start);
dp_packet_put(p, avaya, sizeof avaya);
@ -314,6 +330,7 @@ lldp_send(struct lldpd *global OVS_UNUSED,
if (!hardware->h_lport.p_lastframe
|| hardware->h_lport.p_lastframe->size != lldp_len
|| memcmp(hardware->h_lport.p_lastframe->frame, lldp, lldp_len)) {
struct lldpd_frame *frame = xmalloc(sizeof *frame + lldp_len);
frame->size = lldp_len;
memcpy(frame->frame, lldp, lldp_len);
@ -327,8 +344,8 @@ lldp_send(struct lldpd *global OVS_UNUSED,
int
lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
struct lldpd_hardware *hardware, struct lldpd_chassis **newchassis,
struct lldpd_port **newport)
struct lldpd_hardware *hardware, struct lldpd_chassis **newchassis,
struct lldpd_port **newport)
{
struct lldpd_chassis *chassis;
struct lldpd_port *port;
@ -372,6 +389,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
"received on %s", hardware->h_ifname);
goto malformed;
}
PEEK_DISCARD(ETH_ADDR_LEN); /* Skip source address */
if (PEEK_UINT16 != ETHERTYPE_LLDP) {
VLOG_INFO("non LLDP frame received on %s", hardware->h_ifname);
@ -498,34 +516,57 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
/* LLDP-MED */
hardware->h_rx_unrecognized_cnt++;
} else if (memcmp(avaya_oid, orgid, sizeof orgid) == 0) {
u_int16_t aa_element_word;
u_int16_t aa_status_vlan_word;
u_int32_t aa_element_dword;
u_int16_t aa_system_id_word;
u_int16_t aa_status_vlan_word;
u_int8_t aa_element_state;
unsigned short num_mappings;
switch(tlv_subtype) {
case LLDP_TLV_AA_ELEMENT_SUBTYPE:
aa_element_word = PEEK_UINT16;
PEEK_BYTES(&msg_auth_digest, sizeof msg_auth_digest);
/* Type is first 4 most-significant bits */
port->p_element.type = aa_element_word >> 12;
aa_element_dword = PEEK_UINT32;
/* mgmt_vlan is last 12 bits */
port->p_element.mgmt_vlan = aa_element_word & 0x0FFF;
VLOG_INFO("Element type: %X, Mgmt vlan: %X",
/* Type is first 6 most-significant bits of
* aa_element_dword */
port->p_element.type = aa_element_dword >> 26;
/* State is 6 most significant bits of aa_element_dword */
aa_element_state = (aa_element_dword >> 20) & 0x3F;
/* vlan tagging requirement is the bit 1(left to right)
* of the 6 bits state (1 based) */
port->p_element.vlan_tagging =
(aa_element_state >> 5) & 0x1;
/* Automatic provision mode is the bit 2/3(left to right)
* of the 6 bits state (1 based) */
port->p_element.auto_prov_mode =
(aa_element_state >> 3) & 0x3;
/* mgmt_vlan is the 12 bits of aa_element_dword from
* bit 12 */
port->p_element.mgmt_vlan =
(aa_element_dword >> 8) & 0xFFF;
VLOG_INFO("Element type: %X, vlan tagging %X, "
"auto prov mode %x, Mgmt vlan: %X",
port->p_element.type,
port->p_element.vlan_tagging,
port->p_element.auto_prov_mode,
port->p_element.mgmt_vlan);
PEEK_BYTES(&port->p_element.system_id.system_mac,
sizeof port->p_element.system_id.system_mac);
VLOG_INFO("System mac: "ETH_ADDR_FMT,
ETH_ADDR_ARGS(port->p_element.system_id.system_mac));
ETH_ADDR_ARGS(port->p_element.system_id.system_mac));
aa_system_id_word = PEEK_UINT16;
port->p_element.system_id.conn_type =
aa_system_id_word >> 12;
port->p_element.system_id.smlt_id =
aa_system_id_word & 0x0FFF;
PEEK_BYTES(&port->p_element.system_id.mlt_id,
sizeof port->p_element.system_id.mlt_id);
aa_system_id_word >> 13;
port->p_element.system_id.rsvd = aa_system_id_word &
0x03FF;
PEEK_BYTES(&port->p_element.system_id.rsvd2,
sizeof port->p_element.system_id.rsvd2);
break;
case LLDP_TLV_AA_ISID_VLAN_ASGNS_SUBTYPE:

View File

@ -28,13 +28,12 @@ VLOG_DEFINE_THIS_MODULE(lldpd_structs);
void
lldpd_chassis_mgmt_cleanup(struct lldpd_chassis *chassis)
{
struct lldpd_mgmt *mgmt, *mgmt_next;
struct lldpd_mgmt *mgmt;
VLOG_DBG("cleanup management addresses for chassis %s",
chassis->c_name ? chassis->c_name : "(unknown)");
LIST_FOR_EACH_SAFE (mgmt, mgmt_next, m_entries, &chassis->c_mgmt) {
list_remove(&mgmt->m_entries);
LIST_FOR_EACH_POP (mgmt, m_entries, &chassis->c_mgmt) {
free(mgmt);
}

View File

@ -164,7 +164,7 @@ static void
lldpd_move_chassis(struct lldpd_chassis *ochassis,
struct lldpd_chassis *chassis)
{
struct lldpd_mgmt *mgmt, *mgmt_next;
struct lldpd_mgmt *mgmt;
int refcount = ochassis->c_refcount;
int index = ochassis->c_index;
struct ovs_list listcopy;
@ -182,8 +182,7 @@ lldpd_move_chassis(struct lldpd_chassis *ochassis,
list_init(&ochassis->c_mgmt);
/* Copy of management addresses */
LIST_FOR_EACH_SAFE (mgmt, mgmt_next, m_entries, &chassis->c_mgmt) {
list_remove(&mgmt->m_entries);
LIST_FOR_EACH_POP (mgmt, m_entries, &chassis->c_mgmt) {
list_insert(&ochassis->c_mgmt, &mgmt->m_entries);
}

View File

@ -82,6 +82,14 @@ lockfile_name(const char *filename_)
* symlink, not one for each. */
filename = follow_symlinks(filename_);
slash = strrchr(filename, '/');
#ifdef _WIN32
char *backslash = strrchr(filename, '\\');
if (backslash && (!slash || backslash > slash)) {
slash = backslash;
}
#endif
lockname = (slash
? xasprintf("%.*s/.%s.~lock~",
(int) (slash - filename), filename, slash + 1)

View File

@ -307,10 +307,9 @@ static void
mcast_snooping_flush_group(struct mcast_snooping *ms, struct mcast_group *grp)
OVS_REQ_WRLOCK(ms->rwlock)
{
struct mcast_group_bundle *b, *next_b;
struct mcast_group_bundle *b;
LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) {
list_remove(&b->bundle_node);
LIST_FOR_EACH_POP (b, bundle_node, &grp->bundle_lru) {
free(b);
}
mcast_snooping_flush_group__(ms, grp);

View File

@ -352,6 +352,41 @@ mf_mask_field_and_prereqs(const struct mf_field *mf, struct flow *mask)
}
}
/* Set bits of 'bm' corresponding to the field 'mf' and it's prerequisities. */
void
mf_bitmap_set_field_and_prereqs(const struct mf_field *mf, struct mf_bitmap *bm)
{
bitmap_set1(bm->bm, mf->id);
switch (mf->prereqs) {
case MFP_ND:
case MFP_ND_SOLICIT:
case MFP_ND_ADVERT:
bitmap_set1(bm->bm, MFF_TCP_SRC);
bitmap_set1(bm->bm, MFF_TCP_DST);
/* Fall through. */
case MFP_TCP:
case MFP_UDP:
case MFP_SCTP:
case MFP_ICMPV4:
case MFP_ICMPV6:
/* nw_frag always unwildcarded. */
bitmap_set1(bm->bm, MFF_IP_PROTO);
/* Fall through. */
case MFP_ARP:
case MFP_IPV4:
case MFP_IPV6:
case MFP_MPLS:
case MFP_IP_ANY:
bitmap_set1(bm->bm, MFF_ETH_TYPE);
break;
case MFP_VLAN_VID:
bitmap_set1(bm->bm, MFF_VLAN_TCI);
break;
case MFP_NONE:
break;
}
}
/* Returns true if 'value' may be a valid value *as part of a masked match*,
* false otherwise.
@ -2278,3 +2313,12 @@ mf_format_subvalue(const union mf_subvalue *subvalue, struct ds *s)
}
ds_put_char(s, '0');
}
void
field_array_set(enum mf_field_id id, const union mf_value *value,
struct field_array *fa)
{
ovs_assert(id < MFF_N_IDS);
bitmap_set1(fa->used.bm, id);
fa->value[id] = *value;
}

View File

@ -1590,6 +1590,12 @@ union mf_subvalue {
};
BUILD_ASSERT_DECL(sizeof(union mf_value) == sizeof (union mf_subvalue));
/* An array of fields with values */
struct field_array {
struct mf_bitmap used;
union mf_value value[MFF_N_IDS];
};
/* Finding mf_fields. */
const struct mf_field *mf_from_name(const char *name);
@ -1611,6 +1617,8 @@ void mf_get_mask(const struct mf_field *, const struct flow_wildcards *,
/* Prerequisites. */
bool mf_are_prereqs_ok(const struct mf_field *, const struct flow *);
void mf_mask_field_and_prereqs(const struct mf_field *, struct flow *mask);
void mf_bitmap_set_field_and_prereqs(const struct mf_field *mf, struct
mf_bitmap *bm);
static inline bool
mf_is_l3_or_higher(const struct mf_field *mf)
@ -1668,4 +1676,8 @@ void mf_format(const struct mf_field *,
struct ds *);
void mf_format_subvalue(const union mf_subvalue *subvalue, struct ds *s);
/* Field Arrays. */
void field_array_set(enum mf_field_id id, const union mf_value *,
struct field_array *);
#endif /* meta-flow.h */

View File

@ -1785,7 +1785,7 @@ static int
ifr_get_flags(const struct ifreq *ifr)
{
#ifdef HAVE_STRUCT_IFREQ_IFR_FLAGSHIGH
return (ifr->ifr_flagshigh << 16) | ifr->ifr_flags;
return (ifr->ifr_flagshigh << 16) | (ifr->ifr_flags & 0xffff);
#else
return ifr->ifr_flags;
#endif
@ -1794,9 +1794,11 @@ ifr_get_flags(const struct ifreq *ifr)
static void
ifr_set_flags(struct ifreq *ifr, int flags)
{
ifr->ifr_flags = flags;
#ifdef HAVE_STRUCT_IFREQ_IFR_FLAGSHIGH
ifr->ifr_flags = flags & 0xffff;
ifr->ifr_flagshigh = flags >> 16;
#else
ifr->ifr_flags = flags;
#endif
}

View File

@ -49,6 +49,7 @@
#include "rte_config.h"
#include "rte_mbuf.h"
#include "rte_virtio_net.h"
VLOG_DEFINE_THIS_MODULE(dpdk);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@ -67,9 +68,23 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
#define MBUF_SIZE(mtu) (MTU_TO_MAX_LEN(mtu) + (512) + \
sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
/* XXX: mempool size should be based on system resources. */
#define NB_MBUF (4096 * 64)
#define MP_CACHE_SZ (256 * 2)
/* Max and min number of packets in the mempool. OVS tries to allocate a
* mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
* enough hugepages) we keep halving the number until the allocation succeeds
* or we reach MIN_NB_MBUF */
#define MAX_NB_MBUF (4096 * 64)
#define MIN_NB_MBUF (4096 * 4)
#define MP_CACHE_SZ RTE_MEMPOOL_CACHE_MAX_SIZE
/* MAX_NB_MBUF can be divided by 2 many times, until MIN_NB_MBUF */
BUILD_ASSERT_DECL(MAX_NB_MBUF % ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF) == 0);
/* The smallest possible NB_MBUF that we're going to try should be a multiple
* of MP_CACHE_SZ. This is advised by DPDK documentation. */
BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
% MP_CACHE_SZ == 0);
#define SOCKET0 0
#define NIC_PORT_RX_Q_SIZE 2048 /* Size of Physical NIC RX Queue, Max (n+32<=4096)*/
@ -84,6 +99,11 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
#define TX_HTHRESH 0 /* Default values of TX host threshold reg. */
#define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */
#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
/* Character device cuse_dev_name. */
char *cuse_dev_name = NULL;
static const struct rte_eth_conf port_conf = {
.rxmode = {
.mq_mode = ETH_MQ_RX_RSS,
@ -131,6 +151,11 @@ enum { DPDK_RING_SIZE = 256 };
BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
enum { DRAIN_TSC = 200000ULL };
enum dpdk_dev_type {
DPDK_DEV_ETH = 0,
DPDK_DEV_VHOST = 1
};
static int rte_eal_init_ret = ENODEV;
static struct ovs_mutex dpdk_mutex = OVS_MUTEX_INITIALIZER;
@ -185,6 +210,7 @@ struct netdev_dpdk {
struct netdev up;
int port_id;
int max_packet_len;
enum dpdk_dev_type type;
struct dpdk_tx_queue *tx_q;
@ -202,9 +228,12 @@ struct netdev_dpdk {
struct rte_eth_link link;
int link_reset_cnt;
/* virtio-net structure for vhost device */
OVSRCU_TYPE(struct virtio_net *) virtio_dev;
/* In dpdk_list. */
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
rte_spinlock_t dpdkr_tx_lock;
rte_spinlock_t txq_lock;
};
struct netdev_rxq_dpdk {
@ -216,14 +245,16 @@ static bool thread_is_pmd(void);
static int netdev_dpdk_construct(struct netdev *);
struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
static bool
is_dpdk_class(const struct netdev_class *class)
{
return class->construct == netdev_dpdk_construct;
}
/* XXX: use dpdk malloc for entire OVS. infact huge page shld be used
* for all other sengments data, bss and text. */
/* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
* for all other segments data, bss and text. */
static void *
dpdk_rte_mzalloc(size_t sz)
@ -293,6 +324,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
{
struct dpdk_mp *dmp = NULL;
char mp_name[RTE_MEMPOOL_NAMESIZE];
unsigned mp_size;
LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
@ -306,20 +338,25 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
dmp->mtu = mtu;
dmp->refcount = 1;
if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d", dmp->mtu,
dmp->socket_id) < 0) {
return NULL;
}
mp_size = MAX_NB_MBUF;
do {
if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d_%u",
dmp->mtu, dmp->socket_id, mp_size) < 0) {
return NULL;
}
dmp->mp = rte_mempool_create(mp_name, NB_MBUF, MBUF_SIZE(mtu),
MP_CACHE_SZ,
sizeof(struct rte_pktmbuf_pool_private),
rte_pktmbuf_pool_init, NULL,
ovs_rte_pktmbuf_init, NULL,
socket_id, 0);
dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
MP_CACHE_SZ,
sizeof(struct rte_pktmbuf_pool_private),
rte_pktmbuf_pool_init, NULL,
ovs_rte_pktmbuf_init, NULL,
socket_id, 0);
} while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
if (dmp->mp == NULL) {
return NULL;
} else {
VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name, mp_size );
}
list_push_back(&dpdk_mp_list, &dmp->list_node);
@ -483,7 +520,8 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *netdev, unsigned int n_txqs)
}
static int
netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no)
netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
enum dpdk_dev_type type)
OVS_REQUIRES(dpdk_mutex)
{
struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
@ -491,20 +529,24 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no)
int err = 0;
ovs_mutex_init(&netdev->mutex);
ovs_mutex_lock(&netdev->mutex);
/* If the 'sid' is negative, it means that the kernel fails
* to obtain the pci numa info. In that situation, always
* use 'SOCKET0'. */
sid = rte_eth_dev_socket_id(port_no);
if (type == DPDK_DEV_ETH) {
sid = rte_eth_dev_socket_id(port_no);
} else {
sid = rte_lcore_to_socket_id(rte_get_master_lcore());
}
netdev->socket_id = sid < 0 ? SOCKET0 : sid;
netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
netdev->port_id = port_no;
netdev->type = type;
netdev->flags = 0;
netdev->mtu = ETHER_MTU;
netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
rte_spinlock_init(&netdev->dpdkr_tx_lock);
rte_spinlock_init(&netdev->txq_lock);
netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
if (!netdev->dpdk_mp) {
@ -514,9 +556,13 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no)
netdev_->n_txq = NR_QUEUE;
netdev_->n_rxq = NR_QUEUE;
err = dpdk_eth_dev_init(netdev);
if (err) {
goto unlock;
if (type == DPDK_DEV_ETH) {
netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
err = dpdk_eth_dev_init(netdev);
if (err) {
goto unlock;
}
}
list_push_back(&dpdk_list, &netdev->list_node);
@ -544,6 +590,22 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
return 0;
}
static int
netdev_dpdk_vhost_construct(struct netdev *netdev_)
{
int err;
if (rte_eal_init_ret) {
return rte_eal_init_ret;
}
ovs_mutex_lock(&dpdk_mutex);
err = netdev_dpdk_init(netdev_, -1, DPDK_DEV_VHOST);
ovs_mutex_unlock(&dpdk_mutex);
return err;
}
static int
netdev_dpdk_construct(struct netdev *netdev)
{
@ -561,7 +623,7 @@ netdev_dpdk_construct(struct netdev *netdev)
}
ovs_mutex_lock(&dpdk_mutex);
err = netdev_dpdk_init(netdev, port_no);
err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
ovs_mutex_unlock(&dpdk_mutex);
return err;
}
@ -580,8 +642,23 @@ netdev_dpdk_destruct(struct netdev *netdev_)
list_remove(&dev->list_node);
dpdk_mp_put(dev->dpdk_mp);
ovs_mutex_unlock(&dpdk_mutex);
}
ovs_mutex_destroy(&dev->mutex);
static void
netdev_dpdk_vhost_destruct(struct netdev *netdev_)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
/* Can't remove a port while a guest is attached to it. */
if (netdev_dpdk_get_virtio(dev) != NULL) {
VLOG_ERR("Can not remove port, vhost device still attached");
return;
}
ovs_mutex_lock(&dpdk_mutex);
list_remove(&dev->list_node);
dpdk_mp_put(dev->dpdk_mp);
ovs_mutex_unlock(&dpdk_mutex);
}
static void
@ -635,6 +712,7 @@ netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq,
netdev->up.n_txq = n_txq;
netdev->up.n_rxq = n_rxq;
rte_free(netdev->tx_q);
netdev_dpdk_alloc_txq(netdev, n_txq);
err = dpdk_eth_dev_init(netdev);
@ -645,6 +723,29 @@ netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq,
return err;
}
static int
netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq,
unsigned int n_rxq)
{
struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
int err = 0;
if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) {
return err;
}
ovs_mutex_lock(&dpdk_mutex);
ovs_mutex_lock(&netdev->mutex);
netdev->up.n_txq = n_txq;
netdev->up.n_rxq = n_rxq;
ovs_mutex_unlock(&netdev->mutex);
ovs_mutex_unlock(&dpdk_mutex);
return err;
}
static struct netdev_rxq *
netdev_dpdk_rxq_alloc(void)
{
@ -731,6 +832,43 @@ dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
dpdk_queue_flush__(dev, qid);
}
static bool
is_vhost_running(struct virtio_net *dev)
{
return (dev != NULL && (dev->flags & VIRTIO_DEV_RUNNING));
}
/*
* The receive path for the vhost port is the TX path out from guest.
*/
static int
netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_,
struct dp_packet **packets, int *c)
{
struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_);
struct netdev *netdev = rx->up.netdev;
struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev);
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
int qid = 1;
uint16_t nb_rx = 0;
if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
return EAGAIN;
}
nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid,
vhost_dev->dpdk_mp->mp,
(struct rte_mbuf **)packets,
MAX_PKT_BURST);
if (!nb_rx) {
return EAGAIN;
}
vhost_dev->stats.rx_packets += (uint64_t)nb_rx;
*c = (int) nb_rx;
return 0;
}
static int
netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
int *c)
@ -759,6 +897,38 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
return 0;
}
static void
__netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts,
int cnt, bool may_steal)
{
struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev);
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
int tx_pkts, i;
if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
ovs_mutex_lock(&vhost_dev->mutex);
vhost_dev->stats.tx_dropped+= cnt;
ovs_mutex_unlock(&vhost_dev->mutex);
goto out;
}
/* There is vHost TX single queue, So we need to lock it for TX. */
rte_spinlock_lock(&vhost_dev->txq_lock);
tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ,
(struct rte_mbuf **)pkts, cnt);
vhost_dev->stats.tx_packets += tx_pkts;
vhost_dev->stats.tx_dropped += (cnt - tx_pkts);
rte_spinlock_unlock(&vhost_dev->txq_lock);
out:
if (may_steal) {
for (i = 0; i < cnt; i++) {
dp_packet_delete(pkts[i]);
}
}
}
inline static void
dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
struct rte_mbuf **pkts, int cnt)
@ -790,7 +960,7 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
/* Tx function. Transmit packets indefinitely */
static void
dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet ** pkts,
dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
int cnt)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
@ -840,14 +1010,37 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet ** pkts,
ovs_mutex_unlock(&dev->mutex);
}
dpdk_queue_pkts(dev, qid, mbufs, newcnt);
dpdk_queue_flush(dev, qid);
if (dev->type == DPDK_DEV_VHOST) {
__netdev_dpdk_vhost_send(netdev, (struct dp_packet **) mbufs, newcnt, true);
} else {
dpdk_queue_pkts(dev, qid, mbufs, newcnt);
dpdk_queue_flush(dev, qid);
}
if (!thread_is_pmd()) {
ovs_mutex_unlock(&nonpmd_mempool_mutex);
}
}
static int
netdev_dpdk_vhost_send(struct netdev *netdev, int qid OVS_UNUSED, struct dp_packet **pkts,
int cnt, bool may_steal)
{
if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
int i;
dpdk_do_tx_copy(netdev, qid, pkts, cnt);
if (may_steal) {
for (i = 0; i < cnt; i++) {
dp_packet_delete(pkts[i]);
}
}
} else {
__netdev_dpdk_vhost_send(netdev, pkts, cnt, may_steal);
}
return 0;
}
static inline void
netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
struct dp_packet **pkts, int cnt, bool may_steal)
@ -1001,6 +1194,44 @@ out:
static int
netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier);
static int
netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
struct netdev_stats *stats)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
memset(stats, 0, sizeof(*stats));
/* Unsupported Stats */
stats->rx_errors = UINT64_MAX;
stats->tx_errors = UINT64_MAX;
stats->multicast = UINT64_MAX;
stats->collisions = UINT64_MAX;
stats->rx_crc_errors = UINT64_MAX;
stats->rx_fifo_errors = UINT64_MAX;
stats->rx_frame_errors = UINT64_MAX;
stats->rx_length_errors = UINT64_MAX;
stats->rx_missed_errors = UINT64_MAX;
stats->rx_over_errors = UINT64_MAX;
stats->tx_aborted_errors = UINT64_MAX;
stats->tx_carrier_errors = UINT64_MAX;
stats->tx_errors = UINT64_MAX;
stats->tx_fifo_errors = UINT64_MAX;
stats->tx_heartbeat_errors = UINT64_MAX;
stats->tx_window_errors = UINT64_MAX;
stats->rx_bytes += UINT64_MAX;
stats->rx_dropped += UINT64_MAX;
stats->tx_bytes += UINT64_MAX;
/* Supported Stats */
stats->rx_packets += dev->stats.rx_packets;
stats->tx_packets += dev->stats.tx_packets;
stats->tx_dropped += dev->stats.tx_dropped;
ovs_mutex_unlock(&dev->mutex);
return 0;
}
static int
netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
{
@ -1095,6 +1326,26 @@ netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier)
ovs_mutex_lock(&dev->mutex);
check_link_status(dev);
*carrier = dev->link.link_status;
ovs_mutex_unlock(&dev->mutex);
return 0;
}
static int
netdev_dpdk_vhost_get_carrier(const struct netdev *netdev_, bool *carrier)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
ovs_mutex_lock(&dev->mutex);
if (is_vhost_running(virtio_dev)) {
*carrier = 1;
} else {
*carrier = 0;
}
ovs_mutex_unlock(&dev->mutex);
return 0;
@ -1139,18 +1390,20 @@ netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
return 0;
}
if (dev->flags & NETDEV_UP) {
err = rte_eth_dev_start(dev->port_id);
if (err)
return -err;
}
if (dev->type == DPDK_DEV_ETH) {
if (dev->flags & NETDEV_UP) {
err = rte_eth_dev_start(dev->port_id);
if (err)
return -err;
}
if (dev->flags & NETDEV_PROMISC) {
rte_eth_promiscuous_enable(dev->port_id);
}
if (dev->flags & NETDEV_PROMISC) {
rte_eth_promiscuous_enable(dev->port_id);
}
if (!(dev->flags & NETDEV_UP)) {
rte_eth_dev_stop(dev->port_id);
if (!(dev->flags & NETDEV_UP)) {
rte_eth_dev_stop(dev->port_id);
}
}
return 0;
@ -1261,6 +1514,139 @@ netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
unixctl_command_reply(conn, "OK");
}
/*
* Set virtqueue flags so that we do not receive interrupts.
*/
static void
set_irq_status(struct virtio_net *dev)
{
dev->virtqueue[VIRTIO_RXQ]->used->flags = VRING_USED_F_NO_NOTIFY;
dev->virtqueue[VIRTIO_TXQ]->used->flags = VRING_USED_F_NO_NOTIFY;
}
/*
* A new virtio-net device is added to a vhost port.
*/
static int
new_device(struct virtio_net *dev)
{
struct netdev_dpdk *netdev;
bool exists = false;
ovs_mutex_lock(&dpdk_mutex);
/* Add device to the vhost port with the same name as that passed down. */
LIST_FOR_EACH(netdev, list_node, &dpdk_list) {
if (strncmp(dev->ifname, netdev->up.name, IFNAMSIZ) == 0) {
ovs_mutex_lock(&netdev->mutex);
ovsrcu_set(&netdev->virtio_dev, dev);
ovs_mutex_unlock(&netdev->mutex);
exists = true;
dev->flags |= VIRTIO_DEV_RUNNING;
/* Disable notifications. */
set_irq_status(dev);
break;
}
}
ovs_mutex_unlock(&dpdk_mutex);
if (!exists) {
VLOG_INFO("vHost Device '%s' (%ld) can't be added - name not found",
dev->ifname, dev->device_fh);
return -1;
}
VLOG_INFO("vHost Device '%s' (%ld) has been added",
dev->ifname, dev->device_fh);
return 0;
}
/*
* Remove a virtio-net device from the specific vhost port. Use dev->remove
* flag to stop any more packets from being sent or received to/from a VM and
* ensure all currently queued packets have been sent/received before removing
* the device.
*/
static void
destroy_device(volatile struct virtio_net *dev)
{
struct netdev_dpdk *vhost_dev;
ovs_mutex_lock(&dpdk_mutex);
LIST_FOR_EACH (vhost_dev, list_node, &dpdk_list) {
if (netdev_dpdk_get_virtio(vhost_dev) == dev) {
ovs_mutex_lock(&vhost_dev->mutex);
dev->flags &= ~VIRTIO_DEV_RUNNING;
ovsrcu_set(&vhost_dev->virtio_dev, NULL);
ovs_mutex_unlock(&vhost_dev->mutex);
/*
* Wait for other threads to quiesce before
* setting the virtio_dev to NULL.
*/
ovsrcu_synchronize();
/*
* As call to ovsrcu_synchronize() will end the quiescent state,
* put thread back into quiescent state before returning.
*/
ovsrcu_quiesce_start();
}
}
ovs_mutex_unlock(&dpdk_mutex);
VLOG_INFO("vHost Device '%s' (%ld) has been removed",
dev->ifname, dev->device_fh);
}
struct virtio_net *
netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
{
return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
}
/*
* These callbacks allow virtio-net devices to be added to vhost ports when
* configuration has been fully complete.
*/
const struct virtio_net_device_ops virtio_net_device_ops =
{
.new_device = new_device,
.destroy_device = destroy_device,
};
static void *
start_cuse_session_loop(void *dummy OVS_UNUSED)
{
pthread_detach(pthread_self());
/* Put the cuse thread into quiescent state. */
ovsrcu_quiesce_start();
rte_vhost_driver_session_start();
return NULL;
}
static int
dpdk_vhost_class_init(void)
{
int err = -1;
rte_vhost_driver_callback_register(&virtio_net_device_ops);
/* Register CUSE device to handle IOCTLs.
* Unless otherwise specified on the vswitchd command line, cuse_dev_name
* is set to vhost-net.
*/
err = rte_vhost_driver_register(cuse_dev_name);
if (err != 0) {
VLOG_ERR("CUSE device setup failure.");
return -1;
}
ovs_thread_create("cuse_thread", start_cuse_session_loop, NULL);
return 0;
}
static void
dpdk_common_init(void)
{
@ -1345,7 +1731,7 @@ dpdk_ring_open(const char dev_name[], unsigned int *eth_port_id) OVS_REQUIRES(dp
/* look through our list to find the device */
LIST_FOR_EACH (ivshmem, list_node, &dpdk_ring_list) {
if (ivshmem->user_port_id == port_no) {
VLOG_INFO("Found dpdk ring device %s:\n", dev_name);
VLOG_INFO("Found dpdk ring device %s:", dev_name);
*eth_port_id = ivshmem->eth_port_id; /* really all that is needed */
return 0;
}
@ -1361,9 +1747,9 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid OVS_UNUSED,
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
/* DPDK Rings have a single TX queue, Therefore needs locking. */
rte_spinlock_lock(&dev->dpdkr_tx_lock);
rte_spinlock_lock(&dev->txq_lock);
netdev_dpdk_send__(dev, 0, pkts, cnt, may_steal);
rte_spinlock_unlock(&dev->dpdkr_tx_lock);
rte_spinlock_unlock(&dev->txq_lock);
return 0;
}
@ -1384,14 +1770,15 @@ netdev_dpdk_ring_construct(struct netdev *netdev)
goto unlock_dpdk;
}
err = netdev_dpdk_init(netdev, port_no);
err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
unlock_dpdk:
ovs_mutex_unlock(&dpdk_mutex);
return err;
}
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, MULTIQ, SEND) \
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \
GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) \
{ \
NAME, \
INIT, /* init */ \
@ -1400,14 +1787,14 @@ unlock_dpdk:
\
netdev_dpdk_alloc, \
CONSTRUCT, \
netdev_dpdk_destruct, \
DESTRUCT, \
netdev_dpdk_dealloc, \
netdev_dpdk_get_config, \
NULL, /* netdev_dpdk_set_config */ \
NULL, /* get_tunnel_config */ \
NULL, /* build header */ \
NULL, /* push header */ \
NULL, /* pop header */ \
NULL, /* build header */ \
NULL, /* push header */ \
NULL, /* pop header */ \
netdev_dpdk_get_numa_id, /* get_numa_id */ \
MULTIQ, /* set_multiq */ \
\
@ -1419,11 +1806,11 @@ unlock_dpdk:
netdev_dpdk_get_mtu, \
netdev_dpdk_set_mtu, \
netdev_dpdk_get_ifindex, \
netdev_dpdk_get_carrier, \
GET_CARRIER, \
netdev_dpdk_get_carrier_resets, \
netdev_dpdk_set_miimon, \
netdev_dpdk_get_stats, \
netdev_dpdk_get_features, \
GET_STATS, \
GET_FEATURES, \
NULL, /* set_advertisements */ \
\
NULL, /* set_policing */ \
@ -1445,7 +1832,7 @@ unlock_dpdk:
NULL, /* get_in6 */ \
NULL, /* add_router */ \
NULL, /* get_next_hop */ \
netdev_dpdk_get_status, \
GET_STATUS, \
NULL, /* arp_lookup */ \
\
netdev_dpdk_update_flags, \
@ -1454,7 +1841,7 @@ unlock_dpdk:
netdev_dpdk_rxq_construct, \
netdev_dpdk_rxq_destruct, \
netdev_dpdk_rxq_dealloc, \
netdev_dpdk_rxq_recv, \
RXQ_RECV, \
NULL, /* rx_wait */ \
NULL, /* rxq_drain */ \
}
@ -1463,20 +1850,48 @@ int
dpdk_init(int argc, char **argv)
{
int result;
int base = 0;
char *pragram_name = argv[0];
if (argc < 2 || strcmp(argv[1], "--dpdk"))
return 0;
/* Make sure program name passed to rte_eal_init() is vswitchd. */
argv[1] = argv[0];
/* Remove the --dpdk argument from arg list.*/
argc--;
argv++;
/* If the cuse_dev_name parameter has been provided, set 'cuse_dev_name' to
* this string if it meets the correct criteria. Otherwise, set it to the
* default (vhost-net).
*/
if (!strcmp(argv[1], "--cuse_dev_name") &&
(strlen(argv[2]) <= NAME_MAX)) {
cuse_dev_name = strdup(argv[2]);
/* Remove the cuse_dev_name configuration parameters from the argument
* list, so that the correct elements are passed to the DPDK
* initialization function
*/
argc -= 2;
argv += 2; /* Increment by two to bypass the cuse_dev_name arguments */
base = 2;
VLOG_ERR("User-provided cuse_dev_name in use: /dev/%s", cuse_dev_name);
} else {
cuse_dev_name = "vhost-net";
VLOG_INFO("No cuse_dev_name provided - defaulting to /dev/vhost-net");
}
/* Keep the program name argument as this is needed for call to
* rte_eal_init()
*/
argv[0] = pragram_name;
/* Make sure things are initialized ... */
result = rte_eal_init(argc, argv);
if (result < 0) {
ovs_abort(result, "Cannot init EAL\n");
ovs_abort(result, "Cannot init EAL");
}
rte_memzone_dump(stdout);
@ -1489,7 +1904,7 @@ dpdk_init(int argc, char **argv)
/* We are called from the main thread here */
thread_set_nonpmd();
return result + 1;
return result + 1 + base;
}
const struct netdev_class dpdk_class =
@ -1497,16 +1912,42 @@ const struct netdev_class dpdk_class =
"dpdk",
NULL,
netdev_dpdk_construct,
netdev_dpdk_destruct,
netdev_dpdk_set_multiq,
netdev_dpdk_eth_send);
netdev_dpdk_eth_send,
netdev_dpdk_get_carrier,
netdev_dpdk_get_stats,
netdev_dpdk_get_features,
netdev_dpdk_get_status,
netdev_dpdk_rxq_recv);
const struct netdev_class dpdk_ring_class =
NETDEV_DPDK_CLASS(
"dpdkr",
NULL,
netdev_dpdk_ring_construct,
netdev_dpdk_destruct,
NULL,
netdev_dpdk_ring_send);
netdev_dpdk_ring_send,
netdev_dpdk_get_carrier,
netdev_dpdk_get_stats,
netdev_dpdk_get_features,
netdev_dpdk_get_status,
netdev_dpdk_rxq_recv);
const struct netdev_class dpdk_vhost_class =
NETDEV_DPDK_CLASS(
"dpdkvhost",
dpdk_vhost_class_init,
netdev_dpdk_vhost_construct,
netdev_dpdk_vhost_destruct,
netdev_dpdk_vhost_set_multiq,
netdev_dpdk_vhost_send,
netdev_dpdk_vhost_get_carrier,
netdev_dpdk_vhost_get_stats,
NULL,
NULL,
netdev_dpdk_vhost_rxq_recv);
void
netdev_dpdk_register(void)
@ -1521,6 +1962,7 @@ netdev_dpdk_register(void)
dpdk_common_init();
netdev_register_provider(&dpdk_class);
netdev_register_provider(&dpdk_ring_class);
netdev_register_provider(&dpdk_vhost_class);
ovsthread_once_done(&once);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc.
* Copyright (c) 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -30,6 +30,7 @@
#include "odp-util.h"
#include "ofp-print.h"
#include "ofpbuf.h"
#include "ovs-atomic.h"
#include "packets.h"
#include "pcap-file.h"
#include "poll-loop.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -36,6 +36,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <netpacket/packet.h>
#include <net/if.h>
#include <net/if_arp.h>
@ -376,12 +377,18 @@ tc_destroy(struct tc *tc)
static const struct tc_ops tc_ops_htb;
static const struct tc_ops tc_ops_hfsc;
static const struct tc_ops tc_ops_codel;
static const struct tc_ops tc_ops_fqcodel;
static const struct tc_ops tc_ops_sfq;
static const struct tc_ops tc_ops_default;
static const struct tc_ops tc_ops_other;
static const struct tc_ops *const tcs[] = {
&tc_ops_htb, /* Hierarchy token bucket (see tc-htb(8)). */
&tc_ops_hfsc, /* Hierarchical fair service curve. */
&tc_ops_codel, /* Controlled delay */
&tc_ops_fqcodel, /* Fair queue controlled delay */
&tc_ops_sfq, /* Stochastic fair queueing */
&tc_ops_default, /* Default qdisc (see tc-pfifo_fast(8)). */
&tc_ops_other, /* Some other qdisc. */
NULL
@ -399,8 +406,8 @@ static struct tcmsg *tc_make_request(const struct netdev *, int type,
unsigned int flags, struct ofpbuf *);
static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add);
static int tc_add_policer(struct netdev *netdev, int kbits_rate,
int kbits_burst);
static int tc_add_policer(struct netdev *,
uint32_t kbits_rate, uint32_t kbits_burst);
static int tc_parse_qdisc(const struct ofpbuf *, const char **kind,
struct nlattr **options);
@ -2831,9 +2838,666 @@ const struct netdev_class netdev_internal_class =
NULL, /* get_features */
netdev_internal_get_status);
#define CODEL_N_QUEUES 0x0000
/* In sufficiently new kernel headers these are defined as enums in
* <linux/pkt_sched.h>. Define them here as macros to help out with older
* kernels. (This overrides any enum definition in the header file but that's
* harmless.) */
#define TCA_CODEL_TARGET 1
#define TCA_CODEL_LIMIT 2
#define TCA_CODEL_INTERVAL 3
struct codel {
struct tc tc;
uint32_t target;
uint32_t limit;
uint32_t interval;
};
static struct codel *
codel_get__(const struct netdev *netdev_)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
return CONTAINER_OF(netdev->tc, struct codel, tc);
}
static void
codel_install__(struct netdev *netdev_, uint32_t target, uint32_t limit,
uint32_t interval)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
struct codel *codel;
codel = xmalloc(sizeof *codel);
tc_init(&codel->tc, &tc_ops_codel);
codel->target = target;
codel->limit = limit;
codel->interval = interval;
netdev->tc = &codel->tc;
}
static int
codel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
uint32_t interval)
{
size_t opt_offset;
struct ofpbuf request;
struct tcmsg *tcmsg;
uint32_t otarget, olimit, ointerval;
int error;
tc_del_qdisc(netdev);
tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
NLM_F_EXCL | NLM_F_CREATE, &request);
if (!tcmsg) {
return ENODEV;
}
tcmsg->tcm_handle = tc_make_handle(1, 0);
tcmsg->tcm_parent = TC_H_ROOT;
otarget = target ? target : 5000;
olimit = limit ? limit : 10240;
ointerval = interval ? interval : 100000;
nl_msg_put_string(&request, TCA_KIND, "codel");
opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
nl_msg_put_u32(&request, TCA_CODEL_TARGET, otarget);
nl_msg_put_u32(&request, TCA_CODEL_LIMIT, olimit);
nl_msg_put_u32(&request, TCA_CODEL_INTERVAL, ointerval);
nl_msg_end_nested(&request, opt_offset);
error = tc_transact(&request, NULL);
if (error) {
VLOG_WARN_RL(&rl, "failed to replace %s qdisc, "
"target %u, limit %u, interval %u error %d(%s)",
netdev_get_name(netdev),
otarget, olimit, ointerval,
error, ovs_strerror(error));
}
return error;
}
static void
codel_parse_qdisc_details__(struct netdev *netdev OVS_UNUSED,
const struct smap *details, struct codel *codel)
{
const char *target_s;
const char *limit_s;
const char *interval_s;
target_s = smap_get(details, "target");
limit_s = smap_get(details, "limit");
interval_s = smap_get(details, "interval");
codel->target = target_s ? strtoull(target_s, NULL, 10) : 0;
codel->limit = limit_s ? strtoull(limit_s, NULL, 10) : 0;
codel->interval = interval_s ? strtoull(interval_s, NULL, 10) : 0;
if (!codel->target) {
codel->target = 5000;
}
if (!codel->limit) {
codel->limit = 10240;
}
if (!codel->interval) {
codel->interval = 100000;
}
}
static int
codel_tc_install(struct netdev *netdev, const struct smap *details)
{
int error;
struct codel codel;
codel_parse_qdisc_details__(netdev, details, &codel);
error = codel_setup_qdisc__(netdev, codel.target, codel.limit,
codel.interval);
if (!error) {
codel_install__(netdev, codel.target, codel.limit, codel.interval);
}
return error;
}
static int
codel_parse_tca_options__(struct nlattr *nl_options, struct codel *codel)
{
static const struct nl_policy tca_codel_policy[] = {
[TCA_CODEL_TARGET] = { .type = NL_A_U32 },
[TCA_CODEL_LIMIT] = { .type = NL_A_U32 },
[TCA_CODEL_INTERVAL] = { .type = NL_A_U32 }
};
struct nlattr *attrs[ARRAY_SIZE(tca_codel_policy)];
if (!nl_parse_nested(nl_options, tca_codel_policy,
attrs, ARRAY_SIZE(tca_codel_policy))) {
VLOG_WARN_RL(&rl, "failed to parse CoDel class options");
return EPROTO;
}
codel->target = nl_attr_get_u32(attrs[TCA_CODEL_TARGET]);
codel->limit = nl_attr_get_u32(attrs[TCA_CODEL_LIMIT]);
codel->interval = nl_attr_get_u32(attrs[TCA_CODEL_INTERVAL]);
return 0;
}
static int
codel_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg)
{
struct nlattr *nlattr;
const char * kind;
int error;
struct codel codel;
error = tc_parse_qdisc(nlmsg, &kind, &nlattr);
if (error != 0) {
return error;
}
error = codel_parse_tca_options__(nlattr, &codel);
if (error != 0) {
return error;
}
codel_install__(netdev, codel.target, codel.limit, codel.interval);
return 0;
}
static void
codel_tc_destroy(struct tc *tc)
{
struct codel *codel = CONTAINER_OF(tc, struct codel, tc);
tc_destroy(tc);
free(codel);
}
static int
codel_qdisc_get(const struct netdev *netdev, struct smap *details)
{
const struct codel *codel = codel_get__(netdev);
smap_add_format(details, "target", "%u", codel->target);
smap_add_format(details, "limit", "%u", codel->limit);
smap_add_format(details, "interval", "%u", codel->interval);
return 0;
}
static int
codel_qdisc_set(struct netdev *netdev, const struct smap *details)
{
struct codel codel;
codel_parse_qdisc_details__(netdev, details, &codel);
codel_install__(netdev, codel.target, codel.limit, codel.interval);
codel_get__(netdev)->target = codel.target;
codel_get__(netdev)->limit = codel.limit;
codel_get__(netdev)->interval = codel.interval;
return 0;
}
static const struct tc_ops tc_ops_codel = {
"codel", /* linux_name */
"linux-codel", /* ovs_name */
CODEL_N_QUEUES, /* n_queues */
codel_tc_install,
codel_tc_load,
codel_tc_destroy,
codel_qdisc_get,
codel_qdisc_set,
NULL,
NULL,
NULL,
NULL,
NULL
};
/* FQ-CoDel traffic control class. */
#define FQCODEL_N_QUEUES 0x0000
/* In sufficiently new kernel headers these are defined as enums in
* <linux/pkt_sched.h>. Define them here as macros to help out with older
* kernels. (This overrides any enum definition in the header file but that's
* harmless.) */
#define TCA_FQ_CODEL_TARGET 1
#define TCA_FQ_CODEL_LIMIT 2
#define TCA_FQ_CODEL_INTERVAL 3
#define TCA_FQ_CODEL_ECN 4
#define TCA_FQ_CODEL_FLOWS 5
#define TCA_FQ_CODEL_QUANTUM 6
struct fqcodel {
struct tc tc;
uint32_t target;
uint32_t limit;
uint32_t interval;
uint32_t flows;
uint32_t quantum;
};
static struct fqcodel *
fqcodel_get__(const struct netdev *netdev_)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
return CONTAINER_OF(netdev->tc, struct fqcodel, tc);
}
static void
fqcodel_install__(struct netdev *netdev_, uint32_t target, uint32_t limit,
uint32_t interval, uint32_t flows, uint32_t quantum)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
struct fqcodel *fqcodel;
fqcodel = xmalloc(sizeof *fqcodel);
tc_init(&fqcodel->tc, &tc_ops_fqcodel);
fqcodel->target = target;
fqcodel->limit = limit;
fqcodel->interval = interval;
fqcodel->flows = flows;
fqcodel->quantum = quantum;
netdev->tc = &fqcodel->tc;
}
static int
fqcodel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
uint32_t interval, uint32_t flows, uint32_t quantum)
{
size_t opt_offset;
struct ofpbuf request;
struct tcmsg *tcmsg;
uint32_t otarget, olimit, ointerval, oflows, oquantum;
int error;
tc_del_qdisc(netdev);
tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
NLM_F_EXCL | NLM_F_CREATE, &request);
if (!tcmsg) {
return ENODEV;
}
tcmsg->tcm_handle = tc_make_handle(1, 0);
tcmsg->tcm_parent = TC_H_ROOT;
otarget = target ? target : 5000;
olimit = limit ? limit : 10240;
ointerval = interval ? interval : 100000;
oflows = flows ? flows : 1024;
oquantum = quantum ? quantum : 1514; /* fq_codel default quantum is 1514
not mtu */
nl_msg_put_string(&request, TCA_KIND, "fq_codel");
opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
nl_msg_put_u32(&request, TCA_FQ_CODEL_TARGET, otarget);
nl_msg_put_u32(&request, TCA_FQ_CODEL_LIMIT, olimit);
nl_msg_put_u32(&request, TCA_FQ_CODEL_INTERVAL, ointerval);
nl_msg_put_u32(&request, TCA_FQ_CODEL_FLOWS, oflows);
nl_msg_put_u32(&request, TCA_FQ_CODEL_QUANTUM, oquantum);
nl_msg_end_nested(&request, opt_offset);
error = tc_transact(&request, NULL);
if (error) {
VLOG_WARN_RL(&rl, "failed to replace %s qdisc, "
"target %u, limit %u, interval %u, flows %u, quantum %u error %d(%s)",
netdev_get_name(netdev),
otarget, olimit, ointerval, oflows, oquantum,
error, ovs_strerror(error));
}
return error;
}
static void
fqcodel_parse_qdisc_details__(struct netdev *netdev OVS_UNUSED,
const struct smap *details, struct fqcodel *fqcodel)
{
const char *target_s;
const char *limit_s;
const char *interval_s;
const char *flows_s;
const char *quantum_s;
target_s = smap_get(details, "target");
limit_s = smap_get(details, "limit");
interval_s = smap_get(details, "interval");
flows_s = smap_get(details, "flows");
quantum_s = smap_get(details, "quantum");
fqcodel->target = target_s ? strtoull(target_s, NULL, 10) : 0;
fqcodel->limit = limit_s ? strtoull(limit_s, NULL, 10) : 0;
fqcodel->interval = interval_s ? strtoull(interval_s, NULL, 10) : 0;
fqcodel->flows = flows_s ? strtoull(flows_s, NULL, 10) : 0;
fqcodel->quantum = quantum_s ? strtoull(quantum_s, NULL, 10) : 0;
if (!fqcodel->target) {
fqcodel->target = 5000;
}
if (!fqcodel->limit) {
fqcodel->limit = 10240;
}
if (!fqcodel->interval) {
fqcodel->interval = 1000000;
}
if (!fqcodel->flows) {
fqcodel->flows = 1024;
}
if (!fqcodel->quantum) {
fqcodel->quantum = 1514;
}
}
static int
fqcodel_tc_install(struct netdev *netdev, const struct smap *details)
{
int error;
struct fqcodel fqcodel;
fqcodel_parse_qdisc_details__(netdev, details, &fqcodel);
error = fqcodel_setup_qdisc__(netdev, fqcodel.target, fqcodel.limit,
fqcodel.interval, fqcodel.flows,
fqcodel.quantum);
if (!error) {
fqcodel_install__(netdev, fqcodel.target, fqcodel.limit,
fqcodel.interval, fqcodel.flows, fqcodel.quantum);
}
return error;
}
static int
fqcodel_parse_tca_options__(struct nlattr *nl_options, struct fqcodel *fqcodel)
{
static const struct nl_policy tca_fqcodel_policy[] = {
[TCA_FQ_CODEL_TARGET] = { .type = NL_A_U32 },
[TCA_FQ_CODEL_LIMIT] = { .type = NL_A_U32 },
[TCA_FQ_CODEL_INTERVAL] = { .type = NL_A_U32 },
[TCA_FQ_CODEL_FLOWS] = { .type = NL_A_U32 },
[TCA_FQ_CODEL_QUANTUM] = { .type = NL_A_U32 }
};
struct nlattr *attrs[ARRAY_SIZE(tca_fqcodel_policy)];
if (!nl_parse_nested(nl_options, tca_fqcodel_policy,
attrs, ARRAY_SIZE(tca_fqcodel_policy))) {
VLOG_WARN_RL(&rl, "failed to parse FQ_CoDel class options");
return EPROTO;
}
fqcodel->target = nl_attr_get_u32(attrs[TCA_FQ_CODEL_TARGET]);
fqcodel->limit = nl_attr_get_u32(attrs[TCA_FQ_CODEL_LIMIT]);
fqcodel->interval =nl_attr_get_u32(attrs[TCA_FQ_CODEL_INTERVAL]);
fqcodel->flows = nl_attr_get_u32(attrs[TCA_FQ_CODEL_FLOWS]);
fqcodel->quantum = nl_attr_get_u32(attrs[TCA_FQ_CODEL_QUANTUM]);
return 0;
}
static int
fqcodel_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg)
{
struct nlattr *nlattr;
const char * kind;
int error;
struct fqcodel fqcodel;
error = tc_parse_qdisc(nlmsg, &kind, &nlattr);
if (error != 0) {
return error;
}
error = fqcodel_parse_tca_options__(nlattr, &fqcodel);
if (error != 0) {
return error;
}
fqcodel_install__(netdev, fqcodel.target, fqcodel.limit, fqcodel.interval,
fqcodel.flows, fqcodel.quantum);
return 0;
}
static void
fqcodel_tc_destroy(struct tc *tc)
{
struct fqcodel *fqcodel = CONTAINER_OF(tc, struct fqcodel, tc);
tc_destroy(tc);
free(fqcodel);
}
static int
fqcodel_qdisc_get(const struct netdev *netdev, struct smap *details)
{
const struct fqcodel *fqcodel = fqcodel_get__(netdev);
smap_add_format(details, "target", "%u", fqcodel->target);
smap_add_format(details, "limit", "%u", fqcodel->limit);
smap_add_format(details, "interval", "%u", fqcodel->interval);
smap_add_format(details, "flows", "%u", fqcodel->flows);
smap_add_format(details, "quantum", "%u", fqcodel->quantum);
return 0;
}
static int
fqcodel_qdisc_set(struct netdev *netdev, const struct smap *details)
{
struct fqcodel fqcodel;
fqcodel_parse_qdisc_details__(netdev, details, &fqcodel);
fqcodel_install__(netdev, fqcodel.target, fqcodel.limit, fqcodel.interval,
fqcodel.flows, fqcodel.quantum);
fqcodel_get__(netdev)->target = fqcodel.target;
fqcodel_get__(netdev)->limit = fqcodel.limit;
fqcodel_get__(netdev)->interval = fqcodel.interval;
fqcodel_get__(netdev)->flows = fqcodel.flows;
fqcodel_get__(netdev)->quantum = fqcodel.quantum;
return 0;
}
static const struct tc_ops tc_ops_fqcodel = {
"fq_codel", /* linux_name */
"linux-fq_codel", /* ovs_name */
FQCODEL_N_QUEUES, /* n_queues */
fqcodel_tc_install,
fqcodel_tc_load,
fqcodel_tc_destroy,
fqcodel_qdisc_get,
fqcodel_qdisc_set,
NULL,
NULL,
NULL,
NULL,
NULL
};
/* SFQ traffic control class. */
#define SFQ_N_QUEUES 0x0000
struct sfq {
struct tc tc;
uint32_t quantum;
uint32_t perturb;
};
static struct sfq *
sfq_get__(const struct netdev *netdev_)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
return CONTAINER_OF(netdev->tc, struct sfq, tc);
}
static void
sfq_install__(struct netdev *netdev_, uint32_t quantum, uint32_t perturb)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
struct sfq *sfq;
sfq = xmalloc(sizeof *sfq);
tc_init(&sfq->tc, &tc_ops_sfq);
sfq->perturb = perturb;
sfq->quantum = quantum;
netdev->tc = &sfq->tc;
}
static int
sfq_setup_qdisc__(struct netdev *netdev, uint32_t quantum, uint32_t perturb)
{
struct tc_sfq_qopt opt;
struct ofpbuf request;
struct tcmsg *tcmsg;
int mtu;
int mtu_error, error;
mtu_error = netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu);
tc_del_qdisc(netdev);
tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
NLM_F_EXCL | NLM_F_CREATE, &request);
if (!tcmsg) {
return ENODEV;
}
tcmsg->tcm_handle = tc_make_handle(1, 0);
tcmsg->tcm_parent = TC_H_ROOT;
memset(&opt, 0, sizeof opt);
if (!quantum) {
if (!mtu_error) {
opt.quantum = mtu; /* if we cannot find mtu, use default */
}
} else {
opt.quantum = quantum;
}
if (!perturb) {
opt.perturb_period = 10;
} else {
opt.perturb_period = perturb;
}
nl_msg_put_string(&request, TCA_KIND, "sfq");
nl_msg_put_unspec(&request, TCA_OPTIONS, &opt, sizeof opt);
error = tc_transact(&request, NULL);
if (error) {
VLOG_WARN_RL(&rl, "failed to replace %s qdisc, "
"quantum %u, perturb %u error %d(%s)",
netdev_get_name(netdev),
opt.quantum, opt.perturb_period,
error, ovs_strerror(error));
}
return error;
}
static void
sfq_parse_qdisc_details__(struct netdev *netdev,
const struct smap *details, struct sfq *sfq)
{
const char *perturb_s;
const char *quantum_s;
int mtu;
int mtu_error;
perturb_s = smap_get(details, "perturb");
quantum_s = smap_get(details, "quantum");
sfq->perturb = perturb_s ? strtoull(perturb_s, NULL, 10) : 0;
sfq->quantum = quantum_s ? strtoull(quantum_s, NULL, 10) : 0;
if (!sfq->perturb) {
sfq->perturb = 10;
}
if (!sfq->quantum) {
mtu_error = netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu);
if (!mtu_error) {
sfq->quantum = mtu;
} else {
VLOG_WARN_RL(&rl, "when using SFQ, you must specify quantum on a "
"device without mtu");
return;
}
}
}
static int
sfq_tc_install(struct netdev *netdev, const struct smap *details)
{
int error;
struct sfq sfq;
sfq_parse_qdisc_details__(netdev, details, &sfq);
error = sfq_setup_qdisc__(netdev, sfq.quantum, sfq.perturb);
if (!error) {
sfq_install__(netdev, sfq.quantum, sfq.perturb);
}
return error;
}
static int
sfq_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg)
{
const struct tc_sfq_qopt *sfq;
struct nlattr *nlattr;
const char * kind;
int error;
error = tc_parse_qdisc(nlmsg, &kind, &nlattr);
if (error == 0) {
sfq = nl_attr_get(nlattr);
sfq_install__(netdev, sfq->perturb_period, sfq->quantum);
return 0;
}
return error;
}
static void
sfq_tc_destroy(struct tc *tc)
{
struct sfq *sfq = CONTAINER_OF(tc, struct sfq, tc);
tc_destroy(tc);
free(sfq);
}
static int
sfq_qdisc_get(const struct netdev *netdev, struct smap *details)
{
const struct sfq *sfq = sfq_get__(netdev);
smap_add_format(details, "quantum", "%u", sfq->quantum);
smap_add_format(details, "perturb", "%u", sfq->perturb);
return 0;
}
static int
sfq_qdisc_set(struct netdev *netdev, const struct smap *details)
{
struct sfq sfq;
sfq_parse_qdisc_details__(netdev, details, &sfq);
sfq_install__(netdev, sfq.quantum, sfq.perturb);
sfq_get__(netdev)->quantum = sfq.quantum;
sfq_get__(netdev)->perturb = sfq.perturb;
return 0;
}
static const struct tc_ops tc_ops_sfq = {
"sfq", /* linux_name */
"linux-sfq", /* ovs_name */
SFQ_N_QUEUES, /* n_queues */
sfq_tc_install,
sfq_tc_load,
sfq_tc_destroy,
sfq_qdisc_get,
sfq_qdisc_set,
NULL,
NULL,
NULL,
NULL,
NULL
};
/* HTB traffic control class. */
#define HTB_N_QUEUES 0xf000
#define HTB_RATE2QUANTUM 10
struct htb {
struct tc tc;
@ -2892,7 +3556,7 @@ htb_setup_qdisc__(struct netdev *netdev)
nl_msg_put_string(&request, TCA_KIND, "htb");
memset(&opt, 0, sizeof opt);
opt.rate2quantum = 10;
opt.rate2quantum = HTB_RATE2QUANTUM;
opt.version = 3;
opt.defcls = 1;
@ -2926,6 +3590,11 @@ htb_setup_class__(struct netdev *netdev, unsigned int handle,
memset(&opt, 0, sizeof opt);
tc_fill_rate(&opt.rate, class->min_rate, mtu);
tc_fill_rate(&opt.ceil, class->max_rate, mtu);
/* Makes sure the quantum is at least MTU. Setting quantum will
* make htb ignore the r2q for this class. */
if ((class->min_rate / HTB_RATE2QUANTUM) < mtu) {
opt.quantum = mtu;
}
opt.buffer = tc_calc_buffer(opt.rate.rate, mtu, class->burst);
opt.cbuffer = tc_calc_buffer(opt.ceil.rate, mtu, class->burst);
opt.prio = class->priority;
@ -4028,12 +4697,13 @@ tc_add_del_ingress_qdisc(struct netdev *netdev, bool add)
* mtu 65535 drop
*
* The configuration and stats may be seen with the following command:
* /sbin/tc -s filter show <devname> eth0 parent ffff:
* /sbin/tc -s filter show dev <devname> parent ffff:
*
* Returns 0 if successful, otherwise a positive errno value.
*/
static int
tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst)
tc_add_policer(struct netdev *netdev,
uint32_t kbits_rate, uint32_t kbits_burst)
{
struct tc_police tc_police;
struct ofpbuf request;
@ -4047,8 +4717,22 @@ tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst)
tc_police.action = TC_POLICE_SHOT;
tc_police.mtu = mtu;
tc_fill_rate(&tc_police.rate, ((uint64_t) kbits_rate * 1000)/8, mtu);
tc_police.burst = tc_bytes_to_ticks(tc_police.rate.rate,
kbits_burst * 1024);
/* The following appears wrong in two ways:
*
* - tc_bytes_to_ticks() should take "bytes" as quantity for both of its
* arguments (or at least consistently "bytes" as both or "bits" as
* both), but this supplies bytes for the first argument and bits for the
* second.
*
* - In networking a kilobit is usually 1000 bits but this uses 1024 bits.
*
* However if you "fix" those problems then "tc filter show ..." shows
* "125000b", meaning 125,000 bits, when OVS configures it for 1000 kbit ==
* 1,000,000 bits, whereas this actually ends up doing the right thing from
* tc's point of view. Whatever. */
tc_police.burst = tc_bytes_to_ticks(
tc_police.rate.rate, MIN(UINT32_MAX / 1024, kbits_burst) * 1024);
tcmsg = tc_make_request(netdev, RTM_NEWTFILTER,
NLM_F_EXCL | NLM_F_CREATE, &request);
@ -4390,6 +5074,31 @@ tc_del_qdisc(struct netdev *netdev_)
return error;
}
static bool
getqdisc_is_safe(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
static bool safe = false;
if (ovsthread_once_start(&once)) {
struct utsname utsname;
int major, minor;
if (uname(&utsname) == -1) {
VLOG_WARN("uname failed (%s)", ovs_strerror(errno));
} else if (!ovs_scan(utsname.release, "%d.%d", &major, &minor)) {
VLOG_WARN("uname reported bad OS release (%s)", utsname.release);
} else if (major < 2 || (major == 2 && minor < 35)) {
VLOG_INFO("disabling unsafe RTM_GETQDISC in Linux kernel %s",
utsname.release);
} else {
safe = true;
}
ovsthread_once_done(&once);
}
return safe;
}
/* If 'netdev''s qdisc type and parameters are not yet known, queries the
* kernel to determine what they are. Returns 0 if successful, otherwise a
* positive errno value. */
@ -4419,18 +5128,21 @@ tc_query_qdisc(const struct netdev *netdev_)
* create will have a class with handle 1:0. The built-in qdiscs only have
* a class with handle 0:0.
*
* We could check for Linux 2.6.35+ and use a more straightforward method
* there. */
* On Linux 2.6.35+ we use the straightforward method because it allows us
* to handle non-builtin qdiscs without handle 1:0 (e.g. codel). However,
* in such a case we get no response at all from the kernel (!) if a
* builtin qdisc is in use (which is later caught by "!error &&
* !qdisc->size"). */
tcmsg = tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO, &request);
if (!tcmsg) {
return ENODEV;
}
tcmsg->tcm_handle = tc_make_handle(1, 0);
tcmsg->tcm_parent = 0;
tcmsg->tcm_handle = tc_make_handle(getqdisc_is_safe() ? 0 : 1, 0);
tcmsg->tcm_parent = getqdisc_is_safe() ? TC_H_ROOT : 0;
/* Figure out what tc class to instantiate. */
error = tc_transact(&request, &qdisc);
if (!error) {
if (!error && qdisc->size) {
const char *kind;
error = tc_parse_qdisc(qdisc, &kind, NULL);
@ -4440,15 +5152,15 @@ tc_query_qdisc(const struct netdev *netdev_)
ops = tc_lookup_linux_name(kind);
if (!ops) {
static struct vlog_rate_limit rl2 = VLOG_RATE_LIMIT_INIT(1, 1);
VLOG_INFO_RL(&rl2, "unknown qdisc \"%s\"", kind);
VLOG_DBG_RL(&rl2, "unknown qdisc \"%s\"", kind);
ops = &tc_ops_other;
}
}
} else if (error == ENOENT) {
/* Either it's a built-in qdisc, or it's a qdisc set up by some
* other entity that doesn't have a handle 1:0. We will assume
* that it's the system default qdisc. */
} else if ((!error && !qdisc->size) || error == ENOENT) {
/* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's a qdisc
* set up by some other entity that doesn't have a handle 1:0. We will
* assume that it's the system default qdisc. */
ops = &tc_ops_default;
error = 0;
} else {

View File

@ -257,20 +257,19 @@ struct netdev_class {
/* Build Partial Tunnel header. Ethernet and ip header is already built,
* build_header() is suppose build protocol specific part of header. */
int (*build_header)(const struct netdev *, struct ovs_action_push_tnl *data);
int (*build_header)(const struct netdev *, struct ovs_action_push_tnl *data,
const struct flow *tnl_flow);
/* build_header() can not build entire header for all packets for given
* flow. Push header is called for packet to build header specific to
* a packet on actual transmit. It uses partial header build by
* build_header() which is passed as data. */
int (*push_header)(const struct netdev *netdev,
struct dp_packet **buffers, int cnt,
const struct ovs_action_push_tnl *data);
void (*push_header)(struct dp_packet *packet,
const struct ovs_action_push_tnl *data);
/* Pop tunnel header from packet, build tunnel metadata and resize packet
* for further processing. */
int (*pop_header)(struct netdev *netdev,
struct dp_packet **buffers, int cnt);
int (*pop_header)(struct dp_packet *packet);
/* Returns the id of the numa node the 'netdev' is on. If there is no
* such info, returns NETDEV_NUMA_UNSPEC. */

View File

@ -61,6 +61,11 @@ static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
sizeof(struct udp_header) + \
sizeof(struct vxlanhdr))
#define GENEVE_BASE_HLEN (sizeof(struct eth_header) + \
sizeof(struct ip_header) + \
sizeof(struct udp_header) + \
sizeof(struct genevehdr))
#define DEFAULT_TTL 64
struct netdev_vport {
@ -426,7 +431,8 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
struct netdev_tunnel_config tnl_cfg;
struct smap_node *node;
has_csum = strstr(type, "gre");
has_csum = strstr(type, "gre") || strstr(type, "geneve") ||
strstr(type, "vxlan");
ipsec_mech_set = false;
memset(&tnl_cfg, 0, sizeof tnl_cfg);
@ -611,9 +617,11 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
&tnl_cfg.out_key_flow);
ovs_mutex_lock(&dev->mutex);
dev->tnl_cfg = tnl_cfg;
tunnel_check_status_change__(dev);
netdev_change_seq_changed(dev_);
if (memcmp(&dev->tnl_cfg, &tnl_cfg, sizeof tnl_cfg)) {
dev->tnl_cfg = tnl_cfg;
tunnel_check_status_change__(dev);
netdev_change_seq_changed(dev_);
}
ovs_mutex_unlock(&dev->mutex);
return 0;
@ -786,9 +794,11 @@ set_patch_config(struct netdev *dev_, const struct smap *args)
}
ovs_mutex_lock(&dev->mutex);
free(dev->peer);
dev->peer = xstrdup(peer);
netdev_change_seq_changed(dev_);
if (!dev->peer || strcmp(dev->peer, peer)) {
free(dev->peer);
dev->peer = xstrdup(peer);
netdev_change_seq_changed(dev_);
}
ovs_mutex_unlock(&dev->mutex);
return 0;
@ -837,6 +847,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl)
tnl->ip_src = get_16aligned_be32(&nh->ip_src);
tnl->ip_dst = get_16aligned_be32(&nh->ip_dst);
tnl->ip_tos = nh->ip_tos;
tnl->ip_ttl = nh->ip_ttl;
return l4;
}
@ -870,6 +881,95 @@ push_ip_header(struct dp_packet *packet,
return ip + 1;
}
static void *
udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl)
{
struct udp_header *udp;
udp = ip_extract_tnl_md(packet, tnl);
if (!udp) {
return NULL;
}
if (udp->udp_csum) {
uint32_t csum = packet_csum_pseudoheader(dp_packet_l3(packet));
csum = csum_continue(csum, udp, dp_packet_size(packet) -
((const unsigned char *)udp -
(const unsigned char *)dp_packet_l2(packet)));
if (csum_finish(csum)) {
return NULL;
}
tnl->flags |= FLOW_TNL_F_CSUM;
}
tnl->tp_src = udp->udp_src;
tnl->tp_dst = udp->udp_dst;
return udp + 1;
}
static ovs_be16
get_src_port(struct dp_packet *packet)
{
uint32_t hash;
hash = dp_packet_get_dp_hash(packet);
return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
tnl_udp_port_min);
}
static void
push_udp_header(struct dp_packet *packet,
const struct ovs_action_push_tnl *data)
{
struct udp_header *udp;
int ip_tot_size;
udp = push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
/* set udp src port */
udp->udp_src = get_src_port(packet);
udp->udp_len = htons(ip_tot_size - sizeof (struct ip_header));
if (udp->udp_csum) {
uint32_t csum = packet_csum_pseudoheader(ip_hdr(dp_packet_data(packet)));
csum = csum_continue(csum, udp,
ip_tot_size - sizeof (struct ip_header));
udp->udp_csum = csum_finish(csum);
if (!udp->udp_csum) {
udp->udp_csum = htons(0xffff);
}
}
}
static void *
udp_build_header(struct netdev_tunnel_config *tnl_cfg,
const struct flow *tnl_flow,
struct ovs_action_push_tnl *data)
{
struct ip_header *ip;
struct udp_header *udp;
ip = ip_hdr(data->header);
ip->ip_proto = IPPROTO_UDP;
udp = (struct udp_header *) (ip + 1);
udp->udp_dst = tnl_cfg->dst_port;
if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
/* Write a value in now to mark that we should compute the checksum
* later. 0xffff is handy because it is transparent to the
* calculation. */
udp->udp_csum = htons(0xffff);
}
return udp + 1;
}
static int
gre_header_len(ovs_be16 flags)
{
@ -905,6 +1005,10 @@ parse_gre_header(struct dp_packet *packet,
return -EINVAL;
}
if (greh->protocol != htons(ETH_TYPE_TEB)) {
return -EINVAL;
}
hlen = gre_header_len(greh->flags);
if (hlen > dp_packet_size(packet)) {
return -EINVAL;
@ -937,14 +1041,8 @@ parse_gre_header(struct dp_packet *packet,
return hlen;
}
static void
reset_tnl_md(struct pkt_metadata *md)
{
memset(&md->tunnel, 0, sizeof(md->tunnel));
}
static void
gre_extract_md(struct dp_packet *packet)
static int
netdev_gre_pop_header(struct dp_packet *packet)
{
struct pkt_metadata *md = &packet->md;
struct flow_tnl *tnl = &md->tunnel;
@ -953,64 +1051,38 @@ gre_extract_md(struct dp_packet *packet)
memset(md, 0, sizeof *md);
if (hlen > dp_packet_size(packet)) {
return;
return EINVAL;
}
hlen = parse_gre_header(packet, tnl);
if (hlen < 0) {
reset_tnl_md(md);
return -hlen;
}
dp_packet_reset_packet(packet, hlen);
}
static int
netdev_gre_pop_header(struct netdev *netdev_ OVS_UNUSED,
struct dp_packet **pkt, int cnt)
{
int i;
for (i = 0; i < cnt; i++) {
gre_extract_md(pkt[i]);
}
return 0;
}
static void
netdev_gre_push_header__(struct dp_packet *packet,
const void *header, int size)
netdev_gre_push_header(struct dp_packet *packet,
const struct ovs_action_push_tnl *data)
{
struct gre_base_hdr *greh;
int ip_tot_size;
greh = push_ip_header(packet, header, size, &ip_tot_size);
greh = push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
if (greh->flags & htons(GRE_CSUM)) {
ovs_16aligned_be32 *options = (ovs_16aligned_be32 *) (greh + 1);
put_16aligned_be32(options,
(OVS_FORCE ovs_be32) csum(greh, ip_tot_size - sizeof (struct ip_header)));
ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
*csum_opt = csum(greh, ip_tot_size - sizeof (struct ip_header));
}
}
static int
netdev_gre_push_header(const struct netdev *netdev OVS_UNUSED,
struct dp_packet **packets, int cnt,
const struct ovs_action_push_tnl *data)
{
int i;
for (i = 0; i < cnt; i++) {
netdev_gre_push_header__(packets[i], data->header, data->header_len);
packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
}
return 0;
}
static int
netdev_gre_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data)
struct ovs_action_push_tnl *data,
const struct flow *tnl_flow)
{
struct netdev_vport *dev = netdev_vport_cast(netdev);
struct netdev_tunnel_config *tnl_cfg;
@ -1031,7 +1103,7 @@ netdev_gre_build_header(const struct netdev *netdev,
greh->flags = 0;
options = (ovs_16aligned_be32 *) (greh + 1);
if (tnl_cfg->csum) {
if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
greh->flags |= htons(GRE_CSUM);
put_16aligned_be32(options, 0);
options++;
@ -1040,7 +1112,7 @@ netdev_gre_build_header(const struct netdev *netdev,
if (tnl_cfg->out_key_present) {
greh->flags |= htons(GRE_KEY);
put_16aligned_be32(options, (OVS_FORCE ovs_be32)
((OVS_FORCE uint64_t) tnl_cfg->out_key >> 32));
((OVS_FORCE uint64_t) tnl_flow->tunnel.tun_id >> 32));
options++;
}
@ -1054,75 +1126,55 @@ netdev_gre_build_header(const struct netdev *netdev,
return 0;
}
static void
vxlan_extract_md(struct dp_packet *packet)
static int
netdev_vxlan_pop_header(struct dp_packet *packet)
{
struct pkt_metadata *md = &packet->md;
struct flow_tnl *tnl = &md->tunnel;
struct udp_header *udp;
struct vxlanhdr *vxh;
memset(md, 0, sizeof *md);
if (VXLAN_HLEN > dp_packet_size(packet)) {
return;
return EINVAL;
}
udp = ip_extract_tnl_md(packet, tnl);
if (!udp) {
return;
vxh = udp_extract_tnl_md(packet, tnl);
if (!vxh) {
return EINVAL;
}
vxh = (struct vxlanhdr *) (udp + 1);
if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
(get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
ntohl(get_16aligned_be32(&vxh->vx_flags)),
ntohl(get_16aligned_be32(&vxh->vx_vni)));
reset_tnl_md(md);
return;
return EINVAL;
}
tnl->tp_src = udp->udp_src;
tnl->tp_dst = udp->udp_dst;
tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
tnl->flags |= FLOW_TNL_F_KEY;
dp_packet_reset_packet(packet, VXLAN_HLEN);
}
static int
netdev_vxlan_pop_header(struct netdev *netdev_ OVS_UNUSED,
struct dp_packet **pkt, int cnt)
{
int i;
for (i = 0; i < cnt; i++) {
vxlan_extract_md(pkt[i]);
}
return 0;
}
static int
netdev_vxlan_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data)
struct ovs_action_push_tnl *data,
const struct flow *tnl_flow)
{
struct netdev_vport *dev = netdev_vport_cast(netdev);
struct netdev_tunnel_config *tnl_cfg;
struct ip_header *ip;
struct udp_header *udp;
struct vxlanhdr *vxh;
/* XXX: RCUfy tnl_cfg. */
ovs_mutex_lock(&dev->mutex);
tnl_cfg = &dev->tnl_cfg;
ip = ip_hdr(data->header);
ip->ip_proto = IPPROTO_UDP;
vxh = udp_build_header(tnl_cfg, tnl_flow, data);
udp = (struct udp_header *) (ip + 1);
udp->udp_dst = tnl_cfg->dst_port;
vxh = (struct vxlanhdr *) (udp + 1);
put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_cfg->out_key) << 8));
put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
ovs_mutex_unlock(&dev->mutex);
data->header_len = VXLAN_HLEN;
@ -1130,44 +1182,81 @@ netdev_vxlan_build_header(const struct netdev *netdev,
return 0;
}
static ovs_be16
get_src_port(struct dp_packet *packet)
static int
netdev_geneve_pop_header(struct dp_packet *packet)
{
uint32_t hash;
struct pkt_metadata *md = &packet->md;
struct flow_tnl *tnl = &md->tunnel;
struct genevehdr *gnh;
unsigned int hlen;
hash = dp_packet_get_dp_hash(packet);
memset(md, 0, sizeof *md);
if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
(unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
return EINVAL;
}
return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
tnl_udp_port_min);
}
gnh = udp_extract_tnl_md(packet, tnl);
if (!gnh) {
return EINVAL;
}
static void
netdev_vxlan_push_header__(struct dp_packet *packet,
const void *header, int size)
{
struct udp_header *udp;
int ip_tot_size;
hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4;
if (hlen > dp_packet_size(packet)) {
VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
hlen, dp_packet_size(packet));
return EINVAL;
}
udp = push_ip_header(packet, header, size, &ip_tot_size);
if (gnh->ver != 0) {
VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
return EINVAL;
}
/* set udp src port */
udp->udp_src = get_src_port(packet);
udp->udp_len = htons(ip_tot_size - sizeof (struct ip_header));
/* udp_csum is zero */
if (gnh->opt_len && gnh->critical) {
VLOG_WARN_RL(&err_rl, "unknown geneve critical options: %"PRIu8" bytes\n",
gnh->opt_len * 4);
return EINVAL;
}
if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
ntohs(gnh->proto_type));
return EINVAL;
}
tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
tnl->flags |= FLOW_TNL_F_KEY;
dp_packet_reset_packet(packet, hlen);
return 0;
}
static int
netdev_vxlan_push_header(const struct netdev *netdev OVS_UNUSED,
struct dp_packet **packets, int cnt,
const struct ovs_action_push_tnl *data)
netdev_geneve_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data,
const struct flow *tnl_flow)
{
int i;
struct netdev_vport *dev = netdev_vport_cast(netdev);
struct netdev_tunnel_config *tnl_cfg;
struct genevehdr *gnh;
for (i = 0; i < cnt; i++) {
netdev_vxlan_push_header__(packets[i],
data->header, VXLAN_HLEN);
packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
}
/* XXX: RCUfy tnl_cfg. */
ovs_mutex_lock(&dev->mutex);
tnl_cfg = &dev->tnl_cfg;
gnh = udp_build_header(tnl_cfg, tnl_flow, data);
gnh->oam = !!(tnl_flow->tunnel.flags & FLOW_TNL_F_OAM);
gnh->proto_type = htons(ETH_TYPE_TEB);
put_16aligned_be32(&gnh->vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
ovs_mutex_unlock(&dev->mutex);
data->header_len = GENEVE_BASE_HLEN;
data->tnl_type = OVS_VPORT_TYPE_GENEVE;
return 0;
}
@ -1300,7 +1389,9 @@ netdev_vport_tunnel_register(void)
/* The name of the dpif_port should be short enough to accomodate adding
* a port number to the end if one is necessary. */
static const struct vport_class vport_classes[] = {
TUNNEL_CLASS("geneve", "genev_sys", NULL, NULL, NULL),
TUNNEL_CLASS("geneve", "genev_sys", netdev_geneve_build_header,
push_udp_header,
netdev_geneve_pop_header),
TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
netdev_gre_push_header,
netdev_gre_pop_header),
@ -1308,7 +1399,7 @@ netdev_vport_tunnel_register(void)
TUNNEL_CLASS("gre64", "gre64_sys", NULL, NULL, NULL),
TUNNEL_CLASS("ipsec_gre64", "gre64_sys", NULL, NULL, NULL),
TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
netdev_vxlan_push_header,
push_udp_header,
netdev_vxlan_pop_header),
TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL)
};

View File

@ -34,6 +34,7 @@
#include "netdev-dpdk.h"
#include "netdev-provider.h"
#include "netdev-vport.h"
#include "odp-netlink.h"
#include "openflow/openflow.h"
#include "packets.h"
#include "poll-loop.h"
@ -43,6 +44,7 @@
#include "sset.h"
#include "svec.h"
#include "openvswitch/vlog.h"
#include "flow.h"
VLOG_DEFINE_THIS_MODULE(netdev);
@ -108,7 +110,8 @@ bool
netdev_is_pmd(const struct netdev *netdev)
{
return (!strcmp(netdev->netdev_class->type, "dpdk") ||
!strcmp(netdev->netdev_class->type, "dpdkr"));
!strcmp(netdev->netdev_class->type, "dpdkr") ||
!strcmp(netdev->netdev_class->type, "dpdkvhost"));
}
static void
@ -733,16 +736,30 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet **buffers,
int
netdev_pop_header(struct netdev *netdev, struct dp_packet **buffers, int cnt)
{
return (netdev->netdev_class->pop_header
? netdev->netdev_class->pop_header(netdev, buffers, cnt)
: EOPNOTSUPP);
int i;
if (!netdev->netdev_class->pop_header) {
return EOPNOTSUPP;
}
for (i = 0; i < cnt; i++) {
int err;
err = netdev->netdev_class->pop_header(buffers[i]);
if (err) {
dp_packet_clear(buffers[i]);
}
}
return 0;
}
int
netdev_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data)
netdev_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data,
const struct flow *tnl_flow)
{
if (netdev->netdev_class->build_header) {
return netdev->netdev_class->build_header(netdev, data);
return netdev->netdev_class->build_header(netdev, data, tnl_flow);
}
return EOPNOTSUPP;
}
@ -752,11 +769,18 @@ netdev_push_header(const struct netdev *netdev,
struct dp_packet **buffers, int cnt,
const struct ovs_action_push_tnl *data)
{
if (netdev->netdev_class->push_header) {
return netdev->netdev_class->push_header(netdev, buffers, cnt, data);
} else {
int i;
if (!netdev->netdev_class->push_header) {
return -EINVAL;
}
for (i = 0; i < cnt; i++) {
netdev->netdev_class->push_header(buffers[i], data);
buffers[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
}
return 0;
}
/* Registers with the poll loop to wake up from the next call to poll_block()

View File

@ -22,6 +22,7 @@
#include <stdint.h>
#include "openvswitch/types.h"
#include "packets.h"
#include "flow.h"
#ifdef __cplusplus
extern "C" {
@ -184,7 +185,8 @@ int netdev_send(struct netdev *, int qid, struct dp_packet **, int cnt,
bool may_steal);
void netdev_send_wait(struct netdev *, int qid);
int netdev_build_header(const struct netdev *, struct ovs_action_push_tnl *data);
int netdev_build_header(const struct netdev *, struct ovs_action_push_tnl *data,
const struct flow *tnl_flow);
int netdev_push_header(const struct netdev *netdev,
struct dp_packet **buffers, int cnt,
const struct ovs_action_push_tnl *data);

View File

@ -273,63 +273,27 @@ nl_sock_destroy(struct nl_sock *sock)
#ifdef _WIN32
/* Reads the pid for 'sock' generated in the kernel datapath. The function
* follows a transaction semantic. Eventually this function should call into
* nl_transact. */
* uses a separate IOCTL instead of a transaction semantic to avoid unnecessary
* message overhead. */
static int
get_sock_pid_from_kernel(struct nl_sock *sock)
{
struct nl_transaction txn;
struct ofpbuf request;
uint64_t request_stub[128];
struct ofpbuf reply;
uint64_t reply_stub[128];
struct ovs_header *ovs_header;
struct nlmsghdr *nlmsg;
uint32_t seq;
int retval;
DWORD bytes;
int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) +
sizeof (struct ovs_header);
uint32_t pid = 0;
int retval = 0;
DWORD bytes = 0;
ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
txn.request = &request;
ofpbuf_use_stub(&reply, reply_stub, sizeof reply_stub);
txn.reply = &reply;
seq = nl_sock_allocate_seq(sock, 1);
nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
OVS_CTRL_CMD_WIN_GET_PID, OVS_WIN_CONTROL_VERSION);
nlmsg = nl_msg_nlmsghdr(txn.request);
nlmsg->nlmsg_seq = seq;
ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
ovs_header->dp_ifindex = 0;
ovs_header = ofpbuf_put_uninit(&reply, ovs_msg_size);
if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
txn.request->data, txn.request->size,
txn.reply->data, txn.reply->size,
if (!DeviceIoControl(sock->handle, OVS_IOCTL_GET_PID,
NULL, 0, &pid, sizeof(pid),
&bytes, NULL)) {
retval = EINVAL;
goto done;
} else {
if (bytes < ovs_msg_size) {
if (bytes < sizeof(pid)) {
retval = EINVAL;
goto done;
} else {
sock->pid = pid;
}
nlmsg = nl_msg_nlmsghdr(txn.reply);
if (nlmsg->nlmsg_seq != seq) {
retval = EINVAL;
goto done;
}
sock->pid = nlmsg->nlmsg_pid;
}
retval = 0;
done:
ofpbuf_uninit(&request);
ofpbuf_uninit(&reply);
return retval;
}
#endif /* _WIN32 */

View File

@ -589,6 +589,52 @@ oxm_pull_match_loose(struct ofpbuf *b, struct match *match)
{
return oxm_pull_match__(b, false, match);
}
/* Verify an array of OXM TLVs treating value of each TLV as a mask,
* disallowing masks in each TLV and ignoring pre-requisites. */
enum ofperr
oxm_pull_field_array(const void *fields_data, size_t fields_len,
struct field_array *fa)
{
struct ofpbuf b;
ofpbuf_use_const(&b, fields_data, fields_len);
while (b.size) {
const uint8_t *pos = b.data;
const struct mf_field *field;
union mf_value value;
enum ofperr error;
uint64_t header;
error = nx_pull_entry__(&b, false, &header, &field, &value, NULL);
if (error) {
VLOG_DBG_RL(&rl, "error pulling field array field");
return error;
} else if (!field) {
VLOG_DBG_RL(&rl, "unknown field array field");
error = OFPERR_OFPBMC_BAD_FIELD;
} else if (bitmap_is_set(fa->used.bm, field->id)) {
VLOG_DBG_RL(&rl, "duplicate field array field '%s'", field->name);
error = OFPERR_OFPBMC_DUP_FIELD;
} else if (!mf_is_mask_valid(field, &value)) {
VLOG_DBG_RL(&rl, "bad mask in field array field '%s'", field->name);
return OFPERR_OFPBMC_BAD_MASK;
} else {
field_array_set(field->id, &value, fa);
}
if (error) {
const uint8_t *start = fields_data;
VLOG_DBG_RL(&rl, "error parsing OXM at offset %"PRIdPTR" "
"within field array (%s)", pos - start,
ofperr_to_string(error));
return error;
}
}
return 0;
}
/* nx_put_match() and helpers.
*
@ -1021,6 +1067,84 @@ oxm_put_match(struct ofpbuf *b, const struct match *match,
return match_len;
}
/* Appends to 'b' the nx_match format that expresses the tlv corresponding
* to 'id'. If mask is not all-ones then it is also formated as the value
* of the tlv. */
static void
nx_format_mask_tlv(struct ds *ds, enum mf_field_id id,
const union mf_value *mask)
{
const struct mf_field *mf = mf_from_id(id);
ds_put_format(ds, "%s", mf->name);
if (!is_all_ones(mask, mf->n_bytes)) {
ds_put_char(ds, '=');
mf_format(mf, mask, NULL, ds);
}
ds_put_char(ds, ',');
}
/* Appends a string representation of 'fa_' to 'ds'.
* The TLVS value of 'fa_' is treated as a mask and
* only the name of fields is formated if it is all ones. */
void
oxm_format_field_array(struct ds *ds, const struct field_array *fa)
{
size_t start_len = ds->length;
int i;
for (i = 0; i < MFF_N_IDS; i++) {
if (bitmap_is_set(fa->used.bm, i)) {
nx_format_mask_tlv(ds, i, &fa->value[i]);
}
}
if (ds->length > start_len) {
ds_chomp(ds, ',');
}
}
/* Appends to 'b' a series of OXM TLVs corresponding to the series
* of enum mf_field_id and value tuples in 'fa_'.
*
* OXM differs slightly among versions of OpenFlow. Specify the OpenFlow
* version in use as 'version'.
*
* This function can cause 'b''s data to be reallocated.
*
* Returns the number of bytes appended to 'b'. May return zero. */
int
oxm_put_field_array(struct ofpbuf *b, const struct field_array *fa,
enum ofp_version version)
{
size_t start_len = b->size;
int i;
/* Field arrays are only used with the group selection method
* property and group properties are only available in OpenFlow * 1.5+.
* So the following assertion should never fail.
*
* If support for older OpenFlow versions is desired then some care
* will need to be taken of different TLVs that handle the same
* flow fields. In particular:
* - VLAN_TCI, VLAN_VID and MFF_VLAN_PCP
* - IP_DSCP_MASK and DSCP_SHIFTED
* - REGS and XREGS
*/
ovs_assert(version >= OFP15_VERSION);
for (i = 0; i < MFF_N_IDS; i++) {
if (bitmap_is_set(fa->used.bm, i)) {
nxm_put_unmasked(b, i, version, &fa->value[i],
mf_from_id(i)->n_bytes);
}
}
return b->size - start_len;
}
static void
nx_put_header__(struct ofpbuf *b, uint64_t header, bool masked)
{

View File

@ -55,9 +55,14 @@ enum ofperr nx_pull_match_loose(struct ofpbuf *, unsigned int match_len,
ovs_be64 *cookie_mask);
enum ofperr oxm_pull_match(struct ofpbuf *, struct match *);
enum ofperr oxm_pull_match_loose(struct ofpbuf *, struct match *);
enum ofperr oxm_pull_field_array(const void *, size_t fields_len,
struct field_array *);
int nx_put_match(struct ofpbuf *, const struct match *,
ovs_be64 cookie, ovs_be64 cookie_mask);
int oxm_put_match(struct ofpbuf *, const struct match *, enum ofp_version);
void oxm_format_field_array(struct ds *, const struct field_array *);
int oxm_put_field_array(struct ofpbuf *, const struct field_array *,
enum ofp_version version);
/* Decoding and encoding OXM/NXM headers (just a field ID) or entries (a field
* ID followed by a value and possibly a mask). */

View File

@ -510,6 +510,19 @@ format_odp_hash_action(struct ds *ds, const struct ovs_action_hash *hash_act)
ds_put_format(ds, ")");
}
static const void *
format_udp_tnl_push_header(struct ds *ds, const struct ip_header *ip)
{
const struct udp_header *udp;
udp = (const struct udp_header *) (ip + 1);
ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16",csum=0x%"PRIx16"),",
ntohs(udp->udp_src), ntohs(udp->udp_dst),
ntohs(udp->udp_csum));
return udp + 1;
}
static void
format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
{
@ -541,18 +554,20 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
const struct vxlanhdr *vxh;
const struct udp_header *udp;
/* UDP */
udp = (const struct udp_header *) (ip + 1);
ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16"),",
ntohs(udp->udp_src), ntohs(udp->udp_dst));
vxh = format_udp_tnl_push_header(ds, ip);
/* VxLan */
vxh = (const struct vxlanhdr *) (udp + 1);
ds_put_format(ds, "vxlan(flags=0x%"PRIx32",vni=0x%"PRIx32")",
ntohl(get_16aligned_be32(&vxh->vx_flags)),
ntohl(get_16aligned_be32(&vxh->vx_vni)));
ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
} else if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
const struct genevehdr *gnh;
gnh = format_udp_tnl_push_header(ds, ip);
ds_put_format(ds, "geneve(%svni=0x%"PRIx32")",
gnh->oam ? "oam," : "",
ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
} else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
const struct gre_base_hdr *greh;
ovs_16aligned_be32 *options;
@ -562,10 +577,10 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
greh = (const struct gre_base_hdr *) l4;
ds_put_format(ds, "gre((flags=0x%"PRIx16",proto=0x%"PRIx16")",
greh->flags, ntohs(greh->protocol));
ntohs(greh->flags), ntohs(greh->protocol));
options = (ovs_16aligned_be32 *)(greh + 1);
if (greh->flags & htons(GRE_CSUM)) {
ds_put_format(ds, ",csum=0x%"PRIx32, ntohl(get_16aligned_be32(options)));
ds_put_format(ds, ",csum=0x%"PRIx16, ntohs(*((ovs_be16 *)options)));
options++;
}
if (greh->flags & htons(GRE_KEY)) {
@ -840,7 +855,7 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
struct ip_header *ip;
struct udp_header *udp;
struct gre_base_hdr *greh;
uint16_t gre_proto, dl_type, udp_src, udp_dst;
uint16_t gre_proto, gre_flags, dl_type, udp_src, udp_dst, csum;
ovs_be32 sip, dip;
uint32_t tnl_type = 0, header_len = 0;
void *l3, *l4;
@ -885,40 +900,57 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
/* Tunnel header */
udp = (struct udp_header *) l4;
greh = (struct gre_base_hdr *) l4;
if (ovs_scan_len(s, &n, "udp(src=%"SCNi16",dst=%"SCNi16"),",
&udp_src, &udp_dst)) {
struct vxlanhdr *vxh;
uint32_t vx_flags, vx_vni;
if (ovs_scan_len(s, &n, "udp(src=%"SCNi16",dst=%"SCNi16",csum=0x%"SCNx16"),",
&udp_src, &udp_dst, &csum)) {
uint32_t vx_flags, vni;
udp->udp_src = htons(udp_src);
udp->udp_dst = htons(udp_dst);
udp->udp_len = 0;
udp->udp_csum = 0;
udp->udp_csum = htons(csum);
vxh = (struct vxlanhdr *) (udp + 1);
if (!ovs_scan_len(s, &n, "vxlan(flags=0x%"SCNx32",vni=0x%"SCNx32"))",
&vx_flags, &vx_vni)) {
if (ovs_scan_len(s, &n, "vxlan(flags=0x%"SCNx32",vni=0x%"SCNx32"))",
&vx_flags, &vni)) {
struct vxlanhdr *vxh = (struct vxlanhdr *) (udp + 1);
put_16aligned_be32(&vxh->vx_flags, htonl(vx_flags));
put_16aligned_be32(&vxh->vx_vni, htonl(vni << 8));
tnl_type = OVS_VPORT_TYPE_VXLAN;
header_len = sizeof *eth + sizeof *ip +
sizeof *udp + sizeof *vxh;
} else if (ovs_scan_len(s, &n, "geneve(")) {
struct genevehdr *gnh = (struct genevehdr *) (udp + 1);
memset(gnh, 0, sizeof *gnh);
if (ovs_scan_len(s, &n, "oam,")) {
gnh->oam = 1;
}
if (!ovs_scan_len(s, &n, "vni=0x%"SCNx32"))", &vni)) {
return -EINVAL;
}
gnh->proto_type = htons(ETH_TYPE_TEB);
put_16aligned_be32(&gnh->vni, htonl(vni << 8));
tnl_type = OVS_VPORT_TYPE_GENEVE;
header_len = sizeof *eth + sizeof *ip +
sizeof *udp + sizeof *gnh;
} else {
return -EINVAL;
}
put_16aligned_be32(&vxh->vx_flags, htonl(vx_flags));
put_16aligned_be32(&vxh->vx_vni, htonl(vx_vni));
tnl_type = OVS_VPORT_TYPE_VXLAN;
header_len = sizeof *eth + sizeof *ip +
sizeof *udp + sizeof *vxh;
} else if (ovs_scan_len(s, &n, "gre((flags=0x%"SCNx16",proto=0x%"SCNx16")",
&greh->flags, &gre_proto)){
&gre_flags, &gre_proto)){
tnl_type = OVS_VPORT_TYPE_GRE;
greh->flags = htons(gre_flags);
greh->protocol = htons(gre_proto);
ovs_16aligned_be32 *options = (ovs_16aligned_be32 *) (greh + 1);
if (greh->flags & htons(GRE_CSUM)) {
uint32_t csum;
if (!ovs_scan_len(s, &n, ",csum=0x%"SCNx32, &csum)) {
if (!ovs_scan_len(s, &n, ",csum=0x%"SCNx16, &csum)) {
return -EINVAL;
}
put_16aligned_be32(options, htonl(csum));
memset(options, 0, sizeof *options);
*((ovs_be16 *)options) = htons(csum);
options++;
}
if (greh->flags & htons(GRE_KEY)) {

View File

@ -4237,6 +4237,31 @@ format_EXIT(const struct ofpact_null *a OVS_UNUSED, struct ds *s)
ds_put_cstr(s, "exit");
}
/* Unroll xlate action. */
static void
encode_UNROLL_XLATE(const struct ofpact_unroll_xlate *unroll OVS_UNUSED,
enum ofp_version ofp_version OVS_UNUSED,
struct ofpbuf *out OVS_UNUSED)
{
OVS_NOT_REACHED();
}
static char * OVS_WARN_UNUSED_RESULT
parse_UNROLL_XLATE(char *arg OVS_UNUSED, struct ofpbuf *ofpacts OVS_UNUSED,
enum ofputil_protocol *usable_protocols OVS_UNUSED)
{
OVS_NOT_REACHED();
return NULL;
}
static void
format_UNROLL_XLATE(const struct ofpact_unroll_xlate *a OVS_UNUSED,
struct ds *s)
{
ds_put_cstr(s, "unroll_xlate");
}
/* Action structure for NXAST_SAMPLE.
*
* Samples matching packets with the given probability and sends them
@ -4726,6 +4751,7 @@ ofpact_is_set_or_move_action(const struct ofpact *a)
case OFPACT_DEC_TTL:
case OFPACT_ENQUEUE:
case OFPACT_EXIT:
case OFPACT_UNROLL_XLATE:
case OFPACT_FIN_TIMEOUT:
case OFPACT_GOTO_TABLE:
case OFPACT_GROUP:
@ -4795,6 +4821,7 @@ ofpact_is_allowed_in_actions_set(const struct ofpact *a)
case OFPACT_CONTROLLER:
case OFPACT_ENQUEUE:
case OFPACT_EXIT:
case OFPACT_UNROLL_XLATE:
case OFPACT_FIN_TIMEOUT:
case OFPACT_LEARN:
case OFPACT_CONJUNCTION:
@ -4868,7 +4895,7 @@ ofpacts_copy_all(struct ofpbuf *out, const struct ofpbuf *in,
* "Action Set" and "Action List" terms used in OpenFlow 1.1+.)
*
* In general this involves appending the last instance of each action that is
* adimissible in the action set in the order described in the OpenFlow
* admissible in the action set in the order described in the OpenFlow
* specification.
*
* Exceptions:
@ -5017,6 +5044,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type)
case OFPACT_MULTIPATH:
case OFPACT_NOTE:
case OFPACT_EXIT:
case OFPACT_UNROLL_XLATE:
case OFPACT_SAMPLE:
default:
return OVSINST_OFPIT11_APPLY_ACTIONS;
@ -5607,6 +5635,11 @@ ofpact_check__(enum ofputil_protocol *usable_protocols, struct ofpact *a,
case OFPACT_GROUP:
return 0;
case OFPACT_UNROLL_XLATE:
/* UNROLL is an internal action that should never be seen via
* OpenFlow. */
return OFPERR_OFPBAC_BAD_TYPE;
default:
OVS_NOT_REACHED();
}
@ -5998,6 +6031,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port)
case OFPACT_MULTIPATH:
case OFPACT_NOTE:
case OFPACT_EXIT:
case OFPACT_UNROLL_XLATE:
case OFPACT_PUSH_MPLS:
case OFPACT_POP_MPLS:
case OFPACT_SAMPLE:

View File

@ -105,6 +105,7 @@
OFPACT(NOTE, ofpact_note, data, "note") \
OFPACT(EXIT, ofpact_null, ofpact, "exit") \
OFPACT(SAMPLE, ofpact_sample, ofpact, "sample") \
OFPACT(UNROLL_XLATE, ofpact_unroll_xlate, ofpact, "unroll_xlate") \
\
/* Instructions. */ \
OFPACT(METER, ofpact_meter, ofpact, "meter") \
@ -715,6 +716,17 @@ struct ofpact_group {
uint32_t group_id;
};
/* OFPACT_UNROLL_XLATE.
*
* Used only internally. */
struct ofpact_unroll_xlate {
struct ofpact ofpact;
/* Metadata in xlate context, visible to controller via PACKET_INs. */
uint8_t rule_table_id; /* 0xFF if none. */
ovs_be64 rule_cookie; /* OVS_BE64_MAX if none. */
};
/* Converting OpenFlow to ofpacts. */
enum ofperr ofpacts_pull_openflow_actions(struct ofpbuf *openflow,
unsigned int actions_len,

View File

@ -1157,6 +1157,67 @@ parse_bucket_str(struct ofputil_bucket *bucket, char *str_,
return NULL;
}
static char * OVS_WARN_UNUSED_RESULT
parse_select_group_field(char *s, struct field_array *fa,
enum ofputil_protocol *usable_protocols)
{
char *save_ptr = NULL;
char *name;
for (name = strtok_r(s, "=, \t\r\n", &save_ptr); name;
name = strtok_r(NULL, "=, \t\r\n", &save_ptr)) {
const struct mf_field *mf = mf_from_name(name);
if (mf) {
char *error;
const char *value_str;
union mf_value value;
if (bitmap_is_set(fa->used.bm, mf->id)) {
return xasprintf("%s: duplicate field", name);
}
value_str = strtok_r(NULL, ", \t\r\n", &save_ptr);
if (value_str) {
error = mf_parse_value(mf, value_str, &value);
if (error) {
return error;
}
/* The mask cannot be all-zeros */
if (is_all_zeros(&value, mf->n_bytes)) {
return xasprintf("%s: values are wildcards here "
"and must not be all-zeros", s);
}
/* The values parsed are masks for fields used
* by the selection method */
if (!mf_is_mask_valid(mf, &value)) {
return xasprintf("%s: invalid mask for field %s",
value_str, mf->name);
}
} else {
memset(&value, 0xff, mf->n_bytes);
}
field_array_set(mf->id, &value, fa);
if (is_all_ones(&value, mf->n_bytes)) {
*usable_protocols &= mf->usable_protocols_exact;
} else if (mf->usable_protocols_bitwise == mf->usable_protocols_cidr
|| ip_is_cidr(value.be32)) {
*usable_protocols &= mf->usable_protocols_cidr;
} else {
*usable_protocols &= mf->usable_protocols_bitwise;
}
} else {
return xasprintf("%s: unknown field %s", s, name);
}
}
return NULL;
}
static char * OVS_WARN_UNUSED_RESULT
parse_ofp_group_mod_str__(struct ofputil_group_mod *gm, uint16_t command,
char *string,
@ -1327,6 +1388,39 @@ parse_ofp_group_mod_str__(struct ofputil_group_mod *gm, uint16_t command,
} else if (!strcmp(name, "bucket")) {
error = xstrdup("bucket is not needed");
goto out;
} else if (!strcmp(name, "selection_method")) {
if (!(fields & F_GROUP_TYPE)) {
error = xstrdup("selection method is not needed");
goto out;
}
if (strlen(value) >= NTR_MAX_SELECTION_METHOD_LEN) {
error = xasprintf("selection method is longer than %u"
" bytes long",
NTR_MAX_SELECTION_METHOD_LEN - 1);
goto out;
}
memset(gm->props.selection_method, '\0',
NTR_MAX_SELECTION_METHOD_LEN);
strcpy(gm->props.selection_method, value);
*usable_protocols &= OFPUTIL_P_OF15_UP;
} else if (!strcmp(name, "selection_method_param")) {
if (!(fields & F_GROUP_TYPE)) {
error = xstrdup("selection method param is not needed");
goto out;
}
error = str_to_u64(value, &gm->props.selection_method_param);
*usable_protocols &= OFPUTIL_P_OF15_UP;
} else if (!strcmp(name, "fields")) {
if (!(fields & F_GROUP_TYPE)) {
error = xstrdup("fields are not needed");
goto out;
}
error = parse_select_group_field(value, &gm->props.fields,
usable_protocols);
if (error) {
goto out;
}
*usable_protocols &= OFPUTIL_P_OF15_UP;
} else {
error = xasprintf("unknown keyword %s", name);
goto out;

View File

@ -2156,8 +2156,8 @@ ofp_print_bucket_id(struct ds *s, const char *label, uint32_t bucket_id,
static void
ofp_print_group(struct ds *s, uint32_t group_id, uint8_t type,
struct ovs_list *p_buckets, enum ofp_version ofp_version,
bool suppress_type)
struct ovs_list *p_buckets, struct ofputil_group_props *props,
enum ofp_version ofp_version, bool suppress_type)
{
struct ofputil_bucket *bucket;
@ -2169,6 +2169,26 @@ ofp_print_group(struct ds *s, uint32_t group_id, uint8_t type,
ds_put_format(s, ",type=%s", type_str[type > 4 ? 4 : type]);
}
if (props->selection_method[0]) {
size_t mark, start;
ds_put_format(s, ",selection_method=%s,", props->selection_method);
if (props->selection_method_param) {
ds_put_format(s, "selection_method_param=%"PRIu64",",
props->selection_method_param);
}
/* Allow rewinding to immediately before the trailing ',' */
mark = s->length - 1;
ds_put_cstr(s, "fields=");
start = s->length;
oxm_format_field_array(s, &props->fields);
if (s->length == start) {
ds_truncate(s, mark);
}
}
if (!p_buckets) {
return;
}
@ -2226,8 +2246,8 @@ ofp_print_group_desc(struct ds *s, const struct ofp_header *oh)
ds_put_char(s, '\n');
ds_put_char(s, ' ');
ofp_print_group(s, gd.group_id, gd.type, &gd.buckets, oh->version,
false);
ofp_print_group(s, gd.group_id, gd.type, &gd.buckets, &gd.props,
oh->version, false);
ofputil_bucket_list_destroy(&gd.buckets);
}
}
@ -2380,8 +2400,8 @@ ofp_print_group_mod(struct ds *s, const struct ofp_header *oh)
gm.command_bucket_id, oh->version);
}
ofp_print_group(s, gm.group_id, gm.type, &gm.buckets, oh->version,
bucket_command);
ofp_print_group(s, gm.group_id, gm.type, &gm.buckets, &gm.props,
oh->version, bucket_command);
ofputil_bucket_list_destroy(&gm.buckets);
}

View File

@ -38,6 +38,7 @@
#include "ofp-msgs.h"
#include "ofp-util.h"
#include "ofpbuf.h"
#include "openflow/netronome-ext.h"
#include "packets.h"
#include "random.h"
#include "unaligned.h"
@ -59,6 +60,14 @@ struct ofp_prop_header {
ovs_be16 len;
};
struct ofp_prop_experimenter {
ovs_be16 type; /* OFP*_EXPERIMENTER. */
ovs_be16 length; /* Length in bytes of this property. */
ovs_be32 experimenter; /* Experimenter ID which takes the same form as
* in struct ofp_experimenter_header. */
ovs_be32 exp_type; /* Experimenter defined. */
};
/* Pulls a property, beginning with struct ofp_prop_header, from the beginning
* of 'msg'. Stores the type of the property in '*typep' and, if 'property' is
* nonnull, the entire property, including the header, in '*property'. Returns
@ -6877,10 +6886,9 @@ ofputil_decode_port_stats_request(const struct ofp_header *request,
void
ofputil_bucket_list_destroy(struct ovs_list *buckets)
{
struct ofputil_bucket *bucket, *next_bucket;
struct ofputil_bucket *bucket;
LIST_FOR_EACH_SAFE (bucket, next_bucket, list_node, buckets) {
list_remove(&bucket->list_node);
LIST_FOR_EACH_POP (bucket, list_node, buckets) {
free(bucket->ofpacts);
free(bucket);
}
@ -7020,6 +7028,13 @@ ofputil_encode_group_stats_request(enum ofp_version ofp_version,
return request;
}
void
ofputil_uninit_group_desc(struct ofputil_group_desc *gd)
{
ofputil_bucket_list_destroy(&gd->buckets);
free(&gd->props.fields);
}
/* Decodes the OpenFlow group description request in 'oh', returning the group
* whose description is requested, or OFPG_ALL if stats for all groups was
* requested. */
@ -7411,6 +7426,26 @@ ofputil_put_ofp15_bucket(const struct ofputil_bucket *bucket,
ob->bucket_id = htonl(bucket_id);
}
static void
ofputil_put_group_prop_ntr_selection_method(enum ofp_version ofp_version,
const struct ofputil_group_props *gp,
struct ofpbuf *openflow)
{
struct ntr_group_prop_selection_method *prop;
size_t start;
start = openflow->size;
ofpbuf_put_zeros(openflow, sizeof *prop);
oxm_put_field_array(openflow, &gp->fields, ofp_version);
prop = ofpbuf_at_assert(openflow, start, sizeof *prop);
prop->type = htons(OFPGPT15_EXPERIMENTER);
prop->experimenter = htonl(NTR_VENDOR_ID);
prop->exp_type = htonl(NTRT_SELECTION_METHOD);
strcpy(prop->selection_method, gp->selection_method);
prop->selection_method_param = htonll(gp->selection_method_param);
end_property(openflow, start);
}
static void
ofputil_append_ofp11_group_desc_reply(const struct ofputil_group_desc *gds,
const struct ovs_list *buckets,
@ -7459,6 +7494,12 @@ ofputil_append_ofp15_group_desc_reply(const struct ofputil_group_desc *gds,
ogds->group_id = htonl(gds->group_id);
ogds->bucket_list_len = htons(reply->size - start_buckets);
/* Add group properties */
if (gds->props.selection_method[0]) {
ofputil_put_group_prop_ntr_selection_method(version, &gds->props,
reply);
}
ofpmp_postappend(replies, start_ogds);
}
@ -7721,6 +7762,196 @@ ofputil_pull_ofp15_buckets(struct ofpbuf *msg, size_t buckets_length,
return 0;
}
static void
ofputil_init_group_properties(struct ofputil_group_props *gp)
{
memset(gp, 0, sizeof *gp);
}
static enum ofperr
parse_group_prop_ntr_selection_method(struct ofpbuf *payload,
enum ofp11_group_type group_type,
enum ofp15_group_mod_command group_cmd,
struct ofputil_group_props *gp)
{
struct ntr_group_prop_selection_method *prop = payload->data;
size_t fields_len, method_len;
enum ofperr error;
switch (group_type) {
case OFPGT11_SELECT:
break;
case OFPGT11_ALL:
case OFPGT11_INDIRECT:
case OFPGT11_FF:
log_property(false, "ntr selection method property is only allowed "
"for select groups");
return OFPERR_OFPBPC_BAD_VALUE;
default:
OVS_NOT_REACHED();
}
switch (group_cmd) {
case OFPGC15_ADD:
case OFPGC15_MODIFY:
break;
case OFPGC15_DELETE:
case OFPGC15_INSERT_BUCKET:
case OFPGC15_REMOVE_BUCKET:
log_property(false, "ntr selection method property is only allowed "
"for add and delete group modifications");
return OFPERR_OFPBPC_BAD_VALUE;
default:
OVS_NOT_REACHED();
}
if (payload->size < sizeof *prop) {
log_property(false, "ntr selection method property length "
"%u is not valid", payload->size);
return OFPERR_OFPBPC_BAD_LEN;
}
method_len = strnlen(prop->selection_method, NTR_MAX_SELECTION_METHOD_LEN);
if (method_len == NTR_MAX_SELECTION_METHOD_LEN) {
log_property(false, "ntr selection method is not null terminated");
return OFPERR_OFPBPC_BAD_VALUE;
}
if (strcmp("hash", prop->selection_method)) {
log_property(false, "ntr selection method '%s' is not supported",
prop->selection_method);
return OFPERR_OFPBPC_BAD_VALUE;
}
strcpy(gp->selection_method, prop->selection_method);
gp->selection_method_param = ntohll(prop->selection_method_param);
if (!method_len && gp->selection_method_param) {
log_property(false, "ntr selection method parameter is non-zero but "
"selection method is empty");
return OFPERR_OFPBPC_BAD_VALUE;
}
ofpbuf_pull(payload, sizeof *prop);
fields_len = ntohs(prop->length) - sizeof *prop;
if (!method_len && fields_len) {
log_property(false, "ntr selection method parameter is zero "
"but fields are provided");
return OFPERR_OFPBPC_BAD_VALUE;
}
error = oxm_pull_field_array(payload->data, fields_len,
&gp->fields);
if (error) {
log_property(false, "ntr selection method fields are invalid");
return error;
}
return 0;
}
static enum ofperr
parse_group_prop_ntr(struct ofpbuf *payload, uint32_t exp_type,
enum ofp11_group_type group_type,
enum ofp15_group_mod_command group_cmd,
struct ofputil_group_props *gp)
{
enum ofperr error;
switch (exp_type) {
case NTRT_SELECTION_METHOD:
error = parse_group_prop_ntr_selection_method(payload, group_type,
group_cmd, gp);
break;
default:
log_property(false, "unknown group property ntr experimenter type "
"%"PRIu32, exp_type);
error = OFPERR_OFPBPC_BAD_TYPE;
break;
}
return error;
}
static enum ofperr
parse_ofp15_group_prop_exp(struct ofpbuf *payload,
enum ofp11_group_type group_type,
enum ofp15_group_mod_command group_cmd,
struct ofputil_group_props *gp)
{
struct ofp_prop_experimenter *prop = payload->data;
uint16_t experimenter;
uint32_t exp_type;
enum ofperr error;
if (payload->size < sizeof *prop) {
return OFPERR_OFPBPC_BAD_LEN;
}
experimenter = ntohl(prop->experimenter);
exp_type = ntohl(prop->exp_type);
switch (experimenter) {
case NTR_VENDOR_ID:
error = parse_group_prop_ntr(payload, exp_type, group_type,
group_cmd, gp);
break;
default:
log_property(false, "unknown group property experimenter %"PRIu16,
experimenter);
error = OFPERR_OFPBPC_BAD_EXPERIMENTER;
break;
}
return error;
}
static enum ofperr
parse_ofp15_group_properties(struct ofpbuf *msg,
enum ofp11_group_type group_type,
enum ofp15_group_mod_command group_cmd,
struct ofputil_group_props *gp,
size_t properties_len)
{
struct ofpbuf properties;
ofpbuf_use_const(&properties, ofpbuf_pull(msg, properties_len),
properties_len);
while (properties.size > 0) {
struct ofpbuf payload;
enum ofperr error;
uint16_t type;
error = ofputil_pull_property(&properties, &payload, &type);
if (error) {
return error;
}
switch (type) {
case OFPGPT15_EXPERIMENTER:
error = parse_ofp15_group_prop_exp(&payload, group_type,
group_cmd, gp);
break;
default:
log_property(false, "unknown group property %"PRIu16, type);
error = OFPERR_OFPBPC_BAD_TYPE;
break;
}
if (error) {
return error;
}
}
return 0;
}
static int
ofputil_decode_ofp11_group_desc_reply(struct ofputil_group_desc *gd,
struct ofpbuf *msg,
@ -7764,6 +7995,7 @@ ofputil_decode_ofp15_group_desc_reply(struct ofputil_group_desc *gd,
{
struct ofp15_group_desc_stats *ogds;
uint16_t length, bucket_list_len;
int error;
if (!msg->header) {
ofpraw_pull_assert(msg);
@ -7795,9 +8027,22 @@ ofputil_decode_ofp15_group_desc_reply(struct ofputil_group_desc *gd,
"bucket list length %u", bucket_list_len);
return OFPERR_OFPBRC_BAD_LEN;
}
error = ofputil_pull_ofp15_buckets(msg, bucket_list_len, version,
&gd->buckets);
if (error) {
return error;
}
return ofputil_pull_ofp15_buckets(msg, bucket_list_len, version,
&gd->buckets);
/* By definition group desc messages don't have a group mod command.
* However, parse_group_prop_ntr_selection_method() checks to make sure
* that the command is OFPGC15_ADD or OFPGC15_DELETE to guard
* against group mod messages with other commands supplying
* a NTR selection method group experimenter property.
* Such properties are valid for group desc replies so
* claim that the group mod command is OFPGC15_ADD to
* satisfy the check in parse_group_prop_ntr_selection_method() */
return parse_ofp15_group_properties(msg, gd->type, OFPGC15_ADD, &gd->props,
msg->size);
}
/* Converts a group description reply in 'msg' into an abstract
@ -7814,6 +8059,8 @@ int
ofputil_decode_group_desc_reply(struct ofputil_group_desc *gd,
struct ofpbuf *msg, enum ofp_version version)
{
ofputil_init_group_properties(&gd->props);
switch (version)
{
case OFP11_VERSION:
@ -7831,6 +8078,12 @@ ofputil_decode_group_desc_reply(struct ofputil_group_desc *gd,
}
}
void
ofputil_uninit_group_mod(struct ofputil_group_mod *gm)
{
ofputil_bucket_list_destroy(&gm->buckets);
}
static struct ofpbuf *
ofputil_encode_ofp11_group_mod(enum ofp_version ofp_version,
const struct ofputil_group_mod *gm)
@ -7907,6 +8160,11 @@ ofputil_encode_ofp15_group_mod(enum ofp_version ofp_version,
ogm->command_bucket_id = htonl(gm->command_bucket_id);
ogm->bucket_array_len = htons(b->size - start_ogm - sizeof *ogm);
/* Add group properties */
if (gm->props.selection_method[0]) {
ofputil_put_group_prop_ntr_selection_method(ofp_version, &gm->props, b);
}
id_pool_destroy(bucket_ids);
return b;
}
@ -8064,14 +8322,14 @@ ofputil_pull_ofp15_group_mod(struct ofpbuf *msg, enum ofp_version ofp_version,
}
bucket_list_len = ntohs(ogm->bucket_array_len);
if (bucket_list_len < msg->size) {
VLOG_WARN_RL(&bad_ofmsg_rl, "group has %u trailing bytes",
msg->size - bucket_list_len);
return OFPERR_OFPGMFC_BAD_BUCKET;
error = ofputil_pull_ofp15_buckets(msg, bucket_list_len, ofp_version,
&gm->buckets);
if (error) {
return error;
}
return ofputil_pull_ofp15_buckets(msg, bucket_list_len, ofp_version,
&gm->buckets);
return parse_ofp15_group_properties(msg, gm->type, gm->command, &gm->props,
msg->size);
}
/* Converts OpenFlow group mod message 'oh' into an abstract group mod in
@ -8088,6 +8346,8 @@ ofputil_decode_group_mod(const struct ofp_header *oh,
ofpbuf_use_const(&msg, oh, ntohs(oh->length));
ofpraw_pull_assert(&msg);
ofputil_init_group_properties(&gm->props);
switch (ofp_version)
{
case OFP11_VERSION:

View File

@ -27,6 +27,7 @@
#include "match.h"
#include "meta-flow.h"
#include "netdev.h"
#include "openflow/netronome-ext.h"
#include "openflow/nicira-ext.h"
#include "openvswitch/types.h"
#include "type-props.h"
@ -217,6 +218,8 @@ void ofputil_match_to_ofp10_match(const struct match *, struct ofp10_match *);
/* Work with ofp11_match. */
enum ofperr ofputil_pull_ofp11_match(struct ofpbuf *, struct match *,
uint16_t *padded_match_len);
enum ofperr ofputil_pull_ofp11_mask(struct ofpbuf *, struct match *,
struct mf_bitmap *bm);
enum ofperr ofputil_match_from_ofp11_match(const struct ofp11_match *,
struct match *);
int ofputil_put_ofp11_match(struct ofpbuf *, const struct match *,
@ -993,6 +996,14 @@ struct ofputil_bucket {
struct bucket_counter stats;
};
/* Protocol-independent group_mod. */
struct ofputil_group_props {
/* NTR selection method */
char selection_method[NTR_MAX_SELECTION_METHOD_LEN];
uint64_t selection_method_param;
struct field_array fields;
};
/* Protocol-independent group_mod. */
struct ofputil_group_mod {
uint16_t command; /* One of OFPGC15_*. */
@ -1003,6 +1014,7 @@ struct ofputil_group_mod {
* OFPGC15_REMOVE_BUCKET commands
* execution.*/
struct ovs_list buckets; /* Contains "struct ofputil_bucket"s. */
struct ofputil_group_props props; /* Group properties. */
};
/* Group stats reply, independent of protocol. */
@ -1032,6 +1044,7 @@ struct ofputil_group_desc {
uint8_t type; /* One of OFPGT_*. */
uint32_t group_id; /* Group identifier. */
struct ovs_list buckets; /* Contains "struct ofputil_bucket"s. */
struct ofputil_group_props props; /* Group properties. */
};
void ofputil_bucket_list_destroy(struct ovs_list *buckets);
@ -1062,6 +1075,7 @@ struct ofpbuf *ofputil_encode_group_features_reply(
const struct ofputil_group_features *, const struct ofp_header *request);
void ofputil_decode_group_features_reply(const struct ofp_header *,
struct ofputil_group_features *);
void ofputil_uninit_group_mod(struct ofputil_group_mod *gm);
struct ofpbuf *ofputil_encode_group_mod(enum ofp_version ofp_version,
const struct ofputil_group_mod *gm);
@ -1071,6 +1085,7 @@ enum ofperr ofputil_decode_group_mod(const struct ofp_header *,
int ofputil_decode_group_stats_reply(struct ofpbuf *,
struct ofputil_group_stats *);
void ofputil_uninit_group_desc(struct ofputil_group_desc *gd);
uint32_t ofputil_decode_group_desc_request(const struct ofp_header *);
struct ofpbuf *ofputil_encode_group_desc_request(enum ofp_version,
uint32_t group_id);

View File

@ -477,10 +477,9 @@ ofpbuf_to_string(const struct ofpbuf *b, size_t maxbytes)
void
ofpbuf_list_delete(struct ovs_list *list)
{
struct ofpbuf *b, *next;
struct ofpbuf *b;
LIST_FOR_EACH_SAFE (b, next, list_node, list) {
list_remove(&b->list_node);
LIST_FOR_EACH_POP (b, list_node, list) {
ofpbuf_delete(b);
}
}

View File

@ -421,12 +421,9 @@ aa_get_vlan_queued(struct ovs_list *list)
ovs_mutex_lock(&mutex);
HMAP_FOR_EACH (lldp, hmap_node, all_lldps) {
struct bridge_aa_vlan *node, *node_next;
struct bridge_aa_vlan *node;
LIST_FOR_EACH_SAFE (node,
node_next,
list_node,
&lldp->active_mapping_queue) {
LIST_FOR_EACH_POP (node, list_node, &lldp->active_mapping_queue) {
struct bridge_aa_vlan *copy;
copy = xmalloc(sizeof *copy);
@ -437,7 +434,6 @@ aa_get_vlan_queued(struct ovs_list *list)
list_push_back(list, &copy->list_node);
/* Cleanup */
list_remove(&node->list_node);
free(node->port_name);
free(node);
}
@ -483,12 +479,14 @@ aa_configure(const struct aa_settings *s)
LIST_FOR_EACH (chassis, list, &lldp->lldpd->g_chassis) {
/* System Description */
free(chassis->c_descr);
chassis->c_descr = s->system_description[0] ?
chassis->c_descr = s && s->system_description[0] ?
xstrdup(s->system_description) : xstrdup(PACKAGE_STRING);
/* System Name */
free(chassis->c_name);
chassis->c_name = xstrdup(s->system_name);
if (s) {
free(chassis->c_name);
chassis->c_name = xstrdup(s->system_name);
}
}
}
@ -822,16 +820,15 @@ lldp_create(const struct netdev *netdev,
hw->h_lport.p_id_len = strlen(netdev_get_name(netdev));
/* Auto Attach element tlv */
hw->h_lport.p_element.type = LLDP_TLV_AA_ELEM_TYPE_TAG_CLIENT;
hw->h_lport.p_element.type = LLDP_TLV_AA_ELEM_TYPE_CLIENT_VIRTUAL_SWITCH;
hw->h_lport.p_element.mgmt_vlan = 0;
memcpy(&hw->h_lport.p_element.system_id.system_mac,
lchassis->c_id, lchassis->c_id_len);
hw->h_lport.p_element.system_id.conn_type =
LLDP_TLV_AA_ELEM_CONN_TYPE_SINGLE;
hw->h_lport.p_element.system_id.smlt_id = 0;
hw->h_lport.p_element.system_id.mlt_id[0] = 0;
hw->h_lport.p_element.system_id.mlt_id[1] = 0;
hw->h_lport.p_element.system_id.rsvd = 0;
hw->h_lport.p_element.system_id.rsvd2[0] = 0;
hw->h_lport.p_element.system_id.rsvd2[1] = 0;
list_init(&hw->h_lport.p_isid_vlan_maps);
list_init(&lldp->lldpd->g_hardware);
@ -908,15 +905,15 @@ lldp_create_dummy(void)
hw->h_lport.p_id_len = strlen(hw->h_lport.p_id);
/* Auto Attach element tlv */
hw->h_lport.p_element.type = LLDP_TLV_AA_ELEM_TYPE_TAG_CLIENT;
hw->h_lport.p_element.type = LLDP_TLV_AA_ELEM_TYPE_CLIENT_VIRTUAL_SWITCH;
hw->h_lport.p_element.mgmt_vlan = 0;
memcpy(&hw->h_lport.p_element.system_id.system_mac,
lchassis->c_id, lchassis->c_id_len);
hw->h_lport.p_element.system_id.conn_type =
LLDP_TLV_AA_ELEM_CONN_TYPE_SINGLE;
hw->h_lport.p_element.system_id.smlt_id = 0;
hw->h_lport.p_element.system_id.mlt_id[0] = 0;
hw->h_lport.p_element.system_id.mlt_id[1] = 0;
hw->h_lport.p_element.system_id.rsvd = 0;
hw->h_lport.p_element.system_id.rsvd2[0] = 0;
hw->h_lport.p_element.system_id.rsvd2[1] = 0;
list_init(&hw->h_lport.p_isid_vlan_maps);
list_init(&lldp->lldpd->g_hardware);

Some files were not shown because too many files have changed in this diff Show More