2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-25 15:07:05 +00:00
Files
openvswitch/datapath/vport-netdev.c
Pravin B Shelar cd2a59e99c datapath: Simplify datapath locking.
Currently OVS uses combination of genl and rtnl lock to protect
datapath state.  This was done due to networking stack locking.
But this has complicated locking and there are few lock ordering
issues with new tunneling protocols.
Following patch simplifies locking by introducing new ovs mutex
and now this lock is used to protect entire ovs state.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2013-04-12 15:57:09 -07:00

398 lines
9.7 KiB
C

/*
* Copyright (c) 2007-2012 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if_arp.h>
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/llc.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <net/llc.h>
#include "checksum.h"
#include "datapath.h"
#include "vlan.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \
!defined(HAVE_VLAN_BUG_WORKAROUND)
#include <linux/module.h>
static int vlan_tso __read_mostly;
module_param(vlan_tso, int, 0644);
MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets");
#else
#define vlan_tso true
#endif
static void netdev_port_receive(struct vport *vport, struct sk_buff *skb);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
/* Called with rcu_read_lock and bottom-halves disabled. */
static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
struct vport *vport;
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;
vport = ovs_netdev_get_vport(skb->dev);
netdev_port_receive(vport, skb);
return RX_HANDLER_CONSUMED;
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
/* Called with rcu_read_lock and bottom-halves disabled. */
static struct sk_buff *netdev_frame_hook(struct sk_buff *skb)
{
struct vport *vport;
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return skb;
vport = ovs_netdev_get_vport(skb->dev);
netdev_port_receive(vport, skb);
return NULL;
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
/*
* Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
* different set of devices!)
*/
/* Called with rcu_read_lock and bottom-halves disabled. */
static struct sk_buff *netdev_frame_hook(struct net_bridge_port *p,
struct sk_buff *skb)
{
netdev_port_receive((struct vport *)p, skb);
return NULL;
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/*
* Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
* different set of devices!)
*/
/* Called with rcu_read_lock and bottom-halves disabled. */
static int netdev_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
{
netdev_port_receive((struct vport *)p, *pskb);
return 1;
}
#else
#error
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
static int netdev_init(void) { return 0; }
static void netdev_exit(void) { }
#else
static int netdev_init(void)
{
/* Hook into callback used by the bridge to intercept packets.
* Parasites we are. */
br_handle_frame_hook = netdev_frame_hook;
return 0;
}
static void netdev_exit(void)
{
br_handle_frame_hook = NULL;
}
#endif
static struct vport *netdev_create(const struct vport_parms *parms)
{
struct vport *vport;
struct netdev_vport *netdev_vport;
int err;
vport = ovs_vport_alloc(sizeof(struct netdev_vport),
&ovs_netdev_vport_ops, parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto error;
}
netdev_vport = netdev_vport_priv(vport);
netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
if (!netdev_vport->dev) {
err = -ENODEV;
goto error_free_vport;
}
if (netdev_vport->dev->flags & IFF_LOOPBACK ||
netdev_vport->dev->type != ARPHRD_ETHER ||
ovs_is_internal_dev(netdev_vport->dev)) {
err = -EINVAL;
goto error_put;
}
rtnl_lock();
err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
vport);
if (err)
goto error_unlock;
dev_set_promiscuity(netdev_vport->dev, 1);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
dev_disable_lro(netdev_vport->dev);
#endif
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
rtnl_unlock();
return vport;
error_unlock:
rtnl_unlock();
error_put:
dev_put(netdev_vport->dev);
error_free_vport:
ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
static void free_port_rcu(struct rcu_head *rcu)
{
struct netdev_vport *netdev_vport = container_of(rcu,
struct netdev_vport, rcu);
dev_put(netdev_vport->dev);
ovs_vport_free(vport_from_priv(netdev_vport));
}
static void netdev_destroy(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
rtnl_lock();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
dev_set_promiscuity(netdev_vport->dev, -1);
rtnl_unlock();
call_rcu(&netdev_vport->rcu, free_port_rcu);
}
const char *ovs_netdev_get_name(const struct vport *vport)
{
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
return netdev_vport->dev->name;
}
int ovs_netdev_get_ifindex(const struct vport *vport)
{
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
return netdev_vport->dev->ifindex;
}
/* Must be called with rcu_read_lock. */
static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
{
if (unlikely(!vport))
goto error;
if (unlikely(skb_warn_if_lro(skb)))
goto error;
/* Make our own copy of the packet. Otherwise we will mangle the
* packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
* (No one comes after us, since we tell handle_bridge() that we took
* the packet.) */
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
return;
skb_push(skb, ETH_HLEN);
if (unlikely(compute_ip_summed(skb, false)))
goto error;
vlan_copy_skb_tci(skb);
ovs_vport_receive(vport, skb);
return;
error:
kfree_skb(skb);
}
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
if (skb->protocol == htons(ETH_P_8021Q))
length -= VLAN_HLEN;
return length;
}
static bool dev_supports_vlan_tx(struct net_device *dev)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
/* Software fallback means every device supports vlan_tci on TX. */
return true;
#elif defined(HAVE_VLAN_BUG_WORKAROUND)
return dev->features & NETIF_F_HW_VLAN_TX;
#else
/* Assume that the driver is buggy. */
return false;
#endif
}
static int netdev_send(struct vport *vport, struct sk_buff *skb)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
int mtu = netdev_vport->dev->mtu;
int len;
if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
netdev_vport->dev->name,
packet_length(skb), mtu);
goto error;
}
skb->dev = netdev_vport->dev;
forward_ip_summed(skb, true);
if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) {
int features;
features = netif_skb_features(skb);
if (!vlan_tso)
features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
NETIF_F_UFO | NETIF_F_FSO);
if (netif_needs_gso(skb, features)) {
struct sk_buff *nskb;
nskb = skb_gso_segment(skb, features);
if (!nskb) {
if (unlikely(skb_cloned(skb) &&
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) {
kfree_skb(skb);
return 0;
}
skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY;
goto tag;
}
if (IS_ERR(nskb)) {
kfree_skb(skb);
return 0;
}
consume_skb(skb);
skb = nskb;
len = 0;
do {
nskb = skb->next;
skb->next = NULL;
skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
if (likely(skb)) {
len += skb->len;
vlan_set_tci(skb, 0);
dev_queue_xmit(skb);
}
skb = nskb;
} while (skb);
return len;
}
tag:
skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
if (unlikely(!skb))
return 0;
vlan_set_tci(skb, 0);
}
len = skb->len;
dev_queue_xmit(skb);
return len;
error:
kfree_skb(skb);
ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
return 0;
}
/* Returns null if this device is not attached to a datapath. */
struct vport *ovs_netdev_get_vport(struct net_device *dev)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
#if IFF_OVS_DATAPATH != 0
if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
#else
if (likely(rcu_access_pointer(dev->rx_handler) == netdev_frame_hook))
#endif
return (struct vport *)rcu_dereference_rtnl(dev->rx_handler_data);
else
return NULL;
#else
return (struct vport *)rcu_dereference_rtnl(dev->br_port);
#endif
}
const struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.flags = VPORT_F_REQUIRED,
.init = netdev_init,
.exit = netdev_exit,
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
.get_ifindex = ovs_netdev_get_ifindex,
.send = netdev_send,
};
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
/*
* Enforces, mutual exclusion with the Linux bridge module, by declaring and
* exporting br_should_route_hook. Because the bridge module also exports the
* same symbol, the module loader will refuse to load both modules at the same
* time (e.g. "bridge: exports duplicate symbol br_should_route_hook (owned by
* openvswitch)").
*
* Before Linux 2.6.36, Open vSwitch cannot safely coexist with the Linux
* bridge module, so openvswitch uses this macro in those versions. In
* Linux 2.6.36 and later, Open vSwitch can coexist with the bridge module.
*
* The use of "typeof" here avoids the need to track changes in the type of
* br_should_route_hook over various kernel versions.
*/
typeof(br_should_route_hook) br_should_route_hook;
EXPORT_SYMBOL(br_should_route_hook);
#endif