mirror of
https://github.com/openvswitch/ovs
synced 2025-10-19 14:37:21 +00:00
Enables checksum offloading, scatter/gather, and TSO on internal devices. While these optimizations were not previously enabled on internal ports we already could receive these types of packets from Xen guests. This has the obvious performance benefits when these packets can be passed directly to hardware. There is also a more subtle benefit for GRE on Xen. GRE packets pass through OVS twice - once before encapsulation and once after encapsulation, moving through an internal device in the process. If it is a SG packet (as is common on Xen), a copy was necessary to linearize for the internal device. However, Xen uses the memory allocator to track packets so when the original packet is freed after the copy netback notifies the guest that the packet has been sent, despite the fact that it is actually sitting in the transmit queue. The guest then sends packets as fast as the CPU can handle, overflowing the transmit queue. By enabling SG on the internal device, we avoid the copy and keep the accounting correct. In certain circumstances this patch can decrease performance for TCP. TCP has its own mechanism for tracking in-flight packets and therefore does not benefit from the corrected socket accounting. However, certain NICs do not like SG when it is not being used for TSO (these packets can no longer be handled by TSO after GRE encapsulation). These NICs presumably enable SG even though they can't handle it well because TSO requires SG. Tested controllers (all 1G): Marvell 88E8053 (large performance hit) Broadcom BCM5721 (small performance hit) Intel 82571EB (no change)
382 lines
9.1 KiB
C
382 lines
9.1 KiB
C
/*
|
|
* Copyright (c) 2010 Nicira Networks.
|
|
* Distributed under the terms of the GNU GPL version 2.
|
|
*
|
|
* Significant portions of this file may be copied from parts of the Linux
|
|
* kernel, by Linus Torvalds and others.
|
|
*/
|
|
|
|
#include <linux/if_arp.h>
|
|
#include <linux/if_bridge.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/llc.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <net/llc.h>
|
|
|
|
#include "datapath.h"
|
|
#include "vport-internal_dev.h"
|
|
#include "vport-netdev.h"
|
|
|
|
#include "compat.h"
|
|
|
|
struct vport_ops netdev_vport_ops;
|
|
|
|
static void netdev_port_receive(struct net_bridge_port *, struct sk_buff *);
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
|
|
static struct llc_sap *netdev_stp_sap;
|
|
|
|
static int
|
|
netdev_stp_rcv(struct sk_buff *skb, struct net_device *dev,
|
|
struct packet_type *pt, struct net_device *orig_dev)
|
|
{
|
|
/* We don't really care about STP packets, we just listen for them for
|
|
* mutual exclusion with the bridge module, so this just discards
|
|
* them. */
|
|
kfree_skb(skb);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
netdev_avoid_bridge_init(void)
|
|
{
|
|
/* Register to receive STP packets because the bridge module also
|
|
* attempts to do so. Since there can only be a single listener for a
|
|
* given protocol, this provides mutual exclusion against the bridge
|
|
* module, preventing both of them from being loaded at the same
|
|
* time. */
|
|
netdev_stp_sap = llc_sap_open(LLC_SAP_BSPAN, netdev_stp_rcv);
|
|
if (!netdev_stp_sap) {
|
|
printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n");
|
|
return -EADDRINUSE;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
netdev_avoid_bridge_exit(void)
|
|
{
|
|
llc_sap_put(netdev_stp_sap);
|
|
}
|
|
#else /* Linux 2.6.27 or later. */
|
|
static int
|
|
netdev_avoid_bridge_init(void)
|
|
{
|
|
/* Linux 2.6.27 introduces a way for multiple clients to register for
|
|
* STP packets, which interferes with what we try to do above.
|
|
* Instead, just check whether there's a bridge hook defined. This is
|
|
* not as safe--the bridge module is willing to load over the top of
|
|
* us--but it provides a little bit of protection. */
|
|
if (br_handle_frame_hook) {
|
|
printk(KERN_ERR "openvswitch: bridge module is loaded, cannot load over it\n");
|
|
return -EADDRINUSE;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
netdev_avoid_bridge_exit(void)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
#endif /* Linux 2.6.27 or later */
|
|
|
|
/*
|
|
* Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
|
|
* different set of devices!)
|
|
*/
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
|
|
/* Called with rcu_read_lock and bottom-halves disabled. */
|
|
static struct sk_buff *
|
|
netdev_frame_hook(struct net_bridge_port *p, struct sk_buff *skb)
|
|
{
|
|
netdev_port_receive(p, skb);
|
|
return NULL;
|
|
}
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
|
|
/* Called with rcu_read_lock and bottom-halves disabled. */
|
|
static int
|
|
netdev_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
|
|
{
|
|
netdev_port_receive(p, *pskb);
|
|
return 1;
|
|
}
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
static int
|
|
netdev_init(void)
|
|
{
|
|
int err;
|
|
|
|
err = netdev_avoid_bridge_init();
|
|
if (err)
|
|
return err;
|
|
|
|
/* Hook into callback used by the bridge to intercept packets.
|
|
* Parasites we are. */
|
|
br_handle_frame_hook = netdev_frame_hook;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
netdev_exit(void)
|
|
{
|
|
br_handle_frame_hook = NULL;
|
|
netdev_avoid_bridge_exit();
|
|
}
|
|
|
|
static struct vport *
|
|
netdev_create(const char *name, const void __user *config)
|
|
{
|
|
struct vport *vport;
|
|
struct netdev_vport *netdev_vport;
|
|
int err;
|
|
|
|
vport = vport_alloc(sizeof(struct netdev_vport), &netdev_vport_ops);
|
|
if (IS_ERR(vport)) {
|
|
err = PTR_ERR(vport);
|
|
goto error;
|
|
}
|
|
|
|
netdev_vport = netdev_vport_priv(vport);
|
|
|
|
netdev_vport->dev = dev_get_by_name(&init_net, name);
|
|
if (!netdev_vport->dev) {
|
|
err = -ENODEV;
|
|
goto error_free_vport;
|
|
}
|
|
|
|
if (netdev_vport->dev->flags & IFF_LOOPBACK ||
|
|
netdev_vport->dev->type != ARPHRD_ETHER ||
|
|
is_internal_dev(netdev_vport->dev)) {
|
|
err = -EINVAL;
|
|
goto error_put;
|
|
}
|
|
|
|
if (netdev_vport->dev->br_port) {
|
|
err = -EBUSY;
|
|
goto error_put;
|
|
}
|
|
|
|
return vport;
|
|
|
|
error_put:
|
|
dev_put(netdev_vport->dev);
|
|
error_free_vport:
|
|
vport_free(vport);
|
|
error:
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
static int
|
|
netdev_destroy(struct vport *vport)
|
|
{
|
|
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
|
|
dev_put(netdev_vport->dev);
|
|
vport_free(vport);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
netdev_attach(struct vport *vport)
|
|
{
|
|
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
|
|
dev_set_promiscuity(netdev_vport->dev, 1);
|
|
dev_disable_lro(netdev_vport->dev);
|
|
rcu_assign_pointer(netdev_vport->dev->br_port, (struct net_bridge_port *)vport);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
netdev_detach(struct vport *vport)
|
|
{
|
|
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
|
|
rcu_assign_pointer(netdev_vport->dev->br_port, NULL);
|
|
dev_set_promiscuity(netdev_vport->dev, -1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
netdev_set_mtu(struct vport *vport, int mtu)
|
|
{
|
|
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return dev_set_mtu(netdev_vport->dev, mtu);
|
|
}
|
|
|
|
int
|
|
netdev_set_addr(struct vport *vport, const unsigned char *addr)
|
|
{
|
|
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
struct sockaddr sa;
|
|
|
|
sa.sa_family = ARPHRD_ETHER;
|
|
memcpy(sa.sa_data, addr, ETH_ALEN);
|
|
|
|
return dev_set_mac_address(netdev_vport->dev, &sa);
|
|
}
|
|
|
|
const char *
|
|
netdev_get_name(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netdev_vport->dev->name;
|
|
}
|
|
|
|
const unsigned char *
|
|
netdev_get_addr(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netdev_vport->dev->dev_addr;
|
|
}
|
|
|
|
struct kobject *
|
|
netdev_get_kobj(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return &netdev_vport->dev->NETDEV_DEV_MEMBER.kobj;
|
|
}
|
|
|
|
int
|
|
netdev_get_stats(const struct vport *vport, struct odp_vport_stats *stats)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
const struct net_device_stats *netdev_stats;
|
|
|
|
netdev_stats = dev_get_stats(netdev_vport->dev);
|
|
|
|
stats->rx_bytes = netdev_stats->rx_bytes;
|
|
stats->rx_packets = netdev_stats->rx_packets;
|
|
stats->tx_bytes = netdev_stats->tx_bytes;
|
|
stats->tx_packets = netdev_stats->tx_packets;
|
|
stats->rx_dropped = netdev_stats->rx_dropped;
|
|
stats->rx_errors = netdev_stats->rx_errors;
|
|
stats->rx_frame_err = netdev_stats->rx_frame_errors;
|
|
stats->rx_over_err = netdev_stats->rx_over_errors;
|
|
stats->rx_crc_err = netdev_stats->rx_crc_errors;
|
|
stats->tx_dropped = netdev_stats->tx_dropped;
|
|
stats->tx_errors = netdev_stats->tx_errors;
|
|
stats->collisions = netdev_stats->collisions;
|
|
|
|
return 0;
|
|
}
|
|
|
|
unsigned
|
|
netdev_get_dev_flags(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return dev_get_flags(netdev_vport->dev);
|
|
}
|
|
|
|
int
|
|
netdev_is_running(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netif_running(netdev_vport->dev);
|
|
}
|
|
|
|
unsigned char
|
|
netdev_get_operstate(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netdev_vport->dev->operstate;
|
|
}
|
|
|
|
int
|
|
netdev_get_ifindex(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netdev_vport->dev->ifindex;
|
|
}
|
|
|
|
int
|
|
netdev_get_iflink(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netdev_vport->dev->iflink;
|
|
}
|
|
|
|
int
|
|
netdev_get_mtu(const struct vport *vport)
|
|
{
|
|
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
return netdev_vport->dev->mtu;
|
|
}
|
|
|
|
/* Must be called with rcu_read_lock. */
|
|
static void
|
|
netdev_port_receive(struct net_bridge_port *p, struct sk_buff *skb)
|
|
{
|
|
struct vport *vport = (struct vport *)p;
|
|
|
|
/* Make our own copy of the packet. Otherwise we will mangle the
|
|
* packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
|
|
* (No one comes after us, since we tell handle_bridge() that we took
|
|
* the packet.) */
|
|
skb = skb_share_check(skb, GFP_ATOMIC);
|
|
if (!skb)
|
|
return;
|
|
|
|
/* Push the Ethernet header back on. */
|
|
skb_push(skb, ETH_HLEN);
|
|
skb_reset_mac_header(skb);
|
|
compute_ip_summed(skb, false);
|
|
|
|
vport_receive(vport, skb);
|
|
}
|
|
|
|
static int
|
|
netdev_send(struct vport *vport, struct sk_buff *skb)
|
|
{
|
|
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
|
|
int len = skb->len;
|
|
|
|
skb->dev = netdev_vport->dev;
|
|
forward_ip_summed(skb);
|
|
dev_queue_xmit(skb);
|
|
|
|
return len;
|
|
}
|
|
|
|
/* Returns null if this device is not attached to a datapath. */
|
|
struct vport *
|
|
netdev_get_vport(struct net_device *dev)
|
|
{
|
|
return (struct vport *)dev->br_port;
|
|
}
|
|
|
|
struct vport_ops netdev_vport_ops = {
|
|
.type = "netdev",
|
|
.flags = VPORT_F_REQUIRED,
|
|
.init = netdev_init,
|
|
.exit = netdev_exit,
|
|
.create = netdev_create,
|
|
.destroy = netdev_destroy,
|
|
.attach = netdev_attach,
|
|
.detach = netdev_detach,
|
|
.set_mtu = netdev_set_mtu,
|
|
.set_addr = netdev_set_addr,
|
|
.get_name = netdev_get_name,
|
|
.get_addr = netdev_get_addr,
|
|
.get_kobj = netdev_get_kobj,
|
|
.get_stats = netdev_get_stats,
|
|
.get_dev_flags = netdev_get_dev_flags,
|
|
.is_running = netdev_is_running,
|
|
.get_operstate = netdev_get_operstate,
|
|
.get_ifindex = netdev_get_ifindex,
|
|
.get_iflink = netdev_get_iflink,
|
|
.get_mtu = netdev_get_mtu,
|
|
.send = netdev_send,
|
|
};
|