mirror of
https://github.com/openvswitch/ovs
synced 2025-08-30 22:05:19 +00:00
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2008, 2009, 2010 Nicira Networks.
|
||||
* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -36,8 +36,10 @@
|
||||
#include "dpif-provider.h"
|
||||
#include "netdev.h"
|
||||
#include "netdev-vport.h"
|
||||
#include "netlink.h"
|
||||
#include "ofpbuf.h"
|
||||
#include "openvswitch/tunnel.h"
|
||||
#include "packets.h"
|
||||
#include "poll-loop.h"
|
||||
#include "rtnetlink.h"
|
||||
#include "rtnetlink-link.h"
|
||||
@@ -471,14 +473,61 @@ dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
|
||||
}
|
||||
|
||||
static int
|
||||
dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp)
|
||||
parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall)
|
||||
{
|
||||
static const struct nl_policy odp_packet_policy[] = {
|
||||
/* Always present. */
|
||||
[ODP_PACKET_ATTR_TYPE] = { .type = NL_A_U32 },
|
||||
[ODP_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
|
||||
.min_len = ETH_HEADER_LEN },
|
||||
[ODP_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
|
||||
|
||||
/* _ODPL_ACTION_NR only. */
|
||||
[ODP_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true },
|
||||
|
||||
/* _ODPL_SFLOW_NR only. */
|
||||
[ODP_PACKET_ATTR_SAMPLE_POOL] = { .type = NL_A_U32, .optional = true },
|
||||
[ODP_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
|
||||
};
|
||||
|
||||
struct odp_packet *odp_packet = buf->data;
|
||||
struct nlattr *a[ARRAY_SIZE(odp_packet_policy)];
|
||||
|
||||
if (!nl_policy_parse(buf, sizeof *odp_packet, odp_packet_policy,
|
||||
a, ARRAY_SIZE(odp_packet_policy))) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
memset(upcall, 0, sizeof *upcall);
|
||||
upcall->type = nl_attr_get_u32(a[ODP_PACKET_ATTR_TYPE]);
|
||||
upcall->packet = buf;
|
||||
upcall->packet->data = (void *) nl_attr_get(a[ODP_PACKET_ATTR_PACKET]);
|
||||
upcall->packet->size = nl_attr_get_size(a[ODP_PACKET_ATTR_PACKET]);
|
||||
upcall->key = (void *) nl_attr_get(a[ODP_PACKET_ATTR_KEY]);
|
||||
upcall->key_len = nl_attr_get_size(a[ODP_PACKET_ATTR_KEY]);
|
||||
upcall->userdata = (a[ODP_PACKET_ATTR_USERDATA]
|
||||
? nl_attr_get_u64(a[ODP_PACKET_ATTR_USERDATA])
|
||||
: 0);
|
||||
upcall->sample_pool = (a[ODP_PACKET_ATTR_SAMPLE_POOL]
|
||||
? nl_attr_get_u32(a[ODP_PACKET_ATTR_SAMPLE_POOL])
|
||||
: 0);
|
||||
if (a[ODP_PACKET_ATTR_ACTIONS]) {
|
||||
upcall->actions = (void *) nl_attr_get(a[ODP_PACKET_ATTR_ACTIONS]);
|
||||
upcall->actions_len = nl_attr_get_size(a[ODP_PACKET_ATTR_ACTIONS]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
|
||||
{
|
||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||
struct ofpbuf *buf;
|
||||
int retval;
|
||||
int error;
|
||||
|
||||
buf = ofpbuf_new_with_headroom(65536, DPIF_RECV_MSG_PADDING);
|
||||
buf = ofpbuf_new(65536);
|
||||
retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
|
||||
if (retval < 0) {
|
||||
error = errno;
|
||||
@@ -486,30 +535,30 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp)
|
||||
VLOG_WARN_RL(&error_rl, "%s: read failed: %s",
|
||||
dpif_name(dpif_), strerror(error));
|
||||
}
|
||||
} else if (retval >= sizeof(struct odp_msg)) {
|
||||
struct odp_msg *msg = buf->data;
|
||||
if (msg->length <= retval) {
|
||||
buf->size += retval;
|
||||
*bufp = buf;
|
||||
return 0;
|
||||
} else if (retval >= sizeof(struct odp_packet)) {
|
||||
struct odp_packet *odp_packet = buf->data;
|
||||
buf->size += retval;
|
||||
|
||||
if (odp_packet->len <= retval) {
|
||||
error = parse_odp_packet(buf, upcall);
|
||||
} else {
|
||||
VLOG_WARN_RL(&error_rl, "%s: discarding message truncated "
|
||||
"from %"PRIu32" bytes to %d",
|
||||
dpif_name(dpif_), msg->length, retval);
|
||||
dpif_name(dpif_), odp_packet->len, retval);
|
||||
error = ERANGE;
|
||||
}
|
||||
} else if (!retval) {
|
||||
VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif_));
|
||||
error = EPROTO;
|
||||
} else {
|
||||
VLOG_WARN_RL(&error_rl,
|
||||
"%s: discarding too-short message (%d bytes)",
|
||||
VLOG_WARN_RL(&error_rl, "%s: discarding too-short message (%d bytes)",
|
||||
dpif_name(dpif_), retval);
|
||||
error = ERANGE;
|
||||
}
|
||||
|
||||
*bufp = NULL;
|
||||
ofpbuf_delete(buf);
|
||||
if (error) {
|
||||
ofpbuf_delete(buf);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user