2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-30 22:05:19 +00:00

ofproto: Avoid buffer copy in OFPT_PACKET_IN path.

When a dpif passes an odp_msg down to ofproto, and ofproto transforms it
into an ofp_packet_in to send to the controller, until now this always
involved a full copy of the packet inside ofproto.  This commit eliminates
this copy by ensuring that there is always enough headroom in the ofpbuf
that holds the odp_msg to replace it by an ofp_packet_in in-place.

From Jean Tourrilhes <jt@hpl.hp.com>, with some revisions.
This commit is contained in:
Ben Pfaff
2010-04-27 09:40:46 -07:00
parent 1f256c11cd
commit 4325359529
5 changed files with 123 additions and 43 deletions

View File

@@ -460,7 +460,8 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp)
int retval;
int error;
buf = ofpbuf_new(65536);
buf = ofpbuf_new(65536 + DPIF_RECV_MSG_PADDING);
ofpbuf_reserve(buf, DPIF_RECV_MSG_PADDING);
retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
if (retval < 0) {
error = errno;

View File

@@ -1280,7 +1280,8 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet,
}
msg_size = sizeof *header + packet->size;
msg = ofpbuf_new(msg_size);
msg = ofpbuf_new(msg_size + DPIF_RECV_MSG_PADDING);
ofpbuf_reserve(msg, DPIF_RECV_MSG_PADDING);
header = ofpbuf_put_uninit(msg, sizeof *header);
header->type = queue_no;
header->length = msg_size;

View File

@@ -21,7 +21,9 @@
* datapath. */
#include <assert.h>
#include "openflow/openflow.h"
#include "dpif.h"
#include "util.h"
#ifdef __cplusplus
extern "C" {
@@ -300,8 +302,10 @@ struct dpif_class {
/* Attempts to receive a message from 'dpif'. If successful, stores the
* message into '*packetp'. The message, if one is received, must begin
* with 'struct odp_msg' as a header. Only messages of the types selected
* with the set_listen_mask member function should be received.
* with 'struct odp_msg' as a header, and must have at least
* DPIF_RECV_MSG_PADDING bytes of headroom (allocated using
* e.g. ofpbuf_reserve()). Only messages of the types selected with the
* set_listen_mask member function should be received.
*
* This function must not block. If no message is ready to be received
* when it is called, it should return EAGAIN without blocking. */
@@ -312,6 +316,14 @@ struct dpif_class {
void (*recv_wait)(struct dpif *dpif);
};
/* Minimum number of bytes of headroom for a packet returned by the 'recv'
* member function (see above). This headroom allows "struct odp_msg" to be
* replaced by "struct ofp_packet_in" without copying the buffer. */
#define DPIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \
- sizeof(struct odp_msg))
BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct odp_msg));
BUILD_ASSERT_DECL(DPIF_RECV_MSG_PADDING % 4 == 0);
extern const struct dpif_class dpif_linux_class;
extern const struct dpif_class dpif_netdev_class;

View File

@@ -1015,7 +1015,8 @@ dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability)
/* Attempts to receive a message from 'dpif'. If successful, stores the
* message into '*packetp'. The message, if one is received, will begin with
* 'struct odp_msg' as a header. Only messages of the types selected with
* 'struct odp_msg' as a header, and will have at least DPIF_RECV_MSG_PADDING
* bytes of headroom. Only messages of the types selected with
* dpif_set_listen_mask() will ordinarily be received (but if a message type is
* enabled and then later disabled, some stragglers might pop up).
*
@@ -1026,8 +1027,10 @@ dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
{
int error = dpif->dpif_class->recv(dpif, packetp);
if (!error) {
struct ofpbuf *buf = *packetp;
assert(ofpbuf_headroom(buf) >= DPIF_RECV_MSG_PADDING);
if (VLOG_IS_DBG_ENABLED()) {
struct ofpbuf *buf = *packetp;
struct odp_msg *msg = buf->data;
void *payload = msg + 1;
size_t payload_len = buf->size - sizeof *msg;

View File

@@ -1,5 +1,6 @@
/*
* Copyright (c) 2009, 2010 Nicira Networks.
* Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -3993,65 +3994,127 @@ update_used(struct ofproto *p)
free(flows);
}
/* pinsched callback for sending 'packet' on 'ofconn'. */
static void
do_send_packet_in(struct ofpbuf *packet, void *ofconn_)
{
struct ofconn *ofconn = ofconn_;
struct ofproto *ofproto = ofconn->ofproto;
struct odp_msg *msg = packet->data;
struct ofpbuf payload;
struct ofpbuf *opi;
uint32_t buffer_id;
int send_len;
/* Extract packet payload from 'msg'. */
payload.data = msg + 1;
payload.size = msg->length - sizeof *msg;
/* Construct packet-in message. */
send_len = INT_MAX;
if (msg->type == _ODPL_ACTION_NR) {
buffer_id = UINT32_MAX;
} else {
if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
buffer_id = pktbuf_get_null();
} else {
buffer_id = pktbuf_save(ofconn->pktbuf, &payload, msg->port);
}
if (buffer_id != UINT32_MAX) {
send_len = ofconn->miss_send_len;
}
}
opi = make_packet_in(buffer_id, odp_port_to_ofp_port(msg->port),
msg->type, &payload, send_len);
/* Send. */
rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100);
ofpbuf_delete(packet);
rconn_send_with_limit(ofconn->rconn, packet,
ofconn->packet_in_counter, 100);
}
/* Takes 'packet', which has been converted with do_convert_to_packet_in(), and
* finalizes its content for sending on 'ofconn', and passes it to 'ofconn''s
* packet scheduler for sending.
*
* If 'clone' is true, the caller retains ownership of 'packet'. Otherwise,
* ownership is transferred to this function. */
static void
schedule_packet_in(struct ofconn *ofconn, struct ofpbuf *packet, bool clone)
{
struct ofproto *ofproto = ofconn->ofproto;
struct ofp_packet_in *opi = packet->data;
uint16_t in_port = ofp_port_to_odp_port(ntohs(opi->in_port));
int send_len, trim_size;
uint32_t buffer_id;
/* Get buffer. */
if (opi->reason == OFPR_ACTION) {
buffer_id = UINT32_MAX;
} else if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
buffer_id = pktbuf_get_null();
} else {
struct ofpbuf payload;
payload.data = opi->data;
payload.size = packet->size - offsetof(struct ofp_packet_in, data);
buffer_id = pktbuf_save(ofconn->pktbuf, &payload, in_port);
}
/* Figure out how much of the packet to send. */
send_len = ntohs(opi->total_len);
if (buffer_id != UINT32_MAX) {
send_len = MIN(send_len, ofconn->miss_send_len);
}
/* Adjust packet length and clone if necessary. */
trim_size = offsetof(struct ofp_packet_in, data) + send_len;
if (clone) {
packet = ofpbuf_clone_data(packet->data, trim_size);
opi = packet->data;
} else {
packet->size = trim_size;
}
/* Update packet headers. */
opi->buffer_id = htonl(buffer_id);
update_openflow_length(packet);
/* Hand over to packet scheduler. It might immediately call into
* do_send_packet_in() or it might buffer it for a while (until a later
* call to pinsched_run()). */
pinsched_send(ofconn->schedulers[opi->reason], in_port,
packet, do_send_packet_in, ofconn);
}
/* Replace struct odp_msg header in 'packet' by equivalent struct
* ofp_packet_in. The odp_msg must have sufficient headroom to do so (e.g. as
* returned by dpif_recv()).
*
* The conversion is not complete: the caller still needs to trim any unneeded
* payload off the end of the buffer, set the length in the OpenFlow header,
* and set buffer_id. Those require us to know the controller settings and so
* must be done on a per-controller basis. */
static void
do_convert_to_packet_in(struct ofpbuf *packet)
{
struct odp_msg *msg = packet->data;
struct ofp_packet_in *opi;
uint8_t reason;
uint16_t total_len;
uint16_t in_port;
/* Extract relevant header fields */
reason = (msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH);
total_len = msg->length - sizeof *msg;
in_port = odp_port_to_ofp_port(msg->port);
/* Repurpose packet buffer by overwriting header. */
ofpbuf_pull(packet, sizeof(struct odp_msg));
opi = ofpbuf_push_zeros(packet, offsetof(struct ofp_packet_in, data));
opi->header.version = OFP_VERSION;
opi->header.type = OFPT_PACKET_IN;
opi->total_len = htons(total_len);
opi->in_port = htons(in_port);
opi->reason = reason;
}
/* Given 'packet' containing an odp_msg of type _ODPL_ACTION_NR or
* _ODPL_MISS_NR, sends an OFPT_PACKET_IN message to each OpenFlow controller
* as necessary according to their individual configurations.
*
* 'packet' must have sufficient headroom to convert it into a struct
* ofp_packet_in (e.g. as returned by dpif_recv()).
*
* Takes ownership of 'packet'. */
static void
send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet)
{
struct odp_msg *msg = packet->data;
struct ofconn *ofconn, *prev;
assert(msg->type == _ODPL_MISS_NR || msg->type == _ODPL_ACTION_NR);
do_convert_to_packet_in(packet);
prev = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
if (ofconn->role != NX_ROLE_SLAVE) {
if (prev) {
pinsched_send(prev->schedulers[msg->type], msg->port,
ofpbuf_clone(packet), do_send_packet_in, prev);
schedule_packet_in(prev, packet, true);
}
prev = ofconn;
}
}
if (prev) {
pinsched_send(prev->schedulers[msg->type], msg->port,
packet, do_send_packet_in, prev);
schedule_packet_in(prev, packet, false);
} else {
ofpbuf_delete(packet);
}