2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 06:15:47 +00:00

Use batch process recv for tap and raw socket in netdev datapath

Current netdev_linux_rxq_recv_tap and netdev_linux_rxq_recv_sock
just receive single packet, that is very inefficient, per my test
case which adds two tap ports or veth ports into OVS bridge
(datapath_type=netdev) and use iperf3 to do performance test
between two ports (they are set into different network name space).

The result is as below:

  tap:  295 Mbits/sec
  veth: 207 Mbits/sec

After I change netdev_linux_rxq_recv_tap and
netdev_linux_rxq_recv_sock to use batch process, the performance
is boosted by about 7 times, here is the result:

  tap:  1.96 Gbits/sec
  veth: 1.47 Gbits/sec

Undoubtedly this is a huge improvement although it can't match
OVS kernel datapath yet.

FYI: here is thr result for OVS kernel datapath:

  tap:  37.2 Gbits/sec
  veth: 36.3 Gbits/sec

Note: performance result is highly related with your test machine,
you shouldn't expect the same results on your test machine.

Signed-off-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
Yi Yang
2019-12-17 21:35:27 -05:00
committed by Ben Pfaff
parent b90189841f
commit 2109841b79

View File

@@ -1151,90 +1151,147 @@ auxdata_has_vlan_tci(const struct tpacket_auxdata *aux)
return aux->tp_vlan_tci || aux->tp_status & TP_STATUS_VLAN_VALID;
}
/*
* Receive packets from raw socket in batch process for better performance,
* it can receive NETDEV_MAX_BURST packets at most once, the received
* packets are added into *batch. The return value is 0 or errno.
*
* It also used recvmmsg to reduce multiple syscalls overhead;
*/
static int
netdev_linux_rxq_recv_sock(int fd, struct dp_packet *buffer)
netdev_linux_batch_rxq_recv_sock(int fd, int mtu,
struct dp_packet_batch *batch)
{
size_t size;
ssize_t retval;
struct iovec iov;
struct iovec iovs[NETDEV_MAX_BURST];
struct cmsghdr *cmsg;
union {
struct cmsghdr cmsg;
char buffer[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
} cmsg_buffer;
struct msghdr msgh;
} cmsg_buffers[NETDEV_MAX_BURST];
struct mmsghdr mmsgs[NETDEV_MAX_BURST];
struct dp_packet *buffers[NETDEV_MAX_BURST];
int i;
/* Reserve headroom for a single VLAN tag */
dp_packet_reserve(buffer, VLAN_HEADER_LEN);
size = dp_packet_tailroom(buffer);
iov.iov_base = dp_packet_data(buffer);
iov.iov_len = size;
msgh.msg_name = NULL;
msgh.msg_namelen = 0;
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
msgh.msg_control = &cmsg_buffer;
msgh.msg_controllen = sizeof cmsg_buffer;
msgh.msg_flags = 0;
for (i = 0; i < NETDEV_MAX_BURST; i++) {
buffers[i] = dp_packet_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu,
DP_NETDEV_HEADROOM);
/* Reserve headroom for a single VLAN tag */
dp_packet_reserve(buffers[i], VLAN_HEADER_LEN);
size = dp_packet_tailroom(buffers[i]);
iovs[i].iov_base = dp_packet_data(buffers[i]);
iovs[i].iov_len = size;
mmsgs[i].msg_hdr.msg_name = NULL;
mmsgs[i].msg_hdr.msg_namelen = 0;
mmsgs[i].msg_hdr.msg_iov = &iovs[i];
mmsgs[i].msg_hdr.msg_iovlen = 1;
mmsgs[i].msg_hdr.msg_control = &cmsg_buffers[i];
mmsgs[i].msg_hdr.msg_controllen = sizeof cmsg_buffers[i];
mmsgs[i].msg_hdr.msg_flags = 0;
}
do {
retval = recvmsg(fd, &msgh, MSG_TRUNC);
retval = recvmmsg(fd, mmsgs, NETDEV_MAX_BURST, MSG_TRUNC, NULL);
} while (retval < 0 && errno == EINTR);
if (retval < 0) {
return errno;
} else if (retval > size) {
return EMSGSIZE;
/* Save -errno to retval temporarily */
retval = -errno;
i = 0;
goto free_buffers;
}
dp_packet_set_size(buffer, dp_packet_size(buffer) + retval);
for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg; cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
const struct tpacket_auxdata *aux;
if (cmsg->cmsg_level != SOL_PACKET
|| cmsg->cmsg_type != PACKET_AUXDATA
|| cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata))) {
continue;
}
aux = ALIGNED_CAST(struct tpacket_auxdata *, CMSG_DATA(cmsg));
if (auxdata_has_vlan_tci(aux)) {
struct eth_header *eth;
bool double_tagged;
if (retval < ETH_HEADER_LEN) {
return EINVAL;
}
eth = dp_packet_data(buffer);
double_tagged = eth->eth_type == htons(ETH_TYPE_VLAN_8021Q);
eth_push_vlan(buffer, auxdata_to_vlan_tpid(aux, double_tagged),
htons(aux->tp_vlan_tci));
for (i = 0; i < retval; i++) {
if (mmsgs[i].msg_len < ETH_HEADER_LEN) {
break;
}
dp_packet_set_size(buffers[i],
dp_packet_size(buffers[i]) + mmsgs[i].msg_len);
for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg;
cmsg = CMSG_NXTHDR(&mmsgs[i].msg_hdr, cmsg)) {
const struct tpacket_auxdata *aux;
if (cmsg->cmsg_level != SOL_PACKET
|| cmsg->cmsg_type != PACKET_AUXDATA
|| cmsg->cmsg_len <
CMSG_LEN(sizeof(struct tpacket_auxdata))) {
continue;
}
aux = ALIGNED_CAST(struct tpacket_auxdata *, CMSG_DATA(cmsg));
if (auxdata_has_vlan_tci(aux)) {
struct eth_header *eth;
bool double_tagged;
eth = dp_packet_data(buffers[i]);
double_tagged = eth->eth_type == htons(ETH_TYPE_VLAN_8021Q);
eth_push_vlan(buffers[i],
auxdata_to_vlan_tpid(aux, double_tagged),
htons(aux->tp_vlan_tci));
break;
}
}
dp_packet_batch_add(batch, buffers[i]);
}
free_buffers:
/* Free unused buffers, including buffers whose size is less than
* ETH_HEADER_LEN.
*
* Note: i has been set correctly by the above for loop, so don't
* try to re-initialize it.
*/
for (; i < NETDEV_MAX_BURST; i++) {
dp_packet_delete(buffers[i]);
}
/* netdev_linux_rxq_recv needs it to return 0 or positive errno */
if (retval < 0) {
return -retval;
}
return 0;
}
/*
* Receive packets from tap by batch process for better performance,
* it can receive NETDEV_MAX_BURST packets at most once, the received
* packets are added into *batch. The return value is 0 or errno.
*/
static int
netdev_linux_rxq_recv_tap(int fd, struct dp_packet *buffer)
netdev_linux_batch_rxq_recv_tap(int fd, int mtu, struct dp_packet_batch *batch)
{
struct dp_packet *buffer;
ssize_t retval;
size_t size = dp_packet_tailroom(buffer);
size_t size;
int i;
do {
retval = read(fd, dp_packet_data(buffer), size);
} while (retval < 0 && errno == EINTR);
for (i = 0; i < NETDEV_MAX_BURST; i++) {
/* Assume Ethernet port. No need to set packet_type. */
buffer = dp_packet_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu,
DP_NETDEV_HEADROOM);
size = dp_packet_tailroom(buffer);
do {
retval = read(fd, dp_packet_data(buffer), size);
} while (retval < 0 && errno == EINTR);
if (retval < 0) {
if (retval < 0) {
dp_packet_delete(buffer);
break;
}
dp_packet_set_size(buffer, dp_packet_size(buffer) + retval);
dp_packet_batch_add(batch, buffer);
}
if ((i == 0) && (retval < 0)) {
return errno;
}
dp_packet_set_size(buffer, dp_packet_size(buffer) + retval);
return 0;
}
@@ -1244,7 +1301,6 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
{
struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
struct netdev *netdev = rx->up.netdev;
struct dp_packet *buffer;
ssize_t retval;
int mtu;
@@ -1252,21 +1308,16 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
mtu = ETH_PAYLOAD_MAX;
}
/* Assume Ethernet port. No need to set packet_type. */
buffer = dp_packet_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu,
DP_NETDEV_HEADROOM);
dp_packet_batch_init(batch);
retval = (rx->is_tap
? netdev_linux_rxq_recv_tap(rx->fd, buffer)
: netdev_linux_rxq_recv_sock(rx->fd, buffer));
? netdev_linux_batch_rxq_recv_tap(rx->fd, mtu, batch)
: netdev_linux_batch_rxq_recv_sock(rx->fd, mtu, batch));
if (retval) {
if (retval != EAGAIN && retval != EMSGSIZE) {
VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
netdev_rxq_get_name(rxq_), ovs_strerror(errno));
}
dp_packet_delete(buffer);
} else {
dp_packet_batch_init_packet(batch, buffer);
}
if (qfill) {