mirror of
https://github.com/openvswitch/ovs
synced 2025-09-01 14:55:18 +00:00
netdev-linux: Prepend the std packet in the TSO packet
Usually TSO packets are close to 50k, 60k bytes long, so to
to copy less bytes when receiving a packet from the kernel
change the approach. Instead of extending the MTU sized
packet received and append with remaining TSO data from
the TSO buffer, allocate a TSO packet with enough headroom
to prepend the std packet data.
Fixes: 29cf9c1b3b
("userspace: Add TCP Segmentation Offload support")
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
committed by
Ben Pfaff
parent
2297cbe6cc
commit
73858f9dbe
@@ -243,8 +243,8 @@ dp_packet_copy__(struct dp_packet *b, uint8_t *new_base,
|
|||||||
|
|
||||||
/* Reallocates 'b' so that it has exactly 'new_headroom' and 'new_tailroom'
|
/* Reallocates 'b' so that it has exactly 'new_headroom' and 'new_tailroom'
|
||||||
* bytes of headroom and tailroom, respectively. */
|
* bytes of headroom and tailroom, respectively. */
|
||||||
static void
|
void
|
||||||
dp_packet_resize__(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
|
dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
|
||||||
{
|
{
|
||||||
void *new_base, *new_data;
|
void *new_base, *new_data;
|
||||||
size_t new_allocated;
|
size_t new_allocated;
|
||||||
@@ -297,7 +297,7 @@ void
|
|||||||
dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size)
|
dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size)
|
||||||
{
|
{
|
||||||
if (size > dp_packet_tailroom(b)) {
|
if (size > dp_packet_tailroom(b)) {
|
||||||
dp_packet_resize__(b, dp_packet_headroom(b), MAX(size, 64));
|
dp_packet_resize(b, dp_packet_headroom(b), MAX(size, 64));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,7 +308,7 @@ void
|
|||||||
dp_packet_prealloc_headroom(struct dp_packet *b, size_t size)
|
dp_packet_prealloc_headroom(struct dp_packet *b, size_t size)
|
||||||
{
|
{
|
||||||
if (size > dp_packet_headroom(b)) {
|
if (size > dp_packet_headroom(b)) {
|
||||||
dp_packet_resize__(b, MAX(size, 64), dp_packet_tailroom(b));
|
dp_packet_resize(b, MAX(size, 64), dp_packet_tailroom(b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -152,6 +152,8 @@ struct dp_packet *dp_packet_clone_with_headroom(const struct dp_packet *,
|
|||||||
struct dp_packet *dp_packet_clone_data(const void *, size_t);
|
struct dp_packet *dp_packet_clone_data(const void *, size_t);
|
||||||
struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
|
struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
|
||||||
size_t headroom);
|
size_t headroom);
|
||||||
|
void dp_packet_resize(struct dp_packet *b, size_t new_headroom,
|
||||||
|
size_t new_tailroom);
|
||||||
static inline void dp_packet_delete(struct dp_packet *);
|
static inline void dp_packet_delete(struct dp_packet *);
|
||||||
|
|
||||||
static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
|
static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
|
||||||
|
@@ -45,7 +45,8 @@ struct netdev_rxq_linux {
|
|||||||
struct netdev_rxq up;
|
struct netdev_rxq up;
|
||||||
bool is_tap;
|
bool is_tap;
|
||||||
int fd;
|
int fd;
|
||||||
char *aux_bufs[NETDEV_MAX_BURST]; /* Batch of preallocated TSO buffers. */
|
struct dp_packet *aux_bufs[NETDEV_MAX_BURST]; /* Preallocated TSO
|
||||||
|
packets. */
|
||||||
};
|
};
|
||||||
|
|
||||||
int netdev_linux_construct(struct netdev *);
|
int netdev_linux_construct(struct netdev *);
|
||||||
|
@@ -1052,15 +1052,6 @@ static struct netdev_rxq *
|
|||||||
netdev_linux_rxq_alloc(void)
|
netdev_linux_rxq_alloc(void)
|
||||||
{
|
{
|
||||||
struct netdev_rxq_linux *rx = xzalloc(sizeof *rx);
|
struct netdev_rxq_linux *rx = xzalloc(sizeof *rx);
|
||||||
if (userspace_tso_enabled()) {
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Allocate auxiliay buffers to receive TSO packets. */
|
|
||||||
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
|
||||||
rx->aux_bufs[i] = xmalloc(LINUX_RXQ_TSO_MAX_LEN);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &rx->up;
|
return &rx->up;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1172,7 +1163,7 @@ netdev_linux_rxq_destruct(struct netdev_rxq *rxq_)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
||||||
free(rx->aux_bufs[i]);
|
dp_packet_delete(rx->aux_bufs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1238,13 +1229,18 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
|
|||||||
virtio_net_hdr_size = 0;
|
virtio_net_hdr_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std_len = VLAN_ETH_HEADER_LEN + mtu + virtio_net_hdr_size;
|
/* The length here needs to be accounted in the same way when the
|
||||||
|
* aux_buf is allocated so that it can be prepended to TSO buffer. */
|
||||||
|
std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
|
||||||
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
||||||
buffers[i] = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM);
|
buffers[i] = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM);
|
||||||
iovs[i][IOV_PACKET].iov_base = dp_packet_data(buffers[i]);
|
iovs[i][IOV_PACKET].iov_base = dp_packet_data(buffers[i]);
|
||||||
iovs[i][IOV_PACKET].iov_len = std_len;
|
iovs[i][IOV_PACKET].iov_len = std_len;
|
||||||
iovs[i][IOV_AUXBUF].iov_base = rx->aux_bufs[i];
|
if (iovlen == IOV_TSO_SIZE) {
|
||||||
iovs[i][IOV_AUXBUF].iov_len = LINUX_RXQ_TSO_MAX_LEN;
|
iovs[i][IOV_AUXBUF].iov_base = dp_packet_data(rx->aux_bufs[i]);
|
||||||
|
iovs[i][IOV_AUXBUF].iov_len = dp_packet_tailroom(rx->aux_bufs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
mmsgs[i].msg_hdr.msg_name = NULL;
|
mmsgs[i].msg_hdr.msg_name = NULL;
|
||||||
mmsgs[i].msg_hdr.msg_namelen = 0;
|
mmsgs[i].msg_hdr.msg_namelen = 0;
|
||||||
mmsgs[i].msg_hdr.msg_iov = iovs[i];
|
mmsgs[i].msg_hdr.msg_iov = iovs[i];
|
||||||
@@ -1268,6 +1264,8 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < retval; i++) {
|
for (i = 0; i < retval; i++) {
|
||||||
|
struct dp_packet *pkt;
|
||||||
|
|
||||||
if (mmsgs[i].msg_len < ETH_HEADER_LEN) {
|
if (mmsgs[i].msg_len < ETH_HEADER_LEN) {
|
||||||
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
|
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
|
||||||
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
|
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
|
||||||
@@ -1280,29 +1278,29 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (mmsgs[i].msg_len > std_len) {
|
if (mmsgs[i].msg_len > std_len) {
|
||||||
/* Build a single linear TSO packet by expanding the current packet
|
/* Build a single linear TSO packet by prepending the data from
|
||||||
* to append the data received in the aux_buf. */
|
* std_len buffer to the aux_buf. */
|
||||||
size_t extra_len = mmsgs[i].msg_len - std_len;
|
pkt = rx->aux_bufs[i];
|
||||||
|
dp_packet_set_size(pkt, mmsgs[i].msg_len - std_len);
|
||||||
|
dp_packet_push(pkt, dp_packet_data(buffers[i]), std_len);
|
||||||
|
/* The headroom should be the same in buffers[i], pkt and
|
||||||
|
* DP_NETDEV_HEADROOM. */
|
||||||
|
dp_packet_resize(pkt, DP_NETDEV_HEADROOM, 0);
|
||||||
|
dp_packet_delete(buffers[i]);
|
||||||
|
rx->aux_bufs[i] = NULL;
|
||||||
|
} else {
|
||||||
|
dp_packet_set_size(buffers[i], mmsgs[i].msg_len);
|
||||||
|
pkt = buffers[i];
|
||||||
|
}
|
||||||
|
|
||||||
dp_packet_set_size(buffers[i], dp_packet_size(buffers[i])
|
if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
|
||||||
+ std_len);
|
|
||||||
dp_packet_prealloc_tailroom(buffers[i], extra_len);
|
|
||||||
memcpy(dp_packet_tail(buffers[i]), rx->aux_bufs[i], extra_len);
|
|
||||||
dp_packet_set_size(buffers[i], dp_packet_size(buffers[i])
|
|
||||||
+ extra_len);
|
|
||||||
} else {
|
|
||||||
dp_packet_set_size(buffers[i], dp_packet_size(buffers[i])
|
|
||||||
+ mmsgs[i].msg_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(buffers[i])) {
|
|
||||||
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
|
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
|
||||||
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
|
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
|
||||||
|
|
||||||
/* Unexpected error situation: the virtio header is not present
|
/* Unexpected error situation: the virtio header is not present
|
||||||
* or corrupted. Drop the packet but continue in case next ones
|
* or corrupted. Drop the packet but continue in case next ones
|
||||||
* are correct. */
|
* are correct. */
|
||||||
dp_packet_delete(buffers[i]);
|
dp_packet_delete(pkt);
|
||||||
netdev->rx_dropped += 1;
|
netdev->rx_dropped += 1;
|
||||||
VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
|
VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
|
||||||
netdev_get_name(netdev_));
|
netdev_get_name(netdev_));
|
||||||
@@ -1325,16 +1323,16 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
|
|||||||
struct eth_header *eth;
|
struct eth_header *eth;
|
||||||
bool double_tagged;
|
bool double_tagged;
|
||||||
|
|
||||||
eth = dp_packet_data(buffers[i]);
|
eth = dp_packet_data(pkt);
|
||||||
double_tagged = eth->eth_type == htons(ETH_TYPE_VLAN_8021Q);
|
double_tagged = eth->eth_type == htons(ETH_TYPE_VLAN_8021Q);
|
||||||
|
|
||||||
eth_push_vlan(buffers[i],
|
eth_push_vlan(pkt,
|
||||||
auxdata_to_vlan_tpid(aux, double_tagged),
|
auxdata_to_vlan_tpid(aux, double_tagged),
|
||||||
htons(aux->tp_vlan_tci));
|
htons(aux->tp_vlan_tci));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dp_packet_batch_add(batch, buffers[i]);
|
dp_packet_batch_add(batch, pkt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Delete unused buffers. */
|
/* Delete unused buffers. */
|
||||||
@@ -1354,7 +1352,6 @@ static int
|
|||||||
netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
|
netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
|
||||||
struct dp_packet_batch *batch)
|
struct dp_packet_batch *batch)
|
||||||
{
|
{
|
||||||
struct dp_packet *buffer;
|
|
||||||
int virtio_net_hdr_size;
|
int virtio_net_hdr_size;
|
||||||
ssize_t retval;
|
ssize_t retval;
|
||||||
size_t std_len;
|
size_t std_len;
|
||||||
@@ -1372,16 +1369,22 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
|
|||||||
virtio_net_hdr_size = 0;
|
virtio_net_hdr_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std_len = VLAN_ETH_HEADER_LEN + mtu + virtio_net_hdr_size;
|
/* The length here needs to be accounted in the same way when the
|
||||||
|
* aux_buf is allocated so that it can be prepended to TSO buffer. */
|
||||||
|
std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
|
||||||
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
for (i = 0; i < NETDEV_MAX_BURST; i++) {
|
||||||
|
struct dp_packet *buffer;
|
||||||
|
struct dp_packet *pkt;
|
||||||
struct iovec iov[IOV_TSO_SIZE];
|
struct iovec iov[IOV_TSO_SIZE];
|
||||||
|
|
||||||
/* Assume Ethernet port. No need to set packet_type. */
|
/* Assume Ethernet port. No need to set packet_type. */
|
||||||
buffer = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM);
|
buffer = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM);
|
||||||
iov[IOV_PACKET].iov_base = dp_packet_data(buffer);
|
iov[IOV_PACKET].iov_base = dp_packet_data(buffer);
|
||||||
iov[IOV_PACKET].iov_len = std_len;
|
iov[IOV_PACKET].iov_len = std_len;
|
||||||
iov[IOV_AUXBUF].iov_base = rx->aux_bufs[i];
|
if (iovlen == IOV_TSO_SIZE) {
|
||||||
iov[IOV_AUXBUF].iov_len = LINUX_RXQ_TSO_MAX_LEN;
|
iov[IOV_AUXBUF].iov_base = dp_packet_data(rx->aux_bufs[i]);
|
||||||
|
iov[IOV_AUXBUF].iov_len = dp_packet_tailroom(rx->aux_bufs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
retval = readv(rx->fd, iov, iovlen);
|
retval = readv(rx->fd, iov, iovlen);
|
||||||
@@ -1393,33 +1396,36 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (retval > std_len) {
|
if (retval > std_len) {
|
||||||
/* Build a single linear TSO packet by expanding the current packet
|
/* Build a single linear TSO packet by prepending the data from
|
||||||
* to append the data received in the aux_buf. */
|
* std_len buffer to the aux_buf. */
|
||||||
size_t extra_len = retval - std_len;
|
pkt = rx->aux_bufs[i];
|
||||||
|
dp_packet_set_size(pkt, retval - std_len);
|
||||||
dp_packet_set_size(buffer, dp_packet_size(buffer) + std_len);
|
dp_packet_push(pkt, dp_packet_data(buffer), std_len);
|
||||||
dp_packet_prealloc_tailroom(buffer, extra_len);
|
/* The headroom should be the same in buffers[i], pkt and
|
||||||
memcpy(dp_packet_tail(buffer), rx->aux_bufs[i], extra_len);
|
* DP_NETDEV_HEADROOM. */
|
||||||
dp_packet_set_size(buffer, dp_packet_size(buffer) + extra_len);
|
dp_packet_resize(pkt, DP_NETDEV_HEADROOM, 0);
|
||||||
|
dp_packet_delete(buffer);
|
||||||
|
rx->aux_bufs[i] = NULL;
|
||||||
} else {
|
} else {
|
||||||
dp_packet_set_size(buffer, dp_packet_size(buffer) + retval);
|
dp_packet_set_size(buffer, dp_packet_size(buffer) + retval);
|
||||||
|
pkt = buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(buffer)) {
|
if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
|
||||||
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
|
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
|
||||||
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
|
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
|
||||||
|
|
||||||
/* Unexpected error situation: the virtio header is not present
|
/* Unexpected error situation: the virtio header is not present
|
||||||
* or corrupted. Drop the packet but continue in case next ones
|
* or corrupted. Drop the packet but continue in case next ones
|
||||||
* are correct. */
|
* are correct. */
|
||||||
dp_packet_delete(buffer);
|
dp_packet_delete(pkt);
|
||||||
netdev->rx_dropped += 1;
|
netdev->rx_dropped += 1;
|
||||||
VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
|
VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
|
||||||
netdev_get_name(netdev_));
|
netdev_get_name(netdev_));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
dp_packet_batch_add(batch, buffer);
|
dp_packet_batch_add(batch, pkt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((i == 0) && (retval < 0)) {
|
if ((i == 0) && (retval < 0)) {
|
||||||
@@ -1442,6 +1448,23 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
|
|||||||
mtu = ETH_PAYLOAD_MAX;
|
mtu = ETH_PAYLOAD_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (userspace_tso_enabled()) {
|
||||||
|
/* Allocate TSO packets. The packet has enough headroom to store
|
||||||
|
* a full non-TSO packet. When a TSO packet is received, the data
|
||||||
|
* from non-TSO buffer (std_len) is prepended to the TSO packet
|
||||||
|
* (aux_buf). */
|
||||||
|
size_t std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN
|
||||||
|
+ DP_NETDEV_HEADROOM + mtu;
|
||||||
|
size_t data_len = LINUX_RXQ_TSO_MAX_LEN - std_len;
|
||||||
|
for (int i = 0; i < NETDEV_MAX_BURST; i++) {
|
||||||
|
if (rx->aux_bufs[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
rx->aux_bufs[i] = dp_packet_new_with_headroom(data_len, std_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dp_packet_batch_init(batch);
|
dp_packet_batch_init(batch);
|
||||||
retval = (rx->is_tap
|
retval = (rx->is_tap
|
||||||
? netdev_linux_batch_rxq_recv_tap(rx, mtu, batch)
|
? netdev_linux_batch_rxq_recv_tap(rx, mtu, batch)
|
||||||
|
Reference in New Issue
Block a user