diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 820e9d993..a136db0c9 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -176,9 +176,17 @@ in :doc:`general`::
ovs-vswitchd ...
ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
-Make sure your device driver support AF_XDP, and to use 1 PMD (on core 4)
-on 1 queue (queue 0) device, configure these options: **pmd-cpu-mask,
-pmd-rxq-affinity, and n_rxq**. The **xdpmode** can be "drv" or "skb"::
+Make sure your device driver support AF_XDP, netdev-afxdp supports
+the following additional options (see man ovs-vswitchd.conf.db for
+more details):
+
+ * **xdpmode**: use "drv" for driver mode, or "skb" for skb mode.
+
+ * **use-need-wakeup**: default "true" if libbpf supports it, otherwise false.
+
+For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device,
+configure these options: **pmd-cpu-mask, pmd-rxq-affinity, and n_rxq**.
+The **xdpmode** can be "drv" or "skb"::
ethtool -L enp2s0 combined 1
ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x10
diff --git a/NEWS b/NEWS
index 330ab3832..88b818948 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,9 @@ Post-v2.12.0
separate project. You can find it at
https://github.com/ovn-org/ovn.git
- Userspace datapath:
+ * New option 'use-need-wakeup' for netdev-afxdp to control enabling
+ of corresponding 'need_wakeup' flag in AF_XDP rings. Enabled by default
+ if supported by libbpf.
* Add option to enable, disable and query TCP sequence checking in
conntrack.
diff --git a/acinclude.m4 b/acinclude.m4
index 2eb8892fb..a0507cfe0 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
[Define to 1 if AF_XDP support is available and enabled.])
LIBBPF_LDADD=" -lbpf -lelf"
AC_SUBST([LIBBPF_LDADD])
+
+ AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
+ AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1],
+ [XDP need wakeup support detected in xsk.h.])
+ ], [], [[#include ]])
fi
AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
])
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 8eb270c15..af654d498 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -67,6 +68,12 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
#define PROD_NUM_DESCS XSK_RING_PROD__DEFAULT_NUM_DESCS
#define CONS_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS
+#ifdef HAVE_XDP_NEED_WAKEUP
+#define NEED_WAKEUP_DEFAULT true
+#else
+#define NEED_WAKEUP_DEFAULT false
+#endif
+
/* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
* still on processing in threads. Number of packets currently in OVS
* processing is hard to estimate because it depends on number of ports.
@@ -82,7 +89,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
#define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
- int mode);
+ int mode, bool use_need_wakeup);
static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
static void xsk_destroy(struct xsk_socket_info *xsk);
static int xsk_configure_all(struct netdev *netdev);
@@ -117,6 +124,54 @@ struct xsk_socket_info {
atomic_uint64_t tx_dropped;
};
+#ifdef HAVE_XDP_NEED_WAKEUP
+static inline void
+xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
+ struct netdev *netdev, int fd)
+{
+ struct netdev_linux *dev = netdev_linux_cast(netdev);
+ struct pollfd pfd;
+ int ret;
+
+ if (!dev->use_need_wakeup) {
+ return;
+ }
+
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+
+ ret = poll(&pfd, 1, 0);
+ if (OVS_UNLIKELY(ret < 0)) {
+ VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
+ netdev_get_name(netdev),
+ ovs_strerror(errno));
+ }
+ }
+}
+
+static inline bool
+xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
+{
+ return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
+}
+
+#else /* !HAVE_XDP_NEED_WAKEUP */
+static inline void
+xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
+ struct netdev *netdev OVS_UNUSED,
+ int fd OVS_UNUSED)
+{
+ /* Nothing. */
+}
+
+static inline bool
+xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
+{
+ return true;
+}
+#endif /* HAVE_XDP_NEED_WAKEUP */
+
static void
netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
{
@@ -235,7 +290,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
static struct xsk_socket_info *
xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
- uint32_t queue_id, int xdpmode)
+ uint32_t queue_id, int xdpmode, bool use_need_wakeup)
{
struct xsk_socket_config cfg;
struct xsk_socket_info *xsk;
@@ -258,6 +313,12 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
}
+#ifdef HAVE_XDP_NEED_WAKEUP
+ if (use_need_wakeup) {
+ cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
+ }
+#endif
+
if (if_indextoname(ifindex, devname) == NULL) {
VLOG_ERR("ifindex %d to devname failed (%s)",
ifindex, ovs_strerror(errno));
@@ -268,9 +329,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem,
&xsk->rx, &xsk->tx, &cfg);
if (ret) {
- VLOG_ERR("xsk_socket__create failed (%s) mode: %s qid: %d",
+ VLOG_ERR("xsk_socket__create failed (%s) mode: %s "
+ "use-need-wakeup: %s qid: %d",
ovs_strerror(errno),
xdpmode == XDP_COPY ? "SKB": "DRV",
+ use_need_wakeup ? "true" : "false",
queue_id);
free(xsk);
return NULL;
@@ -312,7 +375,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
}
static struct xsk_socket_info *
-xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
+xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
+ bool use_need_wakeup)
{
struct xsk_socket_info *xsk;
struct xsk_umem_info *umem;
@@ -335,7 +399,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
- xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
+ xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
+ use_need_wakeup);
if (!xsk) {
/* Clean up umem and xpacket pool. */
if (xsk_umem__delete(umem->umem)) {
@@ -366,9 +431,12 @@ xsk_configure_all(struct netdev *netdev)
/* Configure each queue. */
for (i = 0; i < n_rxq; i++) {
- VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
- dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
- xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
+ VLOG_DBG("%s: configure queue %d mode %s use-need-wakeup %s.",
+ netdev_get_name(netdev), i,
+ dev->xdpmode == XDP_COPY ? "SKB" : "DRV",
+ dev->use_need_wakeup ? "true" : "false");
+ xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
+ dev->use_need_wakeup);
if (!xsk_info) {
VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
dev->xsks[i] = NULL;
@@ -460,6 +528,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
struct netdev_linux *dev = netdev_linux_cast(netdev);
const char *str_xdpmode;
int xdpmode, new_n_rxq;
+ bool need_wakeup;
ovs_mutex_lock(&dev->mutex);
new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
@@ -482,10 +551,20 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
return EINVAL;
}
+ need_wakeup = smap_get_bool(args, "use-need-wakeup", NEED_WAKEUP_DEFAULT);
+#ifndef HAVE_XDP_NEED_WAKEUP
+ if (need_wakeup) {
+ VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
+ need_wakeup = false;
+ }
+#endif
+
if (dev->requested_n_rxq != new_n_rxq
- || dev->requested_xdpmode != xdpmode) {
+ || dev->requested_xdpmode != xdpmode
+ || dev->requested_need_wakeup != need_wakeup) {
dev->requested_n_rxq = new_n_rxq;
dev->requested_xdpmode = xdpmode;
+ dev->requested_need_wakeup = need_wakeup;
netdev_request_reconfigure(netdev);
}
ovs_mutex_unlock(&dev->mutex);
@@ -500,7 +579,9 @@ netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
ovs_mutex_lock(&dev->mutex);
smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
smap_add_format(args, "xdpmode", "%s",
- dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
+ dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
+ smap_add_format(args, "use-need-wakeup", "%s",
+ dev->use_need_wakeup ? "true" : "false");
ovs_mutex_unlock(&dev->mutex);
return 0;
}
@@ -516,6 +597,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
if (netdev->n_rxq == dev->requested_n_rxq
&& dev->xdpmode == dev->requested_xdpmode
+ && dev->use_need_wakeup == dev->requested_need_wakeup
&& dev->xsks) {
goto out;
}
@@ -532,6 +614,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
VLOG_ERR("setrlimit(RLIMIT_MEMLOCK) failed: %s", ovs_strerror(errno));
}
+ dev->use_need_wakeup = dev->requested_need_wakeup;
err = xsk_configure_all(netdev);
if (err) {
@@ -654,6 +737,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
if (!rcvd) {
+ xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
return EAGAIN;
}
@@ -698,11 +782,15 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
}
static inline int
-kick_tx(struct xsk_socket_info *xsk_info, int xdpmode)
+kick_tx(struct xsk_socket_info *xsk_info, int xdpmode, bool use_need_wakeup)
{
int ret, retries;
static const int KERNEL_TX_BATCH_SIZE = 16;
+ if (use_need_wakeup && !xsk_tx_need_wakeup(xsk_info)) {
+ return 0;
+ }
+
/* In SKB_MODE packet transmission is synchronous, and the kernel xmits
* only TX_BATCH_SIZE(16) packets for a single sendmsg syscall.
* So, we have to kick the kernel (n_packets / 16) times to be sure that
@@ -874,7 +962,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
&orig);
COVERAGE_INC(afxdp_tx_full);
afxdp_complete_tx(xsk_info);
- kick_tx(xsk_info, dev->xdpmode);
+ kick_tx(xsk_info, dev->xdpmode, dev->use_need_wakeup);
error = ENOMEM;
goto out;
}
@@ -898,7 +986,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
xsk_ring_prod__submit(&xsk_info->tx, dp_packet_batch_size(batch));
xsk_info->outstanding_tx += dp_packet_batch_size(batch);
- ret = kick_tx(xsk_info, dev->xdpmode);
+ ret = kick_tx(xsk_info, dev->xdpmode, dev->use_need_wakeup);
if (OVS_UNLIKELY(ret)) {
VLOG_WARN_RL(&rl, "%s: error sending AF_XDP packet: %s.",
netdev_get_name(netdev), ovs_strerror(ret));
@@ -968,6 +1056,7 @@ netdev_afxdp_construct(struct netdev *netdev)
dev->requested_n_rxq = NR_QUEUE;
dev->requested_xdpmode = XDP_COPY;
+ dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
dev->xsks = NULL;
dev->tx_locks = NULL;
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index a350be151..c14f2fb81 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -102,6 +102,8 @@ struct netdev_linux {
int requested_n_rxq;
int xdpmode; /* AF_XDP running mode: driver or skb. */
int requested_xdpmode;
+ bool use_need_wakeup;
+ bool requested_need_wakeup;
struct ovs_spin *tx_locks; /* spin lock array for TX queues. */
#endif
};
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 01304a5ed..00c6bd2d4 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3122,6 +3122,18 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
+
+
+ Specifies whether to use need_wakeup feature in afxdp netdev.
+ If enabled, OVS explicitly wakes up the kernel RX, using poll()
+ syscall and wakes up TX, using sendto() syscall. For physical
+ devices, this feature improves the performance by avoiding
+ unnecessary sendto syscalls.
+ Defaults to true if supported by libbpf.
+
+
+