diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index 820e9d993..a136db0c9 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -176,9 +176,17 @@ in :doc:`general`:: ovs-vswitchd ... ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev -Make sure your device driver support AF_XDP, and to use 1 PMD (on core 4) -on 1 queue (queue 0) device, configure these options: **pmd-cpu-mask, -pmd-rxq-affinity, and n_rxq**. The **xdpmode** can be "drv" or "skb":: +Make sure your device driver support AF_XDP, netdev-afxdp supports +the following additional options (see man ovs-vswitchd.conf.db for +more details): + + * **xdpmode**: use "drv" for driver mode, or "skb" for skb mode. + + * **use-need-wakeup**: default "true" if libbpf supports it, otherwise false. + +For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device, +configure these options: **pmd-cpu-mask, pmd-rxq-affinity, and n_rxq**. +The **xdpmode** can be "drv" or "skb":: ethtool -L enp2s0 combined 1 ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x10 diff --git a/NEWS b/NEWS index 330ab3832..88b818948 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,9 @@ Post-v2.12.0 separate project. You can find it at https://github.com/ovn-org/ovn.git - Userspace datapath: + * New option 'use-need-wakeup' for netdev-afxdp to control enabling + of corresponding 'need_wakeup' flag in AF_XDP rings. Enabled by default + if supported by libbpf. * Add option to enable, disable and query TCP sequence checking in conntrack. diff --git a/acinclude.m4 b/acinclude.m4 index 2eb8892fb..a0507cfe0 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [ [Define to 1 if AF_XDP support is available and enabled.]) LIBBPF_LDADD=" -lbpf -lelf" AC_SUBST([LIBBPF_LDADD]) + + AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [ + AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1], + [XDP need wakeup support detected in xsk.h.]) + ], [], [[#include ]]) fi AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true) ]) diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c index 8eb270c15..af654d498 100644 --- a/lib/netdev-afxdp.c +++ b/lib/netdev-afxdp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,12 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); #define PROD_NUM_DESCS XSK_RING_PROD__DEFAULT_NUM_DESCS #define CONS_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS +#ifdef HAVE_XDP_NEED_WAKEUP +#define NEED_WAKEUP_DEFAULT true +#else +#define NEED_WAKEUP_DEFAULT false +#endif + /* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets * still on processing in threads. Number of packets currently in OVS * processing is hard to estimate because it depends on number of ports. @@ -82,7 +89,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS); #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base)) static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id, - int mode); + int mode, bool use_need_wakeup); static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode); static void xsk_destroy(struct xsk_socket_info *xsk); static int xsk_configure_all(struct netdev *netdev); @@ -117,6 +124,54 @@ struct xsk_socket_info { atomic_uint64_t tx_dropped; }; +#ifdef HAVE_XDP_NEED_WAKEUP +static inline void +xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, + struct netdev *netdev, int fd) +{ + struct netdev_linux *dev = netdev_linux_cast(netdev); + struct pollfd pfd; + int ret; + + if (!dev->use_need_wakeup) { + return; + } + + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + pfd.fd = fd; + pfd.events = POLLIN; + + ret = poll(&pfd, 1, 0); + if (OVS_UNLIKELY(ret < 0)) { + VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.", + netdev_get_name(netdev), + ovs_strerror(errno)); + } + } +} + +static inline bool +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info) +{ + return xsk_ring_prod__needs_wakeup(&xsk_info->tx); +} + +#else /* !HAVE_XDP_NEED_WAKEUP */ +static inline void +xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED, + struct netdev *netdev OVS_UNUSED, + int fd OVS_UNUSED) +{ + /* Nothing. */ +} + +static inline bool +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED) +{ + return true; +} +#endif /* HAVE_XDP_NEED_WAKEUP */ + static void netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool) { @@ -235,7 +290,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode) static struct xsk_socket_info * xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex, - uint32_t queue_id, int xdpmode) + uint32_t queue_id, int xdpmode, bool use_need_wakeup) { struct xsk_socket_config cfg; struct xsk_socket_info *xsk; @@ -258,6 +313,12 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex, cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE; } +#ifdef HAVE_XDP_NEED_WAKEUP + if (use_need_wakeup) { + cfg.bind_flags |= XDP_USE_NEED_WAKEUP; + } +#endif + if (if_indextoname(ifindex, devname) == NULL) { VLOG_ERR("ifindex %d to devname failed (%s)", ifindex, ovs_strerror(errno)); @@ -268,9 +329,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex, ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem, &xsk->rx, &xsk->tx, &cfg); if (ret) { - VLOG_ERR("xsk_socket__create failed (%s) mode: %s qid: %d", + VLOG_ERR("xsk_socket__create failed (%s) mode: %s " + "use-need-wakeup: %s qid: %d", ovs_strerror(errno), xdpmode == XDP_COPY ? "SKB": "DRV", + use_need_wakeup ? "true" : "false", queue_id); free(xsk); return NULL; @@ -312,7 +375,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex, } static struct xsk_socket_info * -xsk_configure(int ifindex, int xdp_queue_id, int xdpmode) +xsk_configure(int ifindex, int xdp_queue_id, int xdpmode, + bool use_need_wakeup) { struct xsk_socket_info *xsk; struct xsk_umem_info *umem; @@ -335,7 +399,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode) VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem); - xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode); + xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode, + use_need_wakeup); if (!xsk) { /* Clean up umem and xpacket pool. */ if (xsk_umem__delete(umem->umem)) { @@ -366,9 +431,12 @@ xsk_configure_all(struct netdev *netdev) /* Configure each queue. */ for (i = 0; i < n_rxq; i++) { - VLOG_INFO("%s: configure queue %d mode %s", __func__, i, - dev->xdpmode == XDP_COPY ? "SKB" : "DRV"); - xsk_info = xsk_configure(ifindex, i, dev->xdpmode); + VLOG_DBG("%s: configure queue %d mode %s use-need-wakeup %s.", + netdev_get_name(netdev), i, + dev->xdpmode == XDP_COPY ? "SKB" : "DRV", + dev->use_need_wakeup ? "true" : "false"); + xsk_info = xsk_configure(ifindex, i, dev->xdpmode, + dev->use_need_wakeup); if (!xsk_info) { VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i); dev->xsks[i] = NULL; @@ -460,6 +528,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args, struct netdev_linux *dev = netdev_linux_cast(netdev); const char *str_xdpmode; int xdpmode, new_n_rxq; + bool need_wakeup; ovs_mutex_lock(&dev->mutex); new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1); @@ -482,10 +551,20 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args, return EINVAL; } + need_wakeup = smap_get_bool(args, "use-need-wakeup", NEED_WAKEUP_DEFAULT); +#ifndef HAVE_XDP_NEED_WAKEUP + if (need_wakeup) { + VLOG_WARN("XDP need_wakeup is not supported in libbpf."); + need_wakeup = false; + } +#endif + if (dev->requested_n_rxq != new_n_rxq - || dev->requested_xdpmode != xdpmode) { + || dev->requested_xdpmode != xdpmode + || dev->requested_need_wakeup != need_wakeup) { dev->requested_n_rxq = new_n_rxq; dev->requested_xdpmode = xdpmode; + dev->requested_need_wakeup = need_wakeup; netdev_request_reconfigure(netdev); } ovs_mutex_unlock(&dev->mutex); @@ -500,7 +579,9 @@ netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args) ovs_mutex_lock(&dev->mutex); smap_add_format(args, "n_rxq", "%d", netdev->n_rxq); smap_add_format(args, "xdpmode", "%s", - dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb"); + dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb"); + smap_add_format(args, "use-need-wakeup", "%s", + dev->use_need_wakeup ? "true" : "false"); ovs_mutex_unlock(&dev->mutex); return 0; } @@ -516,6 +597,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev) if (netdev->n_rxq == dev->requested_n_rxq && dev->xdpmode == dev->requested_xdpmode + && dev->use_need_wakeup == dev->requested_need_wakeup && dev->xsks) { goto out; } @@ -532,6 +614,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev) if (setrlimit(RLIMIT_MEMLOCK, &r)) { VLOG_ERR("setrlimit(RLIMIT_MEMLOCK) failed: %s", ovs_strerror(errno)); } + dev->use_need_wakeup = dev->requested_need_wakeup; err = xsk_configure_all(netdev); if (err) { @@ -654,6 +737,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { + xsk_rx_wakeup_if_needed(umem, netdev, rx->fd); return EAGAIN; } @@ -698,11 +782,15 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, } static inline int -kick_tx(struct xsk_socket_info *xsk_info, int xdpmode) +kick_tx(struct xsk_socket_info *xsk_info, int xdpmode, bool use_need_wakeup) { int ret, retries; static const int KERNEL_TX_BATCH_SIZE = 16; + if (use_need_wakeup && !xsk_tx_need_wakeup(xsk_info)) { + return 0; + } + /* In SKB_MODE packet transmission is synchronous, and the kernel xmits * only TX_BATCH_SIZE(16) packets for a single sendmsg syscall. * So, we have to kick the kernel (n_packets / 16) times to be sure that @@ -874,7 +962,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid, &orig); COVERAGE_INC(afxdp_tx_full); afxdp_complete_tx(xsk_info); - kick_tx(xsk_info, dev->xdpmode); + kick_tx(xsk_info, dev->xdpmode, dev->use_need_wakeup); error = ENOMEM; goto out; } @@ -898,7 +986,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid, xsk_ring_prod__submit(&xsk_info->tx, dp_packet_batch_size(batch)); xsk_info->outstanding_tx += dp_packet_batch_size(batch); - ret = kick_tx(xsk_info, dev->xdpmode); + ret = kick_tx(xsk_info, dev->xdpmode, dev->use_need_wakeup); if (OVS_UNLIKELY(ret)) { VLOG_WARN_RL(&rl, "%s: error sending AF_XDP packet: %s.", netdev_get_name(netdev), ovs_strerror(ret)); @@ -968,6 +1056,7 @@ netdev_afxdp_construct(struct netdev *netdev) dev->requested_n_rxq = NR_QUEUE; dev->requested_xdpmode = XDP_COPY; + dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT; dev->xsks = NULL; dev->tx_locks = NULL; diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h index a350be151..c14f2fb81 100644 --- a/lib/netdev-linux-private.h +++ b/lib/netdev-linux-private.h @@ -102,6 +102,8 @@ struct netdev_linux { int requested_n_rxq; int xdpmode; /* AF_XDP running mode: driver or skb. */ int requested_xdpmode; + bool use_need_wakeup; + bool requested_need_wakeup; struct ovs_spin *tx_locks; /* spin lock array for TX queues. */ #endif }; diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 01304a5ed..00c6bd2d4 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3122,6 +3122,18 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \

+ +

+ Specifies whether to use need_wakeup feature in afxdp netdev. + If enabled, OVS explicitly wakes up the kernel RX, using poll() + syscall and wakes up TX, using sendto() syscall. For physical + devices, this feature improves the performance by avoiding + unnecessary sendto syscalls. + Defaults to true if supported by libbpf. +

+
+