mirror of
https://github.com/openvswitch/ovs
synced 2025-09-01 14:55:18 +00:00
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63
. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
This commit is contained in:
committed by
Pravin B Shelar
parent
1738803acd
commit
db73f7166a
@@ -138,6 +138,11 @@ static struct list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
|
||||
static struct list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
|
||||
= LIST_INITIALIZER(&dpdk_mp_list);
|
||||
|
||||
/* This mutex must be used by non pmd threads when allocating or freeing
|
||||
* mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
|
||||
* use mempools, a non pmd thread should hold this mutex while calling them */
|
||||
struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
|
||||
|
||||
struct dpdk_mp {
|
||||
struct rte_mempool *mp;
|
||||
int mtu;
|
||||
@@ -200,6 +205,8 @@ struct netdev_rxq_dpdk {
|
||||
int port_id;
|
||||
};
|
||||
|
||||
static bool thread_is_pmd(void);
|
||||
|
||||
static int netdev_dpdk_construct(struct netdev *);
|
||||
|
||||
static bool
|
||||
@@ -223,13 +230,14 @@ dpdk_rte_mzalloc(size_t sz)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* XXX this function should be called only by pmd threads (or by non pmd
|
||||
* threads holding the nonpmd_mempool_mutex) */
|
||||
void
|
||||
free_dpdk_buf(struct dpif_packet *p)
|
||||
{
|
||||
struct ofpbuf *buf = &p->ofpbuf;
|
||||
struct rte_mbuf *pkt = (struct rte_mbuf *) buf->dpdk_buf;
|
||||
struct rte_mbuf *pkt = (struct rte_mbuf *) p;
|
||||
|
||||
rte_mempool_put(pkt->pool, pkt);
|
||||
rte_pktmbuf_free_seg(pkt);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -617,10 +625,13 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
|
||||
|
||||
nb_tx = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts, txq->count);
|
||||
if (OVS_UNLIKELY(nb_tx != txq->count)) {
|
||||
/* free buffers if we couldn't transmit packets */
|
||||
rte_mempool_put_bulk(dev->dpdk_mp->mp,
|
||||
(void **) &txq->burst_pkts[nb_tx],
|
||||
(txq->count - nb_tx));
|
||||
/* free buffers, which we couldn't transmit, one at a time (each
|
||||
* packet could come from a different mempool) */
|
||||
int i;
|
||||
|
||||
for (i = nb_tx; i < txq->count; i++) {
|
||||
rte_pktmbuf_free_seg(txq->burst_pkts[i]);
|
||||
}
|
||||
}
|
||||
txq->count = 0;
|
||||
txq->tsc = rte_get_timer_cycles();
|
||||
@@ -697,6 +708,7 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
|
||||
/* Tx function. Transmit packets indefinitely */
|
||||
static void
|
||||
dpdk_do_tx_copy(struct netdev *netdev, struct dpif_packet ** pkts, int cnt)
|
||||
OVS_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
||||
struct rte_mbuf *mbufs[cnt];
|
||||
@@ -704,6 +716,13 @@ dpdk_do_tx_copy(struct netdev *netdev, struct dpif_packet ** pkts, int cnt)
|
||||
int newcnt = 0;
|
||||
int i;
|
||||
|
||||
/* If we are on a non pmd thread we have to use the mempool mutex, because
|
||||
* every non pmd thread shares the same mempool cache */
|
||||
|
||||
if (!thread_is_pmd()) {
|
||||
ovs_mutex_lock(&nonpmd_mempool_mutex);
|
||||
}
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
int size = ofpbuf_size(&pkts[i]->ofpbuf);
|
||||
|
||||
@@ -739,6 +758,10 @@ dpdk_do_tx_copy(struct netdev *netdev, struct dpif_packet ** pkts, int cnt)
|
||||
|
||||
dpdk_queue_pkts(dev, NON_PMD_THREAD_TX_QUEUE, mbufs, newcnt);
|
||||
dpdk_queue_flush(dev, NON_PMD_THREAD_TX_QUEUE);
|
||||
|
||||
if (!thread_is_pmd()) {
|
||||
ovs_mutex_unlock(&nonpmd_mempool_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -1384,6 +1407,9 @@ dpdk_init(int argc, char **argv)
|
||||
argv[result] = argv[0];
|
||||
}
|
||||
|
||||
/* We are called from the main thread here */
|
||||
thread_set_nonpmd();
|
||||
|
||||
return result + 1;
|
||||
}
|
||||
|
||||
@@ -1429,7 +1455,25 @@ pmd_thread_setaffinity_cpu(int cpu)
|
||||
VLOG_ERR("Thread affinity error %d",err);
|
||||
return err;
|
||||
}
|
||||
RTE_PER_LCORE(_lcore_id) = cpu;
|
||||
/* lcore_id 0 is reseved for use by non pmd threads. */
|
||||
RTE_PER_LCORE(_lcore_id) = cpu + 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
thread_set_nonpmd(void)
|
||||
{
|
||||
/* We cannot have RTE_MAX_LCORE pmd threads, because lcore_id 0 is reserved
|
||||
* for non pmd threads */
|
||||
BUILD_ASSERT(NR_PMD_THREADS < RTE_MAX_LCORE);
|
||||
/* We have to use 0 to allow non pmd threads to perform certain DPDK
|
||||
* operations, like rte_eth_dev_configure(). */
|
||||
RTE_PER_LCORE(_lcore_id) = 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
thread_is_pmd(void)
|
||||
{
|
||||
return rte_lcore_id() != 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user