mirror of
https://github.com/openvswitch/ovs
synced 2025-08-31 06:15:47 +00:00
netdev-dpdk: Use instant sending instead of queueing of packets.
Current implementarion of TX packet's queueing is broken in several ways: * TX queue flushing implemented on receive assumes that all core_id-s are sequential and starts from zero. This may lead to situation when packets will stuck in queue forever and, also, this influences on latency. * For a long time flushing logic depends on uninitialized 'txq_needs_locking', because it usually calculated after 'netdev_dpdk_alloc_txq' but used inside of this function for initialization of 'flush_tx'. Testing shows no performance difference with and without queueing. Lets remove queueing at all because it doesn't work properly now and also does not increase performance. Signed-off-by: Ilya Maximets <i.maximets@samsung.com> Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
This commit is contained in:
committed by
Daniele Di Proietto
parent
c9b9d6dfc0
commit
b59cc14e03
@@ -165,7 +165,6 @@ static const struct rte_eth_conf port_conf = {
|
||||
},
|
||||
};
|
||||
|
||||
enum { MAX_TX_QUEUE_LEN = 384 };
|
||||
enum { DPDK_RING_SIZE = 256 };
|
||||
BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
|
||||
enum { DRAIN_TSC = 200000ULL };
|
||||
@@ -282,8 +281,7 @@ static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
|
||||
= OVS_LIST_INITIALIZER(&dpdk_mp_list);
|
||||
|
||||
/* This mutex must be used by non pmd threads when allocating or freeing
|
||||
* mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
|
||||
* use mempools, a non pmd thread should hold this mutex while calling them */
|
||||
* mbufs through mempools. */
|
||||
static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
|
||||
|
||||
struct dpdk_mp {
|
||||
@@ -297,17 +295,12 @@ struct dpdk_mp {
|
||||
/* There should be one 'struct dpdk_tx_queue' created for
|
||||
* each cpu core. */
|
||||
struct dpdk_tx_queue {
|
||||
bool flush_tx; /* Set to true to flush queue everytime */
|
||||
/* pkts are queued. */
|
||||
int count;
|
||||
rte_spinlock_t tx_lock; /* Protects the members and the NIC queue
|
||||
* from concurrent access. It is used only
|
||||
* if the queue is shared among different
|
||||
* pmd threads (see 'txq_needs_locking'). */
|
||||
int map; /* Mapping of configured vhost-user queues
|
||||
* to enabled by guest. */
|
||||
uint64_t tsc;
|
||||
struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
|
||||
};
|
||||
|
||||
/* dpdk has no way to remove dpdk ring ethernet devices
|
||||
@@ -720,19 +713,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
|
||||
|
||||
dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
|
||||
for (i = 0; i < n_txqs; i++) {
|
||||
int numa_id = ovs_numa_get_numa_id(i);
|
||||
|
||||
if (!dev->txq_needs_locking) {
|
||||
/* Each index is considered as a cpu core id, since there should
|
||||
* be one tx queue for each cpu core. If the corresponding core
|
||||
* is not on the same numa node as 'dev', flags the
|
||||
* 'flush_tx'. */
|
||||
dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
|
||||
} else {
|
||||
/* Queues are shared among CPUs. Always flush */
|
||||
dev->tx_q[i].flush_tx = true;
|
||||
}
|
||||
|
||||
/* Initialize map for vhost devices. */
|
||||
dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
|
||||
rte_spinlock_init(&dev->tx_q[i].tx_lock);
|
||||
@@ -1088,16 +1068,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
|
||||
}
|
||||
|
||||
static inline void
|
||||
dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
|
||||
netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
|
||||
struct rte_mbuf **pkts, int cnt)
|
||||
{
|
||||
struct dpdk_tx_queue *txq = &dev->tx_q[qid];
|
||||
uint32_t nb_tx = 0;
|
||||
|
||||
while (nb_tx != txq->count) {
|
||||
while (nb_tx != cnt) {
|
||||
uint32_t ret;
|
||||
|
||||
ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
|
||||
txq->count - nb_tx);
|
||||
ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);
|
||||
if (!ret) {
|
||||
break;
|
||||
}
|
||||
@@ -1105,32 +1084,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
|
||||
nb_tx += ret;
|
||||
}
|
||||
|
||||
if (OVS_UNLIKELY(nb_tx != txq->count)) {
|
||||
if (OVS_UNLIKELY(nb_tx != cnt)) {
|
||||
/* free buffers, which we couldn't transmit, one at a time (each
|
||||
* packet could come from a different mempool) */
|
||||
int i;
|
||||
|
||||
for (i = nb_tx; i < txq->count; i++) {
|
||||
rte_pktmbuf_free(txq->burst_pkts[i]);
|
||||
for (i = nb_tx; i < cnt; i++) {
|
||||
rte_pktmbuf_free(pkts[i]);
|
||||
}
|
||||
rte_spinlock_lock(&dev->stats_lock);
|
||||
dev->stats.tx_dropped += txq->count-nb_tx;
|
||||
dev->stats.tx_dropped += cnt - nb_tx;
|
||||
rte_spinlock_unlock(&dev->stats_lock);
|
||||
}
|
||||
|
||||
txq->count = 0;
|
||||
txq->tsc = rte_get_timer_cycles();
|
||||
}
|
||||
|
||||
static inline void
|
||||
dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
|
||||
{
|
||||
struct dpdk_tx_queue *txq = &dev->tx_q[qid];
|
||||
|
||||
if (txq->count == 0) {
|
||||
return;
|
||||
}
|
||||
dpdk_queue_flush__(dev, qid);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@@ -1303,15 +1268,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
|
||||
int nb_rx;
|
||||
int dropped = 0;
|
||||
|
||||
/* There is only one tx queue for this core. Do not flush other
|
||||
* queues.
|
||||
* Do not flush tx queue which is shared among CPUs
|
||||
* since it is always flushed */
|
||||
if (rxq->queue_id == rte_lcore_id() &&
|
||||
OVS_LIKELY(!dev->txq_needs_locking)) {
|
||||
dpdk_queue_flush(dev, rxq->queue_id);
|
||||
}
|
||||
|
||||
nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
|
||||
(struct rte_mbuf **) packets,
|
||||
NETDEV_MAX_BURST);
|
||||
@@ -1433,35 +1389,6 @@ out:
|
||||
}
|
||||
}
|
||||
|
||||
inline static void
|
||||
dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
|
||||
struct rte_mbuf **pkts, int cnt)
|
||||
{
|
||||
struct dpdk_tx_queue *txq = &dev->tx_q[qid];
|
||||
uint64_t diff_tsc;
|
||||
|
||||
int i = 0;
|
||||
|
||||
while (i < cnt) {
|
||||
int freeslots = MAX_TX_QUEUE_LEN - txq->count;
|
||||
int tocopy = MIN(freeslots, cnt-i);
|
||||
|
||||
memcpy(&txq->burst_pkts[txq->count], &pkts[i],
|
||||
tocopy * sizeof (struct rte_mbuf *));
|
||||
|
||||
txq->count += tocopy;
|
||||
i += tocopy;
|
||||
|
||||
if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
|
||||
dpdk_queue_flush__(dev, qid);
|
||||
}
|
||||
diff_tsc = rte_get_timer_cycles() - txq->tsc;
|
||||
if (diff_tsc >= DRAIN_TSC) {
|
||||
dpdk_queue_flush__(dev, qid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Tx function. Transmit packets indefinitely */
|
||||
static void
|
||||
dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
|
||||
@@ -1527,8 +1454,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
|
||||
newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
|
||||
|
||||
dropped += qos_pkts - newcnt;
|
||||
dpdk_queue_pkts(dev, qid, mbufs, newcnt);
|
||||
dpdk_queue_flush(dev, qid);
|
||||
netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
|
||||
}
|
||||
|
||||
if (OVS_UNLIKELY(dropped)) {
|
||||
@@ -1611,7 +1537,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
|
||||
temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,
|
||||
temp_cnt);
|
||||
dropped += qos_pkts - temp_cnt;
|
||||
dpdk_queue_pkts(dev, qid,
|
||||
netdev_dpdk_eth_tx_burst(dev, qid,
|
||||
(struct rte_mbuf **)&pkts[next_tx_idx],
|
||||
temp_cnt);
|
||||
|
||||
@@ -1631,8 +1557,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
|
||||
|
||||
cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
|
||||
dropped += qos_pkts - cnt;
|
||||
dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
|
||||
cnt);
|
||||
netdev_dpdk_eth_tx_burst(dev, qid,
|
||||
(struct rte_mbuf **)&pkts[next_tx_idx],
|
||||
cnt);
|
||||
}
|
||||
|
||||
if (OVS_UNLIKELY(dropped)) {
|
||||
|
Reference in New Issue
Block a user