2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 14:25:26 +00:00

dpif-netdev: XPS (Transmit Packet Steering) implementation.

If CPU number in pmd-cpu-mask is not divisible by the number of queues and
in a few more complex situations there may be unfair distribution of TX
queue-ids between PMD threads.

For example, if we have 2 ports with 4 queues and 6 CPUs in pmd-cpu-mask
such distribution is possible:
<------------------------------------------------------------------------>
pmd thread numa_id 0 core_id 13:
        port: vhost-user1       queue-id: 1
        port: dpdk0     queue-id: 3
pmd thread numa_id 0 core_id 14:
        port: vhost-user1       queue-id: 2
pmd thread numa_id 0 core_id 16:
        port: dpdk0     queue-id: 0
pmd thread numa_id 0 core_id 17:
        port: dpdk0     queue-id: 1
pmd thread numa_id 0 core_id 12:
        port: vhost-user1       queue-id: 0
        port: dpdk0     queue-id: 2
pmd thread numa_id 0 core_id 15:
        port: vhost-user1       queue-id: 3
<------------------------------------------------------------------------>

As we can see above dpdk0 port polled by threads on cores:
	12, 13, 16 and 17.

By design of dpif-netdev, there is only one TX queue-id assigned to each
pmd thread. This queue-id's are sequential similar to core-id's. And
thread will send packets to queue with exact this queue-id regardless
of port.

In previous example:

	pmd thread on core 12 will send packets to tx queue 0
	pmd thread on core 13 will send packets to tx queue 1
	...
	pmd thread on core 17 will send packets to tx queue 5

So, for dpdk0 port after truncating in netdev-dpdk:

	core 12 --> TX queue-id 0 % 4 == 0
	core 13 --> TX queue-id 1 % 4 == 1
	core 16 --> TX queue-id 4 % 4 == 0
	core 17 --> TX queue-id 5 % 4 == 1

As a result only 2 of 4 queues used.

To fix this issue some kind of XPS implemented in following way:

	* TX queue-ids are allocated dynamically.
	* When PMD thread first time tries to send packets to new port
	  it allocates less used TX queue for this port.
	* PMD threads periodically performes revalidation of
	  allocated TX queue-ids. If queue wasn't used in last
	  XPS_TIMEOUT_MS milliseconds it will be freed while revalidation.
        * XPS is not working if we have enough TX queues.

Reported-by: Zhihong Wang <zhihong.wang@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
This commit is contained in:
Ilya Maximets
2016-07-27 17:44:41 +03:00
committed by Daniele Di Proietto
parent 7adc92ac59
commit 324c837485
8 changed files with 200 additions and 74 deletions

View File

@@ -298,7 +298,7 @@ struct dpdk_tx_queue {
rte_spinlock_t tx_lock; /* Protects the members and the NIC queue
* from concurrent access. It is used only
* if the queue is shared among different
* pmd threads (see 'txq_needs_locking'). */
* pmd threads (see 'concurrent_txq'). */
int map; /* Mapping of configured vhost-user queues
* to enabled by guest. */
};
@@ -349,13 +349,6 @@ struct netdev_dpdk {
struct rte_eth_link link;
int link_reset_cnt;
/* Caller of netdev_send() might want to use more txqs than the device has.
* For physical NICs, if the 'requested_n_txq' less or equal to 'up.n_txq',
* 'txq_needs_locking' is false, otherwise it is true and we will take a
* spinlock on transmission. For vhost devices, 'requested_n_txq' is
* always true. */
bool txq_needs_locking;
/* virtio-net structure for vhost device */
OVSRCU_TYPE(struct virtio_net *) virtio_dev;
@@ -778,10 +771,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
goto unlock;
}
netdev_dpdk_alloc_txq(dev, netdev->n_txq);
dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;
} else {
netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
dev->txq_needs_locking = true;
/* Enable DPDK_DEV_VHOST device and set promiscuous mode flag. */
dev->flags = NETDEV_UP | NETDEV_PROMISC;
}
@@ -1468,7 +1459,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
static int
netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
struct dp_packet_batch *batch,
bool may_steal)
bool may_steal, bool concurrent_txq OVS_UNUSED)
{
if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
@@ -1484,9 +1475,10 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
static inline void
netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
struct dp_packet_batch *batch, bool may_steal)
struct dp_packet_batch *batch, bool may_steal,
bool concurrent_txq)
{
if (OVS_UNLIKELY(dev->txq_needs_locking)) {
if (OVS_UNLIKELY(concurrent_txq)) {
qid = qid % dev->up.n_txq;
rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
}
@@ -1551,18 +1543,19 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
}
}
if (OVS_UNLIKELY(dev->txq_needs_locking)) {
if (OVS_UNLIKELY(concurrent_txq)) {
rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
}
}
static int
netdev_dpdk_eth_send(struct netdev *netdev, int qid,
struct dp_packet_batch *batch, bool may_steal)
struct dp_packet_batch *batch, bool may_steal,
bool concurrent_txq)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
netdev_dpdk_send__(dev, qid, batch, may_steal);
netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
return 0;
}
@@ -2533,7 +2526,8 @@ dpdk_ring_open(const char dev_name[], unsigned int *eth_port_id)
static int
netdev_dpdk_ring_send(struct netdev *netdev, int qid,
struct dp_packet_batch *batch, bool may_steal)
struct dp_packet_batch *batch, bool may_steal,
bool concurrent_txq)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
unsigned i;
@@ -2546,7 +2540,7 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
dp_packet_rss_invalidate(batch->packets[i]);
}
netdev_dpdk_send__(dev, qid, batch, may_steal);
netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
return 0;
}
@@ -2823,8 +2817,6 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
err = dpdk_eth_dev_init(dev);
netdev_dpdk_alloc_txq(dev, netdev->n_txq);
dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;
out:
ovs_mutex_unlock(&dev->mutex);