2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-17 14:28:02 +00:00

Merge branch 'dpdk_merge' of https://github.com/darball/ovs.

This commit is contained in:
Ben Pfaff
2017-09-12 07:12:53 -07:00
3 changed files with 156 additions and 125 deletions

View File

@@ -44,7 +44,7 @@ existing bridge called ``br0``::
For the above examples to work, an appropriate server socket must be created
at the paths specified (``/tmp/dpdkvhostclient0`` and
``/tmp/dpdkvhostclient0``). These sockets can be created with QEMU; see the
``/tmp/dpdkvhostclient1``). These sockets can be created with QEMU; see the
:ref:`vhost-user client <dpdk-vhost-user-client>` section for details.
vhost-user vs. vhost-user-client

View File

@@ -796,7 +796,7 @@ pmd_info_show_stats(struct ds *reply,
unsigned long long stats[DP_N_STATS],
uint64_t cycles[PMD_N_CYCLES])
{
unsigned long long total_packets = 0;
unsigned long long total_packets;
uint64_t total_cycles = 0;
int i;
@@ -812,12 +812,11 @@ pmd_info_show_stats(struct ds *reply,
} else {
stats[i] = 0;
}
}
if (i != DP_STAT_LOST) {
/* Lost packets are already included in DP_STAT_MISS */
total_packets += stats[i];
}
}
/* Sum of all the matched and not matched packets gives the total. */
total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]
+ stats[DP_STAT_MISS];
for (i = 0; i < PMD_N_CYCLES; i++) {
if (cycles[i] > pmd->cycles_zero[i]) {
@@ -3864,8 +3863,9 @@ dpif_netdev_run(struct dpif *dpif)
dp_netdev_process_rxq_port(non_pmd,
port->rxqs[i].rx,
port->port_no);
cycles_count_intermediate(non_pmd, NULL, process_packets ?
PMD_CYCLES_PROCESSING
cycles_count_intermediate(non_pmd, NULL,
process_packets
? PMD_CYCLES_PROCESSING
: PMD_CYCLES_IDLE);
}
}
@@ -4858,8 +4858,11 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
* The function returns the number of packets that needs to be processed in the
* 'packets' array (they have been moved to the beginning of the vector).
*
* If 'md_is_valid' is false, the metadata in 'packets' is not valid and must
* be initialized by this function using 'port_no'.
* For performance reasons a caller may choose not to initialize the metadata
* in 'packets_'. If 'md_is_valid' is false, the metadata in 'packets'
* is not valid and must be initialized by this function using 'port_no'.
* If 'md_is_valid' is true, the metadata is already valid and 'port_no'
* will be ignored.
*/
static inline size_t
emc_processing(struct dp_netdev_pmd_thread *pmd,
@@ -4872,13 +4875,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
struct netdev_flow_key *key = &keys[0];
size_t n_missed = 0, n_dropped = 0;
struct dp_packet *packet;
const size_t size = dp_packet_batch_size(packets_);
const size_t cnt = dp_packet_batch_size(packets_);
uint32_t cur_min;
int i;
atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
struct dp_netdev_flow *flow;
if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
@@ -4887,7 +4890,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
continue;
}
if (i != size - 1) {
if (i != cnt - 1) {
struct dp_packet **packets = packets_->packets;
/* Prefetch next packet data and metadata. */
OVS_PREFETCH(dp_packet_data(packets[i+1]));
@@ -4918,7 +4921,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
}
dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT,
size - n_dropped - n_missed);
cnt - n_dropped - n_missed);
return dp_packet_batch_size(packets_);
}
@@ -5092,10 +5095,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
/* Packets enter the datapath from a port (or from recirculation) here.
*
* For performance reasons a caller may choose not to initialize the metadata
* in 'packets': in this case 'mdinit' is false and this function needs to
* initialize it using 'port_no'. If the metadata in 'packets' is already
* valid, 'md_is_valid' must be true and 'port_no' will be ignored. */
* When 'md_is_valid' is true the metadata in 'packets' are already valid.
* When false the metadata in 'packets' need to be initialized. */
static void
dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,

View File

@@ -279,7 +279,7 @@ struct dpdk_qos_ops {
* For all QoS implementations it should always be non-null.
*/
int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
int pkt_cnt);
int pkt_cnt, bool may_steal);
};
/* dpdk_qos_ops for each type of user space QoS implementation */
@@ -303,14 +303,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
= OVS_MUTEX_INITIALIZER;
static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
= OVS_LIST_INITIALIZER(&dpdk_mp_list);
struct dpdk_mp {
struct rte_mempool *mp;
int mtu;
int socket_id;
int refcount;
char if_name[IFNAMSIZ];
unsigned mp_size;
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
};
@@ -492,45 +490,79 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
}
static struct dpdk_mp *
dpdk_mp_create(int socket_id, int mtu)
/*
* Full DPDK memory pool name must be unique
* and cannot be longer than RTE_MEMPOOL_NAMESIZE
*/
static char *
dpdk_mp_name(struct dpdk_mp *dmp)
{
struct dpdk_mp *dmp;
unsigned mp_size;
char *mp_name;
uint32_t h = hash_string(dmp->if_name, 0);
char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof *mp_name);
int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
h, dmp->mtu, dmp->mp_size);
if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
return NULL;
}
return mp_name;
}
dmp = dpdk_rte_mzalloc(sizeof *dmp);
static struct dpdk_mp *
dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
{
struct dpdk_mp *dmp = dpdk_rte_mzalloc(sizeof *dmp);
if (!dmp) {
return NULL;
}
dmp->socket_id = socket_id;
dmp->socket_id = dev->requested_socket_id;
dmp->mtu = mtu;
dmp->refcount = 1;
/* XXX: this is a really rough method of provisioning memory.
* It's impossible to determine what the exact memory requirements are
* when the number of ports and rxqs that utilize a particular mempool can
* change dynamically at runtime. For now, use this rough heurisitic.
ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ);
/*
* XXX: rough estimation of memory required for port:
* <packets required to fill the device rxqs>
* + <packets that could be stuck on other ports txqs>
* + <packets in the pmd threads>
* + <additional memory for corner cases>
*/
if (mtu >= ETHER_MTU) {
mp_size = MAX_NB_MBUF;
} else {
mp_size = MIN_NB_MBUF;
}
dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
+ dev->requested_n_txq * dev->requested_txq_size
+ MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
+ MIN_NB_MBUF;
bool mp_exists = false;
do {
mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
mp_size);
char *mp_name = dpdk_mp_name(dmp);
dmp->mp = rte_pktmbuf_pool_create(mp_name, mp_size,
VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s "
"with %d Rx and %d Tx queues.",
dmp->mp_size, dev->up.name,
dev->requested_n_rxq, dev->requested_n_txq);
dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size,
MP_CACHE_SZ,
sizeof (struct dp_packet)
- sizeof (struct rte_mbuf),
MBUF_SIZE(mtu)
- sizeof(struct dp_packet),
socket_id);
dmp->socket_id);
if (dmp->mp) {
VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
mp_name, mp_size);
VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name,
dmp->mp_size);
} else if (rte_errno == EEXIST) {
/* A mempool with the same name already exists. We just
* retrieve its pointer to be returned to the caller. */
dmp->mp = rte_mempool_lookup(mp_name);
VLOG_DBG("A mempool with name %s already exists at %p.",
mp_name, dmp->mp);
/* As the mempool create returned EEXIST we can expect the
* lookup has returned a valid pointer. If for some reason
* that's not the case we keep track of it. */
mp_exists = true;
} else {
VLOG_ERR("Failed mempool \"%s\" create request of %u mbufs",
mp_name, dmp->mp_size);
}
free(mp_name);
if (dmp->mp) {
@@ -541,31 +573,20 @@ dpdk_mp_create(int socket_id, int mtu)
rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL);
return dmp;
}
} while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
} while (!mp_exists &&
(rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF));
rte_free(dmp);
return NULL;
}
static struct dpdk_mp *
dpdk_mp_get(int socket_id, int mtu)
dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
{
struct dpdk_mp *dmp;
ovs_mutex_lock(&dpdk_mp_mutex);
LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
dmp->refcount++;
goto out;
}
}
dmp = dpdk_mp_create(socket_id, mtu);
if (dmp) {
ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
}
out:
dmp = dpdk_mp_create(dev, mtu);
ovs_mutex_unlock(&dpdk_mp_mutex);
return dmp;
@@ -574,18 +595,18 @@ out:
static void
dpdk_mp_put(struct dpdk_mp *dmp)
{
char *mp_name;
if (!dmp) {
return;
}
ovs_mutex_lock(&dpdk_mp_mutex);
ovs_assert(dmp->refcount);
if (!--dmp->refcount) {
ovs_list_remove(&dmp->list_node);
mp_name = dpdk_mp_name(dmp);
VLOG_DBG("Releasing \"%s\" mempool", mp_name);
free(mp_name);
rte_mempool_free(dmp->mp);
rte_free(dmp);
}
ovs_mutex_unlock(&dpdk_mp_mutex);
}
@@ -600,7 +621,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
struct dpdk_mp *mp;
mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
if (!mp) {
VLOG_ERR("Failed to create memory pool for netdev "
"%s, with MTU %d on socket %d: %s\n",
@@ -1501,7 +1522,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
static int
netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
struct rte_mbuf **pkts, int pkt_cnt)
struct rte_mbuf **pkts, int pkt_cnt,
bool may_steal)
{
int i = 0;
int cnt = 0;
@@ -1517,21 +1539,24 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
}
cnt++;
} else {
if (may_steal) {
rte_pktmbuf_free(pkt);
}
}
}
return cnt;
}
static int
ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
int pkt_cnt)
int pkt_cnt, bool may_steal)
{
int cnt = 0;
rte_spinlock_lock(&policer->policer_lock);
cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
pkt_cnt, may_steal);
rte_spinlock_unlock(&policer->policer_lock);
return cnt;
@@ -1635,7 +1660,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
dropped = nb_rx;
nb_rx = ingress_policer_run(policer,
(struct rte_mbuf **) batch->packets,
nb_rx);
nb_rx, true);
dropped -= nb_rx;
}
@@ -1673,7 +1698,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
dropped = nb_rx;
nb_rx = ingress_policer_run(policer,
(struct rte_mbuf **) batch->packets,
nb_rx);
nb_rx, true);
dropped -= nb_rx;
}
@@ -1692,13 +1717,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
static inline int
netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
int cnt)
int cnt, bool may_steal)
{
struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);
if (qos_conf) {
rte_spinlock_lock(&qos_conf->lock);
cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
rte_spinlock_unlock(&qos_conf->lock);
}
@@ -1772,7 +1797,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
/* Check has QoS has been configured for the netdev */
cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
dropped = total_pkts - cnt;
do {
@@ -1818,51 +1843,56 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
#endif
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
int dropped = 0;
int newcnt = 0;
int i;
uint32_t cnt = batch->count;
uint32_t dropped = 0;
if (dev->type != DPDK_DEV_VHOST) {
/* Check if QoS has been configured for this netdev. */
cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
cnt, false);
dropped += batch->count - cnt;
}
dp_packet_batch_apply_cutlen(batch);
for (i = 0; i < batch->count; i++) {
int size = dp_packet_size(batch->packets[i]);
uint32_t txcnt = 0;
for (uint32_t i = 0; i < cnt; i++) {
uint32_t size = dp_packet_size(batch->packets[i]);
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
(int) size, dev->max_packet_len);
VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
size, dev->max_packet_len);
dropped++;
continue;
}
pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
if (!pkts[newcnt]) {
dropped += batch->count - i;
if (!pkts[txcnt]) {
dropped += cnt - i;
break;
}
/* We have to do a copy for now */
memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
dp_packet_data(batch->packets[i]), size);
rte_pktmbuf_data_len(pkts[newcnt]) = size;
rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
rte_pktmbuf_data_len(pkts[txcnt]) = size;
rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
newcnt++;
txcnt++;
}
if (OVS_LIKELY(txcnt)) {
if (dev->type == DPDK_DEV_VHOST) {
__netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
newcnt);
txcnt);
} else {
unsigned int qos_pkts = newcnt;
/* Check if QoS has been configured for this netdev. */
newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
dropped += qos_pkts - newcnt;
dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
}
}
if (OVS_UNLIKELY(dropped)) {
@@ -1917,7 +1947,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
dp_packet_batch_apply_cutlen(batch);
cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
dropped = batch->count - cnt;
dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
@@ -3134,13 +3164,15 @@ egress_policer_qos_is_equal(const struct qos_conf *conf,
}
static int
egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt)
egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
bool may_steal)
{
int cnt = 0;
struct egress_policer *policer =
CONTAINER_OF(conf, struct egress_policer, qos_conf);
cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
pkt_cnt, may_steal);
return cnt;
}
@@ -3175,13 +3207,10 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
rte_eth_dev_stop(dev->port_id);
if (dev->mtu != dev->requested_mtu
|| dev->socket_id != dev->requested_socket_id) {
err = netdev_dpdk_mempool_configure(dev);
if (err) {
goto out;
}
}
netdev->n_txq = dev->requested_n_txq;
netdev->n_rxq = dev->requested_n_rxq;
@@ -3218,18 +3247,19 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
netdev_dpdk_remap_txqs(dev);
if (dev->requested_socket_id != dev->socket_id
|| dev->requested_mtu != dev->mtu) {
err = netdev_dpdk_mempool_configure(dev);
if (err) {
return err;
} else {
netdev_change_seq_changed(&dev->up);
}
}
if (netdev_dpdk_get_vid(dev) >= 0) {
if (dev->vhost_reconfigured == false) {
dev->vhost_reconfigured = true;
/* Carrier status may need updating. */
netdev_change_seq_changed(&dev->up);
}
}
return 0;