2014-03-24 19:23:08 -07:00
|
|
|
/*
|
2016-01-11 15:04:08 -08:00
|
|
|
* Copyright (c) 2014, 2015, 2016 Nicira, Inc.
|
2014-03-24 19:23:08 -07:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <config.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
2015-06-04 06:51:40 -07:00
|
|
|
#include <sys/stat.h>
|
2014-03-24 19:23:08 -07:00
|
|
|
#include <stdio.h>
|
2015-06-04 06:51:40 -07:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
2016-04-29 13:44:01 -04:00
|
|
|
#include <getopt.h>
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
#include "dirs.h"
|
2015-02-25 12:01:53 -08:00
|
|
|
#include "dp-packet.h"
|
2014-03-24 19:23:08 -07:00
|
|
|
#include "dpif-netdev.h"
|
2016-02-02 14:02:15 +03:00
|
|
|
#include "fatal-signal.h"
|
2014-03-24 19:23:08 -07:00
|
|
|
#include "netdev-dpdk.h"
|
|
|
|
#include "netdev-provider.h"
|
|
|
|
#include "netdev-vport.h"
|
|
|
|
#include "odp-util.h"
|
2016-04-29 13:44:04 -04:00
|
|
|
#include "openvswitch/dynamic-string.h"
|
2016-04-14 15:20:21 -07:00
|
|
|
#include "openvswitch/list.h"
|
|
|
|
#include "openvswitch/ofp-print.h"
|
|
|
|
#include "openvswitch/vlog.h"
|
2014-09-04 13:09:22 -07:00
|
|
|
#include "ovs-numa.h"
|
2014-03-24 19:23:08 -07:00
|
|
|
#include "ovs-thread.h"
|
|
|
|
#include "ovs-rcu.h"
|
|
|
|
#include "packets.h"
|
|
|
|
#include "shash.h"
|
2016-03-02 20:35:54 +00:00
|
|
|
#include "smap.h"
|
2014-03-24 19:23:08 -07:00
|
|
|
#include "sset.h"
|
|
|
|
#include "unaligned.h"
|
|
|
|
#include "timeval.h"
|
|
|
|
#include "unixctl.h"
|
|
|
|
|
2015-02-17 13:20:04 -08:00
|
|
|
#include "rte_config.h"
|
|
|
|
#include "rte_mbuf.h"
|
2016-03-02 20:35:54 +00:00
|
|
|
#include "rte_meter.h"
|
2015-03-05 13:42:04 -08:00
|
|
|
#include "rte_virtio_net.h"
|
2015-02-17 13:20:04 -08:00
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
VLOG_DEFINE_THIS_MODULE(dpdk);
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
|
|
|
|
|
|
|
|
#define DPDK_PORT_WATCHDOG_INTERVAL 5
|
|
|
|
|
|
|
|
#define OVS_CACHE_LINE_SIZE CACHE_LINE_SIZE
|
|
|
|
#define OVS_VPORT_DPDK "ovs_dpdk"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* need to reserve tons of extra space in the mbufs so we can align the
|
|
|
|
* DMA addresses to 4KB.
|
2015-09-04 13:35:57 +01:00
|
|
|
* The minimum mbuf size is limited to avoid scatter behaviour and drop in
|
|
|
|
* performance for standard Ethernet MTU.
|
2014-03-24 19:23:08 -07:00
|
|
|
*/
|
2016-02-19 11:25:11 +00:00
|
|
|
#define ETHER_HDR_MAX_LEN (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 * VLAN_HEADER_LEN))
|
|
|
|
#define MTU_TO_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
|
|
|
|
#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN)
|
|
|
|
#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - ETHER_CRC_LEN)
|
|
|
|
#define MBUF_SIZE(mtu) ( MTU_TO_MAX_FRAME_LEN(mtu) \
|
|
|
|
+ sizeof(struct dp_packet) \
|
|
|
|
+ RTE_PKTMBUF_HEADROOM)
|
|
|
|
#define NETDEV_DPDK_MBUF_ALIGN 1024
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-12 18:04:32 +00:00
|
|
|
/* Max and min number of packets in the mempool. OVS tries to allocate a
|
|
|
|
* mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
|
|
|
|
* enough hugepages) we keep halving the number until the allocation succeeds
|
|
|
|
* or we reach MIN_NB_MBUF */
|
|
|
|
|
|
|
|
#define MAX_NB_MBUF (4096 * 64)
|
|
|
|
#define MIN_NB_MBUF (4096 * 4)
|
|
|
|
#define MP_CACHE_SZ RTE_MEMPOOL_CACHE_MAX_SIZE
|
|
|
|
|
|
|
|
/* MAX_NB_MBUF can be divided by 2 many times, until MIN_NB_MBUF */
|
|
|
|
BUILD_ASSERT_DECL(MAX_NB_MBUF % ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF) == 0);
|
|
|
|
|
|
|
|
/* The smallest possible NB_MBUF that we're going to try should be a multiple
|
|
|
|
* of MP_CACHE_SZ. This is advised by DPDK documentation. */
|
|
|
|
BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
|
|
|
|
% MP_CACHE_SZ == 0);
|
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
/*
|
|
|
|
* DPDK XSTATS Counter names definition
|
|
|
|
*/
|
|
|
|
#define XSTAT_RX_64_PACKETS "rx_size_64_packets"
|
|
|
|
#define XSTAT_RX_65_TO_127_PACKETS "rx_size_65_to_127_packets"
|
|
|
|
#define XSTAT_RX_128_TO_255_PACKETS "rx_size_128_to_255_packets"
|
|
|
|
#define XSTAT_RX_256_TO_511_PACKETS "rx_size_256_to_511_packets"
|
|
|
|
#define XSTAT_RX_512_TO_1023_PACKETS "rx_size_512_to_1023_packets"
|
|
|
|
#define XSTAT_RX_1024_TO_1522_PACKETS "rx_size_1024_to_1522_packets"
|
|
|
|
#define XSTAT_RX_1523_TO_MAX_PACKETS "rx_size_1523_to_max_packets"
|
|
|
|
|
|
|
|
#define XSTAT_TX_64_PACKETS "tx_size_64_packets"
|
|
|
|
#define XSTAT_TX_65_TO_127_PACKETS "tx_size_65_to_127_packets"
|
|
|
|
#define XSTAT_TX_128_TO_255_PACKETS "tx_size_128_to_255_packets"
|
|
|
|
#define XSTAT_TX_256_TO_511_PACKETS "tx_size_256_to_511_packets"
|
|
|
|
#define XSTAT_TX_512_TO_1023_PACKETS "tx_size_512_to_1023_packets"
|
|
|
|
#define XSTAT_TX_1024_TO_1522_PACKETS "tx_size_1024_to_1522_packets"
|
|
|
|
#define XSTAT_TX_1523_TO_MAX_PACKETS "tx_size_1523_to_max_packets"
|
|
|
|
|
|
|
|
#define XSTAT_TX_MULTICAST_PACKETS "tx_multicast_packets"
|
|
|
|
#define XSTAT_RX_BROADCAST_PACKETS "rx_broadcast_packets"
|
|
|
|
#define XSTAT_TX_BROADCAST_PACKETS "tx_broadcast_packets"
|
|
|
|
#define XSTAT_RX_UNDERSIZED_ERRORS "rx_undersized_errors"
|
|
|
|
#define XSTAT_RX_OVERSIZE_ERRORS "rx_oversize_errors"
|
|
|
|
#define XSTAT_RX_FRAGMENTED_ERRORS "rx_fragmented_errors"
|
|
|
|
#define XSTAT_RX_JABBER_ERRORS "rx_jabber_errors"
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
#define SOCKET0 0
|
|
|
|
|
2014-06-19 22:58:26 +00:00
|
|
|
#define NIC_PORT_RX_Q_SIZE 2048 /* Size of Physical NIC RX Queue, Max (n+32<=4096)*/
|
|
|
|
#define NIC_PORT_TX_Q_SIZE 2048 /* Size of Physical NIC TX Queue, Max (n+32<=4096)*/
|
|
|
|
|
2016-02-24 17:14:43 +03:00
|
|
|
#define OVS_VHOST_MAX_QUEUE_NUM 1024 /* Maximum number of vHost TX queues. */
|
2016-03-29 09:20:41 +03:00
|
|
|
#define OVS_VHOST_QUEUE_MAP_UNKNOWN (-1) /* Mapping not initialized. */
|
|
|
|
#define OVS_VHOST_QUEUE_DISABLED (-2) /* Queue was disabled by guest and not
|
|
|
|
* yet mapped to another queue. */
|
2016-02-24 17:14:43 +03:00
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
#ifdef VHOST_CUSE
|
2015-06-24 08:55:39 -07:00
|
|
|
static char *cuse_dev_name = NULL; /* Character device cuse_dev_name. */
|
2016-04-29 13:44:01 -04:00
|
|
|
#endif
|
2015-06-24 08:55:39 -07:00
|
|
|
static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-06-10 17:49:38 +01:00
|
|
|
#define VHOST_ENQ_RETRY_NUM 8
|
2015-05-11 21:58:14 -07:00
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static const struct rte_eth_conf port_conf = {
|
2014-06-24 16:05:01 -07:00
|
|
|
.rxmode = {
|
|
|
|
.mq_mode = ETH_MQ_RX_RSS,
|
|
|
|
.split_hdr_size = 0,
|
|
|
|
.header_split = 0, /* Header Split disabled */
|
|
|
|
.hw_ip_checksum = 0, /* IP checksum offload disabled */
|
|
|
|
.hw_vlan_filter = 0, /* VLAN filtering disabled */
|
|
|
|
.jumbo_frame = 0, /* Jumbo Frame Support disabled */
|
|
|
|
.hw_strip_crc = 0,
|
|
|
|
},
|
|
|
|
.rx_adv_conf = {
|
|
|
|
.rss_conf = {
|
|
|
|
.rss_key = NULL,
|
2015-04-20 12:37:14 -07:00
|
|
|
.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP,
|
2014-03-24 19:23:08 -07:00
|
|
|
},
|
2014-06-24 16:05:01 -07:00
|
|
|
},
|
|
|
|
.txmode = {
|
|
|
|
.mq_mode = ETH_MQ_TX_NONE,
|
|
|
|
},
|
2014-03-24 19:23:08 -07:00
|
|
|
};
|
|
|
|
|
2014-07-22 17:09:10 -07:00
|
|
|
enum { MAX_TX_QUEUE_LEN = 384 };
|
2014-07-30 08:51:34 -07:00
|
|
|
enum { DPDK_RING_SIZE = 256 };
|
|
|
|
BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
|
2014-03-24 19:23:08 -07:00
|
|
|
enum { DRAIN_TSC = 200000ULL };
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
enum dpdk_dev_type {
|
|
|
|
DPDK_DEV_ETH = 0,
|
2015-06-04 06:51:40 -07:00
|
|
|
DPDK_DEV_VHOST = 1,
|
2015-03-05 13:42:04 -08:00
|
|
|
};
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int rte_eal_init_ret = ENODEV;
|
|
|
|
|
|
|
|
static struct ovs_mutex dpdk_mutex = OVS_MUTEX_INITIALIZER;
|
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
/* Quality of Service */
|
|
|
|
|
|
|
|
/* An instance of a QoS configuration. Always associated with a particular
|
|
|
|
* network device.
|
|
|
|
*
|
|
|
|
* Each QoS implementation subclasses this with whatever additional data it
|
|
|
|
* needs.
|
|
|
|
*/
|
|
|
|
struct qos_conf {
|
|
|
|
const struct dpdk_qos_ops *ops;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* A particular implementation of dpdk QoS operations.
|
|
|
|
*
|
|
|
|
* The functions below return 0 if successful or a positive errno value on
|
|
|
|
* failure, except where otherwise noted. All of them must be provided, except
|
|
|
|
* where otherwise noted.
|
|
|
|
*/
|
|
|
|
struct dpdk_qos_ops {
|
|
|
|
|
|
|
|
/* Name of the QoS type */
|
|
|
|
const char *qos_name;
|
|
|
|
|
|
|
|
/* Called to construct the QoS implementation on 'netdev'. The
|
|
|
|
* implementation should make the appropriate calls to configure QoS
|
|
|
|
* according to 'details'. The implementation may assume that any current
|
|
|
|
* QoS configuration already installed should be destroyed before
|
|
|
|
* constructing the new configuration.
|
|
|
|
*
|
|
|
|
* The contents of 'details' should be documented as valid for 'ovs_name'
|
|
|
|
* in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
|
|
|
|
* (which is built as ovs-vswitchd.conf.db(8)).
|
|
|
|
*
|
|
|
|
* This function must return 0 if and only if it sets 'netdev->qos_conf'
|
|
|
|
* to an initialized 'struct qos_conf'.
|
|
|
|
*
|
|
|
|
* For all QoS implementations it should always be non-null.
|
|
|
|
*/
|
|
|
|
int (*qos_construct)(struct netdev *netdev, const struct smap *details);
|
|
|
|
|
|
|
|
/* Destroys the data structures allocated by the implementation as part of
|
|
|
|
* 'qos_conf.
|
|
|
|
*
|
|
|
|
* For all QoS implementations it should always be non-null.
|
|
|
|
*/
|
|
|
|
void (*qos_destruct)(struct netdev *netdev, struct qos_conf *conf);
|
|
|
|
|
|
|
|
/* Retrieves details of 'netdev->qos_conf' configuration into 'details'.
|
|
|
|
*
|
|
|
|
* The contents of 'details' should be documented as valid for 'ovs_name'
|
|
|
|
* in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
|
|
|
|
* (which is built as ovs-vswitchd.conf.db(8)).
|
|
|
|
*/
|
|
|
|
int (*qos_get)(const struct netdev *netdev, struct smap *details);
|
|
|
|
|
|
|
|
/* Reconfigures 'netdev->qos_conf' according to 'details', performing any
|
|
|
|
* required calls to complete the reconfiguration.
|
|
|
|
*
|
|
|
|
* The contents of 'details' should be documented as valid for 'ovs_name'
|
|
|
|
* in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
|
|
|
|
* (which is built as ovs-vswitchd.conf.db(8)).
|
|
|
|
*
|
|
|
|
* This function may be null if 'qos_conf' is not configurable.
|
|
|
|
*/
|
|
|
|
int (*qos_set)(struct netdev *netdev, const struct smap *details);
|
|
|
|
|
|
|
|
/* Modify an array of rte_mbufs. The modification is specific to
|
|
|
|
* each qos implementation.
|
|
|
|
*
|
|
|
|
* The function should take and array of mbufs and an int representing
|
|
|
|
* the current number of mbufs present in the array.
|
|
|
|
*
|
|
|
|
* After the function has performed a qos modification to the array of
|
|
|
|
* mbufs it returns an int representing the number of mbufs now present in
|
|
|
|
* the array. This value is can then be passed to the port send function
|
|
|
|
* along with the modified array for transmission.
|
|
|
|
*
|
|
|
|
* For all QoS implementations it should always be non-null.
|
|
|
|
*/
|
|
|
|
int (*qos_run)(struct netdev *netdev, struct rte_mbuf **pkts,
|
|
|
|
int pkt_cnt);
|
|
|
|
};
|
|
|
|
|
|
|
|
/* dpdk_qos_ops for each type of user space QoS implementation */
|
|
|
|
static const struct dpdk_qos_ops egress_policer_ops;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Array of dpdk_qos_ops, contains pointer to all supported QoS
|
|
|
|
* operations.
|
|
|
|
*/
|
|
|
|
static const struct dpdk_qos_ops *const qos_confs[] = {
|
|
|
|
&egress_policer_ops,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
/* Contains all 'struct dpdk_dev's. */
|
2014-12-15 14:10:38 +01:00
|
|
|
static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
|
2014-12-15 14:10:38 +01:00
|
|
|
= OVS_LIST_INITIALIZER(&dpdk_list);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-12-15 14:10:38 +01:00
|
|
|
static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
|
2014-12-15 14:10:38 +01:00
|
|
|
= OVS_LIST_INITIALIZER(&dpdk_mp_list);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
/* This mutex must be used by non pmd threads when allocating or freeing
|
|
|
|
* mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
|
|
|
|
* use mempools, a non pmd thread should hold this mutex while calling them */
|
2015-05-18 08:49:24 -07:00
|
|
|
static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
struct dpdk_mp {
|
|
|
|
struct rte_mempool *mp;
|
|
|
|
int mtu;
|
|
|
|
int socket_id;
|
|
|
|
int refcount;
|
2014-12-15 14:10:38 +01:00
|
|
|
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
|
2014-03-24 19:23:08 -07:00
|
|
|
};
|
|
|
|
|
2014-06-17 10:52:20 -07:00
|
|
|
/* There should be one 'struct dpdk_tx_queue' created for
|
|
|
|
* each cpu core. */
|
2014-03-24 19:23:08 -07:00
|
|
|
struct dpdk_tx_queue {
|
2014-09-04 13:09:22 -07:00
|
|
|
bool flush_tx; /* Set to true to flush queue everytime */
|
|
|
|
/* pkts are queued. */
|
2014-03-24 19:23:08 -07:00
|
|
|
int count;
|
2015-05-22 17:14:22 +01:00
|
|
|
rte_spinlock_t tx_lock; /* Protects the members and the NIC queue
|
|
|
|
* from concurrent access. It is used only
|
|
|
|
* if the queue is shared among different
|
|
|
|
* pmd threads (see 'txq_needs_locking'). */
|
2016-02-24 17:14:43 +03:00
|
|
|
int map; /* Mapping of configured vhost-user queues
|
|
|
|
* to enabled by guest. */
|
2014-03-24 19:23:08 -07:00
|
|
|
uint64_t tsc;
|
|
|
|
struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
|
|
|
|
};
|
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
/* dpdk has no way to remove dpdk ring ethernet devices
|
|
|
|
so we have to keep them around once they've been created
|
|
|
|
*/
|
|
|
|
|
2014-12-15 14:10:38 +01:00
|
|
|
static struct ovs_list dpdk_ring_list OVS_GUARDED_BY(dpdk_mutex)
|
2014-12-15 14:10:38 +01:00
|
|
|
= OVS_LIST_INITIALIZER(&dpdk_ring_list);
|
2014-07-11 13:37:11 +01:00
|
|
|
|
|
|
|
struct dpdk_ring {
|
|
|
|
/* For the client rings */
|
|
|
|
struct rte_ring *cring_tx;
|
|
|
|
struct rte_ring *cring_rx;
|
2016-02-23 23:06:37 +01:00
|
|
|
unsigned int user_port_id; /* User given port no, parsed from port name */
|
2014-07-11 13:37:11 +01:00
|
|
|
int eth_port_id; /* ethernet device port id */
|
2014-12-15 14:10:38 +01:00
|
|
|
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
|
2014-07-11 13:37:11 +01:00
|
|
|
};
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
struct ingress_policer {
|
|
|
|
struct rte_meter_srtcm_params app_srtcm_params;
|
|
|
|
struct rte_meter_srtcm in_policer;
|
|
|
|
rte_spinlock_t policer_lock;
|
|
|
|
};
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
struct netdev_dpdk {
|
|
|
|
struct netdev up;
|
|
|
|
int port_id;
|
|
|
|
int max_packet_len;
|
2015-03-05 13:42:04 -08:00
|
|
|
enum dpdk_dev_type type;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-17 10:52:20 -07:00
|
|
|
struct dpdk_tx_queue *tx_q;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex);
|
|
|
|
|
|
|
|
struct dpdk_mp *dpdk_mp;
|
|
|
|
int mtu;
|
|
|
|
int socket_id;
|
|
|
|
int buf_size;
|
|
|
|
struct netdev_stats stats;
|
2015-05-22 17:14:21 +01:00
|
|
|
/* Protects stats */
|
|
|
|
rte_spinlock_t stats_lock;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-08-28 14:55:11 -07:00
|
|
|
struct eth_addr hwaddr;
|
2014-03-24 19:23:08 -07:00
|
|
|
enum netdev_flags flags;
|
|
|
|
|
|
|
|
struct rte_eth_link link;
|
|
|
|
int link_reset_cnt;
|
|
|
|
|
2015-05-22 17:14:22 +01:00
|
|
|
/* The user might request more txqs than the NIC has. We remap those
|
|
|
|
* ('up.n_txq') on these ('real_n_txq').
|
|
|
|
* If the numbers match, 'txq_needs_locking' is false, otherwise it is
|
|
|
|
* true and we will take a spinlock on transmission */
|
|
|
|
int real_n_txq;
|
2016-01-26 16:58:14 -02:00
|
|
|
int real_n_rxq;
|
2015-05-22 17:14:22 +01:00
|
|
|
bool txq_needs_locking;
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
/* virtio-net structure for vhost device */
|
|
|
|
OVSRCU_TYPE(struct virtio_net *) virtio_dev;
|
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
/* Identifier used to distinguish vhost devices from each other */
|
|
|
|
char vhost_id[PATH_MAX];
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
/* In dpdk_list. */
|
2014-12-15 14:10:38 +01:00
|
|
|
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
|
|
|
/* QoS configuration and lock for the device */
|
|
|
|
struct qos_conf *qos_conf;
|
|
|
|
rte_spinlock_t qos_lock;
|
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
/* The following properties cannot be changed when a device is running,
|
|
|
|
* so we remember the request and update them next time
|
|
|
|
* netdev_dpdk*_reconfigure() is called */
|
|
|
|
int requested_n_txq;
|
|
|
|
int requested_n_rxq;
|
2016-05-24 17:36:51 +01:00
|
|
|
|
|
|
|
/* Ingress Policer */
|
|
|
|
OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
|
|
|
|
uint32_t policer_rate;
|
|
|
|
uint32_t policer_burst;
|
2014-03-24 19:23:08 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct netdev_rxq_dpdk {
|
|
|
|
struct netdev_rxq up;
|
|
|
|
int port_id;
|
|
|
|
};
|
|
|
|
|
2016-01-12 11:32:41 -08:00
|
|
|
static bool dpdk_thread_is_pmd(void);
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int netdev_dpdk_construct(struct netdev *);
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
struct ingress_policer *
|
|
|
|
netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static bool
|
|
|
|
is_dpdk_class(const struct netdev_class *class)
|
|
|
|
{
|
|
|
|
return class->construct == netdev_dpdk_construct;
|
|
|
|
}
|
|
|
|
|
2016-02-19 11:25:11 +00:00
|
|
|
/* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
|
|
|
|
* aligned at 1k or less. If a declared mbuf size is not a multiple of this
|
|
|
|
* value, insufficient buffers are allocated to accomodate the packet in its
|
|
|
|
* entirety. Furthermore, certain drivers need to ensure that there is also
|
|
|
|
* sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ
|
|
|
|
* frames). If the RX buffer is too small, then the driver enables scatter RX
|
|
|
|
* behaviour, which reduces performance. To prevent this, use a buffer size that
|
|
|
|
* is closest to 'mtu', but which satisfies the aforementioned criteria.
|
|
|
|
*/
|
|
|
|
static uint32_t
|
|
|
|
dpdk_buf_size(int mtu)
|
|
|
|
{
|
|
|
|
return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM),
|
|
|
|
NETDEV_DPDK_MBUF_ALIGN);
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
/* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
|
|
|
|
* for all other segments data, bss and text. */
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
static void *
|
|
|
|
dpdk_rte_mzalloc(size_t sz)
|
|
|
|
{
|
|
|
|
void *ptr;
|
|
|
|
|
|
|
|
ptr = rte_zmalloc(OVS_VPORT_DPDK, sz, OVS_CACHE_LINE_SIZE);
|
|
|
|
if (ptr == NULL) {
|
|
|
|
out_of_memory();
|
|
|
|
}
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
/* XXX this function should be called only by pmd threads (or by non pmd
|
|
|
|
* threads holding the nonpmd_mempool_mutex) */
|
2014-03-24 19:23:08 -07:00
|
|
|
void
|
2015-02-25 12:01:53 -08:00
|
|
|
free_dpdk_buf(struct dp_packet *p)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
struct rte_mbuf *pkt = (struct rte_mbuf *) p;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-08 09:50:48 +08:00
|
|
|
rte_pktmbuf_free(pkt);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2014-03-31 13:17:24 -07:00
|
|
|
static void
|
|
|
|
ovs_rte_pktmbuf_init(struct rte_mempool *mp,
|
|
|
|
void *opaque_arg OVS_UNUSED,
|
|
|
|
void *_m,
|
|
|
|
unsigned i OVS_UNUSED)
|
|
|
|
{
|
|
|
|
struct rte_mbuf *m = _m;
|
|
|
|
|
2016-02-19 11:25:11 +00:00
|
|
|
rte_pktmbuf_init(mp, opaque_arg, _m, i);
|
2014-03-31 13:17:24 -07:00
|
|
|
|
2015-02-22 03:21:09 -08:00
|
|
|
dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
|
2014-03-31 13:17:24 -07:00
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static struct dpdk_mp *
|
|
|
|
dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
|
|
|
|
{
|
|
|
|
struct dpdk_mp *dmp = NULL;
|
|
|
|
char mp_name[RTE_MEMPOOL_NAMESIZE];
|
2015-03-12 18:04:32 +00:00
|
|
|
unsigned mp_size;
|
2016-02-19 11:25:11 +00:00
|
|
|
struct rte_pktmbuf_pool_private mbp_priv;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
|
|
|
|
if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
|
|
|
|
dmp->refcount++;
|
|
|
|
return dmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
dmp = dpdk_rte_mzalloc(sizeof *dmp);
|
|
|
|
dmp->socket_id = socket_id;
|
|
|
|
dmp->mtu = mtu;
|
|
|
|
dmp->refcount = 1;
|
2016-02-19 11:25:11 +00:00
|
|
|
mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet);
|
|
|
|
mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct rte_mbuf);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-12 18:04:32 +00:00
|
|
|
mp_size = MAX_NB_MBUF;
|
|
|
|
do {
|
|
|
|
if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d_%u",
|
|
|
|
dmp->mtu, dmp->socket_id, mp_size) < 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-07-11 13:37:11 +01:00
|
|
|
|
2015-03-12 18:04:32 +00:00
|
|
|
dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
|
|
|
|
MP_CACHE_SZ,
|
|
|
|
sizeof(struct rte_pktmbuf_pool_private),
|
2016-02-19 11:25:11 +00:00
|
|
|
rte_pktmbuf_pool_init, &mbp_priv,
|
2015-03-12 18:04:32 +00:00
|
|
|
ovs_rte_pktmbuf_init, NULL,
|
|
|
|
socket_id, 0);
|
|
|
|
} while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
if (dmp->mp == NULL) {
|
|
|
|
return NULL;
|
2015-03-12 18:04:32 +00:00
|
|
|
} else {
|
|
|
|
VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name, mp_size );
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
|
2014-03-24 19:23:08 -07:00
|
|
|
return dmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
dpdk_mp_put(struct dpdk_mp *dmp)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!dmp) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
dmp->refcount--;
|
|
|
|
ovs_assert(dmp->refcount >= 0);
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
/* I could not find any API to destroy mp. */
|
|
|
|
if (dmp->refcount == 0) {
|
|
|
|
list_delete(dmp->list_node);
|
|
|
|
/* destroy mp-pool. */
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
check_link_status(struct netdev_dpdk *dev)
|
|
|
|
{
|
|
|
|
struct rte_eth_link link;
|
|
|
|
|
|
|
|
rte_eth_link_get_nowait(dev->port_id, &link);
|
|
|
|
|
|
|
|
if (dev->link.link_status != link.link_status) {
|
2014-04-03 00:17:34 -07:00
|
|
|
netdev_change_seq_changed(&dev->up);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
dev->link_reset_cnt++;
|
|
|
|
dev->link = link;
|
|
|
|
if (dev->link.link_status) {
|
|
|
|
VLOG_DBG_RL(&rl, "Port %d Link Up - speed %u Mbps - %s",
|
|
|
|
dev->port_id, (unsigned)dev->link.link_speed,
|
|
|
|
(dev->link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
|
|
|
|
("full-duplex") : ("half-duplex"));
|
|
|
|
} else {
|
|
|
|
VLOG_DBG_RL(&rl, "Port %d Link Down", dev->port_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
dpdk_watchdog(void *dummy OVS_UNUSED)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev;
|
|
|
|
|
|
|
|
pthread_detach(pthread_self());
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
LIST_FOR_EACH (dev, list_node, &dpdk_list) {
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
check_link_status(dev);
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
}
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
xsleep(DPDK_PORT_WATCHDOG_INTERVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-07-16 19:48:24 +01:00
|
|
|
static int
|
|
|
|
dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
|
|
|
|
{
|
|
|
|
int diag = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* A device may report more queues than it makes available (this has
|
|
|
|
* been observed for Intel xl710, which reserves some of them for
|
|
|
|
* SRIOV): rte_eth_*_queue_setup will fail if a queue is not
|
|
|
|
* available. When this happens we can retry the configuration
|
|
|
|
* and request less queues */
|
|
|
|
while (n_rxq && n_txq) {
|
|
|
|
if (diag) {
|
|
|
|
VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
|
|
|
|
}
|
|
|
|
|
|
|
|
diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &port_conf);
|
|
|
|
if (diag) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < n_txq; i++) {
|
|
|
|
diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
|
|
|
|
dev->socket_id, NULL);
|
|
|
|
if (diag) {
|
|
|
|
VLOG_INFO("Interface %s txq(%d) setup error: %s",
|
|
|
|
dev->up.name, i, rte_strerror(-diag));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i != n_txq) {
|
|
|
|
/* Retry with less tx queues */
|
|
|
|
n_txq = i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < n_rxq; i++) {
|
|
|
|
diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE,
|
|
|
|
dev->socket_id, NULL,
|
|
|
|
dev->dpdk_mp->mp);
|
|
|
|
if (diag) {
|
|
|
|
VLOG_INFO("Interface %s rxq(%d) setup error: %s",
|
|
|
|
dev->up.name, i, rte_strerror(-diag));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i != n_rxq) {
|
|
|
|
/* Retry with less rx queues */
|
|
|
|
n_rxq = i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev->up.n_rxq = n_rxq;
|
|
|
|
dev->real_n_txq = n_txq;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return diag;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int
|
|
|
|
dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
|
|
|
|
{
|
|
|
|
struct rte_pktmbuf_pool_private *mbp_priv;
|
2015-05-22 17:14:22 +01:00
|
|
|
struct rte_eth_dev_info info;
|
2014-03-24 19:23:08 -07:00
|
|
|
struct ether_addr eth_addr;
|
|
|
|
int diag;
|
2015-07-16 19:48:24 +01:00
|
|
|
int n_rxq, n_txq;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
if (dev->port_id < 0 || dev->port_id >= rte_eth_dev_count()) {
|
2014-07-11 13:37:11 +01:00
|
|
|
return ENODEV;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2015-05-22 17:14:22 +01:00
|
|
|
rte_eth_dev_info_get(dev->port_id, &info);
|
|
|
|
|
2015-07-16 19:48:24 +01:00
|
|
|
n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
|
|
|
|
n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
|
|
|
|
|
|
|
|
diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);
|
2014-03-24 19:23:08 -07:00
|
|
|
if (diag) {
|
2015-07-16 19:48:24 +01:00
|
|
|
VLOG_ERR("Interface %s(rxq:%d txq:%d) configure error: %s",
|
|
|
|
dev->up.name, n_rxq, n_txq, rte_strerror(-diag));
|
2014-07-11 13:37:11 +01:00
|
|
|
return -diag;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
diag = rte_eth_dev_start(dev->port_id);
|
|
|
|
if (diag) {
|
2015-07-16 19:48:24 +01:00
|
|
|
VLOG_ERR("Interface %s start error: %s", dev->up.name,
|
|
|
|
rte_strerror(-diag));
|
2014-07-11 13:37:11 +01:00
|
|
|
return -diag;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
rte_eth_promiscuous_enable(dev->port_id);
|
|
|
|
rte_eth_allmulticast_enable(dev->port_id);
|
|
|
|
|
|
|
|
memset(ð_addr, 0x0, sizeof(eth_addr));
|
|
|
|
rte_eth_macaddr_get(dev->port_id, ð_addr);
|
|
|
|
VLOG_INFO_RL(&rl, "Port %d: "ETH_ADDR_FMT"",
|
2015-09-04 16:53:30 -04:00
|
|
|
dev->port_id, ETH_ADDR_BYTES_ARGS(eth_addr.addr_bytes));
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-09-04 16:53:30 -04:00
|
|
|
memcpy(dev->hwaddr.ea, eth_addr.addr_bytes, ETH_ADDR_LEN);
|
2014-03-24 19:23:08 -07:00
|
|
|
rte_eth_link_get_nowait(dev->port_id, &dev->link);
|
|
|
|
|
|
|
|
mbp_priv = rte_mempool_get_priv(dev->dpdk_mp->mp);
|
|
|
|
dev->buf_size = mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM;
|
|
|
|
|
|
|
|
dev->flags = NETDEV_UP | NETDEV_PROMISC;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct netdev_dpdk *
|
|
|
|
netdev_dpdk_cast(const struct netdev *netdev)
|
|
|
|
{
|
|
|
|
return CONTAINER_OF(netdev, struct netdev_dpdk, up);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct netdev *
|
|
|
|
netdev_dpdk_alloc(void)
|
|
|
|
{
|
2016-04-29 13:44:01 -04:00
|
|
|
struct netdev_dpdk *dev;
|
|
|
|
|
|
|
|
if (!rte_eal_init_ret) { /* Only after successful initialization */
|
|
|
|
dev = dpdk_rte_mzalloc(sizeof *dev);
|
|
|
|
if (dev) {
|
|
|
|
return &dev->up;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2014-06-17 10:52:20 -07:00
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
|
2014-06-17 10:52:20 -07:00
|
|
|
{
|
2015-05-22 17:14:19 +01:00
|
|
|
unsigned i;
|
2014-06-17 10:52:20 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
|
2014-06-17 10:52:20 -07:00
|
|
|
for (i = 0; i < n_txqs; i++) {
|
2014-09-19 10:37:08 -07:00
|
|
|
int numa_id = ovs_numa_get_numa_id(i);
|
2014-09-04 13:09:22 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
if (!dev->txq_needs_locking) {
|
2015-05-22 17:14:22 +01:00
|
|
|
/* Each index is considered as a cpu core id, since there should
|
|
|
|
* be one tx queue for each cpu core. If the corresponding core
|
2016-03-16 14:44:18 -07:00
|
|
|
* is not on the same numa node as 'dev', flags the
|
2015-05-22 17:14:22 +01:00
|
|
|
* 'flush_tx'. */
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
|
2015-05-22 17:14:22 +01:00
|
|
|
} else {
|
|
|
|
/* Queues are shared among CPUs. Always flush */
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->tx_q[i].flush_tx = true;
|
2015-05-22 17:14:22 +01:00
|
|
|
}
|
2016-02-24 17:14:43 +03:00
|
|
|
|
|
|
|
/* Initialize map for vhost devices. */
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
|
|
|
|
rte_spinlock_init(&dev->tx_q[i].tx_lock);
|
2014-06-17 10:52:20 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
|
2015-03-05 13:42:04 -08:00
|
|
|
enum dpdk_dev_type type)
|
2014-06-17 10:52:20 -07:00
|
|
|
OVS_REQUIRES(dpdk_mutex)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-09-25 13:10:55 -07:00
|
|
|
int sid;
|
2014-07-11 13:37:11 +01:00
|
|
|
int err = 0;
|
2016-02-19 11:25:11 +00:00
|
|
|
uint32_t buf_size;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_init(&dev->mutex);
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_init(&dev->stats_lock);
|
2015-05-22 17:14:21 +01:00
|
|
|
|
2014-09-25 13:10:55 -07:00
|
|
|
/* If the 'sid' is negative, it means that the kernel fails
|
|
|
|
* to obtain the pci numa info. In that situation, always
|
|
|
|
* use 'SOCKET0'. */
|
2015-03-05 13:42:04 -08:00
|
|
|
if (type == DPDK_DEV_ETH) {
|
|
|
|
sid = rte_eth_dev_socket_id(port_no);
|
|
|
|
} else {
|
|
|
|
sid = rte_lcore_to_socket_id(rte_get_master_lcore());
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->socket_id = sid < 0 ? SOCKET0 : sid;
|
|
|
|
dev->port_id = port_no;
|
|
|
|
dev->type = type;
|
|
|
|
dev->flags = 0;
|
|
|
|
dev->mtu = ETHER_MTU;
|
|
|
|
dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
buf_size = dpdk_buf_size(dev->mtu);
|
|
|
|
dev->dpdk_mp = dpdk_mp_get(dev->socket_id, FRAME_LEN_TO_MTU(buf_size));
|
|
|
|
if (!dev->dpdk_mp) {
|
2014-03-24 19:23:08 -07:00
|
|
|
err = ENOMEM;
|
2014-07-11 13:37:11 +01:00
|
|
|
goto unlock;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
/* Initialise QoS configuration to NULL and qos lock to unlocked */
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->qos_conf = NULL;
|
|
|
|
rte_spinlock_init(&dev->qos_lock);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
/* Initialise rcu pointer for ingress policer to NULL */
|
|
|
|
ovsrcu_init(&dev->ingress_policer, NULL);
|
|
|
|
dev->policer_rate = 0;
|
|
|
|
dev->policer_burst = 0;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev->n_txq = NR_QUEUE;
|
|
|
|
netdev->n_rxq = NR_QUEUE;
|
2016-02-26 15:58:24 -08:00
|
|
|
dev->requested_n_rxq = NR_QUEUE;
|
|
|
|
dev->requested_n_txq = NR_QUEUE;
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->real_n_txq = NR_QUEUE;
|
2015-03-05 13:42:04 -08:00
|
|
|
|
|
|
|
if (type == DPDK_DEV_ETH) {
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_alloc_txq(dev, NR_QUEUE);
|
|
|
|
err = dpdk_eth_dev_init(dev);
|
2015-04-13 06:36:56 -07:00
|
|
|
if (err) {
|
|
|
|
goto unlock;
|
|
|
|
}
|
2016-02-24 17:14:43 +03:00
|
|
|
} else {
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
|
2016-06-02 12:42:39 +00:00
|
|
|
/* Enable DPDK_DEV_VHOST device and set promiscuous mode flag. */
|
|
|
|
dev->flags = NETDEV_UP | NETDEV_PROMISC;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_list_push_back(&dpdk_list, &dev->list_node);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
unlock:
|
2014-06-17 10:52:20 -07:00
|
|
|
if (err) {
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_free(dev->tx_q);
|
2014-06-17 10:52:20 -07:00
|
|
|
}
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2014-07-11 13:37:11 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-02-23 23:06:37 +01:00
|
|
|
/* dev_name must be the prefix followed by a positive decimal number.
|
|
|
|
* (no leading + or - signs are allowed) */
|
2014-07-11 13:37:11 +01:00
|
|
|
static int
|
|
|
|
dpdk_dev_parse_name(const char dev_name[], const char prefix[],
|
|
|
|
unsigned int *port_no)
|
|
|
|
{
|
|
|
|
const char *cport;
|
|
|
|
|
|
|
|
if (strncmp(dev_name, prefix, strlen(prefix))) {
|
|
|
|
return ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
cport = dev_name + strlen(prefix);
|
2016-02-23 23:06:37 +01:00
|
|
|
|
|
|
|
if (str_to_uint(cport, 10, port_no)) {
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return ENODEV;
|
|
|
|
}
|
2014-07-11 13:37:11 +01:00
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
vhost_construct_helper(struct netdev *netdev) OVS_REQUIRES(dpdk_mutex)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
|
|
|
if (rte_eal_init_ret) {
|
|
|
|
return rte_eal_init_ret;
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
return netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
|
2015-06-04 06:51:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_cuse_construct(struct netdev *netdev)
|
2015-06-04 06:51:40 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2015-06-04 06:51:40 -07:00
|
|
|
int err;
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
if (rte_eal_init_ret) {
|
|
|
|
return rte_eal_init_ret;
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
2016-03-16 14:44:18 -07:00
|
|
|
strncpy(dev->vhost_id, netdev->name, sizeof(dev->vhost_id));
|
|
|
|
err = vhost_construct_helper(netdev);
|
2015-03-05 13:42:04 -08:00
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
2015-06-04 06:51:40 -07:00
|
|
|
return err;
|
|
|
|
}
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_user_construct(struct netdev *netdev)
|
2015-06-04 06:51:40 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
const char *name = netdev->name;
|
2015-06-04 06:51:40 -07:00
|
|
|
int err;
|
2015-05-22 17:14:22 +01:00
|
|
|
|
2016-02-02 17:24:32 -08:00
|
|
|
/* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
|
|
|
|
* the file system. '/' or '\' would traverse directories, so they're not
|
|
|
|
* acceptable in 'name'. */
|
|
|
|
if (strchr(name, '/') || strchr(name, '\\')) {
|
|
|
|
VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
|
|
|
|
"A valid name must not include '/' or '\\'",
|
|
|
|
name);
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
if (rte_eal_init_ret) {
|
|
|
|
return rte_eal_init_ret;
|
|
|
|
}
|
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
/* Take the name of the vhost-user port and append it to the location where
|
|
|
|
* the socket is to be created, then register the socket.
|
|
|
|
*/
|
2016-03-16 14:44:18 -07:00
|
|
|
snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
|
2016-02-02 17:24:32 -08:00
|
|
|
vhost_sock_dir, name);
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
err = rte_vhost_driver_register(dev->vhost_id);
|
2015-06-04 06:51:40 -07:00
|
|
|
if (err) {
|
|
|
|
VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->vhost_id);
|
2016-02-02 14:02:15 +03:00
|
|
|
} else {
|
2016-03-16 14:44:18 -07:00
|
|
|
fatal_signal_add_file_to_unlink(dev->vhost_id);
|
2016-02-02 14:02:16 +03:00
|
|
|
VLOG_INFO("Socket %s created for vhost-user port %s\n",
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->vhost_id, name);
|
|
|
|
err = vhost_construct_helper(netdev);
|
2015-06-04 06:51:40 -07:00
|
|
|
}
|
2016-02-02 14:02:15 +03:00
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
2015-03-05 13:42:04 -08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
static int
|
|
|
|
netdev_dpdk_construct(struct netdev *netdev)
|
|
|
|
{
|
|
|
|
unsigned int port_no;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (rte_eal_init_ret) {
|
|
|
|
return rte_eal_init_ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Names always start with "dpdk" */
|
|
|
|
err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);
|
|
|
|
if (err) {
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
2015-03-05 13:42:04 -08:00
|
|
|
err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
|
2014-03-24 19:23:08 -07:00
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_destruct(struct netdev *netdev)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
rte_eth_dev_stop(dev->port_id);
|
2016-05-24 17:36:51 +01:00
|
|
|
free(ovsrcu_get_protected(struct ingress_policer *,
|
|
|
|
&dev->ingress_policer));
|
2014-03-24 19:23:08 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
2014-06-17 10:52:20 -07:00
|
|
|
rte_free(dev->tx_q);
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_remove(&dev->list_node);
|
2014-03-24 19:23:08 -07:00
|
|
|
dpdk_mp_put(dev->dpdk_mp);
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_destruct(struct netdev *netdev)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-03-22 15:42:03 +03:00
|
|
|
/* Guest becomes an orphan if still attached. */
|
2015-03-05 13:42:04 -08:00
|
|
|
if (netdev_dpdk_get_virtio(dev) != NULL) {
|
2016-03-22 15:42:03 +03:00
|
|
|
VLOG_ERR("Removing port '%s' while vhost device still attached.",
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev->name);
|
2016-03-22 15:42:03 +03:00
|
|
|
VLOG_ERR("To restore connectivity after re-adding of port, VM on socket"
|
|
|
|
" '%s' must be restarted.",
|
|
|
|
dev->vhost_id);
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2015-10-21 14:50:36 +01:00
|
|
|
if (rte_vhost_driver_unregister(dev->vhost_id)) {
|
|
|
|
VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
|
2016-02-02 14:02:15 +03:00
|
|
|
} else {
|
|
|
|
fatal_signal_remove_file_to_unlink(dev->vhost_id);
|
2015-10-21 14:50:36 +01:00
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
free(ovsrcu_get_protected(struct ingress_policer *,
|
|
|
|
&dev->ingress_policer));
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
2016-03-03 11:30:06 +03:00
|
|
|
rte_free(dev->tx_q);
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_remove(&dev->list_node);
|
2015-03-05 13:42:04 -08:00
|
|
|
dpdk_mp_put(dev->dpdk_mp);
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_dealloc(struct netdev *netdev)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_free(dev);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-01-21 17:15:18 +03:00
|
|
|
netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-01-21 17:15:18 +03:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
smap_add_format(args, "requested_rx_queues", "%d", dev->requested_n_rxq);
|
2016-01-21 17:15:18 +03:00
|
|
|
smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
|
|
|
|
smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
|
2015-05-22 17:14:22 +01:00
|
|
|
smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
|
2014-03-24 19:23:08 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-21 17:15:18 +03:00
|
|
|
static int
|
|
|
|
netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2016-02-26 15:58:24 -08:00
|
|
|
int new_n_rxq;
|
2016-01-21 17:15:18 +03:00
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
2016-02-26 15:58:24 -08:00
|
|
|
new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), 1);
|
|
|
|
if (new_n_rxq != dev->requested_n_rxq) {
|
|
|
|
dev->requested_n_rxq = new_n_rxq;
|
|
|
|
netdev_request_reconfigure(netdev);
|
|
|
|
}
|
2016-01-21 17:15:18 +03:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-06-11 16:33:08 -07:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_numa_id(const struct netdev *netdev)
|
2014-06-11 16:33:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-06-11 16:33:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
return dev->socket_id;
|
2014-06-11 16:33:08 -07:00
|
|
|
}
|
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
/* Sets the number of tx queues for the dpdk interface. */
|
2014-09-08 14:52:54 -07:00
|
|
|
static int
|
2016-02-26 15:58:24 -08:00
|
|
|
netdev_dpdk_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
|
2014-09-08 14:52:54 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-09-08 14:52:54 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
2014-09-19 10:38:39 -07:00
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
if (dev->requested_n_txq == n_txq) {
|
|
|
|
goto out;
|
2016-01-26 16:58:14 -02:00
|
|
|
}
|
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
dev->requested_n_txq = n_txq;
|
|
|
|
netdev_request_reconfigure(netdev);
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
out:
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-02-26 15:58:24 -08:00
|
|
|
return 0;
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static struct netdev_rxq *
|
|
|
|
netdev_dpdk_rxq_alloc(void)
|
|
|
|
{
|
|
|
|
struct netdev_rxq_dpdk *rx = dpdk_rte_mzalloc(sizeof *rx);
|
|
|
|
|
|
|
|
return &rx->up;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct netdev_rxq_dpdk *
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_rxq_dpdk_cast(const struct netdev_rxq *rxq)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
return CONTAINER_OF(rxq, struct netdev_rxq_dpdk, up);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_rxq_construct(struct netdev_rxq *rxq)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
rx->port_id = dev->port_id;
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_rxq_destruct(struct netdev_rxq *rxq OVS_UNUSED)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
rte_free(rx);
|
|
|
|
}
|
|
|
|
|
2014-06-26 17:41:45 -07:00
|
|
|
static inline void
|
|
|
|
dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
|
|
|
struct dpdk_tx_queue *txq = &dev->tx_q[qid];
|
2014-08-12 10:43:36 -07:00
|
|
|
uint32_t nb_tx = 0;
|
|
|
|
|
|
|
|
while (nb_tx != txq->count) {
|
|
|
|
uint32_t ret;
|
|
|
|
|
|
|
|
ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
|
|
|
|
txq->count - nb_tx);
|
|
|
|
if (!ret) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
nb_tx += ret;
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-26 17:41:45 -07:00
|
|
|
if (OVS_UNLIKELY(nb_tx != txq->count)) {
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
/* free buffers, which we couldn't transmit, one at a time (each
|
|
|
|
* packet could come from a different mempool) */
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = nb_tx; i < txq->count; i++) {
|
2016-03-08 09:50:48 +08:00
|
|
|
rte_pktmbuf_free(txq->burst_pkts[i]);
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
}
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
2014-08-12 10:43:36 -07:00
|
|
|
dev->stats.tx_dropped += txq->count-nb_tx;
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
2014-08-12 10:43:36 -07:00
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
txq->count = 0;
|
2014-06-26 17:41:46 -07:00
|
|
|
txq->tsc = rte_get_timer_cycles();
|
2014-06-26 17:41:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
|
|
|
|
{
|
|
|
|
struct dpdk_tx_queue *txq = &dev->tx_q[qid];
|
|
|
|
|
|
|
|
if (txq->count == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dpdk_queue_flush__(dev, qid);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:50 +01:00
|
|
|
static inline bool
|
|
|
|
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
|
|
|
|
struct rte_mbuf *pkt, uint64_t time)
|
|
|
|
{
|
|
|
|
uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
|
|
|
|
|
|
|
|
return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
|
|
|
|
e_RTE_METER_GREEN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
|
|
|
|
struct rte_mbuf **pkts, int pkt_cnt)
|
|
|
|
{
|
|
|
|
int i = 0;
|
|
|
|
int cnt = 0;
|
|
|
|
struct rte_mbuf *pkt = NULL;
|
|
|
|
uint64_t current_time = rte_rdtsc();
|
|
|
|
|
|
|
|
for (i = 0; i < pkt_cnt; i++) {
|
|
|
|
pkt = pkts[i];
|
|
|
|
/* Handle current packet */
|
|
|
|
if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
|
|
|
|
if (cnt != i) {
|
|
|
|
pkts[cnt] = pkt;
|
|
|
|
}
|
|
|
|
cnt++;
|
|
|
|
} else {
|
|
|
|
rte_pktmbuf_free(pkt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
static int
|
|
|
|
ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
|
|
|
|
int pkt_cnt)
|
|
|
|
{
|
|
|
|
int cnt = 0;
|
|
|
|
|
|
|
|
rte_spinlock_lock(&policer->policer_lock);
|
|
|
|
cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
|
|
|
|
rte_spinlock_unlock(&policer->policer_lock);
|
|
|
|
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
static bool
|
2016-03-16 14:44:18 -07:00
|
|
|
is_vhost_running(struct virtio_net *virtio_dev)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
return (virtio_dev != NULL && (virtio_dev->flags & VIRTIO_DEV_RUNNING));
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
static inline void
|
|
|
|
netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
|
|
|
|
unsigned int packet_size)
|
|
|
|
{
|
|
|
|
/* Hard-coded search for the size bucket. */
|
|
|
|
if (packet_size < 256) {
|
|
|
|
if (packet_size >= 128) {
|
|
|
|
stats->rx_128_to_255_packets++;
|
|
|
|
} else if (packet_size <= 64) {
|
|
|
|
stats->rx_1_to_64_packets++;
|
|
|
|
} else {
|
|
|
|
stats->rx_65_to_127_packets++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (packet_size >= 1523) {
|
|
|
|
stats->rx_1523_to_max_packets++;
|
|
|
|
} else if (packet_size >= 1024) {
|
|
|
|
stats->rx_1024_to_1522_packets++;
|
|
|
|
} else if (packet_size < 512) {
|
|
|
|
stats->rx_256_to_511_packets++;
|
|
|
|
} else {
|
|
|
|
stats->rx_512_to_1023_packets++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-01 11:49:12 +01:00
|
|
|
static inline void
|
|
|
|
netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
|
2016-05-24 17:36:51 +01:00
|
|
|
struct dp_packet **packets, int count,
|
|
|
|
int dropped)
|
2015-07-01 11:49:12 +01:00
|
|
|
{
|
|
|
|
int i;
|
2016-05-05 09:46:01 +01:00
|
|
|
unsigned int packet_size;
|
2015-07-01 11:49:12 +01:00
|
|
|
struct dp_packet *packet;
|
|
|
|
|
|
|
|
stats->rx_packets += count;
|
2016-05-24 17:36:51 +01:00
|
|
|
stats->rx_dropped += dropped;
|
2015-07-01 11:49:12 +01:00
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
packet = packets[i];
|
2016-05-05 09:46:01 +01:00
|
|
|
packet_size = dp_packet_size(packet);
|
2015-07-01 11:49:12 +01:00
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {
|
2015-07-01 11:49:12 +01:00
|
|
|
/* This only protects the following multicast counting from
|
|
|
|
* too short packets, but it does not stop the packet from
|
|
|
|
* further processing. */
|
|
|
|
stats->rx_errors++;
|
|
|
|
stats->rx_length_errors++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);
|
|
|
|
|
2015-07-01 11:49:12 +01:00
|
|
|
struct eth_header *eh = (struct eth_header *) dp_packet_data(packet);
|
|
|
|
if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {
|
|
|
|
stats->multicast++;
|
|
|
|
}
|
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
stats->rx_bytes += packet_size;
|
2015-07-01 11:49:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
/*
|
|
|
|
* The receive path for the vhost port is the TX path out from guest.
|
|
|
|
*/
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
|
2015-03-05 13:42:04 -08:00
|
|
|
struct dp_packet **packets, int *c)
|
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
|
|
|
|
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
|
|
|
|
int qid = rxq->queue_id;
|
2016-05-24 17:36:51 +01:00
|
|
|
struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
|
2015-03-05 13:42:04 -08:00
|
|
|
uint16_t nb_rx = 0;
|
2016-05-24 17:36:51 +01:00
|
|
|
uint16_t dropped = 0;
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-06-02 12:42:39 +00:00
|
|
|
if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
|
|
|
|
|| !(dev->flags & NETDEV_UP))) {
|
2015-03-05 13:42:04 -08:00
|
|
|
return EAGAIN;
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
if (rxq->queue_id >= dev->real_n_rxq) {
|
2016-01-26 16:58:14 -02:00
|
|
|
return EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM + VIRTIO_TXQ,
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->dpdk_mp->mp,
|
2015-03-05 13:42:04 -08:00
|
|
|
(struct rte_mbuf **)packets,
|
2015-05-16 08:18:20 -07:00
|
|
|
NETDEV_MAX_BURST);
|
2015-03-05 13:42:04 -08:00
|
|
|
if (!nb_rx) {
|
|
|
|
return EAGAIN;
|
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
if (policer) {
|
|
|
|
dropped = nb_rx;
|
|
|
|
nb_rx = ingress_policer_run(policer, (struct rte_mbuf **)packets, nb_rx);
|
|
|
|
dropped -= nb_rx;
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
2016-05-24 17:36:51 +01:00
|
|
|
netdev_dpdk_vhost_update_rx_counters(&dev->stats, packets, nb_rx, dropped);
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2015-05-22 17:14:21 +01:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
*c = (int) nb_rx;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
|
2014-06-23 11:43:57 -07:00
|
|
|
int *c)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
|
2016-05-24 17:36:51 +01:00
|
|
|
struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
|
2014-03-24 19:23:08 -07:00
|
|
|
int nb_rx;
|
2016-05-24 17:36:51 +01:00
|
|
|
int dropped = 0;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-09-08 14:52:54 -07:00
|
|
|
/* There is only one tx queue for this core. Do not flush other
|
2015-06-25 02:45:08 -07:00
|
|
|
* queues.
|
|
|
|
* Do not flush tx queue which is shared among CPUs
|
|
|
|
* since it is always flushed */
|
2016-03-16 14:44:18 -07:00
|
|
|
if (rxq->queue_id == rte_lcore_id() &&
|
2015-06-25 02:45:08 -07:00
|
|
|
OVS_LIKELY(!dev->txq_needs_locking)) {
|
2016-03-16 14:44:18 -07:00
|
|
|
dpdk_queue_flush(dev, rxq->queue_id);
|
2014-09-08 14:52:54 -07:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
|
2014-06-03 17:10:52 -07:00
|
|
|
(struct rte_mbuf **) packets,
|
2015-05-16 08:18:20 -07:00
|
|
|
NETDEV_MAX_BURST);
|
2014-03-24 19:23:08 -07:00
|
|
|
if (!nb_rx) {
|
|
|
|
return EAGAIN;
|
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
if (policer) {
|
|
|
|
dropped = nb_rx;
|
|
|
|
nb_rx = ingress_policer_run(policer, (struct rte_mbuf **) packets, nb_rx);
|
|
|
|
dropped -= nb_rx;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update stats to reflect dropped packets */
|
|
|
|
if (OVS_UNLIKELY(dropped)) {
|
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
|
|
|
dev->stats.rx_dropped += dropped;
|
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
*c = nb_rx;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
static inline int
|
|
|
|
netdev_dpdk_qos_run__(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
|
|
|
|
int cnt)
|
|
|
|
{
|
|
|
|
struct netdev *netdev = &dev->up;
|
|
|
|
|
|
|
|
if (dev->qos_conf != NULL) {
|
|
|
|
rte_spinlock_lock(&dev->qos_lock);
|
|
|
|
if (dev->qos_conf != NULL) {
|
|
|
|
cnt = dev->qos_conf->ops->qos_run(netdev, pkts, cnt);
|
|
|
|
}
|
|
|
|
rte_spinlock_unlock(&dev->qos_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2015-07-01 11:49:12 +01:00
|
|
|
static inline void
|
|
|
|
netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
|
|
|
|
struct dp_packet **packets,
|
|
|
|
int attempted,
|
|
|
|
int dropped)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int sent = attempted - dropped;
|
|
|
|
|
|
|
|
stats->tx_packets += sent;
|
|
|
|
stats->tx_dropped += dropped;
|
|
|
|
|
|
|
|
for (i = 0; i < sent; i++) {
|
|
|
|
stats->tx_bytes += dp_packet_size(packets[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
static void
|
2016-01-26 16:58:14 -02:00
|
|
|
__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
|
|
|
|
struct dp_packet **pkts, int cnt,
|
|
|
|
bool may_steal)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
|
2015-05-11 21:58:14 -07:00
|
|
|
struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
|
|
|
|
unsigned int total_pkts = cnt;
|
2016-03-02 20:35:54 +00:00
|
|
|
unsigned int qos_pkts = cnt;
|
2016-06-10 17:49:38 +01:00
|
|
|
int retries = 0;
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
qid = dev->tx_q[qid % dev->real_n_txq].map;
|
2016-02-24 17:14:43 +03:00
|
|
|
|
2016-06-02 12:42:39 +00:00
|
|
|
if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
|
|
|
|
|| !(dev->flags & NETDEV_UP))) {
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
|
|
|
dev->stats.tx_dropped+= cnt;
|
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2015-04-13 06:36:56 -07:00
|
|
|
goto out;
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
/* Check has QoS has been configured for the netdev */
|
2016-03-16 14:44:18 -07:00
|
|
|
cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);
|
2016-03-02 20:35:54 +00:00
|
|
|
qos_pkts -= cnt;
|
|
|
|
|
2015-05-11 21:58:14 -07:00
|
|
|
do {
|
2016-01-26 16:58:14 -02:00
|
|
|
int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
|
2015-05-11 21:58:14 -07:00
|
|
|
unsigned int tx_pkts;
|
|
|
|
|
2016-01-26 16:58:14 -02:00
|
|
|
tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
|
2015-05-11 21:58:14 -07:00
|
|
|
cur_pkts, cnt);
|
|
|
|
if (OVS_LIKELY(tx_pkts)) {
|
|
|
|
/* Packets have been sent.*/
|
|
|
|
cnt -= tx_pkts;
|
2016-06-10 17:49:38 +01:00
|
|
|
/* Prepare for possible retry.*/
|
2015-05-11 21:58:14 -07:00
|
|
|
cur_pkts = &cur_pkts[tx_pkts];
|
|
|
|
} else {
|
2016-06-10 17:49:38 +01:00
|
|
|
/* No packets sent - do not retry.*/
|
|
|
|
break;
|
2015-05-11 21:58:14 -07:00
|
|
|
}
|
2016-06-10 17:49:38 +01:00
|
|
|
} while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));
|
2016-01-26 16:58:14 -02:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
|
2015-05-11 21:58:14 -07:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
2016-03-02 20:35:54 +00:00
|
|
|
cnt += qos_pkts;
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts, cnt);
|
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2015-03-05 13:42:04 -08:00
|
|
|
|
|
|
|
out:
|
|
|
|
if (may_steal) {
|
2015-05-11 21:58:14 -07:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < total_pkts; i++) {
|
2015-04-13 06:36:56 -07:00
|
|
|
dp_packet_delete(pkts[i]);
|
|
|
|
}
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
inline static void
|
2014-06-23 11:43:58 -07:00
|
|
|
dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
|
|
|
|
struct rte_mbuf **pkts, int cnt)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
|
|
|
struct dpdk_tx_queue *txq = &dev->tx_q[qid];
|
|
|
|
uint64_t diff_tsc;
|
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
while (i < cnt) {
|
|
|
|
int freeslots = MAX_TX_QUEUE_LEN - txq->count;
|
|
|
|
int tocopy = MIN(freeslots, cnt-i);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
memcpy(&txq->burst_pkts[txq->count], &pkts[i],
|
|
|
|
tocopy * sizeof (struct rte_mbuf *));
|
|
|
|
|
|
|
|
txq->count += tocopy;
|
|
|
|
i += tocopy;
|
|
|
|
|
2014-09-04 13:09:22 -07:00
|
|
|
if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
|
2014-06-26 17:41:45 -07:00
|
|
|
dpdk_queue_flush__(dev, qid);
|
2014-06-23 11:43:58 -07:00
|
|
|
}
|
2014-06-26 17:41:46 -07:00
|
|
|
diff_tsc = rte_get_timer_cycles() - txq->tsc;
|
2014-06-23 11:43:58 -07:00
|
|
|
if (diff_tsc >= DRAIN_TSC) {
|
2014-06-26 17:41:45 -07:00
|
|
|
dpdk_queue_flush__(dev, qid);
|
2014-06-23 11:43:58 -07:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Tx function. Transmit packets indefinitely */
|
|
|
|
static void
|
2015-03-05 13:42:04 -08:00
|
|
|
dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
|
2014-09-18 17:02:17 -07:00
|
|
|
int cnt)
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2015-05-18 08:49:24 -07:00
|
|
|
#if !defined(__CHECKER__) && !defined(_WIN32)
|
|
|
|
const size_t PKT_ARRAY_SIZE = cnt;
|
|
|
|
#else
|
|
|
|
/* Sparse or MSVC doesn't like variable length array. */
|
2015-05-16 08:18:20 -07:00
|
|
|
enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
|
2015-05-18 08:49:24 -07:00
|
|
|
#endif
|
2014-03-24 19:23:08 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2015-05-18 08:49:24 -07:00
|
|
|
struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
|
2014-06-26 18:16:39 -07:00
|
|
|
int dropped = 0;
|
|
|
|
int newcnt = 0;
|
|
|
|
int i;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
/* If we are on a non pmd thread we have to use the mempool mutex, because
|
|
|
|
* every non pmd thread shares the same mempool cache */
|
|
|
|
|
2016-01-12 11:32:41 -08:00
|
|
|
if (!dpdk_thread_is_pmd()) {
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
ovs_mutex_lock(&nonpmd_mempool_mutex);
|
|
|
|
}
|
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
for (i = 0; i < cnt; i++) {
|
2015-02-22 03:21:09 -08:00
|
|
|
int size = dp_packet_size(pkts[i]);
|
2014-07-11 13:37:11 +01:00
|
|
|
|
2014-06-26 18:16:41 -07:00
|
|
|
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
|
2014-06-23 11:43:58 -07:00
|
|
|
VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
|
|
|
|
(int)size , dev->max_packet_len);
|
|
|
|
|
2014-06-26 18:16:39 -07:00
|
|
|
dropped++;
|
2014-06-23 11:43:58 -07:00
|
|
|
continue;
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
mbufs[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
if (!mbufs[newcnt]) {
|
2014-06-26 18:16:39 -07:00
|
|
|
dropped += cnt - i;
|
|
|
|
break;
|
2014-06-23 11:43:58 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* We have to do a copy for now */
|
2015-02-17 13:20:04 -08:00
|
|
|
memcpy(rte_pktmbuf_mtod(mbufs[newcnt], void *), dp_packet_data(pkts[i]), size);
|
2014-06-23 11:43:58 -07:00
|
|
|
|
|
|
|
rte_pktmbuf_data_len(mbufs[newcnt]) = size;
|
|
|
|
rte_pktmbuf_pkt_len(mbufs[newcnt]) = size;
|
|
|
|
|
|
|
|
newcnt++;
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
if (dev->type == DPDK_DEV_VHOST) {
|
2016-01-26 16:58:14 -02:00
|
|
|
__netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs, newcnt, true);
|
2015-03-05 13:42:04 -08:00
|
|
|
} else {
|
2016-03-02 20:35:54 +00:00
|
|
|
unsigned int qos_pkts = newcnt;
|
|
|
|
|
|
|
|
/* Check if QoS has been configured for this netdev. */
|
|
|
|
newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
|
|
|
|
|
|
|
|
dropped += qos_pkts - newcnt;
|
2015-03-05 13:42:04 -08:00
|
|
|
dpdk_queue_pkts(dev, qid, mbufs, newcnt);
|
|
|
|
dpdk_queue_flush(dev, qid);
|
|
|
|
}
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
if (OVS_UNLIKELY(dropped)) {
|
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
|
|
|
dev->stats.tx_dropped += dropped;
|
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
|
|
|
}
|
|
|
|
|
2016-01-12 11:32:41 -08:00
|
|
|
if (!dpdk_thread_is_pmd()) {
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
ovs_mutex_unlock(&nonpmd_mempool_mutex);
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
static int
|
2016-01-26 16:58:14 -02:00
|
|
|
netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet **pkts,
|
2015-03-05 13:42:04 -08:00
|
|
|
int cnt, bool may_steal)
|
|
|
|
{
|
|
|
|
if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
dpdk_do_tx_copy(netdev, qid, pkts, cnt);
|
|
|
|
if (may_steal) {
|
|
|
|
for (i = 0; i < cnt; i++) {
|
|
|
|
dp_packet_delete(pkts[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2016-01-26 16:58:14 -02:00
|
|
|
__netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-10-14 19:01:49 +02:00
|
|
|
static inline void
|
|
|
|
netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
|
2015-02-25 12:01:53 -08:00
|
|
|
struct dp_packet **pkts, int cnt, bool may_steal)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2014-06-23 11:43:58 -07:00
|
|
|
int i;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-05-22 17:14:22 +01:00
|
|
|
if (OVS_UNLIKELY(dev->txq_needs_locking)) {
|
|
|
|
qid = qid % dev->real_n_txq;
|
|
|
|
rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
|
|
|
|
}
|
|
|
|
|
2014-10-14 19:01:49 +02:00
|
|
|
if (OVS_UNLIKELY(!may_steal ||
|
2015-02-22 03:21:09 -08:00
|
|
|
pkts[0]->source != DPBUF_DPDK)) {
|
2014-10-14 19:01:49 +02:00
|
|
|
struct netdev *netdev = &dev->up;
|
|
|
|
|
2014-09-18 17:02:17 -07:00
|
|
|
dpdk_do_tx_copy(netdev, qid, pkts, cnt);
|
2014-03-31 13:17:24 -07:00
|
|
|
|
|
|
|
if (may_steal) {
|
2014-06-23 11:43:58 -07:00
|
|
|
for (i = 0; i < cnt; i++) {
|
2015-02-25 12:01:53 -08:00
|
|
|
dp_packet_delete(pkts[i]);
|
2014-06-23 11:43:58 -07:00
|
|
|
}
|
2014-03-31 13:17:24 -07:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
} else {
|
2014-06-23 11:43:58 -07:00
|
|
|
int next_tx_idx = 0;
|
|
|
|
int dropped = 0;
|
2016-03-02 20:35:54 +00:00
|
|
|
unsigned int qos_pkts = 0;
|
|
|
|
unsigned int temp_cnt = 0;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
for (i = 0; i < cnt; i++) {
|
2015-02-22 03:21:09 -08:00
|
|
|
int size = dp_packet_size(pkts[i]);
|
2015-04-13 06:36:56 -07:00
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
|
|
|
|
if (next_tx_idx != i) {
|
2016-03-02 20:35:54 +00:00
|
|
|
temp_cnt = i - next_tx_idx;
|
|
|
|
qos_pkts = temp_cnt;
|
|
|
|
|
|
|
|
temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,
|
|
|
|
temp_cnt);
|
|
|
|
dropped += qos_pkts - temp_cnt;
|
2014-06-23 11:43:58 -07:00
|
|
|
dpdk_queue_pkts(dev, qid,
|
|
|
|
(struct rte_mbuf **)&pkts[next_tx_idx],
|
2016-03-02 20:35:54 +00:00
|
|
|
temp_cnt);
|
|
|
|
|
2014-06-24 16:04:20 -07:00
|
|
|
}
|
2014-06-23 11:43:58 -07:00
|
|
|
|
2014-06-24 16:04:20 -07:00
|
|
|
VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
|
|
|
|
(int)size , dev->max_packet_len);
|
2014-06-23 11:43:58 -07:00
|
|
|
|
2015-02-25 12:01:53 -08:00
|
|
|
dp_packet_delete(pkts[i]);
|
2014-06-24 16:04:20 -07:00
|
|
|
dropped++;
|
2014-06-23 11:43:58 -07:00
|
|
|
next_tx_idx = i + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (next_tx_idx != cnt) {
|
2016-03-02 20:35:54 +00:00
|
|
|
cnt -= next_tx_idx;
|
|
|
|
qos_pkts = cnt;
|
|
|
|
|
|
|
|
cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
|
|
|
|
dropped += qos_pkts - cnt;
|
|
|
|
dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
|
|
|
|
cnt);
|
2014-06-23 11:43:58 -07:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-06-23 11:43:58 -07:00
|
|
|
if (OVS_UNLIKELY(dropped)) {
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
2014-06-23 11:43:58 -07:00
|
|
|
dev->stats.tx_dropped += dropped;
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2014-06-23 11:43:58 -07:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
2015-05-22 17:14:22 +01:00
|
|
|
|
|
|
|
if (OVS_UNLIKELY(dev->txq_needs_locking)) {
|
|
|
|
rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
|
|
|
|
}
|
2014-10-14 19:01:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_eth_send(struct netdev *netdev, int qid,
|
2015-02-25 12:01:53 -08:00
|
|
|
struct dp_packet **pkts, int cnt, bool may_steal)
|
2014-10-14 19:01:49 +02:00
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-10-14 19:01:49 +02:00
|
|
|
netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
|
|
|
|
return 0;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2015-08-28 14:55:11 -07:00
|
|
|
netdev_dpdk_set_etheraddr(struct netdev *netdev, const struct eth_addr mac)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
if (!eth_addr_equals(dev->hwaddr, mac)) {
|
2015-08-28 14:55:11 -07:00
|
|
|
dev->hwaddr = mac;
|
2014-04-03 09:46:31 -07:00
|
|
|
netdev_change_seq_changed(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2015-08-28 14:55:11 -07:00
|
|
|
netdev_dpdk_get_etheraddr(const struct netdev *netdev, struct eth_addr *mac)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
2015-08-28 14:55:11 -07:00
|
|
|
*mac = dev->hwaddr;
|
2014-03-24 19:23:08 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_get_mtu(const struct netdev *netdev, int *mtup)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
*mtup = dev->mtu;
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2016-02-19 11:25:11 +00:00
|
|
|
int old_mtu, err, dpdk_mtu;
|
2014-03-24 19:23:08 -07:00
|
|
|
struct dpdk_mp *old_mp;
|
|
|
|
struct dpdk_mp *mp;
|
2016-02-19 11:25:11 +00:00
|
|
|
uint32_t buf_size;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
if (dev->mtu == mtu) {
|
|
|
|
err = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-02-19 11:25:11 +00:00
|
|
|
buf_size = dpdk_buf_size(mtu);
|
|
|
|
dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
|
|
|
|
|
|
|
|
mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
|
2014-03-24 19:23:08 -07:00
|
|
|
if (!mp) {
|
|
|
|
err = ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rte_eth_dev_stop(dev->port_id);
|
|
|
|
|
|
|
|
old_mtu = dev->mtu;
|
|
|
|
old_mp = dev->dpdk_mp;
|
|
|
|
dev->dpdk_mp = mp;
|
|
|
|
dev->mtu = mtu;
|
2016-02-19 11:25:11 +00:00
|
|
|
dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
err = dpdk_eth_dev_init(dev);
|
|
|
|
if (err) {
|
|
|
|
dpdk_mp_put(mp);
|
|
|
|
dev->mtu = old_mtu;
|
|
|
|
dev->dpdk_mp = old_mp;
|
2016-02-19 11:25:11 +00:00
|
|
|
dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
|
2014-03-24 19:23:08 -07:00
|
|
|
dpdk_eth_dev_init(dev);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
dpdk_mp_put(old_mp);
|
2014-04-03 09:46:31 -07:00
|
|
|
netdev_change_seq_changed(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
out:
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
static int
|
|
|
|
netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
|
|
|
|
struct netdev_stats *stats)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
2015-03-05 13:42:04 -08:00
|
|
|
/* Supported Stats */
|
|
|
|
stats->rx_packets += dev->stats.rx_packets;
|
|
|
|
stats->tx_packets += dev->stats.tx_packets;
|
2016-05-24 17:36:51 +01:00
|
|
|
stats->rx_dropped = dev->stats.rx_dropped;
|
2015-03-05 13:42:04 -08:00
|
|
|
stats->tx_dropped += dev->stats.tx_dropped;
|
2015-07-01 11:49:12 +01:00
|
|
|
stats->multicast = dev->stats.multicast;
|
|
|
|
stats->rx_bytes = dev->stats.rx_bytes;
|
|
|
|
stats->tx_bytes = dev->stats.tx_bytes;
|
|
|
|
stats->rx_errors = dev->stats.rx_errors;
|
|
|
|
stats->rx_length_errors = dev->stats.rx_length_errors;
|
2016-05-05 09:46:01 +01:00
|
|
|
|
|
|
|
stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;
|
|
|
|
stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;
|
|
|
|
stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;
|
|
|
|
stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;
|
|
|
|
stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;
|
|
|
|
stats->rx_1024_to_1522_packets = dev->stats.rx_1024_to_1522_packets;
|
|
|
|
stats->rx_1523_to_max_packets = dev->stats.rx_1523_to_max_packets;
|
|
|
|
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2015-07-01 11:49:12 +01:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
static void
|
|
|
|
netdev_dpdk_convert_xstats(struct netdev_stats *stats,
|
|
|
|
const struct rte_eth_xstats *xstats,
|
|
|
|
const unsigned int size)
|
|
|
|
{
|
|
|
|
/* XXX Current implementation is simple search through an array
|
|
|
|
* to find hardcoded counter names. In future DPDK release (TBD)
|
|
|
|
* XSTATS API will change so each counter will be represented by
|
|
|
|
* unique ID instead of String. */
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < size; i++) {
|
|
|
|
if (strcmp(XSTAT_RX_64_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_1_to_64_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_65_to_127_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_128_to_255_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_256_to_511_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS,
|
|
|
|
xstats[i].name) == 0) {
|
|
|
|
stats->rx_512_to_1023_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS,
|
|
|
|
xstats[i].name) == 0) {
|
|
|
|
stats->rx_1024_to_1522_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS,
|
|
|
|
xstats[i].name) == 0) {
|
|
|
|
stats->rx_1523_to_max_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_64_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->tx_1_to_64_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->tx_65_to_127_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->tx_128_to_255_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->tx_256_to_511_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS,
|
|
|
|
xstats[i].name) == 0) {
|
|
|
|
stats->tx_512_to_1023_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS,
|
|
|
|
xstats[i].name) == 0) {
|
|
|
|
stats->tx_1024_to_1522_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS,
|
|
|
|
xstats[i].name) == 0) {
|
|
|
|
stats->tx_1523_to_max_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->tx_multicast_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_broadcast_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, xstats[i].name) == 0) {
|
|
|
|
stats->tx_broadcast_packets = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_undersized_errors = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_fragmented_errors = xstats[i].value;
|
|
|
|
} else if (strcmp(XSTAT_RX_JABBER_ERRORS, xstats[i].name) == 0) {
|
|
|
|
stats->rx_jabber_errors = xstats[i].value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int
|
|
|
|
netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
struct rte_eth_stats rte_stats;
|
|
|
|
bool gg;
|
|
|
|
|
|
|
|
netdev_dpdk_get_carrier(netdev, &gg);
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
2016-05-05 09:46:01 +01:00
|
|
|
struct rte_eth_xstats *rte_xstats;
|
|
|
|
int rte_xstats_len, rte_xstats_ret;
|
|
|
|
|
|
|
|
if (rte_eth_stats_get(dev->port_id, &rte_stats)) {
|
|
|
|
VLOG_ERR("Can't get ETH statistics for port: %i.", dev->port_id);
|
2016-05-10 15:50:42 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-05-05 09:46:01 +01:00
|
|
|
return EPROTO;
|
|
|
|
}
|
|
|
|
|
|
|
|
rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
|
|
|
|
if (rte_xstats_len > 0) {
|
|
|
|
rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
|
|
|
|
memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
|
|
|
|
rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
|
|
|
|
rte_xstats_len);
|
|
|
|
if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
|
|
|
|
netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
|
|
|
|
}
|
|
|
|
rte_free(rte_xstats);
|
|
|
|
} else {
|
|
|
|
VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2014-09-12 16:00:50 -07:00
|
|
|
stats->rx_packets = rte_stats.ipackets;
|
|
|
|
stats->tx_packets = rte_stats.opackets;
|
|
|
|
stats->rx_bytes = rte_stats.ibytes;
|
|
|
|
stats->tx_bytes = rte_stats.obytes;
|
2015-07-01 11:49:12 +01:00
|
|
|
/* DPDK counts imissed as errors, but count them here as dropped instead */
|
|
|
|
stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
|
2014-09-12 16:00:50 -07:00
|
|
|
stats->tx_errors = rte_stats.oerrors;
|
|
|
|
stats->multicast = rte_stats.imcasts;
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
2014-09-12 16:00:50 -07:00
|
|
|
stats->tx_dropped = dev->stats.tx_dropped;
|
2016-05-24 17:36:51 +01:00
|
|
|
stats->rx_dropped = dev->stats.rx_dropped;
|
2015-05-22 17:14:21 +01:00
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
2015-07-01 11:49:12 +01:00
|
|
|
|
|
|
|
/* These are the available DPDK counters for packets not received due to
|
|
|
|
* local resource constraints in DPDK and NIC respectively. */
|
2016-05-24 17:36:51 +01:00
|
|
|
stats->rx_dropped += rte_stats.rx_nombuf + rte_stats.imissed;
|
2015-07-01 11:49:12 +01:00
|
|
|
stats->rx_missed_errors = rte_stats.imissed;
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_features(const struct netdev *netdev,
|
2014-03-24 19:23:08 -07:00
|
|
|
enum netdev_features *current,
|
|
|
|
enum netdev_features *advertised OVS_UNUSED,
|
|
|
|
enum netdev_features *supported OVS_UNUSED,
|
|
|
|
enum netdev_features *peer OVS_UNUSED)
|
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
struct rte_eth_link link;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
link = dev->link;
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_duplex == ETH_LINK_HALF_DUPLEX) {
|
|
|
|
if (link.link_speed == ETH_SPEED_NUM_10M) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_10MB_HD;
|
|
|
|
}
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_speed == ETH_SPEED_NUM_100M) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_100MB_HD;
|
|
|
|
}
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_speed == ETH_SPEED_NUM_1G) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_1GB_HD;
|
|
|
|
}
|
|
|
|
} else if (link.link_duplex == ETH_LINK_FULL_DUPLEX) {
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_speed == ETH_SPEED_NUM_10M) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_10MB_FD;
|
|
|
|
}
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_speed == ETH_SPEED_NUM_100M) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_100MB_FD;
|
|
|
|
}
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_speed == ETH_SPEED_NUM_1G) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_1GB_FD;
|
|
|
|
}
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_speed == ETH_SPEED_NUM_10G) {
|
2014-03-24 19:23:08 -07:00
|
|
|
*current = NETDEV_F_10GB_FD;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-14 17:40:06 +01:00
|
|
|
if (link.link_autoneg) {
|
|
|
|
*current |= NETDEV_F_AUTONEG;
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
static struct ingress_policer *
|
|
|
|
netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
|
|
|
|
{
|
|
|
|
struct ingress_policer *policer = NULL;
|
|
|
|
uint64_t rate_bytes;
|
|
|
|
uint64_t burst_bytes;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
policer = xmalloc(sizeof *policer);
|
|
|
|
rte_spinlock_init(&policer->policer_lock);
|
|
|
|
|
|
|
|
/* rte_meter requires bytes so convert kbits rate and burst to bytes. */
|
|
|
|
rate_bytes = rate * 1000/8;
|
|
|
|
burst_bytes = burst * 1000/8;
|
|
|
|
|
|
|
|
policer->app_srtcm_params.cir = rate_bytes;
|
|
|
|
policer->app_srtcm_params.cbs = burst_bytes;
|
|
|
|
policer->app_srtcm_params.ebs = 0;
|
|
|
|
err = rte_meter_srtcm_config(&policer->in_policer,
|
|
|
|
&policer->app_srtcm_params);
|
|
|
|
if(err) {
|
|
|
|
VLOG_ERR("Could not create rte meter for ingress policer");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return policer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_set_policing(struct netdev* netdev, uint32_t policer_rate,
|
|
|
|
uint32_t policer_burst)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
struct ingress_policer *policer;
|
|
|
|
|
|
|
|
/* Force to 0 if no rate specified,
|
|
|
|
* default to 8000 kbits if burst is 0,
|
|
|
|
* else stick with user-specified value.
|
|
|
|
*/
|
|
|
|
policer_burst = (!policer_rate ? 0
|
|
|
|
: !policer_burst ? 8000
|
|
|
|
: policer_burst);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
|
|
|
policer = ovsrcu_get_protected(struct ingress_policer *,
|
|
|
|
&dev->ingress_policer);
|
|
|
|
|
|
|
|
if (dev->policer_rate == policer_rate &&
|
|
|
|
dev->policer_burst == policer_burst) {
|
|
|
|
/* Assume that settings haven't changed since we last set them. */
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Destroy any existing ingress policer for the device if one exists */
|
|
|
|
if (policer) {
|
|
|
|
ovsrcu_postpone(free, policer);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (policer_rate != 0) {
|
|
|
|
policer = netdev_dpdk_policer_construct(policer_rate, policer_burst);
|
|
|
|
} else {
|
|
|
|
policer = NULL;
|
|
|
|
}
|
|
|
|
ovsrcu_set(&dev->ingress_policer, policer);
|
|
|
|
dev->policer_rate = policer_rate;
|
|
|
|
dev->policer_burst = policer_burst;
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
static int
|
|
|
|
netdev_dpdk_get_ifindex(const struct netdev *netdev)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
int ifindex;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
ifindex = dev->port_id;
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return ifindex;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
check_link_status(dev);
|
|
|
|
*carrier = dev->link.link_status;
|
2015-03-05 13:42:04 -08:00
|
|
|
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2015-03-05 13:42:04 -08:00
|
|
|
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
|
|
|
if (is_vhost_running(virtio_dev)) {
|
|
|
|
*carrier = 1;
|
|
|
|
} else {
|
|
|
|
*carrier = 0;
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long long int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_carrier_resets(const struct netdev *netdev)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
long long int carrier_resets;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
carrier_resets = dev->link_reset_cnt;
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
return carrier_resets;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_set_miimon(struct netdev *netdev OVS_UNUSED,
|
2014-03-24 19:23:08 -07:00
|
|
|
long long int interval OVS_UNUSED)
|
|
|
|
{
|
2015-02-13 10:00:58 +00:00
|
|
|
return EOPNOTSUPP;
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
|
|
|
|
enum netdev_flags off, enum netdev_flags on,
|
2014-07-11 13:37:11 +01:00
|
|
|
enum netdev_flags *old_flagsp) OVS_REQUIRES(dev->mutex)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) {
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
*old_flagsp = dev->flags;
|
|
|
|
dev->flags |= on;
|
|
|
|
dev->flags &= ~off;
|
|
|
|
|
|
|
|
if (dev->flags == *old_flagsp) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
if (dev->type == DPDK_DEV_ETH) {
|
|
|
|
if (dev->flags & NETDEV_UP) {
|
|
|
|
err = rte_eth_dev_start(dev->port_id);
|
|
|
|
if (err)
|
|
|
|
return -err;
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
if (dev->flags & NETDEV_PROMISC) {
|
|
|
|
rte_eth_promiscuous_enable(dev->port_id);
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
if (!(dev->flags & NETDEV_UP)) {
|
|
|
|
rte_eth_dev_stop(dev->port_id);
|
|
|
|
}
|
2016-06-02 12:42:39 +00:00
|
|
|
} else {
|
|
|
|
/* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and vhost is
|
|
|
|
* running then change netdev's change_seq to trigger link state
|
|
|
|
* update. */
|
|
|
|
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
|
|
|
|
|
|
|
|
if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
|
|
|
|
&& is_vhost_running(virtio_dev)) {
|
|
|
|
netdev_change_seq_changed(&dev->up);
|
|
|
|
|
|
|
|
/* Clear statistics if device is getting up. */
|
|
|
|
if (NETDEV_UP & on) {
|
|
|
|
rte_spinlock_lock(&dev->stats_lock);
|
|
|
|
memset(&dev->stats, 0, sizeof(dev->stats));
|
|
|
|
rte_spinlock_unlock(&dev->stats_lock);
|
|
|
|
}
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_update_flags(struct netdev *netdev,
|
2014-03-24 19:23:08 -07:00
|
|
|
enum netdev_flags off, enum netdev_flags on,
|
|
|
|
enum netdev_flags *old_flagsp)
|
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
int error;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
error = netdev_dpdk_update_flags__(dev, off, on, old_flagsp);
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2014-03-24 19:23:08 -07:00
|
|
|
struct rte_eth_dev_info dev_info;
|
|
|
|
|
2014-08-21 15:53:15 -07:00
|
|
|
if (dev->port_id < 0)
|
2014-03-24 19:23:08 -07:00
|
|
|
return ENODEV;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
rte_eth_dev_info_get(dev->port_id, &dev_info);
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
|
|
|
|
smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
|
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
smap_add_format(args, "port_no", "%d", dev->port_id);
|
2014-03-24 19:23:08 -07:00
|
|
|
smap_add_format(args, "numa_id", "%d", rte_eth_dev_socket_id(dev->port_id));
|
|
|
|
smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
|
|
|
|
smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize);
|
2016-02-19 11:25:11 +00:00
|
|
|
smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len);
|
2014-03-24 19:23:08 -07:00
|
|
|
smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues);
|
|
|
|
smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues);
|
|
|
|
smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs);
|
|
|
|
smap_add_format(args, "max_hash_mac_addrs", "%u", dev_info.max_hash_mac_addrs);
|
|
|
|
smap_add_format(args, "max_vfs", "%u", dev_info.max_vfs);
|
|
|
|
smap_add_format(args, "max_vmdq_pools", "%u", dev_info.max_vmdq_pools);
|
|
|
|
|
2015-12-02 23:30:16 -08:00
|
|
|
if (dev_info.pci_dev) {
|
|
|
|
smap_add_format(args, "pci-vendor_id", "0x%u",
|
|
|
|
dev_info.pci_dev->id.vendor_id);
|
|
|
|
smap_add_format(args, "pci-device_id", "0x%x",
|
|
|
|
dev_info.pci_dev->id.device_id);
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
netdev_dpdk_set_admin_state__(struct netdev_dpdk *dev, bool admin_state)
|
|
|
|
OVS_REQUIRES(dev->mutex)
|
|
|
|
{
|
|
|
|
enum netdev_flags old_flags;
|
|
|
|
|
|
|
|
if (admin_state) {
|
|
|
|
netdev_dpdk_update_flags__(dev, 0, NETDEV_UP, &old_flags);
|
|
|
|
} else {
|
|
|
|
netdev_dpdk_update_flags__(dev, NETDEV_UP, 0, &old_flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
|
|
|
|
const char *argv[], void *aux OVS_UNUSED)
|
|
|
|
{
|
|
|
|
bool up;
|
|
|
|
|
|
|
|
if (!strcasecmp(argv[argc - 1], "up")) {
|
|
|
|
up = true;
|
|
|
|
} else if ( !strcasecmp(argv[argc - 1], "down")) {
|
|
|
|
up = false;
|
|
|
|
} else {
|
|
|
|
unixctl_command_reply_error(conn, "Invalid Admin State");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (argc > 2) {
|
|
|
|
struct netdev *netdev = netdev_from_name(argv[1]);
|
|
|
|
if (netdev && is_dpdk_class(netdev->netdev_class)) {
|
|
|
|
struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_dev->mutex);
|
|
|
|
netdev_dpdk_set_admin_state__(dpdk_dev, up);
|
|
|
|
ovs_mutex_unlock(&dpdk_dev->mutex);
|
|
|
|
|
|
|
|
netdev_close(netdev);
|
|
|
|
} else {
|
|
|
|
unixctl_command_reply_error(conn, "Not a DPDK Interface");
|
|
|
|
netdev_close(netdev);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
struct netdev_dpdk *netdev;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
LIST_FOR_EACH (netdev, list_node, &dpdk_list) {
|
|
|
|
ovs_mutex_lock(&netdev->mutex);
|
|
|
|
netdev_dpdk_set_admin_state__(netdev, up);
|
|
|
|
ovs_mutex_unlock(&netdev->mutex);
|
|
|
|
}
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
}
|
|
|
|
unixctl_command_reply(conn, "OK");
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
/*
|
|
|
|
* Set virtqueue flags so that we do not receive interrupts.
|
|
|
|
*/
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
set_irq_status(struct virtio_net *virtio_dev)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-01-26 16:58:14 -02:00
|
|
|
uint32_t i;
|
|
|
|
uint64_t idx;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
|
2016-01-26 16:58:14 -02:00
|
|
|
idx = i * VIRTIO_QNUM;
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ, 0);
|
|
|
|
rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ, 0);
|
2016-01-26 16:58:14 -02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-24 17:14:43 +03:00
|
|
|
/*
|
|
|
|
* Fixes mapping for vhost-user tx queues. Must be called after each
|
|
|
|
* enabling/disabling of queues and real_n_txq modifications.
|
|
|
|
*/
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
|
|
|
|
OVS_REQUIRES(dev->mutex)
|
2016-02-24 17:14:43 +03:00
|
|
|
{
|
|
|
|
int *enabled_queues, n_enabled = 0;
|
2016-03-16 14:44:18 -07:00
|
|
|
int i, k, total_txqs = dev->real_n_txq;
|
2016-02-24 17:14:43 +03:00
|
|
|
|
|
|
|
enabled_queues = dpdk_rte_mzalloc(total_txqs * sizeof *enabled_queues);
|
|
|
|
|
|
|
|
for (i = 0; i < total_txqs; i++) {
|
|
|
|
/* Enabled queues always mapped to themselves. */
|
2016-03-16 14:44:18 -07:00
|
|
|
if (dev->tx_q[i].map == i) {
|
2016-02-24 17:14:43 +03:00
|
|
|
enabled_queues[n_enabled++] = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (n_enabled == 0 && total_txqs != 0) {
|
2016-03-29 09:20:41 +03:00
|
|
|
enabled_queues[0] = OVS_VHOST_QUEUE_DISABLED;
|
2016-02-24 17:14:43 +03:00
|
|
|
n_enabled = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
k = 0;
|
|
|
|
for (i = 0; i < total_txqs; i++) {
|
2016-03-16 14:44:18 -07:00
|
|
|
if (dev->tx_q[i].map != i) {
|
|
|
|
dev->tx_q[i].map = enabled_queues[k];
|
2016-02-24 17:14:43 +03:00
|
|
|
k = (k + 1) % n_enabled;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
VLOG_DBG("TX queue mapping for %s\n", dev->vhost_id);
|
2016-02-24 17:14:43 +03:00
|
|
|
for (i = 0; i < total_txqs; i++) {
|
2016-03-16 14:44:18 -07:00
|
|
|
VLOG_DBG("%2d --> %2d", i, dev->tx_q[i].map);
|
2016-02-24 17:14:43 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
rte_free(enabled_queues);
|
|
|
|
}
|
2016-01-26 16:58:14 -02:00
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_vhost_set_queues(struct netdev_dpdk *dev, struct virtio_net *virtio_dev)
|
|
|
|
OVS_REQUIRES(dev->mutex)
|
2016-01-26 16:58:14 -02:00
|
|
|
{
|
|
|
|
uint32_t qp_num;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
qp_num = virtio_dev->virt_qp_nb;
|
|
|
|
if (qp_num > dev->up.n_rxq) {
|
2016-01-26 16:58:14 -02:00
|
|
|
VLOG_ERR("vHost Device '%s' %"PRIu64" can't be added - "
|
2016-03-16 14:44:18 -07:00
|
|
|
"too many queues %d > %d", virtio_dev->ifname, virtio_dev->device_fh,
|
|
|
|
qp_num, dev->up.n_rxq);
|
2016-01-26 16:58:14 -02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->real_n_rxq = qp_num;
|
|
|
|
dev->real_n_txq = qp_num;
|
|
|
|
dev->txq_needs_locking = true;
|
2016-03-29 09:20:41 +03:00
|
|
|
/* Enable TX queue 0 by default if it wasn't disabled. */
|
2016-03-16 14:44:18 -07:00
|
|
|
if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) {
|
|
|
|
dev->tx_q[0].map = 0;
|
2016-03-29 09:20:41 +03:00
|
|
|
}
|
2016-02-24 17:14:43 +03:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_remap_txqs(dev);
|
2016-01-26 16:58:14 -02:00
|
|
|
|
|
|
|
return 0;
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A new virtio-net device is added to a vhost port.
|
|
|
|
*/
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
new_device(struct virtio_net *virtio_dev)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev;
|
2015-03-05 13:42:04 -08:00
|
|
|
bool exists = false;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
/* Add device to the vhost port with the same name as that passed down. */
|
2016-03-16 14:44:18 -07:00
|
|
|
LIST_FOR_EACH(dev, list_node, &dpdk_list) {
|
|
|
|
if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
if (netdev_dpdk_vhost_set_queues(dev, virtio_dev)) {
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-01-26 16:58:14 -02:00
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
return -1;
|
|
|
|
}
|
2016-03-16 14:44:18 -07:00
|
|
|
ovsrcu_set(&dev->virtio_dev, virtio_dev);
|
2015-03-05 13:42:04 -08:00
|
|
|
exists = true;
|
2016-03-16 14:44:18 -07:00
|
|
|
virtio_dev->flags |= VIRTIO_DEV_RUNNING;
|
2015-03-05 13:42:04 -08:00
|
|
|
/* Disable notifications. */
|
2016-03-16 14:44:18 -07:00
|
|
|
set_irq_status(virtio_dev);
|
2016-06-02 12:42:39 +00:00
|
|
|
netdev_change_seq_changed(&dev->up);
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2015-03-05 13:42:04 -08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
|
|
|
|
if (!exists) {
|
2015-09-21 16:01:23 -07:00
|
|
|
VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "
|
2016-03-16 14:44:18 -07:00
|
|
|
"found", virtio_dev->ifname, virtio_dev->device_fh);
|
2015-03-05 13:42:04 -08:00
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
VLOG_INFO("vHost Device '%s' %"PRIu64" has been added", virtio_dev->ifname,
|
|
|
|
virtio_dev->device_fh);
|
2015-03-05 13:42:04 -08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-03-29 09:20:41 +03:00
|
|
|
/* Clears mapping for all available queues of vhost interface. */
|
|
|
|
static void
|
|
|
|
netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
|
|
|
|
OVS_REQUIRES(dev->mutex)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < dev->real_n_txq; i++) {
|
|
|
|
dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
/*
|
|
|
|
* Remove a virtio-net device from the specific vhost port. Use dev->remove
|
|
|
|
* flag to stop any more packets from being sent or received to/from a VM and
|
|
|
|
* ensure all currently queued packets have been sent/received before removing
|
|
|
|
* the device.
|
|
|
|
*/
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
destroy_device(volatile struct virtio_net *virtio_dev)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev;
|
2016-02-05 17:07:16 +00:00
|
|
|
bool exists = false;
|
2015-03-05 13:42:04 -08:00
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
2016-03-16 14:44:18 -07:00
|
|
|
LIST_FOR_EACH (dev, list_node, &dpdk_list) {
|
|
|
|
if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
|
|
|
|
ovsrcu_set(&dev->virtio_dev, NULL);
|
|
|
|
netdev_dpdk_txq_map_clear(dev);
|
2016-02-05 17:07:16 +00:00
|
|
|
exists = true;
|
2016-06-02 12:42:39 +00:00
|
|
|
netdev_change_seq_changed(&dev->up);
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-02-05 17:07:16 +00:00
|
|
|
break;
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
}
|
2016-02-05 17:07:16 +00:00
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
|
2016-02-05 17:07:16 +00:00
|
|
|
if (exists == true) {
|
|
|
|
/*
|
|
|
|
* Wait for other threads to quiesce after setting the 'virtio_dev'
|
|
|
|
* to NULL, before returning.
|
|
|
|
*/
|
|
|
|
ovsrcu_synchronize();
|
|
|
|
/*
|
|
|
|
* As call to ovsrcu_synchronize() will end the quiescent state,
|
|
|
|
* put thread back into quiescent state before returning.
|
|
|
|
*/
|
|
|
|
ovsrcu_quiesce_start();
|
2016-03-16 14:44:18 -07:00
|
|
|
VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
|
|
|
|
virtio_dev->ifname, virtio_dev->device_fh);
|
2016-02-05 17:07:16 +00:00
|
|
|
} else {
|
2016-03-16 14:44:18 -07:00
|
|
|
VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
|
|
|
|
virtio_dev->device_fh);
|
2016-02-05 17:07:16 +00:00
|
|
|
}
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2016-02-24 17:14:43 +03:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
|
|
|
|
int enable)
|
2016-02-24 17:14:43 +03:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev;
|
2016-02-24 17:14:43 +03:00
|
|
|
bool exists = false;
|
|
|
|
int qid = queue_id / VIRTIO_QNUM;
|
|
|
|
|
|
|
|
if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
2016-03-16 14:44:18 -07:00
|
|
|
LIST_FOR_EACH (dev, list_node, &dpdk_list) {
|
|
|
|
if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
2016-02-24 17:14:43 +03:00
|
|
|
if (enable) {
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->tx_q[qid].map = qid;
|
2016-02-24 17:14:43 +03:00
|
|
|
} else {
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
|
2016-02-24 17:14:43 +03:00
|
|
|
}
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_remap_txqs(dev);
|
2016-02-24 17:14:43 +03:00
|
|
|
exists = true;
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-02-24 17:14:43 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
|
|
|
|
if (exists) {
|
|
|
|
VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
|
2016-03-16 14:44:18 -07:00
|
|
|
PRIu64" changed to \'%s\'", queue_id, qid,
|
|
|
|
virtio_dev->ifname, virtio_dev->device_fh,
|
|
|
|
(enable == 1) ? "enabled" : "disabled");
|
2016-02-24 17:14:43 +03:00
|
|
|
} else {
|
2016-03-16 14:44:18 -07:00
|
|
|
VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
|
|
|
|
virtio_dev->device_fh);
|
2016-02-24 17:14:43 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
struct virtio_net *
|
|
|
|
netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
|
|
|
|
{
|
|
|
|
return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
|
|
|
|
}
|
|
|
|
|
2016-05-24 17:36:51 +01:00
|
|
|
struct ingress_policer *
|
|
|
|
netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
|
|
|
|
{
|
|
|
|
return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
/*
|
|
|
|
* These callbacks allow virtio-net devices to be added to vhost ports when
|
|
|
|
* configuration has been fully complete.
|
|
|
|
*/
|
2015-05-18 08:49:24 -07:00
|
|
|
static const struct virtio_net_device_ops virtio_net_device_ops =
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
|
|
|
.new_device = new_device,
|
|
|
|
.destroy_device = destroy_device,
|
2016-02-24 17:14:43 +03:00
|
|
|
.vring_state_changed = vring_state_changed
|
2015-03-05 13:42:04 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void *
|
2015-06-04 06:51:40 -07:00
|
|
|
start_vhost_loop(void *dummy OVS_UNUSED)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
|
|
|
pthread_detach(pthread_self());
|
2015-03-27 11:06:57 -07:00
|
|
|
/* Put the cuse thread into quiescent state. */
|
|
|
|
ovsrcu_quiesce_start();
|
2015-03-05 13:42:04 -08:00
|
|
|
rte_vhost_driver_session_start();
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
dpdk_vhost_class_init(void)
|
2015-06-04 06:51:40 -07:00
|
|
|
{
|
|
|
|
rte_vhost_driver_callback_register(&virtio_net_device_ops);
|
2016-04-14 17:40:06 +01:00
|
|
|
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
|
|
|
|
| 1ULL << VIRTIO_NET_F_HOST_TSO6
|
|
|
|
| 1ULL << VIRTIO_NET_F_CSUM);
|
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
dpdk_vhost_cuse_class_init(void)
|
2015-03-05 13:42:04 -08:00
|
|
|
{
|
2015-06-04 06:51:40 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
dpdk_vhost_user_class_init(void)
|
|
|
|
{
|
2015-03-27 11:06:57 -07:00
|
|
|
return 0;
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2014-07-16 17:10:59 -07:00
|
|
|
static void
|
|
|
|
dpdk_common_init(void)
|
|
|
|
{
|
|
|
|
unixctl_command_register("netdev-dpdk/set-admin-state",
|
|
|
|
"[netdev] up|down", 1, 2,
|
|
|
|
netdev_dpdk_set_admin_state, NULL);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
/* Client Rings */
|
|
|
|
|
|
|
|
static int
|
|
|
|
dpdk_ring_create(const char dev_name[], unsigned int port_no,
|
|
|
|
unsigned int *eth_port_id)
|
|
|
|
{
|
|
|
|
struct dpdk_ring *ivshmem;
|
2016-01-23 22:20:13 -05:00
|
|
|
char ring_name[RTE_RING_NAMESIZE];
|
2014-07-11 13:37:11 +01:00
|
|
|
int err;
|
|
|
|
|
|
|
|
ivshmem = dpdk_rte_mzalloc(sizeof *ivshmem);
|
|
|
|
if (ivshmem == NULL) {
|
|
|
|
return ENOMEM;
|
|
|
|
}
|
|
|
|
|
2014-10-14 19:01:49 +02:00
|
|
|
/* XXX: Add support for multiquque ring. */
|
2016-01-23 22:20:13 -05:00
|
|
|
err = snprintf(ring_name, sizeof(ring_name), "%s_tx", dev_name);
|
2014-07-11 13:37:11 +01:00
|
|
|
if (err < 0) {
|
|
|
|
return -err;
|
|
|
|
}
|
|
|
|
|
2015-11-16 23:24:47 +01:00
|
|
|
/* Create single producer tx ring, netdev does explicit locking. */
|
2014-10-14 19:01:49 +02:00
|
|
|
ivshmem->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
|
2015-11-16 23:24:47 +01:00
|
|
|
RING_F_SP_ENQ);
|
2014-07-11 13:37:11 +01:00
|
|
|
if (ivshmem->cring_tx == NULL) {
|
|
|
|
rte_free(ivshmem);
|
|
|
|
return ENOMEM;
|
|
|
|
}
|
|
|
|
|
2016-01-23 22:20:13 -05:00
|
|
|
err = snprintf(ring_name, sizeof(ring_name), "%s_rx", dev_name);
|
2014-07-11 13:37:11 +01:00
|
|
|
if (err < 0) {
|
|
|
|
return -err;
|
|
|
|
}
|
|
|
|
|
2015-11-16 23:24:47 +01:00
|
|
|
/* Create single consumer rx ring, netdev does explicit locking. */
|
2014-10-14 19:01:49 +02:00
|
|
|
ivshmem->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
|
2015-11-16 23:24:47 +01:00
|
|
|
RING_F_SC_DEQ);
|
2014-07-11 13:37:11 +01:00
|
|
|
if (ivshmem->cring_rx == NULL) {
|
|
|
|
rte_free(ivshmem);
|
|
|
|
return ENOMEM;
|
|
|
|
}
|
|
|
|
|
2014-08-12 10:43:35 -07:00
|
|
|
err = rte_eth_from_rings(dev_name, &ivshmem->cring_rx, 1,
|
|
|
|
&ivshmem->cring_tx, 1, SOCKET0);
|
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
if (err < 0) {
|
|
|
|
rte_free(ivshmem);
|
|
|
|
return ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
ivshmem->user_port_id = port_no;
|
|
|
|
ivshmem->eth_port_id = rte_eth_dev_count() - 1;
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_push_back(&dpdk_ring_list, &ivshmem->list_node);
|
2014-07-11 13:37:11 +01:00
|
|
|
|
|
|
|
*eth_port_id = ivshmem->eth_port_id;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
dpdk_ring_open(const char dev_name[], unsigned int *eth_port_id) OVS_REQUIRES(dpdk_mutex)
|
|
|
|
{
|
|
|
|
struct dpdk_ring *ivshmem;
|
|
|
|
unsigned int port_no;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* Names always start with "dpdkr" */
|
|
|
|
err = dpdk_dev_parse_name(dev_name, "dpdkr", &port_no);
|
|
|
|
if (err) {
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* look through our list to find the device */
|
|
|
|
LIST_FOR_EACH (ivshmem, list_node, &dpdk_ring_list) {
|
|
|
|
if (ivshmem->user_port_id == port_no) {
|
2015-03-05 13:42:04 -08:00
|
|
|
VLOG_INFO("Found dpdk ring device %s:", dev_name);
|
2014-07-11 13:37:11 +01:00
|
|
|
*eth_port_id = ivshmem->eth_port_id; /* really all that is needed */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Need to create the device rings */
|
|
|
|
return dpdk_ring_create(dev_name, port_no, eth_port_id);
|
|
|
|
}
|
|
|
|
|
2014-10-14 19:01:49 +02:00
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_ring_send(struct netdev *netdev, int qid,
|
2015-02-25 12:01:53 -08:00
|
|
|
struct dp_packet **pkts, int cnt, bool may_steal)
|
2014-10-14 19:01:49 +02:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2015-04-13 06:36:56 -07:00
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
/* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that the
|
|
|
|
* rss hash field is clear. This is because the same mbuf may be modified by
|
|
|
|
* the consumer of the ring and return into the datapath without recalculating
|
|
|
|
* the RSS hash. */
|
|
|
|
for (i = 0; i < cnt; i++) {
|
2015-06-16 19:16:24 +01:00
|
|
|
dp_packet_rss_invalidate(pkts[i]);
|
2015-04-13 06:36:56 -07:00
|
|
|
}
|
2014-10-14 19:01:49 +02:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
|
2014-10-14 19:01:49 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-07-11 13:37:11 +01:00
|
|
|
static int
|
|
|
|
netdev_dpdk_ring_construct(struct netdev *netdev)
|
|
|
|
{
|
|
|
|
unsigned int port_no = 0;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (rte_eal_init_ret) {
|
|
|
|
return rte_eal_init_ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
|
|
|
|
err = dpdk_ring_open(netdev->name, &port_no);
|
|
|
|
if (err) {
|
|
|
|
goto unlock_dpdk;
|
|
|
|
}
|
|
|
|
|
2015-03-05 13:42:04 -08:00
|
|
|
err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
|
2014-07-11 13:37:11 +01:00
|
|
|
|
|
|
|
unlock_dpdk:
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
/* QoS Functions */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize QoS configuration operations.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
qos_conf_init(struct qos_conf *conf, const struct dpdk_qos_ops *ops)
|
|
|
|
{
|
|
|
|
conf->ops = ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search existing QoS operations in qos_ops and compare each set of
|
|
|
|
* operations qos_name to name. Return a dpdk_qos_ops pointer to a match,
|
|
|
|
* else return NULL
|
|
|
|
*/
|
|
|
|
static const struct dpdk_qos_ops *
|
|
|
|
qos_lookup_name(const char *name)
|
|
|
|
{
|
|
|
|
const struct dpdk_qos_ops *const *opsp;
|
|
|
|
|
|
|
|
for (opsp = qos_confs; *opsp != NULL; opsp++) {
|
|
|
|
const struct dpdk_qos_ops *ops = *opsp;
|
|
|
|
if (!strcmp(name, ops->qos_name)) {
|
|
|
|
return ops;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Call qos_destruct to clean up items associated with the netdevs
|
|
|
|
* qos_conf. Set netdevs qos_conf to NULL.
|
|
|
|
*/
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
qos_delete_conf(struct netdev *netdev)
|
2016-03-02 20:35:54 +00:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_lock(&dev->qos_lock);
|
|
|
|
if (dev->qos_conf) {
|
|
|
|
if (dev->qos_conf->ops->qos_destruct) {
|
|
|
|
dev->qos_conf->ops->qos_destruct(netdev, dev->qos_conf);
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->qos_conf = NULL;
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_unlock(&dev->qos_lock);
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_get_qos_types(const struct netdev *netdev OVS_UNUSED,
|
|
|
|
struct sset *types)
|
|
|
|
{
|
|
|
|
const struct dpdk_qos_ops *const *opsp;
|
|
|
|
|
|
|
|
for (opsp = qos_confs; *opsp != NULL; opsp++) {
|
|
|
|
const struct dpdk_qos_ops *ops = *opsp;
|
|
|
|
if (ops->qos_construct && ops->qos_name[0] != '\0') {
|
|
|
|
sset_add(types, ops->qos_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_get_qos(const struct netdev *netdev,
|
2016-03-02 20:35:54 +00:00
|
|
|
const char **typep, struct smap *details)
|
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
int error = 0;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
if(dev->qos_conf) {
|
|
|
|
*typep = dev->qos_conf->ops->qos_name;
|
|
|
|
error = (dev->qos_conf->ops->qos_get
|
|
|
|
? dev->qos_conf->ops->qos_get(netdev, details): 0);
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
netdev_dpdk_set_qos(struct netdev *netdev,
|
2016-03-02 20:35:54 +00:00
|
|
|
const char *type, const struct smap *details)
|
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
const struct dpdk_qos_ops *new_ops = NULL;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
/* If type is empty or unsupported then the current QoS configuration
|
|
|
|
* for the dpdk-netdev can be destroyed */
|
|
|
|
new_ops = qos_lookup_name(type);
|
|
|
|
|
|
|
|
if (type[0] == '\0' || !new_ops || !new_ops->qos_construct) {
|
2016-03-16 14:44:18 -07:00
|
|
|
qos_delete_conf(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
return EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_lock(&dev->mutex);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
if (dev->qos_conf) {
|
|
|
|
if (new_ops == dev->qos_conf->ops) {
|
|
|
|
error = new_ops->qos_set ? new_ops->qos_set(netdev, details) : 0;
|
2016-03-02 20:35:54 +00:00
|
|
|
} else {
|
|
|
|
/* Delete existing QoS configuration. */
|
2016-03-16 14:44:18 -07:00
|
|
|
qos_delete_conf(netdev);
|
|
|
|
ovs_assert(dev->qos_conf == NULL);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
|
|
|
/* Install new QoS configuration. */
|
2016-03-16 14:44:18 -07:00
|
|
|
error = new_ops->qos_construct(netdev, details);
|
|
|
|
ovs_assert((error == 0) == (dev->qos_conf != NULL));
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
|
|
|
} else {
|
2016-03-16 14:44:18 -07:00
|
|
|
error = new_ops->qos_construct(netdev, details);
|
|
|
|
ovs_assert((error == 0) == (dev->qos_conf != NULL));
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
2016-03-02 20:35:54 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* egress-policer details */
|
|
|
|
|
|
|
|
struct egress_policer {
|
|
|
|
struct qos_conf qos_conf;
|
|
|
|
struct rte_meter_srtcm_params app_srtcm_params;
|
|
|
|
struct rte_meter_srtcm egress_meter;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct egress_policer *
|
2016-03-16 14:44:18 -07:00
|
|
|
egress_policer_get__(const struct netdev *netdev)
|
2016-03-02 20:35:54 +00:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
return CONTAINER_OF(dev->qos_conf, struct egress_policer, qos_conf);
|
2016-03-02 20:35:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
egress_policer_qos_construct(struct netdev *netdev,
|
|
|
|
const struct smap *details)
|
2016-03-02 20:35:54 +00:00
|
|
|
{
|
2016-03-16 14:44:18 -07:00
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
struct egress_policer *policer;
|
|
|
|
const char *cir_s;
|
|
|
|
const char *cbs_s;
|
|
|
|
int err = 0;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_lock(&dev->qos_lock);
|
2016-03-02 20:35:54 +00:00
|
|
|
policer = xmalloc(sizeof *policer);
|
|
|
|
qos_conf_init(&policer->qos_conf, &egress_policer_ops);
|
2016-03-16 14:44:18 -07:00
|
|
|
dev->qos_conf = &policer->qos_conf;
|
2016-03-02 20:35:54 +00:00
|
|
|
cir_s = smap_get(details, "cir");
|
|
|
|
cbs_s = smap_get(details, "cbs");
|
|
|
|
policer->app_srtcm_params.cir = cir_s ? strtoull(cir_s, NULL, 10) : 0;
|
|
|
|
policer->app_srtcm_params.cbs = cbs_s ? strtoull(cbs_s, NULL, 10) : 0;
|
|
|
|
policer->app_srtcm_params.ebs = 0;
|
|
|
|
err = rte_meter_srtcm_config(&policer->egress_meter,
|
|
|
|
&policer->app_srtcm_params);
|
2016-03-16 14:44:18 -07:00
|
|
|
rte_spinlock_unlock(&dev->qos_lock);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2016-03-16 14:44:18 -07:00
|
|
|
egress_policer_qos_destruct(struct netdev *netdev OVS_UNUSED,
|
2016-03-02 20:35:54 +00:00
|
|
|
struct qos_conf *conf)
|
|
|
|
{
|
|
|
|
struct egress_policer *policer = CONTAINER_OF(conf, struct egress_policer,
|
|
|
|
qos_conf);
|
|
|
|
free(policer);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
egress_policer_qos_get(const struct netdev *netdev, struct smap *details)
|
|
|
|
{
|
|
|
|
struct egress_policer *policer = egress_policer_get__(netdev);
|
|
|
|
smap_add_format(details, "cir", "%llu",
|
|
|
|
1ULL * policer->app_srtcm_params.cir);
|
|
|
|
smap_add_format(details, "cbs", "%llu",
|
|
|
|
1ULL * policer->app_srtcm_params.cbs);
|
2016-02-26 15:58:24 -08:00
|
|
|
|
2016-03-02 20:35:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-03-16 14:44:18 -07:00
|
|
|
egress_policer_qos_set(struct netdev *netdev, const struct smap *details)
|
2016-03-02 20:35:54 +00:00
|
|
|
{
|
|
|
|
struct egress_policer *policer;
|
|
|
|
const char *cir_s;
|
|
|
|
const char *cbs_s;
|
|
|
|
int err = 0;
|
|
|
|
|
2016-03-16 14:44:18 -07:00
|
|
|
policer = egress_policer_get__(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
cir_s = smap_get(details, "cir");
|
|
|
|
cbs_s = smap_get(details, "cbs");
|
|
|
|
policer->app_srtcm_params.cir = cir_s ? strtoull(cir_s, NULL, 10) : 0;
|
|
|
|
policer->app_srtcm_params.cbs = cbs_s ? strtoull(cbs_s, NULL, 10) : 0;
|
|
|
|
policer->app_srtcm_params.ebs = 0;
|
|
|
|
err = rte_meter_srtcm_config(&policer->egress_meter,
|
|
|
|
&policer->app_srtcm_params);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-05-24 17:36:50 +01:00
|
|
|
egress_policer_run(struct netdev *netdev, struct rte_mbuf **pkts, int pkt_cnt)
|
2016-03-02 20:35:54 +00:00
|
|
|
{
|
|
|
|
int cnt = 0;
|
2016-03-16 14:44:18 -07:00
|
|
|
struct egress_policer *policer = egress_policer_get__(netdev);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
2016-05-24 17:36:50 +01:00
|
|
|
cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
|
2016-03-02 20:35:54 +00:00
|
|
|
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct dpdk_qos_ops egress_policer_ops = {
|
|
|
|
"egress-policer", /* qos_name */
|
|
|
|
egress_policer_qos_construct,
|
|
|
|
egress_policer_qos_destruct,
|
|
|
|
egress_policer_qos_get,
|
|
|
|
egress_policer_qos_set,
|
|
|
|
egress_policer_run
|
|
|
|
};
|
|
|
|
|
2016-02-26 15:58:24 -08:00
|
|
|
static int
|
|
|
|
netdev_dpdk_reconfigure(struct netdev *netdev)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
|
|
|
if (netdev->n_txq == dev->requested_n_txq
|
|
|
|
&& netdev->n_rxq == dev->requested_n_rxq) {
|
|
|
|
/* Reconfiguration is unnecessary */
|
|
|
|
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rte_eth_dev_stop(dev->port_id);
|
|
|
|
|
|
|
|
netdev->n_txq = dev->requested_n_txq;
|
|
|
|
netdev->n_rxq = dev->requested_n_rxq;
|
|
|
|
|
|
|
|
rte_free(dev->tx_q);
|
|
|
|
err = dpdk_eth_dev_init(dev);
|
|
|
|
netdev_dpdk_alloc_txq(dev, dev->real_n_txq);
|
|
|
|
|
|
|
|
dev->txq_needs_locking = dev->real_n_txq != netdev->n_txq;
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
|
|
|
netdev->n_txq = dev->requested_n_txq;
|
|
|
|
netdev->n_rxq = dev->requested_n_rxq;
|
|
|
|
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
netdev_dpdk_vhost_cuse_reconfigure(struct netdev *netdev)
|
|
|
|
{
|
|
|
|
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
|
|
|
|
|
|
|
|
ovs_mutex_lock(&dpdk_mutex);
|
|
|
|
ovs_mutex_lock(&dev->mutex);
|
|
|
|
|
|
|
|
netdev->n_txq = dev->requested_n_txq;
|
|
|
|
dev->real_n_txq = 1;
|
|
|
|
netdev->n_rxq = 1;
|
|
|
|
dev->txq_needs_locking = dev->real_n_txq != netdev->n_txq;
|
|
|
|
|
|
|
|
ovs_mutex_unlock(&dev->mutex);
|
|
|
|
ovs_mutex_unlock(&dpdk_mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SEND, \
|
|
|
|
GET_CARRIER, GET_STATS, GET_FEATURES, \
|
|
|
|
GET_STATUS, RECONFIGURE, RXQ_RECV) \
|
2014-07-11 13:37:11 +01:00
|
|
|
{ \
|
|
|
|
NAME, \
|
2016-03-14 18:18:11 +03:00
|
|
|
true, /* is_pmd */ \
|
2014-07-11 13:37:11 +01:00
|
|
|
INIT, /* init */ \
|
|
|
|
NULL, /* netdev_dpdk_run */ \
|
|
|
|
NULL, /* netdev_dpdk_wait */ \
|
|
|
|
\
|
|
|
|
netdev_dpdk_alloc, \
|
|
|
|
CONSTRUCT, \
|
2015-03-05 13:42:04 -08:00
|
|
|
DESTRUCT, \
|
2014-07-11 13:37:11 +01:00
|
|
|
netdev_dpdk_dealloc, \
|
|
|
|
netdev_dpdk_get_config, \
|
2016-01-21 17:15:18 +03:00
|
|
|
netdev_dpdk_set_config, \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* get_tunnel_config */ \
|
2015-03-05 13:42:04 -08:00
|
|
|
NULL, /* build header */ \
|
|
|
|
NULL, /* push header */ \
|
|
|
|
NULL, /* pop header */ \
|
2014-06-11 16:33:08 -07:00
|
|
|
netdev_dpdk_get_numa_id, /* get_numa_id */ \
|
2016-02-26 15:58:24 -08:00
|
|
|
netdev_dpdk_set_tx_multiq, \
|
2014-07-11 13:37:11 +01:00
|
|
|
\
|
2014-10-14 19:01:49 +02:00
|
|
|
SEND, /* send */ \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* send_wait */ \
|
|
|
|
\
|
|
|
|
netdev_dpdk_set_etheraddr, \
|
|
|
|
netdev_dpdk_get_etheraddr, \
|
|
|
|
netdev_dpdk_get_mtu, \
|
|
|
|
netdev_dpdk_set_mtu, \
|
|
|
|
netdev_dpdk_get_ifindex, \
|
2015-03-05 13:42:04 -08:00
|
|
|
GET_CARRIER, \
|
2014-07-11 13:37:11 +01:00
|
|
|
netdev_dpdk_get_carrier_resets, \
|
|
|
|
netdev_dpdk_set_miimon, \
|
2015-03-05 13:42:04 -08:00
|
|
|
GET_STATS, \
|
|
|
|
GET_FEATURES, \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* set_advertisements */ \
|
|
|
|
\
|
2016-05-24 17:36:51 +01:00
|
|
|
netdev_dpdk_set_policing, \
|
2016-03-02 20:35:54 +00:00
|
|
|
netdev_dpdk_get_qos_types, \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* get_qos_capabilities */ \
|
2016-03-02 20:35:54 +00:00
|
|
|
netdev_dpdk_get_qos, \
|
|
|
|
netdev_dpdk_set_qos, \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* get_queue */ \
|
|
|
|
NULL, /* set_queue */ \
|
|
|
|
NULL, /* delete_queue */ \
|
|
|
|
NULL, /* get_queue_stats */ \
|
|
|
|
NULL, /* queue_dump_start */ \
|
|
|
|
NULL, /* queue_dump_next */ \
|
|
|
|
NULL, /* queue_dump_done */ \
|
|
|
|
NULL, /* dump_queue_stats */ \
|
|
|
|
\
|
|
|
|
NULL, /* set_in4 */ \
|
2016-03-24 09:30:57 -07:00
|
|
|
NULL, /* get_addr_list */ \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* add_router */ \
|
|
|
|
NULL, /* get_next_hop */ \
|
2015-03-05 13:42:04 -08:00
|
|
|
GET_STATUS, \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* arp_lookup */ \
|
|
|
|
\
|
|
|
|
netdev_dpdk_update_flags, \
|
2016-02-26 15:58:24 -08:00
|
|
|
RECONFIGURE, \
|
2014-07-11 13:37:11 +01:00
|
|
|
\
|
|
|
|
netdev_dpdk_rxq_alloc, \
|
|
|
|
netdev_dpdk_rxq_construct, \
|
|
|
|
netdev_dpdk_rxq_destruct, \
|
|
|
|
netdev_dpdk_rxq_dealloc, \
|
2015-03-05 13:42:04 -08:00
|
|
|
RXQ_RECV, \
|
2014-07-11 13:37:11 +01:00
|
|
|
NULL, /* rx_wait */ \
|
|
|
|
NULL, /* rxq_drain */ \
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
static int
|
|
|
|
process_vhost_flags(char *flag, char *default_val, int size,
|
2016-04-29 13:44:01 -04:00
|
|
|
const struct smap *ovs_other_config,
|
|
|
|
char **new_val)
|
2015-06-04 06:51:40 -07:00
|
|
|
{
|
2016-04-29 13:44:01 -04:00
|
|
|
const char *val;
|
2015-06-04 06:51:40 -07:00
|
|
|
int changed = 0;
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
val = smap_get(ovs_other_config, flag);
|
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
/* Depending on which version of vhost is in use, process the vhost-specific
|
2016-04-29 13:44:01 -04:00
|
|
|
* flag if it is provided, otherwise resort to default value.
|
2015-06-04 06:51:40 -07:00
|
|
|
*/
|
2016-04-29 13:44:01 -04:00
|
|
|
if (val && (strlen(val) <= size)) {
|
2015-06-04 06:51:40 -07:00
|
|
|
changed = 1;
|
2016-04-29 13:44:01 -04:00
|
|
|
*new_val = xstrdup(val);
|
2015-06-04 06:51:40 -07:00
|
|
|
VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
|
|
|
|
} else {
|
|
|
|
VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
|
|
|
|
*new_val = default_val;
|
|
|
|
}
|
|
|
|
|
|
|
|
return changed;
|
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
static char **
|
|
|
|
grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
|
2014-03-24 19:23:08 -07:00
|
|
|
{
|
2016-04-29 13:44:01 -04:00
|
|
|
return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
static void
|
|
|
|
dpdk_option_extend(char ***argv, int argc, const char *option,
|
|
|
|
const char *value)
|
|
|
|
{
|
|
|
|
char **newargv = grow_argv(argv, argc, 2);
|
|
|
|
*argv = newargv;
|
|
|
|
newargv[argc] = xstrdup(option);
|
|
|
|
newargv[argc+1] = xstrdup(value);
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-04-29 13:44:05 -04:00
|
|
|
static char **
|
|
|
|
move_argv(char ***argv, size_t cur_size, char **src_argv, size_t src_argc)
|
|
|
|
{
|
|
|
|
char **newargv = grow_argv(argv, cur_size, src_argc);
|
|
|
|
while (src_argc--) {
|
|
|
|
newargv[cur_size+src_argc] = src_argv[src_argc];
|
|
|
|
src_argv[src_argc] = NULL;
|
|
|
|
}
|
|
|
|
return newargv;
|
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:04 -04:00
|
|
|
static int
|
|
|
|
extra_dpdk_args(const char *ovs_extra_config, char ***argv, int argc)
|
|
|
|
{
|
|
|
|
int ret = argc;
|
|
|
|
char *release_tok = xstrdup(ovs_extra_config);
|
|
|
|
char *tok = release_tok, *endptr = NULL;
|
|
|
|
|
|
|
|
for (tok = strtok_r(release_tok, " ", &endptr); tok != NULL;
|
|
|
|
tok = strtok_r(NULL, " ", &endptr)) {
|
|
|
|
char **newarg = grow_argv(argv, ret, 1);
|
|
|
|
*argv = newarg;
|
|
|
|
newarg[ret++] = xstrdup(tok);
|
|
|
|
}
|
|
|
|
free(release_tok);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:05 -04:00
|
|
|
static bool
|
|
|
|
argv_contains(char **argv_haystack, const size_t argc_haystack,
|
|
|
|
const char *needle)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < argc_haystack; ++i) {
|
|
|
|
if (!strcmp(argv_haystack[i], needle))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
static int
|
|
|
|
construct_dpdk_options(const struct smap *ovs_other_config,
|
2016-04-29 13:44:05 -04:00
|
|
|
char ***argv, const int initial_size,
|
|
|
|
char **extra_args, const size_t extra_argc)
|
2016-04-29 13:44:01 -04:00
|
|
|
{
|
|
|
|
struct dpdk_options_map {
|
|
|
|
const char *ovs_configuration;
|
|
|
|
const char *dpdk_option;
|
|
|
|
bool default_enabled;
|
|
|
|
const char *default_value;
|
|
|
|
} opts[] = {
|
|
|
|
{"dpdk-lcore-mask", "-c", false, NULL},
|
|
|
|
{"dpdk-hugepage-dir", "--huge-dir", false, NULL},
|
|
|
|
};
|
|
|
|
|
|
|
|
int i, ret = initial_size;
|
|
|
|
|
|
|
|
/*First, construct from the flat-options (non-mutex)*/
|
|
|
|
for (i = 0; i < ARRAY_SIZE(opts); ++i) {
|
|
|
|
const char *lookup = smap_get(ovs_other_config,
|
|
|
|
opts[i].ovs_configuration);
|
|
|
|
if (!lookup && opts[i].default_enabled) {
|
|
|
|
lookup = opts[i].default_value;
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
if (lookup) {
|
2016-04-29 13:44:05 -04:00
|
|
|
if (!argv_contains(extra_args, extra_argc, opts[i].dpdk_option)) {
|
|
|
|
dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
|
|
|
|
ret += 2;
|
|
|
|
} else {
|
|
|
|
VLOG_WARN("Ignoring database defined option '%s' due to "
|
|
|
|
"dpdk_extras config", opts[i].dpdk_option);
|
|
|
|
}
|
2016-04-29 13:44:01 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define MAX_DPDK_EXCL_OPTS 10
|
|
|
|
|
|
|
|
static int
|
|
|
|
construct_dpdk_mutex_options(const struct smap *ovs_other_config,
|
2016-04-29 13:44:05 -04:00
|
|
|
char ***argv, const int initial_size,
|
|
|
|
char **extra_args, const size_t extra_argc)
|
2016-04-29 13:44:01 -04:00
|
|
|
{
|
|
|
|
struct dpdk_exclusive_options_map {
|
|
|
|
const char *category;
|
|
|
|
const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
|
|
|
|
const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
|
|
|
|
const char *default_value;
|
|
|
|
int default_option;
|
|
|
|
} excl_opts[] = {
|
|
|
|
{"memory type",
|
|
|
|
{"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
|
|
|
|
{"-m", "--socket-mem", NULL,},
|
|
|
|
"1024,0", 1
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
int i, ret = initial_size;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
|
|
|
|
int found_opts = 0, scan, found_pos = -1;
|
|
|
|
const char *found_value;
|
|
|
|
struct dpdk_exclusive_options_map *popt = &excl_opts[i];
|
|
|
|
|
|
|
|
for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
|
|
|
|
&& popt->ovs_dpdk_options[scan]; ++scan) {
|
|
|
|
const char *lookup = smap_get(ovs_other_config,
|
|
|
|
popt->ovs_dpdk_options[scan]);
|
|
|
|
if (lookup && strlen(lookup)) {
|
|
|
|
found_opts++;
|
|
|
|
found_pos = scan;
|
|
|
|
found_value = lookup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found_opts) {
|
|
|
|
if (popt->default_option) {
|
|
|
|
found_pos = popt->default_option;
|
|
|
|
found_value = popt->default_value;
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
2015-09-21 15:06:00 -07:00
|
|
|
}
|
2016-04-29 13:44:01 -04:00
|
|
|
|
|
|
|
if (found_opts > 1) {
|
|
|
|
VLOG_ERR("Multiple defined options for %s. Please check your"
|
|
|
|
" database settings and reconfigure if necessary.",
|
|
|
|
popt->category);
|
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:05 -04:00
|
|
|
if (!argv_contains(extra_args, extra_argc,
|
|
|
|
popt->eal_dpdk_options[found_pos])) {
|
|
|
|
dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
|
|
|
|
found_value);
|
|
|
|
ret += 2;
|
|
|
|
} else {
|
|
|
|
VLOG_WARN("Ignoring database defined option '%s' due to "
|
|
|
|
"dpdk_extras config", popt->eal_dpdk_options[found_pos]);
|
|
|
|
}
|
2016-04-29 13:44:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-04-29 13:44:03 -04:00
|
|
|
get_dpdk_args(const struct smap *ovs_other_config, char ***argv,
|
|
|
|
int argc)
|
2016-04-29 13:44:01 -04:00
|
|
|
{
|
2016-04-29 13:44:04 -04:00
|
|
|
const char *extra_configuration;
|
2016-04-29 13:44:05 -04:00
|
|
|
char **extra_args = NULL;
|
|
|
|
int i;
|
|
|
|
size_t extra_argc = 0;
|
2016-04-29 13:44:04 -04:00
|
|
|
|
|
|
|
extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
|
|
|
|
if (extra_configuration) {
|
2016-04-29 13:44:05 -04:00
|
|
|
extra_argc = extra_dpdk_args(extra_configuration, &extra_args, 0);
|
2016-04-29 13:44:04 -04:00
|
|
|
}
|
2016-04-29 13:44:05 -04:00
|
|
|
|
|
|
|
i = construct_dpdk_options(ovs_other_config, argv, argc, extra_args,
|
|
|
|
extra_argc);
|
|
|
|
i = construct_dpdk_mutex_options(ovs_other_config, argv, i, extra_args,
|
|
|
|
extra_argc);
|
|
|
|
|
|
|
|
if (extra_configuration) {
|
|
|
|
*argv = move_argv(argv, i, extra_args, extra_argc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return i + extra_argc;
|
2016-04-29 13:44:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static char **dpdk_argv;
|
|
|
|
static int dpdk_argc;
|
|
|
|
|
|
|
|
static void
|
|
|
|
deferred_argv_release(void)
|
|
|
|
{
|
|
|
|
int result;
|
|
|
|
for (result = 0; result < dpdk_argc; ++result) {
|
|
|
|
free(dpdk_argv[result]);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(dpdk_argv);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
dpdk_init__(const struct smap *ovs_other_config)
|
|
|
|
{
|
|
|
|
char **argv = NULL;
|
|
|
|
int result;
|
2016-04-29 13:44:03 -04:00
|
|
|
int argc, argc_tmp;
|
|
|
|
bool auto_determine = true;
|
2016-05-19 13:51:32 +01:00
|
|
|
int err = 0;
|
2016-04-29 13:44:01 -04:00
|
|
|
cpu_set_t cpuset;
|
2016-04-29 13:44:02 -04:00
|
|
|
#ifndef VHOST_CUSE
|
|
|
|
char *sock_dir_subcomponent;
|
|
|
|
#endif
|
2016-04-29 13:44:01 -04:00
|
|
|
|
|
|
|
if (!smap_get_bool(ovs_other_config, "dpdk-init", false)) {
|
|
|
|
VLOG_INFO("DPDK Disabled - to change this requires a restart.\n");
|
|
|
|
return;
|
2015-09-21 15:06:00 -07:00
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
VLOG_INFO("DPDK Enabled, initializing");
|
|
|
|
|
2015-06-04 06:51:40 -07:00
|
|
|
#ifdef VHOST_CUSE
|
2016-04-29 13:44:01 -04:00
|
|
|
if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"),
|
|
|
|
PATH_MAX, ovs_other_config, &cuse_dev_name)) {
|
2015-06-04 06:51:40 -07:00
|
|
|
#else
|
2016-05-06 11:20:34 +01:00
|
|
|
if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()),
|
2016-04-29 13:44:02 -04:00
|
|
|
NAME_MAX, ovs_other_config,
|
|
|
|
&sock_dir_subcomponent)) {
|
2015-06-04 06:51:40 -07:00
|
|
|
struct stat s;
|
2016-04-29 13:44:02 -04:00
|
|
|
if (!strstr(sock_dir_subcomponent, "..")) {
|
|
|
|
vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
|
|
|
|
sock_dir_subcomponent);
|
|
|
|
|
|
|
|
err = stat(vhost_sock_dir, &s);
|
|
|
|
if (err) {
|
|
|
|
VLOG_ERR("vhost-user sock directory '%s' does not exist.",
|
|
|
|
vhost_sock_dir);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
vhost_sock_dir = xstrdup(ovs_rundir());
|
|
|
|
VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
|
|
|
|
"characters '..' - using %s instead.",
|
|
|
|
ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
|
2015-06-04 06:51:40 -07:00
|
|
|
}
|
2016-04-29 13:44:02 -04:00
|
|
|
free(sock_dir_subcomponent);
|
|
|
|
} else {
|
2016-05-06 11:20:34 +01:00
|
|
|
vhost_sock_dir = sock_dir_subcomponent;
|
2015-06-04 06:51:40 -07:00
|
|
|
#endif
|
2015-03-05 13:42:04 -08:00
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
argv = grow_argv(&argv, 0, 1);
|
2016-04-29 13:44:03 -04:00
|
|
|
argc = 1;
|
2016-04-29 13:44:01 -04:00
|
|
|
argv[0] = xstrdup(ovs_get_program_name());
|
2016-04-29 13:44:03 -04:00
|
|
|
argc_tmp = get_dpdk_args(ovs_other_config, &argv, argc);
|
|
|
|
|
|
|
|
while (argc_tmp != argc) {
|
|
|
|
if (!strcmp("-c", argv[argc]) || !strcmp("-l", argv[argc])) {
|
|
|
|
auto_determine = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
argc++;
|
|
|
|
}
|
|
|
|
argc = argc_tmp;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* NOTE: This is an unsophisticated mechanism for determining the DPDK
|
|
|
|
* lcore for the DPDK Master.
|
|
|
|
*/
|
|
|
|
if (auto_determine) {
|
|
|
|
int i;
|
2016-05-19 13:51:32 +01:00
|
|
|
/* Get the main thread affinity */
|
|
|
|
CPU_ZERO(&cpuset);
|
|
|
|
err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
|
|
|
|
&cpuset);
|
|
|
|
if (!err) {
|
|
|
|
for (i = 0; i < CPU_SETSIZE; i++) {
|
|
|
|
if (CPU_ISSET(i, &cpuset)) {
|
|
|
|
argv = grow_argv(&argv, argc, 2);
|
|
|
|
argv[argc++] = xstrdup("-c");
|
|
|
|
argv[argc++] = xasprintf("0x%08llX", (1ULL<<i));
|
|
|
|
i = CPU_SETSIZE;
|
|
|
|
}
|
2016-04-29 13:44:03 -04:00
|
|
|
}
|
2016-05-19 13:51:32 +01:00
|
|
|
} else {
|
|
|
|
VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err);
|
|
|
|
/* User did not set dpdk-lcore-mask and unable to get current
|
|
|
|
* thread affintity - default to core 0x1 */
|
|
|
|
argv = grow_argv(&argv, argc, 2);
|
|
|
|
argv[argc++] = xstrdup("-c");
|
|
|
|
argv[argc++] = xasprintf("0x%X", 1);
|
2016-04-29 13:44:03 -04:00
|
|
|
}
|
|
|
|
}
|
2016-04-29 13:44:01 -04:00
|
|
|
|
|
|
|
argv = grow_argv(&argv, argc, 1);
|
|
|
|
argv[argc] = NULL;
|
|
|
|
|
|
|
|
optind = 1;
|
2015-03-05 13:42:04 -08:00
|
|
|
|
2016-04-29 13:44:04 -04:00
|
|
|
if (VLOG_IS_INFO_ENABLED()) {
|
|
|
|
struct ds eal_args;
|
|
|
|
int opt;
|
|
|
|
ds_init(&eal_args);
|
|
|
|
ds_put_cstr(&eal_args, "EAL ARGS:");
|
|
|
|
for (opt = 0; opt < argc; ++opt) {
|
|
|
|
ds_put_cstr(&eal_args, " ");
|
|
|
|
ds_put_cstr(&eal_args, argv[opt]);
|
|
|
|
}
|
|
|
|
VLOG_INFO("%s", ds_cstr_ro(&eal_args));
|
|
|
|
ds_destroy(&eal_args);
|
|
|
|
}
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
/* Make sure things are initialized ... */
|
|
|
|
result = rte_eal_init(argc, argv);
|
2014-06-24 08:54:56 -07:00
|
|
|
if (result < 0) {
|
2015-03-05 13:42:04 -08:00
|
|
|
ovs_abort(result, "Cannot init EAL");
|
2014-06-24 08:54:56 -07:00
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2016-04-29 13:44:00 -04:00
|
|
|
/* Set the main thread affinity back to pre rte_eal_init() value */
|
2016-05-19 13:51:32 +01:00
|
|
|
if (auto_determine && !err) {
|
2016-04-29 13:44:01 -04:00
|
|
|
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
|
|
|
|
&cpuset);
|
|
|
|
if (err) {
|
|
|
|
VLOG_ERR("Thread setaffinity error %d", err);
|
|
|
|
}
|
2016-04-29 13:44:00 -04:00
|
|
|
}
|
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
dpdk_argv = argv;
|
|
|
|
dpdk_argc = argc;
|
|
|
|
|
|
|
|
atexit(deferred_argv_release);
|
|
|
|
|
2014-08-12 10:43:35 -07:00
|
|
|
rte_memzone_dump(stdout);
|
2014-03-24 19:23:08 -07:00
|
|
|
rte_eal_init_ret = 0;
|
|
|
|
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
/* We are called from the main thread here */
|
2015-05-22 17:14:20 +01:00
|
|
|
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
|
2016-04-29 13:44:01 -04:00
|
|
|
ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL);
|
|
|
|
|
|
|
|
#ifdef VHOST_CUSE
|
|
|
|
/* Register CUSE device to handle IOCTLs.
|
|
|
|
* Unless otherwise specified, cuse_dev_name is set to vhost-net.
|
|
|
|
*/
|
|
|
|
err = rte_vhost_driver_register(cuse_dev_name);
|
|
|
|
|
|
|
|
if (err != 0) {
|
|
|
|
VLOG_ERR("CUSE device setup failure.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
dpdk_vhost_class_init();
|
|
|
|
|
|
|
|
/* Finally, register the dpdk classes */
|
|
|
|
netdev_dpdk_register();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dpdk_init(const struct smap *ovs_other_config)
|
|
|
|
{
|
|
|
|
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
|
|
|
|
|
|
|
|
if (ovs_other_config && ovsthread_once_start(&once)) {
|
|
|
|
dpdk_init__(ovs_other_config);
|
|
|
|
ovsthread_once_done(&once);
|
|
|
|
}
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
|
|
|
|
2015-05-18 08:49:24 -07:00
|
|
|
static const struct netdev_class dpdk_class =
|
2014-07-11 13:37:11 +01:00
|
|
|
NETDEV_DPDK_CLASS(
|
|
|
|
"dpdk",
|
2015-02-17 13:20:04 -08:00
|
|
|
NULL,
|
2014-09-08 14:52:54 -07:00
|
|
|
netdev_dpdk_construct,
|
2015-03-05 13:42:04 -08:00
|
|
|
netdev_dpdk_destruct,
|
|
|
|
netdev_dpdk_eth_send,
|
|
|
|
netdev_dpdk_get_carrier,
|
|
|
|
netdev_dpdk_get_stats,
|
|
|
|
netdev_dpdk_get_features,
|
|
|
|
netdev_dpdk_get_status,
|
2016-02-26 15:58:24 -08:00
|
|
|
netdev_dpdk_reconfigure,
|
2015-03-05 13:42:04 -08:00
|
|
|
netdev_dpdk_rxq_recv);
|
2014-07-11 13:37:11 +01:00
|
|
|
|
2015-05-18 08:49:24 -07:00
|
|
|
static const struct netdev_class dpdk_ring_class =
|
2014-07-11 13:37:11 +01:00
|
|
|
NETDEV_DPDK_CLASS(
|
|
|
|
"dpdkr",
|
2014-07-16 17:10:59 -07:00
|
|
|
NULL,
|
2014-09-08 14:52:54 -07:00
|
|
|
netdev_dpdk_ring_construct,
|
2015-03-05 13:42:04 -08:00
|
|
|
netdev_dpdk_destruct,
|
|
|
|
netdev_dpdk_ring_send,
|
|
|
|
netdev_dpdk_get_carrier,
|
|
|
|
netdev_dpdk_get_stats,
|
|
|
|
netdev_dpdk_get_features,
|
|
|
|
netdev_dpdk_get_status,
|
2016-02-26 15:58:24 -08:00
|
|
|
netdev_dpdk_reconfigure,
|
2015-03-05 13:42:04 -08:00
|
|
|
netdev_dpdk_rxq_recv);
|
|
|
|
|
2015-06-24 08:55:39 -07:00
|
|
|
static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class =
|
2015-03-05 13:42:04 -08:00
|
|
|
NETDEV_DPDK_CLASS(
|
2015-06-04 06:51:40 -07:00
|
|
|
"dpdkvhostcuse",
|
|
|
|
dpdk_vhost_cuse_class_init,
|
|
|
|
netdev_dpdk_vhost_cuse_construct,
|
|
|
|
netdev_dpdk_vhost_destruct,
|
|
|
|
netdev_dpdk_vhost_send,
|
|
|
|
netdev_dpdk_vhost_get_carrier,
|
|
|
|
netdev_dpdk_vhost_get_stats,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
2016-02-26 15:58:24 -08:00
|
|
|
netdev_dpdk_vhost_cuse_reconfigure,
|
2015-06-04 06:51:40 -07:00
|
|
|
netdev_dpdk_vhost_rxq_recv);
|
|
|
|
|
2015-06-24 08:55:39 -07:00
|
|
|
static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class =
|
2015-06-04 06:51:40 -07:00
|
|
|
NETDEV_DPDK_CLASS(
|
|
|
|
"dpdkvhostuser",
|
|
|
|
dpdk_vhost_user_class_init,
|
|
|
|
netdev_dpdk_vhost_user_construct,
|
2015-03-05 13:42:04 -08:00
|
|
|
netdev_dpdk_vhost_destruct,
|
|
|
|
netdev_dpdk_vhost_send,
|
|
|
|
netdev_dpdk_vhost_get_carrier,
|
|
|
|
netdev_dpdk_vhost_get_stats,
|
|
|
|
NULL,
|
2014-10-14 19:01:49 +02:00
|
|
|
NULL,
|
2016-02-26 15:58:24 -08:00
|
|
|
netdev_dpdk_vhost_user_reconfigure,
|
2015-03-05 13:42:04 -08:00
|
|
|
netdev_dpdk_vhost_rxq_recv);
|
2014-07-11 13:37:11 +01:00
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
void
|
|
|
|
netdev_dpdk_register(void)
|
|
|
|
{
|
2016-04-29 13:44:01 -04:00
|
|
|
dpdk_common_init();
|
|
|
|
netdev_register_provider(&dpdk_class);
|
|
|
|
netdev_register_provider(&dpdk_ring_class);
|
2015-06-04 06:51:40 -07:00
|
|
|
#ifdef VHOST_CUSE
|
2016-04-29 13:44:01 -04:00
|
|
|
netdev_register_provider(&dpdk_vhost_cuse_class);
|
2015-06-04 06:51:40 -07:00
|
|
|
#else
|
2016-04-29 13:44:01 -04:00
|
|
|
netdev_register_provider(&dpdk_vhost_user_class);
|
2015-06-04 06:51:40 -07:00
|
|
|
#endif
|
2014-03-24 19:23:08 -07:00
|
|
|
}
|
2014-03-20 22:07:44 -07:00
|
|
|
|
2016-06-06 17:05:49 -07:00
|
|
|
void
|
|
|
|
dpdk_set_lcore_id(unsigned cpu)
|
2014-03-20 22:07:44 -07:00
|
|
|
{
|
2015-02-03 17:08:13 -08:00
|
|
|
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
|
|
|
|
ovs_assert(cpu != NON_PMD_CORE_ID);
|
2014-09-05 14:14:20 -07:00
|
|
|
RTE_PER_LCORE(_lcore_id) = cpu;
|
2014-03-20 22:07:44 -07:00
|
|
|
}
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
|
|
|
|
static bool
|
2016-01-12 11:32:41 -08:00
|
|
|
dpdk_thread_is_pmd(void)
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
{
|
2015-02-03 17:08:13 -08:00
|
|
|
return rte_lcore_id() != NON_PMD_CORE_ID;
|
netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads
DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.
This commit resolves the issue with the following changes:
- Every non pmd thread has the same lcore_id (0, for management reasons), which
is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
threads: therefore this commit partially revert 143859ec63d45e. Now packets
are copied for upcall processing. We can remove the extra memcpy by
processing upcalls in the pmd thread itself.
With the introduction of the extra locking, the packet throughput will be lower
in the following cases:
- When using internal (tap) devices with DPDK devices on the same datapath.
Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
can be avoided by handling the upcalls directly in pmd threads (a change that
has already been proposed by Ryan Wilson)
Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
mempools
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
2014-07-17 14:29:36 -07:00
|
|
|
}
|