2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-02 07:15:17 +00:00

netdev-dpdk: clean up mbuf initialization

Current mbuf initialization relies on magic numbers and does not
accomodate mbufs of different sizes.

Resolve this issue by ensuring that mbufs are always aligned to a 1k
boundary (a typical DPDK NIC Rx buffer alignment).

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
This commit is contained in:
Mark Kavanagh
2016-02-19 11:25:11 +00:00
committed by Daniele Di Proietto
parent 7ae1f322d7
commit 4be4d22c33

View File

@@ -69,14 +69,14 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
* The minimum mbuf size is limited to avoid scatter behaviour and drop in * The minimum mbuf size is limited to avoid scatter behaviour and drop in
* performance for standard Ethernet MTU. * performance for standard Ethernet MTU.
*/ */
#define MTU_TO_MAX_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN) #define ETHER_HDR_MAX_LEN (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 * VLAN_HEADER_LEN))
#define MBUF_SIZE_MTU(mtu) (MTU_TO_MAX_LEN(mtu) \ #define MTU_TO_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN)
#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - ETHER_CRC_LEN)
#define MBUF_SIZE(mtu) ( MTU_TO_MAX_FRAME_LEN(mtu) \
+ sizeof(struct dp_packet) \ + sizeof(struct dp_packet) \
+ RTE_PKTMBUF_HEADROOM) + RTE_PKTMBUF_HEADROOM)
#define MBUF_SIZE_DRIVER (2048 \ #define NETDEV_DPDK_MBUF_ALIGN 1024
+ sizeof (struct rte_mbuf) \
+ RTE_PKTMBUF_HEADROOM)
#define MBUF_SIZE(mtu) MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER)
/* Max and min number of packets in the mempool. OVS tries to allocate a /* Max and min number of packets in the mempool. OVS tries to allocate a
* mempool with MAX_NB_MBUF: if this fails (because the system doesn't have * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
@@ -256,6 +256,22 @@ is_dpdk_class(const struct netdev_class *class)
return class->construct == netdev_dpdk_construct; return class->construct == netdev_dpdk_construct;
} }
/* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
* aligned at 1k or less. If a declared mbuf size is not a multiple of this
* value, insufficient buffers are allocated to accomodate the packet in its
* entirety. Furthermore, certain drivers need to ensure that there is also
* sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ
* frames). If the RX buffer is too small, then the driver enables scatter RX
* behaviour, which reduces performance. To prevent this, use a buffer size that
* is closest to 'mtu', but which satisfies the aforementioned criteria.
*/
static uint32_t
dpdk_buf_size(int mtu)
{
return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM),
NETDEV_DPDK_MBUF_ALIGN);
}
/* XXX: use dpdk malloc for entire OVS. in fact huge page should be used /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
* for all other segments data, bss and text. */ * for all other segments data, bss and text. */
@@ -281,34 +297,6 @@ free_dpdk_buf(struct dp_packet *p)
rte_pktmbuf_free_seg(pkt); rte_pktmbuf_free_seg(pkt);
} }
static void
__rte_pktmbuf_init(struct rte_mempool *mp,
void *opaque_arg OVS_UNUSED,
void *_m,
unsigned i OVS_UNUSED)
{
struct rte_mbuf *m = _m;
uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet);
RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet));
memset(m, 0, mp->elt_size);
/* start of buffer is just after mbuf structure */
m->buf_addr = (char *)m + sizeof(struct dp_packet);
m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
sizeof(struct dp_packet);
m->buf_len = (uint16_t)buf_len;
/* keep some headroom between start of buffer and data */
m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
/* init some constant fields */
m->pool = mp;
m->nb_segs = 1;
m->port = 0xff;
}
static void static void
ovs_rte_pktmbuf_init(struct rte_mempool *mp, ovs_rte_pktmbuf_init(struct rte_mempool *mp,
void *opaque_arg OVS_UNUSED, void *opaque_arg OVS_UNUSED,
@@ -317,7 +305,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
{ {
struct rte_mbuf *m = _m; struct rte_mbuf *m = _m;
__rte_pktmbuf_init(mp, opaque_arg, _m, i); rte_pktmbuf_init(mp, opaque_arg, _m, i);
dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len); dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
} }
@@ -328,6 +316,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
struct dpdk_mp *dmp = NULL; struct dpdk_mp *dmp = NULL;
char mp_name[RTE_MEMPOOL_NAMESIZE]; char mp_name[RTE_MEMPOOL_NAMESIZE];
unsigned mp_size; unsigned mp_size;
struct rte_pktmbuf_pool_private mbp_priv;
LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
if (dmp->socket_id == socket_id && dmp->mtu == mtu) { if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
@@ -340,6 +329,8 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
dmp->socket_id = socket_id; dmp->socket_id = socket_id;
dmp->mtu = mtu; dmp->mtu = mtu;
dmp->refcount = 1; dmp->refcount = 1;
mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet);
mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct rte_mbuf);
mp_size = MAX_NB_MBUF; mp_size = MAX_NB_MBUF;
do { do {
@@ -351,7 +342,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
MP_CACHE_SZ, MP_CACHE_SZ,
sizeof(struct rte_pktmbuf_pool_private), sizeof(struct rte_pktmbuf_pool_private),
rte_pktmbuf_pool_init, NULL, rte_pktmbuf_pool_init, &mbp_priv,
ovs_rte_pktmbuf_init, NULL, ovs_rte_pktmbuf_init, NULL,
socket_id, 0); socket_id, 0);
} while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF); } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
@@ -591,6 +582,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
int sid; int sid;
int err = 0; int err = 0;
uint32_t buf_size;
ovs_mutex_init(&netdev->mutex); ovs_mutex_init(&netdev->mutex);
ovs_mutex_lock(&netdev->mutex); ovs_mutex_lock(&netdev->mutex);
@@ -611,9 +603,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
netdev->type = type; netdev->type = type;
netdev->flags = 0; netdev->flags = 0;
netdev->mtu = ETHER_MTU; netdev->mtu = ETHER_MTU;
netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); netdev->max_packet_len = MTU_TO_FRAME_LEN(netdev->mtu);
netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); buf_size = dpdk_buf_size(netdev->mtu);
netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, FRAME_LEN_TO_MTU(buf_size));
if (!netdev->dpdk_mp) { if (!netdev->dpdk_mp) {
err = ENOMEM; err = ENOMEM;
goto unlock; goto unlock;
@@ -1439,9 +1432,10 @@ static int
netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu) netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
{ {
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
int old_mtu, err; int old_mtu, err, dpdk_mtu;
struct dpdk_mp *old_mp; struct dpdk_mp *old_mp;
struct dpdk_mp *mp; struct dpdk_mp *mp;
uint32_t buf_size;
ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dpdk_mutex);
ovs_mutex_lock(&dev->mutex); ovs_mutex_lock(&dev->mutex);
@@ -1450,7 +1444,10 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
goto out; goto out;
} }
mp = dpdk_mp_get(dev->socket_id, dev->mtu); buf_size = dpdk_buf_size(mtu);
dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
if (!mp) { if (!mp) {
err = ENOMEM; err = ENOMEM;
goto out; goto out;
@@ -1462,14 +1459,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
old_mp = dev->dpdk_mp; old_mp = dev->dpdk_mp;
dev->dpdk_mp = mp; dev->dpdk_mp = mp;
dev->mtu = mtu; dev->mtu = mtu;
dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu); dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
err = dpdk_eth_dev_init(dev); err = dpdk_eth_dev_init(dev);
if (err) { if (err) {
dpdk_mp_put(mp); dpdk_mp_put(mp);
dev->mtu = old_mtu; dev->mtu = old_mtu;
dev->dpdk_mp = old_mp; dev->dpdk_mp = old_mp;
dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu); dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
dpdk_eth_dev_init(dev); dpdk_eth_dev_init(dev);
goto out; goto out;
} }
@@ -1758,7 +1755,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, struct smap *args)
smap_add_format(args, "numa_id", "%d", rte_eth_dev_socket_id(dev->port_id)); smap_add_format(args, "numa_id", "%d", rte_eth_dev_socket_id(dev->port_id));
smap_add_format(args, "driver_name", "%s", dev_info.driver_name); smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize); smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize);
smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen); smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len);
smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues); smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues);
smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues); smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues);
smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs); smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs);