2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-01 14:55:18 +00:00

netdev-dpdk: fix mbuf sizing

There are numerous factors that must be considered when calculating
the size of an mbuf:
- the data portion of the mbuf must be sized in accordance With Rx
  buffer alignment (typically 1024B). So, for example, in order to
  successfully receive and capture a 1500B packet, mbufs with a
  data portion of size 2048B must be used.
- in OvS, the elements that comprise an mbuf are:
  * the dp packet, which includes a struct rte mbuf (704B)
  * RTE_PKTMBUF_HEADROOM (128B)
  * packet data (aligned to 1k, as previously described)
  * RTE_PKTMBUF_TAILROOM (typically 0)

Some PMDs require that the total mbuf size (i.e. the total sum of all
of the above-listed components' lengths) is cache-aligned. To satisfy
this requirement, it may be necessary to round up the total mbuf size
with respect to cacheline size. In doing so, it's possible that the
dp_packet's data portion is inadvertently increased in size, such that
it no longer adheres to Rx buffer alignment. Consequently, the
following property of the mbuf no longer holds true:

    mbuf.data_len == mbuf.buf_len - mbuf.data_off

This creates a problem in the case of multi-segment mbufs, where that
assumption is assumed to be true for all but the final segment in an
mbuf chain. Resolve this issue by adjusting the size of the mbuf's
private data portion, as opposed to the packet data portion when
aligning mbuf size to cachelines.

Co-authored-by: Tiago Lam <tiago.lam@intel.com>

Fixes: 4be4d22 ("netdev-dpdk: clean up mbuf initialization")
Fixes: 31b88c9 ("netdev-dpdk: round up mbuf_size to cache_line_size")
CC: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Tiago Lam <tiago.lam@intel.com>
Acked-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
Mark Kavanagh
2018-11-02 09:06:32 +00:00
committed by Ian Stokes
parent 31154f9523
commit dfaf00e8c3
2 changed files with 50 additions and 31 deletions

View File

@@ -107,8 +107,8 @@ Example 1
MTU = 1500 Bytes
Number of mbufs = 262144
Mbuf size = 3008 Bytes
Memory required = 262144 * 3008 = 788 MB
Mbuf size = 2752 Bytes
Memory required = 262144 * 2752 = 721 MB
Example 2
+++++++++
@@ -116,8 +116,8 @@ Example 2
MTU = 1800 Bytes
Number of mbufs = 262144
Mbuf size = 3008 Bytes
Memory required = 262144 * 3008 = 788 MB
Mbuf size = 2752 Bytes
Memory required = 262144 * 2752 = 721 MB
.. note::
@@ -130,8 +130,8 @@ Example 3
MTU = 6000 Bytes
Number of mbufs = 262144
Mbuf size = 8128 Bytes
Memory required = 262144 * 8128 = 2130 MB
Mbuf size = 8000 Bytes
Memory required = 262144 * 8000 = 2097 MB
Example 4
+++++++++
@@ -139,8 +139,8 @@ Example 4
MTU = 9000 Bytes
Number of mbufs = 262144
Mbuf size = 10176 Bytes
Memory required = 262144 * 10176 = 2667 MB
Mbuf size = 10048 Bytes
Memory required = 262144 * 10048 = 2634 MB
Per Port Memory Calculations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -194,8 +194,8 @@ Example 1: (1 rxq, 1 PMD, 1500 MTU)
MTU = 1500
Number of mbufs = (1 * 2048) + (2 * 2048) + (1 * 32) + (16384) = 22560
Mbuf size = 3008 Bytes
Memory required = 22560 * 3008 = 67 MB
Mbuf size = 2752 Bytes
Memory required = 22560 * 2752 = 62 MB
Example 2: (1 rxq, 2 PMD, 6000 MTU)
+++++++++++++++++++++++++++++++++++
@@ -203,8 +203,8 @@ Example 2: (1 rxq, 2 PMD, 6000 MTU)
MTU = 6000
Number of mbufs = (1 * 2048) + (3 * 2048) + (1 * 32) + (16384) = 24608
Mbuf size = 8128 Bytes
Memory required = 24608 * 8128 = 200 MB
Mbuf size = 8000 Bytes
Memory required = 24608 * 8000 = 196 MB
Example 3: (2 rxq, 2 PMD, 9000 MTU)
+++++++++++++++++++++++++++++++++++
@@ -212,5 +212,5 @@ Example 3: (2 rxq, 2 PMD, 9000 MTU)
MTU = 9000
Number of mbufs = (2 * 2048) + (3 * 2048) + (1 * 32) + (16384) = 26656
Mbuf size = 10176 Bytes
Memory required = 26656 * 10176 = 271 MB
Mbuf size = 10048 Bytes
Memory required = 26656 * 10048 = 267 MB

View File

@@ -88,10 +88,6 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN)
#define FRAME_LEN_TO_MTU(frame_len) ((frame_len) \
- ETHER_HDR_LEN - ETHER_CRC_LEN)
#define MBUF_SIZE(mtu) ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) \
+ sizeof(struct dp_packet) \
+ RTE_PKTMBUF_HEADROOM), \
RTE_CACHE_LINE_SIZE)
#define NETDEV_DPDK_MBUF_ALIGN 1024
#define NETDEV_DPDK_MAX_PKT_LEN 9728
@@ -637,7 +633,11 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
char mp_name[RTE_MEMPOOL_NAMESIZE];
const char *netdev_name = netdev_get_name(&dev->up);
int socket_id = dev->requested_socket_id;
uint32_t n_mbufs;
uint32_t n_mbufs = 0;
uint32_t mbuf_size = 0;
uint32_t aligned_mbuf_size = 0;
uint32_t mbuf_priv_data_len = 0;
uint32_t pkt_size = 0;
uint32_t hash = hash_string(netdev_name, 0);
struct dpdk_mp *dmp = NULL;
int ret;
@@ -650,6 +650,9 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
dmp->mtu = mtu;
dmp->refcount = 1;
/* Get the size of each mbuf, based on the MTU */
mbuf_size = MTU_TO_FRAME_LEN(mtu);
n_mbufs = dpdk_calculate_mbufs(dev, mtu, per_port_mp);
do {
@@ -661,8 +664,8 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
* so this is not an issue for tasks such as debugging.
*/
ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
"ovs%08x%02d%05d%07u",
hash, socket_id, mtu, n_mbufs);
"ovs%08x%02d%05d%07u",
hash, socket_id, mtu, n_mbufs);
if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
VLOG_DBG("snprintf returned %d. "
"Failed to generate a mempool name for \"%s\". "
@@ -671,17 +674,33 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
break;
}
VLOG_DBG("Port %s: Requesting a mempool of %u mbufs "
"on socket %d for %d Rx and %d Tx queues.",
netdev_name, n_mbufs, socket_id,
dev->requested_n_rxq, dev->requested_n_txq);
VLOG_DBG("Port %s: Requesting a mempool of %u mbufs of size %u "
"on socket %d for %d Rx and %d Tx queues, "
"cache line size of %u",
netdev_name, n_mbufs, mbuf_size, socket_id,
dev->requested_n_rxq, dev->requested_n_txq,
RTE_CACHE_LINE_SIZE);
dmp->mp = rte_pktmbuf_pool_create(mp_name, n_mbufs,
MP_CACHE_SZ,
sizeof (struct dp_packet)
- sizeof (struct rte_mbuf),
MBUF_SIZE(mtu)
- sizeof(struct dp_packet),
mbuf_priv_data_len = sizeof(struct dp_packet) -
sizeof(struct rte_mbuf);
/* The size of the entire dp_packet. */
pkt_size = sizeof(struct dp_packet) + mbuf_size;
/* mbuf size, rounded up to cacheline size. */
aligned_mbuf_size = ROUND_UP(pkt_size, RTE_CACHE_LINE_SIZE);
/* If there is a size discrepancy, add padding to mbuf_priv_data_len.
* This maintains mbuf size cache alignment, while also honoring RX
* buffer alignment in the data portion of the mbuf. If this adjustment
* is not made, there is a possiblity later on that for an element of
* the mempool, buf, buf->data_len < (buf->buf_len - buf->data_off).
* This is problematic in the case of multi-segment mbufs, particularly
* when an mbuf segment needs to be resized (when [push|popp]ing a VLAN
* header, for example.
*/
mbuf_priv_data_len += (aligned_mbuf_size - pkt_size);
dmp->mp = rte_pktmbuf_pool_create(mp_name, n_mbufs, MP_CACHE_SZ,
mbuf_priv_data_len,
mbuf_size,
socket_id);
if (dmp->mp) {