[dpdk-dev] [RFC PATCH 4/6] mempool: add a function to flush default cache

From: "Artem V. Andreev" <***@oktetlabs.ru>

Primarily, it is intended as a way for the mempool driver to provide
additional information on how it lays up objects inside the mempool.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.h | 31 +++++++++++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops.c | 15 +++++++++++++++
2 files changed, 46 insertions(+)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 721227f..3c59d36 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -217,6 +217,11 @@ struct rte_mempool_memhdr {
void *opaque; /**< Argument passed to the free callback */
};

+/*
+ * Additional information about the mempool
+ */
+struct rte_mempool_info;
+
/**
* The RTE mempool structure.
*/
@@ -422,6 +427,12 @@ typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
unsigned int *flags);

/**
+ * Get some additional information about a mempool.
+ */
+typedef int (*rte_mempool_get_info_t)(const struct rte_mempool *mp,
+ struct rte_mempool_info *info);
+
+/**
* Notify new memory area to mempool.
*/
typedef int (*rte_mempool_ops_register_memory_area_t)
@@ -443,6 +454,10 @@ struct rte_mempool_ops {
* Notify new memory area to mempool
*/
rte_mempool_ops_register_memory_area_t register_memory_area;
+ /**
+ * Get mempool info
+ */
+ rte_mempool_get_info_t get_info;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -592,6 +607,22 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
char *vaddr, rte_iova_t iova, size_t len);

/**
+ * @internal wrapper for mempool_ops get_info callback.
+ *
+ * @param mp [in]
+ * Pointer to the memory pool.
+ * @param info [out]
+ * Pointer to the rte_mempool_info structure
+ * @return
+ * - 0: Success; The mempool driver supports retrieving supplementary
+ * mempool information
+ * - -ENOTSUP - doesn't support get_info ops (valid case).
+ */
+int
+rte_mempool_ops_get_info(const struct rte_mempool *mp,
+ struct rte_mempool_info *info);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 92b9f90..23de4db 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -88,6 +88,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_count = h->get_count;
ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
+ ops->get_info = h->get_info;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

@@ -152,6 +153,20 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
return ops->register_memory_area(mp, vaddr, iova, len);
}

+/* wrapper to get additional mempool info */
+int
+rte_mempool_ops_get_info(const struct rte_mempool *mp,
+ struct rte_mempool_info *info)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ RTE_FUNC_PTR_OR_ERR_RET(ops->get_info, -ENOTSUP);
+ return ops->get_info(mp, info);
+}
+
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,

--
2.7.4

Olivier MATZ

2017-12-14 13:36:41 UTC

Post by Andrew Rybchenko
Primarily, it is intended as a way for the mempool driver to provide
additional information on how it lays up objects inside the mempool.
---
lib/librte_mempool/rte_mempool.h | 31 +++++++++++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops.c | 15 +++++++++++++++
2 files changed, 46 insertions(+)
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 721227f..3c59d36 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -217,6 +217,11 @@ struct rte_mempool_memhdr {
void *opaque; /**< Argument passed to the free callback */
};
+/*
+ * Additional information about the mempool
+ */
+struct rte_mempool_info;
+

While there is no compilation issue, I find a bit strange to define this
API without defining the content of rte_mempool_info.

Andrew Rybchenko

2018-01-17 15:03:33 UTC

While there is no compilation issue, I find a bit strange to define this
API without defining the content of rte_mempool_info.

Agree. Mainly it was an attempt to fit required way to store objects in
memory
into existing approach. I agree that it is significantly better to solve
it in
the different way as you suggested. So, the patch will go away.

Andrew Rybchenko

2017-11-24 16:06:28 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

The manager provides a way to allocate physically and virtually
contiguous set of objects.

Note: due to the way objects are organized in the bucket manager,
the get_avail_count may return less objects than were enqueued.
That breaks the expectation of mempool and mempool_perf tests.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
MAINTAINERS | 9 +
config/common_base | 2 +
drivers/mempool/Makefile | 1 +
drivers/mempool/bucket/Makefile | 49 ++
drivers/mempool/bucket/rte_mempool_bucket.c | 521 +++++++++++++++++++++
.../mempool/bucket/rte_mempool_bucket_version.map | 4 +
mk/rte.app.mk | 1 +
7 files changed, 587 insertions(+)
create mode 100644 drivers/mempool/bucket/Makefile
create mode 100644 drivers/mempool/bucket/rte_mempool_bucket.c
create mode 100644 drivers/mempool/bucket/rte_mempool_bucket_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index f0baeb4..144fd1d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -293,6 +293,15 @@ F: test/test/test_event_eth_rx_adapter.c
F: doc/guides/prog_guide/event_ethernet_rx_adapter.rst

+Memory Pool Drivers
+-------------------
+
+Bucket memory pool
+M: Artem V. Andreev <***@oktetlabs.ru>
+M: Andrew Rybchenko <***@solarflare.com>
+F: drivers/mempool/bucket/
+
+
Bus Drivers
-----------

diff --git a/config/common_base b/config/common_base
index e74febe..8793699 100644
--- a/config/common_base
+++ b/config/common_base
@@ -608,6 +608,8 @@ CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG=n
#
# Compile Mempool drivers
#
+CONFIG_RTE_DRIVER_MEMPOOL_BUCKET=y
+CONFIG_RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB=32
CONFIG_RTE_DRIVER_MEMPOOL_RING=y
CONFIG_RTE_DRIVER_MEMPOOL_STACK=y

diff --git a/drivers/mempool/Makefile b/drivers/mempool/Makefile
index f656c56..9de0783 100644
--- a/drivers/mempool/Makefile
+++ b/drivers/mempool/Makefile
@@ -30,6 +30,7 @@

include $(RTE_SDK)/mk/rte.vars.mk

+DIRS-$(CONFIG_RTE_DRIVER_MEMPOOL_BUCKET) += bucket
DIRS-$(CONFIG_RTE_LIBRTE_DPAA_MEMPOOL) += dpaa
DIRS-$(CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL) += dpaa2
DIRS-$(CONFIG_RTE_DRIVER_MEMPOOL_RING) += ring
diff --git a/drivers/mempool/bucket/Makefile b/drivers/mempool/bucket/Makefile
new file mode 100644
index 0000000..06ddd31
--- /dev/null
+++ b/drivers/mempool/bucket/Makefile
@@ -0,0 +1,49 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) 2017 Solarflare Communications Inc.
+# All rights reserved.
+#
+# This software was jointly developed between OKTET Labs (under contract
+# for Solarflare) and Solarflare Communications, Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_mempool_bucket.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_eal -lrte_mempool -lrte_ring
+
+EXPORT_MAP := rte_mempool_bucket_version.map
+
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_DRIVER_MEMPOOL_BUCKET) += rte_mempool_bucket.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
new file mode 100644
index 0000000..4063d2c
--- /dev/null
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -0,0 +1,521 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2017 Solarflare Communications Inc.
+ * All rights reserved.
+ *
+ * This software was jointly developed between OKTET Labs (under contract
+ * for Solarflare) and Solarflare Communications, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+
+/*
+ * The general idea of the bucket mempool driver is as follows.
+ * We keep track of physically contiguous groups (buckets) of objects
+ * of a certain size. Every such a group has a counter that is
+ * incremented every time an object from that group is enqueued.
+ * Until the bucket is full, no objects from it are eligible for allocation.
+ * If a request is made to dequeue a multiply of bucket size, it is
+ * satisfied by returning the whole buckets, instead of separate objects.
+ */
+
+#define BUCKET_MEM_SIZE (RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB * 1024)
+
+struct bucket_header {
+ unsigned int lcore_id;
+ uint8_t fill_cnt;
+};
+
+struct bucket_stack {
+ unsigned int top;
+ unsigned int limit;
+ void *objects[];
+};
+
+struct bucket_data {
+ unsigned int header_size;
+ unsigned int chunk_size;
+ unsigned int bucket_size;
+ uintptr_t bucket_page_mask;
+ struct rte_ring *shared_bucket_ring;
+ struct bucket_stack *buckets[RTE_MAX_LCORE];
+ /*
+ * Multi-producer single-consumer ring to hold objects that are
+ * returned to the mempool at a different lcore than initially
+ * dequeued
+ */
+ struct rte_ring *adoption_buffer_rings[RTE_MAX_LCORE];
+ struct rte_ring *shared_orphan_ring;
+ struct rte_mempool *pool;
+
+};
+
+static struct bucket_stack *
+bucket_stack_create(const struct rte_mempool *mp, unsigned int n_elts)
+{
+ struct bucket_stack *stack;
+
+ stack = rte_zmalloc_socket("bucket_stack",
+ sizeof(struct bucket_stack) +
+ n_elts * sizeof(void *),
+ RTE_CACHE_LINE_SIZE,
+ mp->socket_id);
+ if (stack == NULL)
+ return NULL;
+ stack->limit = n_elts;
+ stack->top = 0;
+
+ return stack;
+}
+
+static void
+bucket_stack_push(struct bucket_stack *stack, void *obj)
+{
+ RTE_ASSERT(stack->top < stack->limit);
+ stack->objects[stack->top++] = obj;
+}
+
+static void *
+bucket_stack_pop_unsafe(struct bucket_stack *stack)
+{
+ RTE_ASSERT(stack->top > 0);
+ return stack->objects[--stack->top];
+}
+
+static void *
+bucket_stack_pop(struct bucket_stack *stack)
+{
+ if (stack->top == 0)
+ return NULL;
+ return bucket_stack_pop_unsafe(stack);
+}
+
+static int
+bucket_enqueue_single(struct bucket_data *data, void *obj)
+{
+ int rc = 0;
+ uintptr_t addr = (uintptr_t)obj;
+ struct bucket_header *hdr;
+ unsigned int lcore_id = rte_lcore_id();
+
+ addr &= data->bucket_page_mask;
+ hdr = (struct bucket_header *)addr;
+
+ if (likely(hdr->lcore_id == lcore_id)) {
+ if (hdr->fill_cnt < data->bucket_size - 1) {
+ hdr->fill_cnt++;
+ } else {
+ hdr->fill_cnt = 0;
+ /* Stack is big enough to put all buckets */
+ bucket_stack_push(data->buckets[lcore_id], hdr);
+ }
+ } else if (hdr->lcore_id != LCORE_ID_ANY) {
+ struct rte_ring *adopt_ring =
+ data->adoption_buffer_rings[hdr->lcore_id];
+
+ rc = rte_ring_enqueue(adopt_ring, obj);
+ /* Ring is big enough to put all objects */
+ RTE_ASSERT(rc == 0);
+ } else if (hdr->fill_cnt < data->bucket_size - 1) {
+ hdr->fill_cnt++;
+ } else {
+ hdr->fill_cnt = 0;
+ rc = rte_ring_enqueue(data->shared_bucket_ring, hdr);
+ /* Ring is big enough to put all buckets */
+ RTE_ASSERT(rc == 0);
+ }
+
+ return rc;
+}
+
+static int
+bucket_enqueue(struct rte_mempool *mp, void * const *obj_table,
+ unsigned int n)
+{
+ struct bucket_data *data = mp->pool_data;
+ unsigned int i;
+ int rc = 0;
+
+ for (i = 0; i < n; i++) {
+ rc = bucket_enqueue_single(data, obj_table[i]);
+ RTE_ASSERT(rc == 0);
+ }
+ return rc;
+}
+
+static void **
+bucket_fill_obj_table(const struct bucket_data *data, void **pstart,
+ void **obj_table, unsigned int n)
+{
+ unsigned int i;
+ uint8_t *objptr = *pstart;
+
+ for (objptr += data->header_size, i = 0; i < n; i++,
+ objptr += data->chunk_size)
+ *obj_table++ = objptr;
+ *pstart = objptr;
+ return obj_table;
+}
+
+static int
+bucket_dequeue_orphans(struct bucket_data *data, void **obj_table,
+ unsigned int n_orphans)
+{
+ unsigned int i;
+ int rc;
+ uint8_t *objptr;
+
+ rc = rte_ring_dequeue_bulk(data->shared_orphan_ring, obj_table,
+ n_orphans, NULL);
+ if (unlikely(rc != (int)n_orphans)) {
+ struct bucket_header *hdr;
+
+ objptr = bucket_stack_pop(data->buckets[rte_lcore_id()]);
+ hdr = (struct bucket_header *)objptr;
+
+ if (objptr == NULL) {
+ rc = rte_ring_dequeue(data->shared_bucket_ring,
+ (void **)&objptr);
+ if (rc != 0) {
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ hdr = (struct bucket_header *)objptr;
+ hdr->lcore_id = rte_lcore_id();
+ }
+ hdr->fill_cnt = 0;
+ bucket_fill_obj_table(data, (void **)&objptr, obj_table,
+ n_orphans);
+ for (i = n_orphans; i < data->bucket_size; i++,
+ objptr += data->chunk_size) {
+ rc = rte_ring_enqueue(data->shared_orphan_ring,
+ objptr);
+ if (rc != 0) {
+ RTE_ASSERT(0);
+ rte_errno = -rc;
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+bucket_dequeue_buckets(struct bucket_data *data, void **obj_table,
+ unsigned int n_buckets)
+{
+ struct bucket_stack *cur_stack = data->buckets[rte_lcore_id()];
+ unsigned int n_buckets_from_stack = RTE_MIN(n_buckets, cur_stack->top);
+ void **obj_table_base = obj_table;
+
+ n_buckets -= n_buckets_from_stack;
+ while (n_buckets_from_stack-- > 0) {
+ void *obj = bucket_stack_pop_unsafe(cur_stack);
+
+ obj_table = bucket_fill_obj_table(data, &obj, obj_table,
+ data->bucket_size);
+ }
+ while (n_buckets-- > 0) {
+ struct bucket_header *hdr;
+
+ if (unlikely(rte_ring_dequeue(data->shared_bucket_ring,
+ (void **)&hdr) != 0)) {
+ /* Return the already-dequeued buffers
+ * back to the mempool
+ */
+ bucket_enqueue(data->pool, obj_table_base,
+ obj_table - obj_table_base);
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ hdr->lcore_id = rte_lcore_id();
+ obj_table = bucket_fill_obj_table(data, (void **)&hdr,
+ obj_table, data->bucket_size);
+ }
+
+ return 0;
+}
+
+static int
+bucket_adopt_orphans(struct bucket_data *data)
+{
+ int rc = 0;
+ struct rte_ring *adopt_ring =
+ data->adoption_buffer_rings[rte_lcore_id()];
+
+ if (unlikely(!rte_ring_empty(adopt_ring))) {
+ void *orphan;
+
+ while (rte_ring_sc_dequeue(adopt_ring, &orphan) == 0) {
+ rc = bucket_enqueue_single(data, orphan);
+ RTE_ASSERT(rc == 0);
+ }
+ }
+ return rc;
+}
+
+static int
+bucket_dequeue(struct rte_mempool *mp, void **obj_table, unsigned int n)
+{
+ struct bucket_data *data = mp->pool_data;
+ unsigned int n_buckets = n / data->bucket_size;
+ unsigned int n_orphans = n - n_buckets * data->bucket_size;
+ int rc = 0;
+
+ bucket_adopt_orphans(data);
+
+ if (unlikely(n_orphans > 0)) {
+ rc = bucket_dequeue_orphans(data, obj_table +
+ (n_buckets * data->bucket_size),
+ n_orphans);
+ if (rc != 0)
+ return rc;
+ }
+
+ if (likely(n_buckets > 0)) {
+ rc = bucket_dequeue_buckets(data, obj_table, n_buckets);
+ if (unlikely(rc != 0) && n_orphans > 0) {
+ rte_ring_enqueue_bulk(data->shared_orphan_ring,
+ obj_table + (n_buckets *
+ data->bucket_size),
+ n_orphans, NULL);
+ }
+ }
+
+ return rc;
+}
+
+static unsigned int
+bucket_get_count(const struct rte_mempool *mp)
+{
+ const struct bucket_data *data = mp->pool_data;
+ const struct bucket_stack *local_bucket_stack =
+ data->buckets[rte_lcore_id()];
+
+ return data->bucket_size * local_bucket_stack->top +
+ data->bucket_size * rte_ring_count(data->shared_bucket_ring) +
+ rte_ring_count(data->shared_orphan_ring);
+}
+
+static int
+bucket_alloc(struct rte_mempool *mp)
+{
+ int rg_flags = 0;
+ int rc = 0;
+ char rg_name[RTE_RING_NAMESIZE];
+ struct bucket_data *data;
+ unsigned int i;
+
+ data = rte_zmalloc_socket("bucket_pool", sizeof(*data),
+ RTE_CACHE_LINE_SIZE, mp->socket_id);
+ if (data == NULL) {
+ rc = -ENOMEM;
+ goto no_mem_for_data;
+ }
+ data->pool = mp;
+ data->header_size = mp->header_size;
+ RTE_VERIFY(sizeof(struct bucket_header) +
+ sizeof(struct rte_mempool_objhdr) <= mp->header_size);
+ data->chunk_size = mp->header_size + mp->elt_size + mp->trailer_size;
+ data->bucket_size = BUCKET_MEM_SIZE / data->chunk_size;
+ data->bucket_page_mask = ~(rte_align64pow2(BUCKET_MEM_SIZE) - 1);
+
+ if (mp->flags & MEMPOOL_F_SP_PUT)
+ rg_flags |= RING_F_SP_ENQ;
+ if (mp->flags & MEMPOOL_F_SC_GET)
+ rg_flags |= RING_F_SC_DEQ;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (!rte_lcore_is_enabled(i))
+ continue;
+ data->buckets[i] =
+ bucket_stack_create(mp, mp->size / data->bucket_size);
+ if (data->buckets[i] == NULL) {
+ rc = -ENOMEM;
+ goto no_mem_for_stacks;
+ }
+ rc = snprintf(rg_name, sizeof(rg_name),
+ RTE_MEMPOOL_MZ_FORMAT ".a%u", mp->name, i);
+ if (rc < 0 || rc >= (int)sizeof(rg_name)) {
+ rc = -ENAMETOOLONG;
+ goto no_mem_for_stacks;
+ }
+ data->adoption_buffer_rings[i] =
+ rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
+ mp->socket_id,
+ rg_flags | RING_F_SC_DEQ);
+ if (data->adoption_buffer_rings[i] == NULL) {
+ rc = -rte_errno;
+ goto no_mem_for_stacks;
+ }
+ }
+
+ rc = snprintf(rg_name, sizeof(rg_name),
+ RTE_MEMPOOL_MZ_FORMAT ".0", mp->name);
+ if (rc < 0 || rc >= (int)sizeof(rg_name)) {
+ rc = -ENAMETOOLONG;
+ goto invalid_shared_orphan_ring;
+ }
+ data->shared_orphan_ring =
+ rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
+ mp->socket_id, rg_flags);
+ if (data->shared_orphan_ring == NULL) {
+ rc = -rte_errno;
+ goto cannot_create_shared_orphan_ring;
+ }
+
+ rc = snprintf(rg_name, sizeof(rg_name),
+ RTE_MEMPOOL_MZ_FORMAT ".1", mp->name);
+ if (rc < 0 || rc >= (int)sizeof(rg_name)) {
+ rc = -ENAMETOOLONG;
+ goto invalid_shared_bucket_ring;
+ }
+ data->shared_bucket_ring =
+ rte_ring_create(rg_name,
+ rte_align32pow2((mp->size /
+ data->bucket_size) + 1),
+ mp->socket_id, rg_flags);
+ if (data->shared_bucket_ring == NULL) {
+ rc = -rte_errno;
+ goto cannot_create_shared_bucket_ring;
+ }
+
+ mp->pool_data = data;
+
+ return 0;
+
+cannot_create_shared_bucket_ring:
+invalid_shared_bucket_ring:
+ rte_ring_free(data->shared_orphan_ring);
+cannot_create_shared_orphan_ring:
+invalid_shared_orphan_ring:
+no_mem_for_stacks:
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ rte_free(data->buckets[i]);
+ rte_ring_free(data->adoption_buffer_rings[i]);
+ }
+ rte_free(data);
+no_mem_for_data:
+ rte_errno = -rc;
+ return rc;
+}
+
+static void
+bucket_free(struct rte_mempool *mp)
+{
+ unsigned int i;
+ struct bucket_data *data = mp->pool_data;
+
+ if (data == NULL)
+ return;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ rte_free(data->buckets[i]);
+ rte_ring_free(data->adoption_buffer_rings[i]);
+ }
+
+ rte_ring_free(data->shared_orphan_ring);
+ rte_ring_free(data->shared_bucket_ring);
+
+ rte_free(data);
+}
+
+static int
+bucket_get_capabilities(__rte_unused const struct rte_mempool *mp,
+ unsigned int *flags)
+{
+ *flags |= MEMPOOL_F_CAPA_PHYS_CONTIG |
+ MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS;
+ return 0;
+}
+
+static int
+bucket_get_info(__rte_unused const struct rte_mempool *mp,
+ struct rte_mempool_info *info)
+{
+ /* mp->pool_data may be still uninitialized at this point */
+ unsigned int chunk_size = mp->header_size + mp->elt_size +
+ mp->trailer_size;
+
+ info->cluster_size = BUCKET_MEM_SIZE / chunk_size;
+ return 0;
+}
+
+static int
+bucket_register_memory_area(__rte_unused const struct rte_mempool *mp,
+ char *vaddr, __rte_unused phys_addr_t paddr,
+ size_t len)
+{
+ /* mp->pool_data may be still uninitialized at this point */
+ unsigned int chunk_size = mp->header_size + mp->elt_size +
+ mp->trailer_size;
+ unsigned int bucket_mem_size =
+ (BUCKET_MEM_SIZE / chunk_size) * chunk_size;
+ unsigned int bucket_page_sz = rte_align32pow2(bucket_mem_size);
+ uintptr_t align;
+ char *iter;
+
+ align = RTE_PTR_ALIGN_CEIL(vaddr, bucket_page_sz) - vaddr;
+
+ for (iter = vaddr + align; iter < vaddr + len; iter += bucket_page_sz) {
+ /* librte_mempool uses the header part for its own bookkeeping,
+ * but the librte_mempool's object header is adjacent to the
+ * data; it is small enough and the header is guaranteed to be
+ * at least CACHE_LINE_SIZE (i.e. 64) bytes, so we do have
+ * plenty of space at the start of the header. So the layout
+ * looks like this:
+ * [bucket_header] ... unused ... [rte_mempool_objhdr] [data...]
+ */
+ struct bucket_header *hdr = (struct bucket_header *)iter;
+
+ hdr->fill_cnt = 0;
+ hdr->lcore_id = LCORE_ID_ANY;
+ }
+
+ return 0;
+}
+
+static const struct rte_mempool_ops ops_bucket = {
+ .name = "bucket",
+ .alloc = bucket_alloc,
+ .free = bucket_free,
+ .enqueue = bucket_enqueue,
+ .dequeue = bucket_dequeue,
+ .get_count = bucket_get_count,
+ .get_capabilities = bucket_get_capabilities,
+ .register_memory_area = bucket_register_memory_area,
+ .get_info = bucket_get_info,
+};
+
+
+MEMPOOL_REGISTER_OPS(ops_bucket);
diff --git a/drivers/mempool/bucket/rte_mempool_bucket_version.map b/drivers/mempool/bucket/rte_mempool_bucket_version.map
new file mode 100644
index 0000000..179140f
--- /dev/null
+++ b/drivers/mempool/bucket/rte_mempool_bucket_version.map
@@ -0,0 +1,4 @@
+DPDK_18.02 {
+
+ local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 6a6a745..d99181f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -115,6 +115,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_VDEV_BUS) += -lrte_bus_vdev
ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
# plugins (link only if static libraries)

+_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_BUCKET) += -lrte_mempool_bucket
_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK) += -lrte_mempool_stack

_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += -lrte_pmd_af_packet

--
2.7.4

Olivier MATZ

2017-12-14 13:38:22 UTC

Post by Andrew Rybchenko
The manager provides a way to allocate physically and virtually
contiguous set of objects.
Note: due to the way objects are organized in the bucket manager,
the get_avail_count may return less objects than were enqueued.
That breaks the expectation of mempool and mempool_perf tests.

To me, this can be problematic. The driver should respect the
API, or it will trigger hard-to-debug issues in applications. Can't
this be fixed in some way or another?

[...]

Post by Andrew Rybchenko
--- a/config/common_base
+++ b/config/common_base
@@ -608,6 +608,8 @@ CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG=n
#
# Compile Mempool drivers
#
+CONFIG_RTE_DRIVER_MEMPOOL_BUCKET=y
+CONFIG_RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB=32
CONFIG_RTE_DRIVER_MEMPOOL_RING=y
CONFIG_RTE_DRIVER_MEMPOOL_STACK=y

Why 32KB?
Why not more, or less?
Can it be a runtime parameter?
I guess it won't work with too large objects.

[...]

Post by Andrew Rybchenko
+struct bucket_data {
+ unsigned int header_size;
+ unsigned int chunk_size;
+ unsigned int bucket_size;
+ uintptr_t bucket_page_mask;
+ struct rte_ring *shared_bucket_ring;
+ struct bucket_stack *buckets[RTE_MAX_LCORE];
+ /*
+ * Multi-producer single-consumer ring to hold objects that are
+ * returned to the mempool at a different lcore than initially
+ * dequeued
+ */
+ struct rte_ring *adoption_buffer_rings[RTE_MAX_LCORE];
+ struct rte_ring *shared_orphan_ring;
+ struct rte_mempool *pool;
+
+};

I'm seeing per-core structures. Will it work on non-dataplane cores?
For instance, if a control thread wants to allocate a mbuf?

If possible, these fields should be more documented (or just renamed).
For instance, I suggest chunk_size could be called obj_per_bucket, which
better described the content of the field.

[...]

Post by Andrew Rybchenko
+static int
+bucket_enqueue_single(struct bucket_data *data, void *obj)
+{
+ int rc = 0;
+ uintptr_t addr = (uintptr_t)obj;
+ struct bucket_header *hdr;
+ unsigned int lcore_id = rte_lcore_id();
+
+ addr &= data->bucket_page_mask;
+ hdr = (struct bucket_header *)addr;
+
+ if (likely(hdr->lcore_id == lcore_id)) {
+ if (hdr->fill_cnt < data->bucket_size - 1) {
+ hdr->fill_cnt++;
+ } else {
+ hdr->fill_cnt = 0;
+ /* Stack is big enough to put all buckets */
+ bucket_stack_push(data->buckets[lcore_id], hdr);
+ }
+ } else if (hdr->lcore_id != LCORE_ID_ANY) {
+ struct rte_ring *adopt_ring =
+ data->adoption_buffer_rings[hdr->lcore_id];
+
+ rc = rte_ring_enqueue(adopt_ring, obj);
+ /* Ring is big enough to put all objects */
+ RTE_ASSERT(rc == 0);
+ } else if (hdr->fill_cnt < data->bucket_size - 1) {
+ hdr->fill_cnt++;
+ } else {
+ hdr->fill_cnt = 0;
+ rc = rte_ring_enqueue(data->shared_bucket_ring, hdr);
+ /* Ring is big enough to put all buckets */
+ RTE_ASSERT(rc == 0);
+ }
+
+ return rc;
+}

[...]

Post by Andrew Rybchenko
+static int
+bucket_dequeue_buckets(struct bucket_data *data, void **obj_table,
+ unsigned int n_buckets)
+{
+ struct bucket_stack *cur_stack = data->buckets[rte_lcore_id()];
+ unsigned int n_buckets_from_stack = RTE_MIN(n_buckets, cur_stack->top);
+ void **obj_table_base = obj_table;
+
+ n_buckets -= n_buckets_from_stack;
+ while (n_buckets_from_stack-- > 0) {
+ void *obj = bucket_stack_pop_unsafe(cur_stack);
+
+ obj_table = bucket_fill_obj_table(data, &obj, obj_table,
+ data->bucket_size);
+ }
+ while (n_buckets-- > 0) {
+ struct bucket_header *hdr;
+
+ if (unlikely(rte_ring_dequeue(data->shared_bucket_ring,
+ (void **)&hdr) != 0)) {
+ /* Return the already-dequeued buffers
+ * back to the mempool
+ */
+ bucket_enqueue(data->pool, obj_table_base,
+ obj_table - obj_table_base);
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ hdr->lcore_id = rte_lcore_id();
+ obj_table = bucket_fill_obj_table(data, (void **)&hdr,
+ obj_table, data->bucket_size);
+ }
+
+ return 0;
+}

[...]

Post by Andrew Rybchenko
+static int
+bucket_dequeue(struct rte_mempool *mp, void **obj_table, unsigned int n)
+{
+ struct bucket_data *data = mp->pool_data;
+ unsigned int n_buckets = n / data->bucket_size;
+ unsigned int n_orphans = n - n_buckets * data->bucket_size;
+ int rc = 0;
+
+ bucket_adopt_orphans(data);
+
+ if (unlikely(n_orphans > 0)) {
+ rc = bucket_dequeue_orphans(data, obj_table +
+ (n_buckets * data->bucket_size),
+ n_orphans);
+ if (rc != 0)
+ return rc;
+ }
+
+ if (likely(n_buckets > 0)) {
+ rc = bucket_dequeue_buckets(data, obj_table, n_buckets);
+ if (unlikely(rc != 0) && n_orphans > 0) {
+ rte_ring_enqueue_bulk(data->shared_orphan_ring,
+ obj_table + (n_buckets *
+ data->bucket_size),
+ n_orphans, NULL);
+ }
+ }
+
+ return rc;
+}

If my understanding is correct, at initialization, all full buckets will
go to the data->shared_bucket_ring ring, with lcore_id == ANY (this is
done in register_mem).

(note: I feel 'data' is not an ideal name for bucket_data)

If the core 0 allocates all the mbufs, and then frees them all, they
will be stored in the per-core stack, with hdr->lcoreid == 0. Is it
right?

If yes, can core 1 allocate a mbuf after that?

Post by Andrew Rybchenko
+static unsigned int
+bucket_get_count(const struct rte_mempool *mp)
+{
+ const struct bucket_data *data = mp->pool_data;
+ const struct bucket_stack *local_bucket_stack =
+ data->buckets[rte_lcore_id()];
+
+ return data->bucket_size * local_bucket_stack->top +
+ data->bucket_size * rte_ring_count(data->shared_bucket_ring) +
+ rte_ring_count(data->shared_orphan_ring);
+}

It looks that get_count only rely on the current core stack usage
and ignore the other core stacks.

[...]

Post by Andrew Rybchenko
+static int
+bucket_register_memory_area(__rte_unused const struct rte_mempool *mp,
+ char *vaddr, __rte_unused phys_addr_t paddr,
+ size_t len)
+{
+ /* mp->pool_data may be still uninitialized at this point */
+ unsigned int chunk_size = mp->header_size + mp->elt_size +
+ mp->trailer_size;
+ unsigned int bucket_mem_size =
+ (BUCKET_MEM_SIZE / chunk_size) * chunk_size;
+ unsigned int bucket_page_sz = rte_align32pow2(bucket_mem_size);
+ uintptr_t align;
+ char *iter;
+
+ align = RTE_PTR_ALIGN_CEIL(vaddr, bucket_page_sz) - vaddr;
+
+ for (iter = vaddr + align; iter < vaddr + len; iter += bucket_page_sz) {
+ /* librte_mempool uses the header part for its own bookkeeping,
+ * but the librte_mempool's object header is adjacent to the
+ * data; it is small enough and the header is guaranteed to be
+ * at least CACHE_LINE_SIZE (i.e. 64) bytes, so we do have
+ * plenty of space at the start of the header. So the layout
+ * [bucket_header] ... unused ... [rte_mempool_objhdr] [data...]
+ */

This is not always true.
If a use creates a mempool with the NO_CACHE_ALIGN, the header will be
small, without padding.

Andrew Rybchenko

2018-01-17 15:06:20 UTC

To me, this can be problematic. The driver should respect the
API, or it will trigger hard-to-debug issues in applications. Can't
this be fixed in some way or another?

As I understand there is no requirements on how fast get_count
works. If so, it is doable and we'll fix it in RFCv2.

Post by Olivier MATZ
[...]

Why 32KB?
Why not more, or less?
Can it be a runtime parameter?
I guess it won't work with too large objects.

We have no good understanding of how driver-specific parameters
should be passed on mempool creation. We've simply kept it for
future since it looks like separate task.
If you have ideas, please, share - we'll be thankful.

Post by Olivier MATZ
[...]

I'm seeing per-core structures. Will it work on non-dataplane cores?
For instance, if a control thread wants to allocate a mbuf?

May be I don't understand something. Does the control thread has
valid rte_lcore_id()?

Post by Olivier MATZ
If possible, these fields should be more documented (or just renamed).
For instance, I suggest chunk_size could be called obj_per_bucket, which
better described the content of the field.

Thanks, we'll do.

Post by Olivier MATZ
[...]

[...]

Yes, agree. We'll rename it. It is really too generic.

Post by Olivier MATZ
If the core 0 allocates all the mbufs, and then frees them all, they
will be stored in the per-core stack, with hdr->lcoreid == 0. Is it
right?

Right.

Post by Olivier MATZ
If yes, can core 1 allocate a mbuf after that?

We'll add threshold for per-core stack. If it is exceeded, buckets will be
flushed into shared ring.

It looks that get_count only rely on the current core stack usage
and ignore the other core stacks.

We'll fix it to provide more accurate return value which is required
to pass self-test and make it usable for debugging.

Post by Olivier MATZ
[...]

This is not always true.
If a use creates a mempool with the NO_CACHE_ALIGN, the header will be
small, without padding.

Thanks. I think it can be handled when bucket mempool implements own
callback to populate objects.

Andrew Rybchenko

2017-11-24 16:06:30 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

If mempool manager supports object blocks (physically and virtual
contiguous set of objects), it is sufficient to get the first
object only and the function allows to avoid filling in of
information about each block member.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 4 +-
lib/librte_mempool/rte_mempool.h | 111 +++++++++++++++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops.c | 1 +
3 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 43455a3..6850d6e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -603,8 +603,10 @@ rte_mempool_populate_default(struct rte_mempool *mp)
ret = rte_mempool_ops_get_info(mp, &mp->info);
if ((ret < 0) && (ret != -ENOTSUP))
return ret;
- if (ret == -ENOTSUP)
+ if (ret == -ENOTSUP) {
mp->info.cluster_size = 0;
+ mp->info.contig_block_size = 0;
+ }

if ((mp->info.cluster_size == 0) &&
(mp_flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS))
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 3a52b93..4575eb2 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -223,6 +223,8 @@ struct rte_mempool_memhdr {
struct rte_mempool_info {
/** Number of objects in a cluster */
unsigned int cluster_size;
+ /** Number of objects in the contiguous block */
+ unsigned int contig_block_size;
};

/**
@@ -431,6 +433,12 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
void **obj_table, unsigned int n);

/**
+ * Dequeue a number of contiquous object blocks from the external pool.
+ */
+typedef int (*rte_mempool_dequeue_contig_blocks_t)(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n);
+
+/**
* Return the number of available objects in the external pool.
*/
typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);
@@ -473,6 +481,10 @@ struct rte_mempool_ops {
* Get mempool info
*/
rte_mempool_get_info_t get_info;
+ /**
+ * Dequeue a number of contiguous object blocks.
+ */
+ rte_mempool_dequeue_contig_blocks_t dequeue_contig_blocks;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -551,6 +563,30 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
}

/**
+ * @internal Wrapper for mempool_ops dequeue_contig_blocks callback.
+ *
+ * @param mp
+ * Pointer to the memory pool.
+ * @param first_obj_table
+ * Pointer to a table of void * pointers (first objects).
+ * @param n
+ * Number of blocks to get.
+ * @return
+ * - 0: Success; got n objects.
+ * - <0: Error; code of dequeue function.
+ */
+static inline int
+rte_mempool_ops_dequeue_contig_blocks(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+ RTE_ASSERT(ops->dequeue_contig_blocks != NULL);
+ return ops->dequeue_contig_blocks(mp, first_obj_table, n);
+}
+
+/**
* @internal wrapper for mempool_ops enqueue callback.
*
* @param mp
@@ -1456,6 +1492,81 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p)
}

/**
+ * @internal Get contiguous blocks of objects from the pool. Used internally.
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param first_obj_table
+ * A pointer to a pointer to the first object in each block.
+ * @param n
+ * A number of blocks to get.
+ * @return
+ * - >0: Success
+ * - <0: Error
+ */
+static __rte_always_inline int
+__mempool_generic_get_contig_blocks(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n)
+{
+ int ret;
+
+ ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n);
+ if (ret < 0)
+ __MEMPOOL_STAT_ADD(mp, get_fail,
+ n * mp->info.contig_block_size);
+ else
+ __MEMPOOL_STAT_ADD(mp, get_success,
+ n * mp->info.contig_block_size);
+
+ return ret;
+}
+
+/**
+ * Get a contiguous blocks of objects from the mempool.
+ *
+ * If cache is enabled, consider to flush it first, to reuse objects
+ * as soon as possible.
+ *
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param first_obj_table
+ * A pointer to a pointer to the first object in each block.
+ * @param n
+ * The number of blocks to get from mempool.
+ * @return
+ * - >0: the size of the block
+ * - -ENOBUFS: Not enough entries in the mempool; no object is retrieved.
+ * - -EOPNOTSUPP: The mempool driver does not support block dequeue
+ */
+static __rte_always_inline int
+rte_mempool_get_contig_blocks(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n)
+{
+ int ret;
+
+ ret = __mempool_generic_get_contig_blocks(mp, first_obj_table, n);
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (ret == 0) {
+ const size_t total_elt_sz =
+ mp->header_size + mp->elt_size + mp->trailer_size;
+ unsigned int i, j;
+
+ for (i = 0; i < n; ++i) {
+ void *first_obj = first_obj_table[i];
+
+ for (j = 0; j < mp->info.contig_block_size; ++j) {
+ void *obj;
+
+ obj = (void *)((uintptr_t)first_obj +
+ j * total_elt_sz);
+ rte_mempool_check_cookies(mp, &obj, 1, 1);
+ }
+ }
+ }
+#endif
+ return ret;
+}
+
+/**
* Return the number of entries in the mempool.
*
* When cache is enabled, this function has to browse the length of
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 23de4db..cc38761 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -89,6 +89,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
ops->get_info = h->get_info;
+ ops->dequeue_contig_blocks = h->dequeue_contig_blocks;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

--
2.7.4

Olivier MATZ

2017-12-14 13:38:58 UTC

Post by Andrew Rybchenko
If mempool manager supports object blocks (physically and virtual
contiguous set of objects), it is sufficient to get the first
object only and the function allows to avoid filling in of
information about each block member.

This can be a good idea. A use case and some performance numbers would
be welcome to demonstrate it :)

Andrew Rybchenko

2017-11-24 16:06:27 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Clustered allocation is required to simplify packaging objects into
buckets and search of the bucket control structure by an object.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 39 +++++++++++++++++++++++++++++++++++----
lib/librte_mempool/rte_mempool.h | 23 +++++++++++++++++++++--
test/test/test_mempool.c | 2 +-
3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index d50dba4..43455a3 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -239,7 +239,8 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
*/
size_t
rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- unsigned int flags)
+ unsigned int flags,
+ const struct rte_mempool_info *info)
{
size_t obj_per_page, pg_num, pg_sz;
unsigned int mask;
@@ -252,6 +253,17 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
if (total_elt_sz == 0)
return 0;

+ if (flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS) {
+ unsigned int align_shift =
+ rte_bsf32(
+ rte_align32pow2(total_elt_sz *
+ info->cluster_size));
+ if (pg_shift < align_shift) {
+ return ((elt_num / info->cluster_size) + 2)
+ << align_shift;
+ }
+ }
+
if (pg_shift == 0)
return total_elt_sz * elt_num;

@@ -362,6 +374,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
void *opaque)
{
unsigned total_elt_sz;
+ unsigned int page_align_size = 0;
unsigned i = 0;
size_t off;
struct rte_mempool_memhdr *memhdr;
@@ -407,7 +420,11 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
memhdr->free_cb = free_cb;
memhdr->opaque = opaque;

- if (mp->flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
+ if (mp->flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS) {
+ page_align_size = rte_align32pow2(total_elt_sz *
+ mp->info.cluster_size);
+ off = RTE_PTR_ALIGN_CEIL(vaddr, page_align_size) - vaddr;
+ } else if (mp->flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
/* align object start address to a multiple of total_elt_sz */
off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
@@ -424,6 +441,10 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
mempool_add_elem(mp, (char *)vaddr + off, iova + off);
off += mp->elt_size + mp->trailer_size;
i++;
+ if ((mp->flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS) &&
+ (i % mp->info.cluster_size) == 0)
+ off = RTE_PTR_ALIGN_CEIL((char *)vaddr + off,
+ page_align_size) - vaddr;
}

/* not enough room to store one object */
@@ -579,6 +600,16 @@ rte_mempool_populate_default(struct rte_mempool *mp)
if ((ret < 0) && (ret != -ENOTSUP))
return ret;

+ ret = rte_mempool_ops_get_info(mp, &mp->info);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+ if (ret == -ENOTSUP)
+ mp->info.cluster_size = 0;
+
+ if ((mp->info.cluster_size == 0) &&
+ (mp_flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS))
+ return -EINVAL;
+
/* update mempool capabilities */
mp->flags |= mp_flags;

@@ -595,7 +626,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
- mp->flags);
+ mp->flags, &mp->info);

ret = snprintf(mz_name, sizeof(mz_name),
RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
@@ -653,7 +684,7 @@ get_anon_size(const struct rte_mempool *mp)
pg_shift = rte_bsf32(pg_sz);
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift,
- mp->flags);
+ mp->flags, &mp->info);

return size;
}
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 3c59d36..9bcb8b7 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -220,7 +220,10 @@ struct rte_mempool_memhdr {
/*
* Additional information about the mempool
*/
-struct rte_mempool_info;
+struct rte_mempool_info {
+ /** Number of objects in a cluster */
+ unsigned int cluster_size;
+};

/**
* The RTE mempool structure.
@@ -265,6 +268,7 @@ struct rte_mempool {
struct rte_mempool_objhdr_list elt_list; /**< List of objects in pool */
uint32_t nb_mem_chunks; /**< Number of memory chunks */
struct rte_mempool_memhdr_list mem_list; /**< List of memory chunks */
+ struct rte_mempool_info info; /**< Additional mempool info */

#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
/** Per-lcore statistics. */
@@ -298,6 +302,17 @@ struct rte_mempool {
#define MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS 0x0080

/**
+ * This capability flag is advertised by a mempool handler. Used for a case
+ * where mempool driver wants clusters of objects start at a power-of-two
+ * boundary
+ *
+ * Note:
+ * - This flag should not be passed by application.
+ * Flag used for mempool driver only.
+ */
+#define MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS 0x0100
+
+/**
* @internal When debug is enabled, store some statistics.
*
* @param mp
@@ -1605,11 +1620,15 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* LOG2 of the physical pages size. If set to 0, ignore page boundaries.
* @param flags
* The mempool flags.
+ * @param info
+ * A pointer to the mempool's additional info (may be NULL unless
+ * MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS is set in @arg flags)
* @return
* Required memory size aligned at page boundary.
*/
size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
- uint32_t pg_shift, unsigned int flags);
+ uint32_t pg_shift, unsigned int flags,
+ const struct rte_mempool_info *info);

/**
* Get the size of memory required to store mempool elements.
diff --git a/test/test/test_mempool.c b/test/test/test_mempool.c
index 37ead50..f4bb9a9 100644
--- a/test/test/test_mempool.c
+++ b/test/test/test_mempool.c
@@ -485,7 +485,7 @@ test_mempool_xmem_misc(void)
elt_num = MAX_KEEP;
total_size = rte_mempool_calc_obj_size(MEMPOOL_ELT_SIZE, 0, NULL);
sz = rte_mempool_xmem_size(elt_num, total_size, MEMPOOL_PG_SHIFT_MAX,
- 0);
+ 0, NULL);

usz = rte_mempool_xmem_usage(NULL, elt_num, total_size, 0, 1,
MEMPOOL_PG_SHIFT_MAX, 0);

--
2.7.4

Olivier MATZ

2017-12-14 13:37:24 UTC

Post by Andrew Rybchenko
Clustered allocation is required to simplify packaging objects into
buckets and search of the bucket control structure by an object.
---
lib/librte_mempool/rte_mempool.c | 39 +++++++++++++++++++++++++++++++++++----
lib/librte_mempool/rte_mempool.h | 23 +++++++++++++++++++++--
test/test/test_mempool.c | 2 +-
3 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index d50dba4..43455a3 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -239,7 +239,8 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
*/
size_t
rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- unsigned int flags)
+ unsigned int flags,
+ const struct rte_mempool_info *info)
{
size_t obj_per_page, pg_num, pg_sz;
unsigned int mask;
@@ -252,6 +253,17 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
if (total_elt_sz == 0)
return 0;
+ if (flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS) {
+ unsigned int align_shift =
+ rte_bsf32(
+ rte_align32pow2(total_elt_sz *
+ info->cluster_size));
+ if (pg_shift < align_shift) {
+ return ((elt_num / info->cluster_size) + 2)
+ << align_shift;
+ }
+ }
+

+Cc Santosh for this

To be honnest, that was my fear when introducing
MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS and MEMPOOL_F_CAPA_PHYS_CONTIG to see more
and more specific flags in generic code.

I feel that the hidden meaning of these flags is more "if driver == foo",
which shows that something is wrong is the current design.

We have to think about another way to do. Let me try to propose
something (to be deepen).

The standard way to create a mempool is:

mp = create_empty(...)
set_ops_by_name(mp, "my-driver") // optional
populate_default(mp) // or populate_*()
obj_iter(mp, callback, arg) // optional, to init objects
// and optional local func to init mempool priv

First, we can consider deprecating some APIs like:
- rte_mempool_xmem_create()
- rte_mempool_xmem_size()
- rte_mempool_xmem_usage()
- rte_mempool_populate_iova_tab()

These functions were introduced for xen, which was recently
removed. They are complex to use, and are not used anywhere else in
DPDK.

Then, instead of having flags (quite hard to understand without knowing
the underlying driver), we can let the mempool drivers do the
populate_default() operation. For that we can add a populate_default
field in mempool ops. Same for populate_virt(), populate_anon(), and
populate_phys() which can return -ENOTSUP if this is not
implemented/implementable on a specific driver, or if flags
(NO_CACHE_ALIGN, NO_SPREAD, ...) are not supported. If the function
pointer is NULL, use the generic function.

Thanks to this, the generic code would remain understandable and won't
have to care about how memory should be allocated for a specific driver.

Thoughts?

[...]

Post by Andrew Rybchenko
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 3c59d36..9bcb8b7 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -220,7 +220,10 @@ struct rte_mempool_memhdr {
/*
* Additional information about the mempool
*/
-struct rte_mempool_info;
+struct rte_mempool_info {
+ /** Number of objects in a cluster */
+ unsigned int cluster_size;
+};

I think what I'm proposing would also prevent to introduce this
structure, which is generic but only applies to this driver.

Andrew Rybchenko

2018-01-17 15:03:52 UTC

+Cc Santosh for this
To be honnest, that was my fear when introducing
MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS and MEMPOOL_F_CAPA_PHYS_CONTIG to see more
and more specific flags in generic code.
I feel that the hidden meaning of these flags is more "if driver == foo",
which shows that something is wrong is the current design.
We have to think about another way to do. Let me try to propose
something (to be deepen).
mp = create_empty(...)
set_ops_by_name(mp, "my-driver") // optional
populate_default(mp) // or populate_*()
obj_iter(mp, callback, arg) // optional, to init objects
// and optional local func to init mempool priv
- rte_mempool_xmem_create()
- rte_mempool_xmem_size()
- rte_mempool_xmem_usage()
- rte_mempool_populate_iova_tab()
These functions were introduced for xen, which was recently
removed. They are complex to use, and are not used anywhere else in
DPDK.
Then, instead of having flags (quite hard to understand without knowing
the underlying driver), we can let the mempool drivers do the
populate_default() operation. For that we can add a populate_default
field in mempool ops. Same for populate_virt(), populate_anon(), and
populate_phys() which can return -ENOTSUP if this is not
implemented/implementable on a specific driver, or if flags
(NO_CACHE_ALIGN, NO_SPREAD, ...) are not supported. If the function
pointer is NULL, use the generic function.
Thanks to this, the generic code would remain understandable and won't
have to care about how memory should be allocated for a specific driver.
Thoughts?

Yes, I agree. This week we'll provide updated version of the RFC which
covers it including transition of the mempool/octeontx. I think it is
sufficient
to introduce two new ops:
1. To calculate memory space required to store specified number of objects
2. To populate objects in the provided memory chunk (the op will be called
from rte_mempool_populate_iova() which is a leaf function for all
rte_mempool_populate_*() calls.
It will allow to avoid duplication and keep memchunks housekeeping inside
mempool library.

Post by Olivier MATZ
[...]

I think what I'm proposing would also prevent to introduce this
structure, which is generic but only applies to this driver.

Yes

santosh

2018-01-17 15:55:43 UTC

Post by Andrew Rybchenko
Clustered allocation is required to simplify packaging objects into
buckets and search of the bucket control structure by an object.
---
lib/librte_mempool/rte_mempool.c | 39 +++++++++++++++++++++++++++++++++++----
lib/librte_mempool/rte_mempool.h | 23 +++++++++++++++++++++--
test/test/test_mempool.c         | 2 +-
3 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index d50dba4..43455a3 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -239,7 +239,8 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
   */
size_t
rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
-              unsigned int flags)
+              unsigned int flags,
+              const struct rte_mempool_info *info)
{
      size_t obj_per_page, pg_num, pg_sz;
      unsigned int mask;
@@ -252,6 +253,17 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
      if (total_elt_sz == 0)
          return 0;
+    if (flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS) {
+        unsigned int align_shift =
+            rte_bsf32(
+                rte_align32pow2(total_elt_sz *
+                        info->cluster_size));
+        if (pg_shift < align_shift) {
+            return ((elt_num / info->cluster_size) + 2)
+                << align_shift;
+        }
+    }
+

+Cc Santosh for this
To be honnest, that was my fear when introducing
MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS and MEMPOOL_F_CAPA_PHYS_CONTIG to see more
and more specific flags in generic code.
I feel that the hidden meaning of these flags is more "if driver == foo",
which shows that something is wrong is the current design.
We have to think about another way to do. Let me try to propose
something (to be deepen).
   mp = create_empty(...)
   set_ops_by_name(mp, "my-driver")    // optional
   populate_default(mp)                // or populate_*()
   obj_iter(mp, callback, arg)         // optional, to init objects
   // and optional local func to init mempool priv
- rte_mempool_xmem_create()
- rte_mempool_xmem_size()
- rte_mempool_xmem_usage()
- rte_mempool_populate_iova_tab()
These functions were introduced for xen, which was recently
removed. They are complex to use, and are not used anywhere else in
DPDK.
Then, instead of having flags (quite hard to understand without knowing
the underlying driver), we can let the mempool drivers do the
populate_default() operation. For that we can add a populate_default
field in mempool ops. Same for populate_virt(), populate_anon(), and
populate_phys() which can return -ENOTSUP if this is not
implemented/implementable on a specific driver, or if flags
(NO_CACHE_ALIGN, NO_SPREAD, ...) are not supported. If the function
pointer is NULL, use the generic function.
Thanks to this, the generic code would remain understandable and won't
have to care about how memory should be allocated for a specific driver.
Thoughts?

Yes, I agree. This week we'll provide updated version of the RFC which
covers it including transition of the mempool/octeontx. I think it is sufficient
1. To calculate memory space required to store specified number of objects
2. To populate objects in the provided memory chunk (the op will be called
from rte_mempool_populate_iova() which is a leaf function for all
rte_mempool_populate_*() calls.
It will allow to avoid duplication and keep memchunks housekeeping inside
mempool library.

There is also a downside of letting mempool driver to populate, which was raised in other thread.
http://dpdk.org/dev/patchwork/patch/31943/

Thanks.

Andrew Rybchenko

2018-01-17 16:37:34 UTC

Post by Andrew Rybchenko
Clustered allocation is required to simplify packaging objects into
buckets and search of the bucket control structure by an object.
---
lib/librte_mempool/rte_mempool.c | 39 +++++++++++++++++++++++++++++++++++----
lib/librte_mempool/rte_mempool.h | 23 +++++++++++++++++++++--
test/test/test_mempool.c         | 2 +-
3 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index d50dba4..43455a3 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -239,7 +239,8 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
   */
size_t
rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
-              unsigned int flags)
+              unsigned int flags,
+              const struct rte_mempool_info *info)
{
      size_t obj_per_page, pg_num, pg_sz;
      unsigned int mask;
@@ -252,6 +253,17 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
      if (total_elt_sz == 0)
          return 0;
+    if (flags & MEMPOOL_F_CAPA_ALLOCATE_IN_CLUSTERS) {
+        unsigned int align_shift =
+            rte_bsf32(
+                rte_align32pow2(total_elt_sz *
+                        info->cluster_size));
+        if (pg_shift < align_shift) {
+            return ((elt_num / info->cluster_size) + 2)
+                << align_shift;
+        }
+    }
+

+Cc Santosh for this
To be honnest, that was my fear when introducing
MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS and MEMPOOL_F_CAPA_PHYS_CONTIG to see more
and more specific flags in generic code.
I feel that the hidden meaning of these flags is more "if driver == foo",
which shows that something is wrong is the current design.
We have to think about another way to do. Let me try to propose
something (to be deepen).
   mp = create_empty(...)
   set_ops_by_name(mp, "my-driver")    // optional
   populate_default(mp)                // or populate_*()
   obj_iter(mp, callback, arg)         // optional, to init objects
   // and optional local func to init mempool priv
- rte_mempool_xmem_create()
- rte_mempool_xmem_size()
- rte_mempool_xmem_usage()
- rte_mempool_populate_iova_tab()
These functions were introduced for xen, which was recently
removed. They are complex to use, and are not used anywhere else in
DPDK.
Then, instead of having flags (quite hard to understand without knowing
the underlying driver), we can let the mempool drivers do the
populate_default() operation. For that we can add a populate_default
field in mempool ops. Same for populate_virt(), populate_anon(), and
populate_phys() which can return -ENOTSUP if this is not
implemented/implementable on a specific driver, or if flags
(NO_CACHE_ALIGN, NO_SPREAD, ...) are not supported. If the function
pointer is NULL, use the generic function.
Thanks to this, the generic code would remain understandable and won't
have to care about how memory should be allocated for a specific driver.
Thoughts?

There is also a downside of letting mempool driver to populate, which was raised in other thread.
http://dpdk.org/dev/patchwork/patch/31943/

I've seen the note about code duplication. Let's discuss it when v2 is sent.
I think our approach minimizes it and allows to have only specific code
in the
driver callback.

Andrew Rybchenko

2017-11-24 16:06:31 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/bucket/rte_mempool_bucket.c | 38 +++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index 4063d2c..ee5a6cf 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -315,6 +315,42 @@ bucket_dequeue(struct rte_mempool *mp, void **obj_table, unsigned int n)
return rc;
}

+static int
+bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
+ unsigned int n)
+{
+ struct bucket_data *data = mp->pool_data;
+ const uint32_t header_size = data->header_size;
+ struct bucket_stack *cur_stack = data->buckets[rte_lcore_id()];
+ unsigned int n_buckets_from_stack = RTE_MIN(n, cur_stack->top);
+ struct bucket_header *hdr;
+ void **first_objp = first_obj_table;
+
+ bucket_adopt_orphans(data);
+
+ n -= n_buckets_from_stack;
+ while (n_buckets_from_stack-- > 0) {
+ hdr = bucket_stack_pop_unsafe(cur_stack);
+ *first_objp++ = (uint8_t *)hdr + header_size;
+ }
+ while (n-- > 0) {
+ if (unlikely(rte_ring_dequeue(data->shared_bucket_ring,
+ (void **)&hdr) != 0)) {
+ /* Return the already dequeued buckets */
+ while (first_objp-- != first_obj_table) {
+ bucket_stack_push(cur_stack,
+ (uint8_t *)*first_objp - header_size);
+ }
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ hdr->lcore_id = rte_lcore_id();
+ *first_objp++ = (uint8_t *)hdr + header_size;
+ }
+
+ return 0;
+}
+
static unsigned int
bucket_get_count(const struct rte_mempool *mp)
{
@@ -468,6 +504,7 @@ bucket_get_info(__rte_unused const struct rte_mempool *mp,
mp->trailer_size;

info->cluster_size = BUCKET_MEM_SIZE / chunk_size;
+ info->contig_block_size = info->cluster_size;
return 0;
}

@@ -515,6 +552,7 @@ static const struct rte_mempool_ops ops_bucket = {
.get_capabilities = bucket_get_capabilities,
.register_memory_area = bucket_register_memory_area,
.get_info = bucket_get_info,
+ .dequeue_contig_blocks = bucket_dequeue_contig_blocks,
};

--
2.7.4

Olivier MATZ

2017-12-14 13:36:00 UTC

Hi Andrew,

Please find some comments about this patchset below.
I'll also send some comments as replies to the specific patch.

The patch series adds bucket mempool driver which allows to allocate
(both physically and virtually) contiguous blocks of objects and adds
mempool API to do it. It is still capable to provide separate objects,
but it is definitely more heavy-weight than ring/stack drivers.
The target usecase is dequeue in blocks and enqueue separate objects
back (which are collected in buckets to be dequeued). So, the memory
pool with bucket driver is created by an application and provided to
networking PMD receive queue. The choice of bucket driver is done using
rte_eth_dev_pool_ops_supported(). A PMD that relies upon contiguous
block allocation should report the bucket driver as the only supported
and preferred one.

So, you are planning to use this driver for a future/existing PMD?

Do you have numbers about the performance gain, in which conditions,
etc... ? And are there conditions where there is a performance loss ?

The number of objects in the contiguous block is a function of bucket
memory size (.config option) and total element size.

The size of the bucket memory is hardcoded to 32KB.
Why this value ?
Won't that be an issue if the user wants to use larger objects?

As I understand it breaks ABI so it requires 3 acks in accordance with
policy, deprecation notice and mempool shared library version bump.
If there is a way to avoid ABI breakage, please, let us know.

If my understanding is correct, the ABI breakage is caused by the
addition of the new block dequeue operation, right?

Thanks
Olivier

Andrew Rybchenko

2018-01-17 15:03:11 UTC

Hi Olivier,

first of all many thanks for the review. See my replies/comments below.
Also I'll reply to the the specific patch mails as well.

Post by Olivier MATZ
Hi Andrew,
Please find some comments about this patchset below.
I'll also send some comments as replies to the specific patch.

So, you are planning to use this driver for a future/existing PMD?

Yes, we're going to use it in the sfc PMD in the case of dedicated FW
variant which utilizes the bucketing.

Post by Olivier MATZ
Do you have numbers about the performance gain, in which conditions,
etc... ? And are there conditions where there is a performance loss ?

Our idea here is to use it together HW/FW which understand the bucketing.
It adds some load on CPU to track buckets, but block/bucket dequeue allows
to compensate it. We'll try to prepare performance figures when we have
solution close to final. Hopefully pretty soon.

The number of objects in the contiguous block is a function of bucket
memory size (.config option) and total element size.

The size of the bucket memory is hardcoded to 32KB.
Why this value ?

It is just an example. In fact we test mainly with 64K and 128K.

Post by Olivier MATZ
Won't that be an issue if the user wants to use larger objects?

Ideally it should be start-time configurable, but it requires a way
to specify driver-specific parameters passed to mempool on allocation.
Right now we decided to keep the task for the future since there is
no clear understanding on how it should look like.
If you have ideas, please, share, we would be thankful.

If my understanding is correct, the ABI breakage is caused by the
addition of the new block dequeue operation, right?

Yes and we'll have more ops to make population of objects customizable.

Thanks,
Andrew.

Olivier MATZ

2017-12-14 13:38:38 UTC

Post by Andrew Rybchenko
Mempool get/put API cares about cache itself, but sometimes it is
required to flush the cache explicitly.

I don't disagree, but do you have some use-case in mind?

Post by Andrew Rybchenko
Also dedicated API allows to decouple it from block get API (to be
added) and provides more fine-grained control.
---
lib/librte_mempool/rte_mempool.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9bcb8b7..3a52b93 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -1161,6 +1161,22 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
}
/**
+ * Ensure that a default per-lcore mempool cache is flushed, if it is present
+ *
+ * A pointer to the mempool structure.
+ */
+static __rte_always_inline void
+rte_mempool_ensure_cache_flushed(struct rte_mempool *mp)
+{
+ struct rte_mempool_cache *cache;
+ cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ if (cache != NULL && cache->len > 0)
+ rte_mempool_cache_flush(cache, mp);
+}
+

We already have rte_mempool_cache_flush().
Why not just extending it instead of adding a new function?

I mean:

static __rte_always_inline void
rte_mempool_cache_flush(struct rte_mempool_cache *cache,
struct rte_mempool *mp)
{
+ if (cache == NULL)
+ cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ if (cache == NULL || cache->len == 0)
+ return;
rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
cache->len = 0;
}

Andrew Rybchenko

2018-01-17 15:07:00 UTC

Post by Andrew Rybchenko
Mempool get/put API cares about cache itself, but sometimes it is
required to flush the cache explicitly.

I don't disagree, but do you have some use-case in mind?

Ideally mempool objects should be reused ASAP. Block/bucket dequeue
bypasses cache, since cache is not block-aware. So, cache should be
flushed before block dequeue. Initially we had cache flush inside block
dequeue wrapper, but decoupling it gives more freedom for optimizations.

We already have rte_mempool_cache_flush().
Why not just extending it instead of adding a new function?
static __rte_always_inline void
rte_mempool_cache_flush(struct rte_mempool_cache *cache,
struct rte_mempool *mp)
{
+ if (cache == NULL)
+ cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ if (cache == NULL || cache->len == 0)
+ return;
rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
cache->len = 0;
}

Thanks, good idea.

Andrew Rybchenko

2018-01-23 13:15:56 UTC

There is not specified dependency between rte_mempool_populate_default()
and rte_mempool_populate_iova(). So, the second should not rely on the
fact that the first adds capability flags to the mempool flags.

Fixes: 65cf769f5e6a ("mempool: detect physical contiguous objects")
Cc: ***@dpdk.org

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 6d17022..e783b9a 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -362,6 +362,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
void *opaque)
{
unsigned total_elt_sz;
+ unsigned int mp_cap_flags;
unsigned i = 0;
size_t off;
struct rte_mempool_memhdr *memhdr;
@@ -386,8 +387,14 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,

total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;

+ /* Get mempool capabilities */
+ mp_cap_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_cap_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
/* Detect pool area has sufficient space for elements */
- if (mp->flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
+ if (mp_cap_flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
if (len < total_elt_sz * mp->size) {
RTE_LOG(ERR, MEMPOOL,
"pool area %" PRIx64 " not enough\n",
@@ -407,7 +414,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
memhdr->free_cb = free_cb;
memhdr->opaque = opaque;

- if (mp->flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
+ if (mp_cap_flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
/* align object start address to a multiple of total_elt_sz */
off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)

--
2.7.4

Olivier Matz

2018-01-31 16:45:04 UTC

Post by Andrew Rybchenko
There is not specified dependency between rte_mempool_populate_default()
and rte_mempool_populate_iova(). So, the second should not rely on the
fact that the first adds capability flags to the mempool flags.
Fixes: 65cf769f5e6a ("mempool: detect physical contiguous objects")

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!

However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

santosh

2018-02-01 05:05:45 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

rte_mempool_xmem_size should return correct size if MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS flag
is set in 'mp->flags'. Thats why _ops_get_capabilities() called in _populate_default() but not
at _populate_iova().
I think, this 'alone' patch may break octeontx mempool.

Andrew Rybchenko

2018-02-01 06:54:35 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

The patch does not touch rte_mempool_populate_default().
_ops_get_capabilities() is still called there before
rte_mempool_xmem_size(). The theoretical problem which
the patch tries to fix is the case when
rte_mempool_populate_default() is not called at all. I.e. application
calls _ops_get_capabilities() to get flags, then, together with
mp->flags, calls rte_mempool_xmem_size() directly, allocates
calculated amount of memory and calls _populate_iova().

Since later patches of the series reconsider memory size
calculation etc, it is up to you if it makes sense to apply it
in 18.02 as a fix.

santosh

2018-02-01 09:09:49 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

In that case, Application does like below:

/* Get mempool capabilities */
mp_flags = 0;
ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
if ((ret < 0) && (ret != -ENOTSUP))
return ret;

/* update mempool capabilities */
mp->flags |= mp_flags;

/* calc xmem sz */
size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
mp->flags);

/* rsrv memory */
mz = rte_memzone_reserve_aligned(mz_name, size,...);

/* now populate iova */
ret = rte_mempool_populate_iova(mp,,..);

won't it work?

However I understand that clubbing `_get_ops_capa() + flag-updation` into _populate_iova()
perhaps better from user PoV.

Post by Andrew Rybchenko
Since later patches of the series reconsider memory size
calculation etc, it is up to you if it makes sense to apply it
in 18.02 as a fix.

Andrew Rybchenko

2018-02-01 09:18:36 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

/* Get mempool capabilities */
mp_flags = 0;
ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
if ((ret < 0) && (ret != -ENOTSUP))
return ret;
/* update mempool capabilities */
mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

Post by Andrew Rybchenko
/* calc xmem sz */
size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
mp->flags);
/* rsrv memory */
mz = rte_memzone_reserve_aligned(mz_name, size,...);
/* now populate iova */
ret = rte_mempool_populate_iova(mp,,..);
won't it work?
However I understand that clubbing `_get_ops_capa() + flag-updation` into _populate_iova()
perhaps better from user PoV.

Post by Andrew Rybchenko
Since later patches of the series reconsider memory size
calculation etc, it is up to you if it makes sense to apply it
in 18.02 as a fix.

santosh

2018-02-01 09:30:00 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

    /* Get mempool capabilities */
    mp_flags = 0;
    ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
    if ((ret < 0) && (ret != -ENOTSUP))
        return ret;
    /* update mempool capabilities */
    mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

That depends and again upto application requirement, if app further down
wants to refer mp->flags for _align/_contig then better update to mp->flags.

But that wasn't the point of discussion, I'm trying to understand that
w/o this patch, whats could be the application level problem?

Andrew Rybchenko

2018-02-01 10:00:12 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

    /* Get mempool capabilities */
    mp_flags = 0;
    ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
    if ((ret < 0) && (ret != -ENOTSUP))
        return ret;
    /* update mempool capabilities */
    mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

That depends and again upto application requirement, if app further down
wants to refer mp->flags for _align/_contig then better update to mp->flags.
But that wasn't the point of discussion, I'm trying to understand that
w/o this patch, whats could be the application level problem?

The problem that it is fragile. If application does not use
rte_mempool_populate_default() it has to care about addition
of mempool capability flags into mempool flags. If it is not done,
rte_mempool_populate_iova/virt/iova_tab() functions will work
incorrectly since F_CAPA_PHYS_CONTIG and
F_CAPA_BLK_ALIGNED_OBJECTS are missing.

The idea of the patch is to make it a bit more robust. I have no
idea how it can break something. If capability flags are already
there - no problem. If no, just make sure that we locally have them.

Olivier Matz

2018-02-01 10:14:23 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

    /* Get mempool capabilities */
    mp_flags = 0;
    ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
    if ((ret < 0) && (ret != -ENOTSUP))
        return ret;
    /* update mempool capabilities */
    mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

That depends and again upto application requirement, if app further down
wants to refer mp->flags for _align/_contig then better update to mp->flags.
But that wasn't the point of discussion, I'm trying to understand that
w/o this patch, whats could be the application level problem?

The problem that it is fragile. If application does not use
rte_mempool_populate_default() it has to care about addition
of mempool capability flags into mempool flags. If it is not done,
rte_mempool_populate_iova/virt/iova_tab() functions will work
incorrectly since F_CAPA_PHYS_CONTIG and
F_CAPA_BLK_ALIGNED_OBJECTS are missing.
The idea of the patch is to make it a bit more robust. I have no
idea how it can break something. If capability flags are already
there - no problem. If no, just make sure that we locally have them.

The example given by Santosh will work, but it is *not* the role of the
application to update the mempool flags. And nothing says that it is mandatory
to call rte_mempool_ops_get_capabilities() before the populate functions.

For instance, in testpmd it calls rte_mempool_populate_anon() when using
anonymous memory. The capabilities will never be updated in mp->flags.

santosh

2018-02-01 10:33:32 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

    /* Get mempool capabilities */
    mp_flags = 0;
    ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
    if ((ret < 0) && (ret != -ENOTSUP))
        return ret;
    /* update mempool capabilities */
    mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

That depends and again upto application requirement, if app further down
wants to refer mp->flags for _align/_contig then better update to mp->flags.
But that wasn't the point of discussion, I'm trying to understand that
w/o this patch, whats could be the application level problem?

The problem that it is fragile. If application does not use
rte_mempool_populate_default() it has to care about addition
of mempool capability flags into mempool flags. If it is not done,
rte_mempool_populate_iova/virt/iova_tab() functions will work
incorrectly since F_CAPA_PHYS_CONTIG and
F_CAPA_BLK_ALIGNED_OBJECTS are missing.
The idea of the patch is to make it a bit more robust. I have no
idea how it can break something. If capability flags are already
there - no problem. If no, just make sure that we locally have them.

The example given by Santosh will work, but it is *not* the role of the
application to update the mempool flags. And nothing says that it is mandatory
to call rte_mempool_ops_get_capabilities() before the populate functions.
For instance, in testpmd it calls rte_mempool_populate_anon() when using
anonymous memory. The capabilities will never be updated in mp->flags.

Valid case and I agree with your example and explanation.
With nits change:
mp->flags |= mp_capa_flags;

Acked-by: Santosh Shukla <***@caviumnetworks.com>

Andrew Rybchenko

2018-02-01 14:02:19 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

    /* Get mempool capabilities */
    mp_flags = 0;
    ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
    if ((ret < 0) && (ret != -ENOTSUP))
        return ret;
    /* update mempool capabilities */
    mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

That depends and again upto application requirement, if app further down
wants to refer mp->flags for _align/_contig then better update to mp->flags.
But that wasn't the point of discussion, I'm trying to understand that
w/o this patch, whats could be the application level problem?

The problem that it is fragile. If application does not use
rte_mempool_populate_default() it has to care about addition
of mempool capability flags into mempool flags. If it is not done,
rte_mempool_populate_iova/virt/iova_tab() functions will work
incorrectly since F_CAPA_PHYS_CONTIG and
F_CAPA_BLK_ALIGNED_OBJECTS are missing.
The idea of the patch is to make it a bit more robust. I have no
idea how it can break something. If capability flags are already
there - no problem. If no, just make sure that we locally have them.

The example given by Santosh will work, but it is *not* the role of the
application to update the mempool flags. And nothing says that it is mandatory
to call rte_mempool_ops_get_capabilities() before the populate functions.
For instance, in testpmd it calls rte_mempool_populate_anon() when using
anonymous memory. The capabilities will never be updated in mp->flags.

Valid case and I agree with your example and explanation.
mp->flags |= mp_capa_flags;

I'll submit the patch separately with this minor change. Thanks.

santosh

2018-02-01 10:17:29 UTC

Looks good to me. I agree it's strange that the mp->flags are
updated with capabilities only in rte_mempool_populate_default().
I see that this behavior is removed later in the patchset since the
get_capa() is removed!
However maybe this single patch could go in 18.02.
+Santosh +Jerin since it's mostly about Octeon.

     /* Get mempool capabilities */
     mp_flags = 0;
     ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
     if ((ret < 0) && (ret != -ENOTSUP))
         return ret;
     /* update mempool capabilities */
     mp->flags |= mp_flags;

Above line is not mandatory. "mp->flags | mp_flags" could be simply
passed to rte_mempool_xmem_size() below.

That depends and again upto application requirement, if app further down
wants to refer mp->flags for _align/_contig then better update to mp->flags.
But that wasn't the point of discussion, I'm trying to understand that
w/o this patch, whats could be the application level problem?

The problem that it is fragile. If application does not use
rte_mempool_populate_default() it has to care about addition
of mempool capability flags into mempool flags. If it is not done,

Capability flags should get updated to mempool flags. Or else
_get_ops_capabilities() to update capa flags to mempool flags internally,
I recall that I proposed same in the past.

[...]

Post by Andrew Rybchenko
The idea of the patch is to make it a bit more robust. I have no
idea how it can break something. If capability flags are already
there - no problem. If no, just make sure that we locally have them.

I would prefer _get_ops_capabilities() updates capa flags to mp->flag for once,
rather than doing (mp->flags | mp_flags) across mempool func.

Andrew Rybchenko

2018-02-01 14:02:23 UTC

There is not specified dependency between rte_mempool_populate_default()
and rte_mempool_populate_iova(). So, the second should not rely on the
fact that the first adds capability flags to the mempool flags.

Fixes: 65cf769f5e6a ("mempool: detect physical contiguous objects")
Cc: ***@dpdk.org

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
Acked-by: Santosh Shukla <***@caviumnetworks.com>
---
lib/librte_mempool/rte_mempool.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 6fdb723..54f7f4b 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -333,6 +333,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
void *opaque)
{
unsigned total_elt_sz;
+ unsigned int mp_capa_flags;
unsigned i = 0;
size_t off;
struct rte_mempool_memhdr *memhdr;
@@ -357,8 +358,17 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,

total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;

+ /* Get mempool capabilities */
+ mp_capa_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_capa_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
+ /* update mempool capabilities */
+ mp->flags |= mp_capa_flags;
+
/* Detect pool area has sufficient space for elements */
- if (mp->flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
+ if (mp_capa_flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
if (len < total_elt_sz * mp->size) {
RTE_LOG(ERR, MEMPOOL,
"pool area %" PRIx64 " not enough\n",
@@ -378,7 +388,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
memhdr->free_cb = free_cb;
memhdr->opaque = opaque;

- if (mp->flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
+ if (mp_capa_flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
/* align object start address to a multiple of total_elt_sz */
off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)

--
2.7.4

Thomas Monjalon

2018-02-05 23:53:13 UTC

Applied, thanks

Andrew Rybchenko

2018-01-23 13:16:05 UTC

The callback is not required any more since there is a new callback
to populate objects using provided memory area which provides
the same information.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 5 -----
lib/librte_mempool/rte_mempool.h | 31 ------------------------------
lib/librte_mempool/rte_mempool_ops.c | 14 --------------
lib/librte_mempool/rte_mempool_version.map | 1 -
4 files changed, 51 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 32b3f94..fc9c95a 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -416,11 +416,6 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
mp->flags |= MEMPOOL_F_POOL_CREATED;
}

- /* Notify memory area to mempool */
- ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len);
- if (ret != -ENOTSUP && ret < 0)
- return ret;
-
/* mempool is already populated */
if (mp->populated_size >= mp->size)
return -ENOSPC;
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index e95b1a7..6a0039d 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -399,12 +399,6 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);

/**
- * Notify new memory area to mempool.
- */
-typedef int (*rte_mempool_ops_register_memory_area_t)
-(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);
-
-/**
* Calculate memory size required to store specified number of objects.
*
* Note that if object size is bigger then page size, then it assumes
@@ -499,10 +493,6 @@ struct rte_mempool_ops {
rte_mempool_dequeue_t dequeue; /**< Dequeue an object. */
rte_mempool_get_count get_count; /**< Get qty of available objs. */
/**
- * Notify new memory area to mempool
- */
- rte_mempool_ops_register_memory_area_t register_memory_area;
- /**
* Optional callback to calculate memory size required to
* store specified number of objects.
*/
@@ -624,27 +614,6 @@ unsigned
rte_mempool_ops_get_count(const struct rte_mempool *mp);

/**
- * @internal wrapper for mempool_ops register_memory_area callback.
- * API to notify the mempool handler when a new memory area is added to pool.
- *
- * @param mp
- * Pointer to the memory pool.
- * @param vaddr
- * Pointer to the buffer virtual address.
- * @param iova
- * Pointer to the buffer IO address.
- * @param len
- * Pool size.
- * @return
- * - 0: Success;
- * - -ENOTSUP - doesn't support register_memory_area ops (valid error case).
- * - Otherwise, rte_mempool_populate_phys fails thus pool create fails.
- */
-int
-rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
- char *vaddr, rte_iova_t iova, size_t len);
-
-/**
* @internal wrapper for mempool_ops calc_mem_size callback.
* API to calculate size of memory required to store specified number of
* object.
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 5ab643b..37b0802 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -86,7 +86,6 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->enqueue = h->enqueue;
ops->dequeue = h->dequeue;
ops->get_count = h->get_count;
- ops->register_memory_area = h->register_memory_area;
ops->calc_mem_size = h->calc_mem_size;
ops->populate = h->populate;

@@ -128,19 +127,6 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp)
}

/* wrapper to notify new memory area to external mempool */
-int
-rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
- rte_iova_t iova, size_t len)
-{
- struct rte_mempool_ops *ops;
-
- ops = rte_mempool_get_ops(mp->ops_index);
-
- RTE_FUNC_PTR_OR_ERR_RET(ops->register_memory_area, -ENOTSUP);
- return ops->register_memory_area(mp, vaddr, iova, len);
-}
-
-/* wrapper to notify new memory area to external mempool */
ssize_t
rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
uint32_t obj_num, uint32_t pg_shift,
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index ab30b16..4f7e2b2 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -45,7 +45,6 @@ DPDK_16.07 {
DPDK_17.11 {
global:

- rte_mempool_ops_register_memory_area;
rte_mempool_populate_iova;
rte_mempool_populate_iova_tab;

--
2.7.4

Andrew Rybchenko

2018-01-23 13:15:57 UTC

Size of memory chunk required to populate mempool objects depends
on how objects are stored in the memory. Different mempool drivers
may have different requirements and a new operation allows to
calculate memory size in accordance with driver requirements and
advertise requirements on minimum memory chunk size and alignment
in a generic way.

Suggested-by: Olivier Matz <***@6wind.com>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 95 ++++++++++++++++++++++--------
lib/librte_mempool/rte_mempool.h | 63 +++++++++++++++++++-
lib/librte_mempool/rte_mempool_ops.c | 18 ++++++
lib/librte_mempool/rte_mempool_version.map | 8 +++
4 files changed, 159 insertions(+), 25 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index e783b9a..1f54f95 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -233,13 +233,14 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
return sz->total_size;
}

-
/*
- * Calculate maximum amount of memory required to store given number of objects.
+ * Internal function to calculate required memory chunk size shared
+ * by default implementation of the corresponding callback and
+ * deprecated external function.
*/
-size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- unsigned int flags)
+static size_t
+rte_mempool_xmem_size_int(uint32_t elt_num, size_t total_elt_sz,
+ uint32_t pg_shift, unsigned int flags)
{
size_t obj_per_page, pg_num, pg_sz;
unsigned int mask;
@@ -264,6 +265,49 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
return pg_num << pg_shift;
}

+ssize_t
+rte_mempool_calc_mem_size_def(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size,
+ __rte_unused size_t *align)
+{
+ unsigned int mp_flags;
+ int ret;
+ size_t total_elt_sz;
+ size_t mem_size;
+
+ /* Get mempool capabilities */
+ mp_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ mem_size = rte_mempool_xmem_size_int(obj_num, total_elt_sz, pg_shift,
+ mp->flags | mp_flags);
+
+ if (mp_flags & MEMPOOL_F_CAPA_PHYS_CONTIG)
+ *min_chunk_size = mem_size;
+ else
+ *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
+
+ /* No extra align requirements by default */
+
+ return mem_size;
+}
+
+/*
+ * Calculate maximum amount of memory required to store given number of objects.
+ */
+size_t
+rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
+ unsigned int flags)
+{
+ return rte_mempool_xmem_size_int(elt_num, total_elt_sz, pg_shift,
+ flags);
+}
+
/*
* Calculate how much memory would be actually required with the
* given memory footprint to store required number of elements.
@@ -570,25 +614,16 @@ rte_mempool_populate_default(struct rte_mempool *mp)
unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
- size_t size, total_elt_sz, align, pg_sz, pg_shift;
+ ssize_t mem_size;
+ size_t align, pg_sz, pg_shift;
rte_iova_t iova;
unsigned mz_id, n;
- unsigned int mp_flags;
int ret;

/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;

- /* Get mempool capabilities */
- mp_flags = 0;
- ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
- if ((ret < 0) && (ret != -ENOTSUP))
- return ret;
-
- /* update mempool capabilities */
- mp->flags |= mp_flags;
-
if (rte_eal_has_hugepages()) {
pg_shift = 0; /* not needed, zone is physically contiguous */
pg_sz = 0;
@@ -599,10 +634,15 @@ rte_mempool_populate_default(struct rte_mempool *mp)
align = pg_sz;
}

- total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
- size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
- mp->flags);
+ size_t min_chunk_size;
+
+ mem_size = rte_mempool_ops_calc_mem_size(mp, n, pg_shift,
+ &min_chunk_size, &align);
+ if (mem_size < 0) {
+ ret = mem_size;
+ goto fail;
+ }

ret = snprintf(mz_name, sizeof(mz_name),
RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
@@ -611,7 +651,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
goto fail;
}

- mz = rte_memzone_reserve_aligned(mz_name, size,
+ mz = rte_memzone_reserve_aligned(mz_name, mem_size,
mp->socket_id, mz_flags, align);
/* not enough memory, retry with the biggest zone we have */
if (mz == NULL)
@@ -622,6 +662,12 @@ rte_mempool_populate_default(struct rte_mempool *mp)
goto fail;
}

+ if (mz->len < min_chunk_size) {
+ rte_memzone_free(mz);
+ ret = -ENOMEM;
+ goto fail;
+ }
+
if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
iova = RTE_BAD_IOVA;
else
@@ -654,13 +700,14 @@ rte_mempool_populate_default(struct rte_mempool *mp)
static size_t
get_anon_size(const struct rte_mempool *mp)
{
- size_t size, total_elt_sz, pg_sz, pg_shift;
+ size_t size, pg_sz, pg_shift;
+ size_t min_chunk_size;
+ size_t align;

pg_sz = getpagesize();
pg_shift = rte_bsf32(pg_sz);
- total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
- size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift,
- mp->flags);
+ size = rte_mempool_ops_calc_mem_size(mp, mp->size, pg_shift,
+ &min_chunk_size, &align);

return size;
}
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index e21026a..be8a371 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -428,6 +428,39 @@ typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
typedef int (*rte_mempool_ops_register_memory_area_t)
(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);

+/**
+ * Calculate memory size required to store specified number of objects.
+ *
+ * Note that if object size is bigger then page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * @param mp
+ * Pointer to the memory pool.
+ * @param obj_num
+ * Number of objects.
+ * @param pg_shift
+ * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param min_chunk_size
+ * Location for minimum size of the memory chunk which may be used to
+ * store memory pool objects.
+ * @param align
+ * Location with required memory chunk alignment.
+ * @return
+ * Required memory size aligned at page boundary.
+ */
+typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+
+/**
+ * Default way to calculate memory size required to store specified
+ * number of objects.
+ */
+ssize_t rte_mempool_calc_mem_size_def(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+
/** Structure defining mempool operations structure */
struct rte_mempool_ops {
char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -444,6 +477,11 @@ struct rte_mempool_ops {
* Notify new memory area to mempool
*/
rte_mempool_ops_register_memory_area_t register_memory_area;
+ /**
+ * Optional callback to calculate memory size required to
+ * store specified number of objects.
+ */
+ rte_mempool_calc_mem_size_t calc_mem_size;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -593,6 +631,29 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
char *vaddr, rte_iova_t iova, size_t len);

/**
+ * @internal wrapper for mempool_ops calc_mem_size callback.
+ * API to calculate size of memory required to store specified number of
+ * object.
+ *
+ * @param mp
+ * Pointer to the memory pool.
+ * @param obj_num
+ * Number of objects.
+ * @param pg_shift
+ * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param min_chunk_size
+ * Location for minimum size of the memory chunk which may be used to
+ * store memory pool objects.
+ * @param align
+ * Location with required memory chunk alignment.
+ * @return
+ * Required memory size aligned at page boundary.
+ */
+ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
@@ -1562,7 +1623,7 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* of objects. Assume that the memory buffer will be aligned at page
* boundary.
*
- * Note that if object size is bigger then page size, then it assumes
+ * Note that if object size is bigger than page size, then it assumes
* that pages are grouped in subsets of physically continuous pages big
* enough to store at least one object.
*
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 92b9f90..d048b37 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -88,6 +88,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_count = h->get_count;
ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
+ ops->calc_mem_size = h->calc_mem_size;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

@@ -152,6 +153,23 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
return ops->register_memory_area(mp, vaddr, iova, len);
}

+/* wrapper to notify new memory area to external mempool */
+ssize_t
+rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ if (ops->calc_mem_size == NULL)
+ return rte_mempool_calc_mem_size_def(mp, obj_num, pg_shift,
+ min_chunk_size, align);
+
+ return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align);
+}
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 62b76f9..9fa7270 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -51,3 +51,11 @@ DPDK_17.11 {
rte_mempool_populate_iova_tab;

} DPDK_16.07;
+
+DPDK_18.05 {
+ global:
+
+ rte_mempool_calc_mem_size_def;
+
+} DPDK_17.11;
+

--
2.7.4

Olivier Matz

2018-01-31 16:45:19 UTC

The general idea is fine. Few small comments below.

[...]

Post by Andrew Rybchenko
---
lib/librte_mempool/rte_mempool.c | 95 ++++++++++++++++++++++--------
lib/librte_mempool/rte_mempool.h | 63 +++++++++++++++++++-
lib/librte_mempool/rte_mempool_ops.c | 18 ++++++
lib/librte_mempool/rte_mempool_version.map | 8 +++
4 files changed, 159 insertions(+), 25 deletions(-)
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index e783b9a..1f54f95 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -233,13 +233,14 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
return sz->total_size;
}
-
/*
- * Calculate maximum amount of memory required to store given number of objects.
+ * Internal function to calculate required memory chunk size shared
+ * by default implementation of the corresponding callback and
+ * deprecated external function.
*/
-size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- unsigned int flags)
+static size_t
+rte_mempool_xmem_size_int(uint32_t elt_num, size_t total_elt_sz,
+ uint32_t pg_shift, unsigned int flags)
{

I'm not getting why the function is changed to a static function
in this patch, given that rte_mempool_xmem_size() is redefined
below as a simple wrapper.

Post by Andrew Rybchenko
size_t obj_per_page, pg_num, pg_sz;
unsigned int mask;
@@ -264,6 +265,49 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
return pg_num << pg_shift;
}
+ssize_t
+rte_mempool_calc_mem_size_def(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size,
+ __rte_unused size_t *align)
+{
+ unsigned int mp_flags;
+ int ret;
+ size_t total_elt_sz;
+ size_t mem_size;
+
+ /* Get mempool capabilities */
+ mp_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ mem_size = rte_mempool_xmem_size_int(obj_num, total_elt_sz, pg_shift,
+ mp->flags | mp_flags);
+
+ if (mp_flags & MEMPOOL_F_CAPA_PHYS_CONTIG)
+ *min_chunk_size = mem_size;
+ else
+ *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
+
+ /* No extra align requirements by default */

maybe set *align = 0 ?
I think it's not sane to keep the variable uninitialized.

[...]

Post by Andrew Rybchenko
+/**
+ * Calculate memory size required to store specified number of objects.
+ *
+ * Note that if object size is bigger then page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * Pointer to the memory pool.
+ * Number of objects.
+ * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * Location for minimum size of the memory chunk which may be used to
+ * store memory pool objects.
+ * Location with required memory chunk alignment.
+ * Required memory size aligned at page boundary.
+ */
+typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);

The API comment can be enhanced by saying that min_chunk_size and align
are output only parameters. For align, the '0' value could be described
as well.

Post by Andrew Rybchenko
+
+/**
+ * Default way to calculate memory size required to store specified
+ * number of objects.
+ */
+ssize_t rte_mempool_calc_mem_size_def(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+

The behavior of the default function could be better explained.
I would prefer "default" instead of "def".

Andrew Rybchenko

2018-02-01 07:15:47 UTC

Post by Andrew Rybchenko
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index e783b9a..1f54f95 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -233,13 +233,14 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
return sz->total_size;
}
-
/*
- * Calculate maximum amount of memory required to store given number of objects.
+ * Internal function to calculate required memory chunk size shared
+ * by default implementation of the corresponding callback and
+ * deprecated external function.
*/
-size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- unsigned int flags)
+static size_t
+rte_mempool_xmem_size_int(uint32_t elt_num, size_t total_elt_sz,
+ uint32_t pg_shift, unsigned int flags)
{

I'm not getting why the function is changed to a static function
in this patch, given that rte_mempool_xmem_size() is redefined
below as a simple wrapper.

rte_mempool_xmem_size() is deprecated in the subsequent patch.
This static function is created to reuse the code and avoid usage of
the deprecated in non-deprecated. Yes, it is definitely unclear here and
now I think it is better to make the change in the patch which
deprecates rte_mempool_xmem_size().

maybe set *align = 0 ?
I think it's not sane to keep the variable uninitialized.

Right now align is in/out. On input it is either cacheline (has hugepages)
or page size. These external limitations could be important for size
calculations. _ops_calc_mem_size() is allowed to strengthen the
alignment only. If it is acceptable, I'll try to highlight it in the
description
and check it in the code.
May be more transparent solution is to make it input-only and
highlight that it is full responsibility of the callback to care about
all alignment requirements (e.g. imposed by absence of huge pages).
What do you think?

Post by Olivier Matz
[...]

The API comment can be enhanced by saying that min_chunk_size and align
are output only parameters. For align, the '0' value could be described
as well.

OK, will fix as soon as we decide if align is input only or input/output.

The behavior of the default function could be better explained.
I would prefer "default" instead of "def".

Will do.

Andrew Rybchenko

2018-01-23 13:16:06 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Callback to calculate required memory area size may require mempool
driver data to be already allocated and initialized.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index fc9c95a..cbb4dd5 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -370,6 +370,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
}
}

+static int
+mempool_maybe_initialize(struct rte_mempool *mp)
+{
+ int ret;
+
+ /* create the internal ring if not already done */
+ if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+ ret = rte_mempool_ops_alloc(mp);
+ if (ret != 0)
+ return ret;
+ mp->flags |= MEMPOOL_F_POOL_CREATED;
+ }
+ return 0;
+}
+
int
rte_mempool_populate_one_by_one(struct rte_mempool *mp, unsigned int max_objs,
void *vaddr, rte_iova_t iova, size_t len,
@@ -408,13 +423,9 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
struct rte_mempool_memhdr *memhdr;
int ret;

- /* create the internal ring if not already done */
- if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
- ret = rte_mempool_ops_alloc(mp);
- if (ret != 0)
- return ret;
- mp->flags |= MEMPOOL_F_POOL_CREATED;
- }
+ ret = mempool_maybe_initialize(mp);
+ if (ret != 0)
+ return ret;

/* mempool is already populated */
if (mp->populated_size >= mp->size)
@@ -587,6 +598,10 @@ rte_mempool_populate_default(struct rte_mempool *mp)
unsigned mz_id, n;
int ret;

+ ret = mempool_maybe_initialize(mp);
+ if (ret != 0)
+ return ret;
+
/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;

--
2.7.4

Olivier Matz

2018-01-31 16:45:39 UTC

Post by Andrew Rybchenko
Callback to calculate required memory area size may require mempool
driver data to be already allocated and initialized.
---
lib/librte_mempool/rte_mempool.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index fc9c95a..cbb4dd5 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -370,6 +370,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
}
}
+static int
+mempool_maybe_initialize(struct rte_mempool *mp)
+{
+ int ret;
+
+ /* create the internal ring if not already done */
+ if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+ ret = rte_mempool_ops_alloc(mp);
+ if (ret != 0)
+ return ret;
+ mp->flags |= MEMPOOL_F_POOL_CREATED;
+ }
+ return 0;
+}

mempool_ops_alloc_once() ?

Andrew Rybchenko

2018-02-01 08:53:07 UTC

mempool_ops_alloc_once() ?

Yes, I like it. Will fix.

Andrew Rybchenko

2018-01-23 13:15:59 UTC

The callback allows to customize how objects are stored in the
memory chunk. Default implementation of the callback which simply
puts objects one by one is available.

Suggested-by: Olivier Matz <***@6wind.com>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.c | 44 +++++++++++-----
lib/librte_mempool/rte_mempool.h | 83 ++++++++++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops.c | 18 +++++++
lib/librte_mempool/rte_mempool_version.map | 1 +
4 files changed, 133 insertions(+), 13 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 1f54f95..c5003a9 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -145,9 +145,6 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
tlr = __mempool_get_trailer(obj);
tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE;
#endif
-
- /* enqueue in ring */
- rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
}

/* call obj_cb() for each mempool element */
@@ -396,6 +393,30 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
}
}

+int
+rte_mempool_populate_one_by_one(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)
+{
+ size_t total_elt_sz;
+ size_t off;
+ unsigned int i;
+ void *obj;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ for (off = 0, i = 0; off + total_elt_sz <= len && i < max_objs; i++) {
+ off += mp->header_size;
+ obj = (char *)vaddr + off;
+ obj_cb(mp, obj,
+ (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off));
+ rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
+ off += mp->elt_size + mp->trailer_size;
+ }
+
+ return i;
+}
+
/* Add objects in the pool, using a physically contiguous memory
* zone. Return the number of objects added, or a negative value
* on error.
@@ -466,16 +487,13 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
else
off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;

- while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
- off += mp->header_size;
- if (iova == RTE_BAD_IOVA)
- mempool_add_elem(mp, (char *)vaddr + off,
- RTE_BAD_IOVA);
- else
- mempool_add_elem(mp, (char *)vaddr + off, iova + off);
- off += mp->elt_size + mp->trailer_size;
- i++;
- }
+ if (off > len)
+ return -EINVAL;
+
+ i = rte_mempool_ops_populate(mp, mp->size - mp->populated_size,
+ (char *)vaddr + off,
+ (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off),
+ len - off, mempool_add_elem);

/* not enough room to store one object */
if (i == 0)
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index be8a371..f6ffab9 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -461,6 +461,59 @@ ssize_t rte_mempool_calc_mem_size_def(const struct rte_mempool *mp,
uint32_t obj_num, uint32_t pg_shift,
size_t *min_chunk_size, size_t *align);

+/**
+ * Function to be called for each populated object.
+ *
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param vaddr
+ * Object virtual address.
+ * @param iova
+ * Input/output virtual addresss of the object or #RTE_BAD_IOVA.
+ */
+typedef void (rte_mempool_populate_obj_cb_t)(struct rte_mempool *mp,
+ void *vaddr, rte_iova_t iova);
+
+/**
+ * Populate memory pool objects using provided memory chunk.
+ *
+ * Populated objects should be enqueued to the pool, e.g. using
+ * rte_mempool_ops_enqueue_bulk().
+ *
+ * If the given IO address is unknown (iova = RTE_BAD_IOVA),
+ * the chunk doesn't need to be physically contiguous (only virtually),
+ * and allocated objects may span two pages.
+ *
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param max_objs
+ * Maximum number of objects to be populated.
+ * @param vaddr
+ * The virtual address of memory that should be used to store objects.
+ * @param iova
+ * The IO address
+ * @param len
+ * The length of memory in bytes.
+ * @param obj_cb
+ * Callback function to be executed for each populated object.
+ * @return
+ * The number of objects added on success.
+ * On error, no objects are populated and a negative errno is returned.
+ */
+typedef int (*rte_mempool_populate_t)(struct rte_mempool *mp,
+ unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb);
+
+/**
+ * Default way to populate memory pool object using provided memory
+ * chunk: just slice objects one by one.
+ */
+int rte_mempool_populate_one_by_one(struct rte_mempool *mp,
+ unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb);
+
/** Structure defining mempool operations structure */
struct rte_mempool_ops {
char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -482,6 +535,11 @@ struct rte_mempool_ops {
* store specified number of objects.
*/
rte_mempool_calc_mem_size_t calc_mem_size;
+ /**
+ * Optional callback to populate mempool objects using
+ * provided memory chunk.
+ */
+ rte_mempool_populate_t populate;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -654,6 +712,31 @@ ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
size_t *min_chunk_size, size_t *align);

/**
+ * @internal wrapper for mempool_ops populate callback.
+ *
+ * Populate memory pool objects using provided memory chunk.
+ *
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param max_objs
+ * Maximum number of objects to be populated.
+ * @param vaddr
+ * The virtual address of memory that should be used to store objects.
+ * @param iova
+ * The IO address
+ * @param len
+ * The length of memory in bytes.
+ * @param obj_cb
+ * Callback function to be executed for each populated object.
+ * @return
+ * The number of objects added on success.
+ * On error, no objects are populated and a negative errno is returned.
+ */
+int rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index d048b37..7c4a22b 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -89,6 +89,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
ops->calc_mem_size = h->calc_mem_size;
+ ops->populate = h->populate;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

@@ -170,6 +171,23 @@ rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align);
}

+/* wrapper to populate memory pool objects using provided memory chunk */
+int
+rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ if (ops->populate == NULL)
+ return rte_mempool_populate_one_by_one(mp, max_objs, vaddr,
+ iova, len, obj_cb);
+
+ return ops->populate(mp, max_objs, vaddr, iova, len, obj_cb);
+}
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 9fa7270..00288de 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -56,6 +56,7 @@ DPDK_18.05 {
global:

rte_mempool_calc_mem_size_def;
+ rte_mempool_populate_one_by_one;

} DPDK_17.11;

--
2.7.4

Olivier Matz

2018-01-31 16:45:29 UTC

Post by Andrew Rybchenko
The callback allows to customize how objects are stored in the
memory chunk. Default implementation of the callback which simply
puts objects one by one is available.

...

Post by Andrew Rybchenko
+int
+rte_mempool_populate_one_by_one(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)

We shall find a better name for this function.
Unfortunatly rte_mempool_populate_default() already exists...

I'm also wondering if having a file rte_mempool_ops_default.c
with all the default behaviors would make sense?

...

Post by Andrew Rybchenko
@@ -466,16 +487,13 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
else
off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
- while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
- off += mp->header_size;
- if (iova == RTE_BAD_IOVA)
- mempool_add_elem(mp, (char *)vaddr + off,
- RTE_BAD_IOVA);
- else
- mempool_add_elem(mp, (char *)vaddr + off, iova + off);
- off += mp->elt_size + mp->trailer_size;
- i++;
- }
+ if (off > len)
+ return -EINVAL;
+
+ i = rte_mempool_ops_populate(mp, mp->size - mp->populated_size,
+ (char *)vaddr + off,
+ (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off),
+ len - off, mempool_add_elem);

My initial idea was to provide populate_iova(), populate_virt(), ...
as mempool ops. I don't see any strong requirement for doing it now, but
on the other hand it would break the API to do it later. What's
your opinion?

Also, I see that mempool_add_elem() is passed as a callback to
rte_mempool_ops_populate(). Instead, would it make sense to
export mempool_add_elem() and let the implementation of populate()
ops to call it?

Andrew Rybchenko

2018-02-01 08:51:27 UTC

Post by Andrew Rybchenko
The callback allows to customize how objects are stored in the
memory chunk. Default implementation of the callback which simply
puts objects one by one is available.

...

Post by Andrew Rybchenko
+int
+rte_mempool_populate_one_by_one(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)

We shall find a better name for this function.
Unfortunatly rte_mempool_populate_default() already exists...

I have no better idea right now, but we'll try in the next version.
May be rte_mempool_op_populate_default()?

Post by Olivier Matz
I'm also wondering if having a file rte_mempool_ops_default.c
with all the default behaviors would make sense?

I think it is a good idea. Will do.

Post by Olivier Matz
...

Suggested solution keeps only generic house-keeping inside
rte_mempool_populate_iova() (driver-data alloc/init, generic
check if the pool is already populated, maintenance of the memory
chunks list and object cache-alignment requirements). I think that
only the last item is questionable, but cache-line alignment is
hard-wired in object size calculation as well which is not
customizable yet. May be we should add callback for object size
calculation with default fallback and move object cache-line
alignment into populate() callback.

As for populate_virt() etc right now all these functions finally
come to populate_iova(). I have no customization usecases
for these functions in my mind, so it is hard to guess required
set of parameters. That's why I kept it as is for now.
(In general I prefer to avoid overkill solutions since chances
of success (100% guess of the prototype) are small)

May be someone else on the list have usecases in mind?

Post by Olivier Matz
Also, I see that mempool_add_elem() is passed as a callback to
rte_mempool_ops_populate(). Instead, would it make sense to
export mempool_add_elem() and let the implementation of populate()
ops to call it?

I think callback gives a bit more freedom and allows to pass own
function which does some actions (e.g. filtering) per object.
In fact I think opaque parameter should be added to the callback
prototype to make it really useful for customization (to provide
specific context and make it possible to chain callbacks).

Andrew Rybchenko

2018-01-23 13:16:02 UTC

Suggested-by: Olivier Matz <***@6wind.com>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.h | 3 +++
1 file changed, 3 insertions(+)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 697d618..e95b1a7 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -916,6 +916,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
* The pointer to the new allocated mempool, on success. NULL on error
* with rte_errno set appropriately. See rte_mempool_create() for details.
*/
+__rte_deprecated
struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
@@ -1678,6 +1679,7 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* @return
* Required memory size aligned at page boundary.
*/
+__rte_deprecated
size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
uint32_t pg_shift, unsigned int flags);

@@ -1709,6 +1711,7 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
* buffer is too small, return a negative value whose absolute value
* is the actual number of elements that can be stored in that buffer.
*/
+__rte_deprecated
ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
uint32_t pg_shift, unsigned int flags);

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:00 UTC

Custom callback is required to fullfil requirement to align
object virtual address to total object size.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/octeontx/rte_mempool_octeontx.c | 28 +++++++++++++++++++++++++
1 file changed, 28 insertions(+)

diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index 4ec5efe..6563e80 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -174,6 +174,33 @@ octeontx_fpavf_register_memory_area(const struct rte_mempool *mp,
return octeontx_fpavf_pool_set_range(pool_bar, len, vaddr, gpool);
}

+static int
+octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)
+{
+ size_t total_elt_sz;
+ size_t off;
+
+ if (iova == RTE_BAD_IOVA)
+ return -EINVAL;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ /* align object start address to a multiple of total_elt_sz */
+ off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
+
+ if (len < off)
+ return -EINVAL;
+
+ vaddr = (char *)vaddr + off;
+ iova += off;
+ len -= off;
+
+ return rte_mempool_populate_one_by_one(mp, max_objs, vaddr, iova, len,
+ obj_cb);
+}
+
static struct rte_mempool_ops octeontx_fpavf_ops = {
.name = "octeontx_fpavf",
.alloc = octeontx_fpavf_alloc,
@@ -184,6 +211,7 @@ static struct rte_mempool_ops octeontx_fpavf_ops = {
.get_capabilities = octeontx_fpavf_get_capabilities,
.register_memory_area = octeontx_fpavf_register_memory_area,
.calc_mem_size = octeontx_fpavf_calc_mem_size,
+ .populate = octeontx_fpavf_populate,
};

MEMPOOL_REGISTER_OPS(octeontx_fpavf_ops);

--
2.7.4

Andrew Rybchenko

2018-01-23 13:15:58 UTC

The driver requires one and only one physically contiguous
memory chunk for all objects.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/octeontx/rte_mempool_octeontx.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)

diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index d143d05..4ec5efe 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -136,6 +136,30 @@ octeontx_fpavf_get_capabilities(const struct rte_mempool *mp,
return 0;
}

+static ssize_t
+octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
+{
+ ssize_t mem_size;
+
+ /*
+ * Simply need space for one more object to be able to
+ * fullfil alignment requirements.
+ */
+ mem_size = rte_mempool_calc_mem_size_def(mp, obj_num + 1, pg_shift,
+ min_chunk_size, align);
+ if (mem_size >= 0) {
+ /*
+ * The whole memory area containing the objects must be
+ * physically contiguous.
+ */
+ *min_chunk_size = mem_size;
+ }
+
+ return mem_size;
+}
+
static int
octeontx_fpavf_register_memory_area(const struct rte_mempool *mp,
char *vaddr, rte_iova_t paddr, size_t len)
@@ -159,6 +183,7 @@ static struct rte_mempool_ops octeontx_fpavf_ops = {
.get_count = octeontx_fpavf_get_count,
.get_capabilities = octeontx_fpavf_get_capabilities,
.register_memory_area = octeontx_fpavf_register_memory_area,
+ .calc_mem_size = octeontx_fpavf_calc_mem_size,
};

MEMPOOL_REGISTER_OPS(octeontx_fpavf_ops);

--
2.7.4

santosh

2018-02-01 10:01:24 UTC

Hi Andrew,

Post by Andrew Rybchenko
The driver requires one and only one physically contiguous
memory chunk for all objects.
---
drivers/mempool/octeontx/rte_mempool_octeontx.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c
b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index d143d05..4ec5efe 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -136,6 +136,30 @@ octeontx_fpavf_get_capabilities(const struct rte_mempool *mp,
return 0;
}
+static ssize_t
+octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
+{
+ ssize_t mem_size;
+
+ /*
+ * Simply need space for one more object to be able to
+ * fullfil alignment requirements.
+ */
+ mem_size = rte_mempool_calc_mem_size_def(mp, obj_num + 1, pg_shift,
+

I think, you don't need that (obj_num + 1) as because
rte_xmem_calc_int() will be checking flags for
_ALIGNED + _CAPA_PHYS_CONFIG i.e..

mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
if ((flags & mask) == mask)
/* alignment need one additional object */
elt_num += 1;

Post by Andrew Rybchenko
min_chunk_size, align);
+ if (mem_size >= 0) {
+ /*
+ * The whole memory area containing the objects must be
+ * physically contiguous.
+ */
+ *min_chunk_size = mem_size;
+ }
+
+ return mem_size;
+}
+
static int
octeontx_fpavf_register_memory_area(const struct rte_mempool *mp,
char *vaddr, rte_iova_t paddr, size_t len)
@@ -159,6 +183,7 @@ static struct rte_mempool_ops octeontx_fpavf_ops = {
.get_count = octeontx_fpavf_get_count,
.get_capabilities = octeontx_fpavf_get_capabilities,
.register_memory_area = octeontx_fpavf_register_memory_area,
+ .calc_mem_size = octeontx_fpavf_calc_mem_size,
};
MEMPOOL_REGISTER_OPS(octeontx_fpavf_ops);
--
2.7.4

santosh

2018-02-01 13:40:43 UTC

Post by Olivier MATZ
Hi Andrew,

I think, you don't need that (obj_num + 1) as because
rte_xmem_calc_int() will be checking flags for
_ALIGNED + _CAPA_PHYS_CONFIG i.e..
mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
if ((flags & mask) == mask)
/* alignment need one additional object */
elt_num += 1;

ok, You are removing above check in v2- 06/17, so ignore above comment.
I suggest to move this patch and keep it after 06/17. Or perhaps keep
common mempool changes first then followed by driver specifics changes in your
v3 series.

Thanks.

Andrew Rybchenko

2018-03-10 15:49:41 UTC

Hi Santosh,

Post by Olivier MATZ
Hi Andrew,

I think, you don't need that (obj_num + 1) as because
rte_xmem_calc_int() will be checking flags for
_ALIGNED + _CAPA_PHYS_CONFIG i.e..
mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
if ((flags & mask) == mask)
/* alignment need one additional object */
elt_num += 1;

Finally I've decided to include these changes into the patch which
removes get_capabilities [1]. Please, take a look at suggested version.
I think it is the most transparent solution. Otherwise it is hard
to avoid the issue found by you above.

I'm sorry, I've forgot to include you in CC.

[1] https://dpdk.org/dev/patchwork/patch/35934/

Thanks,
Andrew.

santosh

2018-03-11 06:31:48 UTC

Hi Andrew,

Post by Andrew Rybchenko
Hi Santosh,

Post by Olivier MATZ
Hi Andrew,

Post by Andrew Rybchenko
The driver requires one and only one physically contiguous
memory chunk for all objects.
---
   drivers/mempool/octeontx/rte_mempool_octeontx.c | 25 +++++++++++++++++++++++++
   1 file changed, 25 insertions(+)
diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c
b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index d143d05..4ec5efe 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -136,6 +136,30 @@ octeontx_fpavf_get_capabilities(const struct rte_mempool *mp,
           return 0;
   }
+static ssize_t
+octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
+                            uint32_t obj_num, uint32_t pg_shift,
+                            size_t *min_chunk_size, size_t *align)
+{
+       ssize_t mem_size;
+
+       /*
+        * Simply need space for one more object to be able to
+        * fullfil alignment requirements.
+        */
+       mem_size = rte_mempool_calc_mem_size_def(mp, obj_num + 1, pg_shift,
+

I think, you don't need that (obj_num + 1) as because
rte_xmem_calc_int() will be checking flags for
_ALIGNED + _CAPA_PHYS_CONFIG i.e..
    mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
    if ((flags & mask) == mask)
        /* alignment need one additional object */
        elt_num += 1;

Sure. I'll review.

Post by Andrew Rybchenko
I'm sorry, I've forgot to include you in CC.

NP,

Thanks.

Post by Andrew Rybchenko
[1] https://dpdk.org/dev/patchwork/patch/35934/
Thanks,
Andrew.

Andrew Rybchenko

2018-01-23 13:16:04 UTC

Populate mempool driver callback is executed a bit later than
register memory area, provides the same information and will
substitute the later since it gives more flexibility and in addition
to notification about memory area allows to customize how mempool
objects are stored in memory.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/dpaa/dpaa_mempool.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/mempool/dpaa/dpaa_mempool.c b/drivers/mempool/dpaa/dpaa_mempool.c
index ddc4e47..a179804 100644
--- a/drivers/mempool/dpaa/dpaa_mempool.c
+++ b/drivers/mempool/dpaa/dpaa_mempool.c
@@ -260,10 +260,9 @@ dpaa_mbuf_get_count(const struct rte_mempool *mp)
}

static int
-dpaa_register_memory_area(const struct rte_mempool *mp,
- char *vaddr __rte_unused,
- rte_iova_t paddr __rte_unused,
- size_t len)
+dpaa_populate(const struct rte_mempool *mp, unsigned int max_objs,
+ char *vaddr, rte_iova_t paddr, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)
{
struct dpaa_bp_info *bp_info;
unsigned int total_elt_sz;
@@ -286,7 +285,9 @@ dpaa_register_memory_area(const struct rte_mempool *mp,
/* Else, Memory will be allocated from multiple memzones */
bp_info->flags |= DPAA_MPOOL_MULTI_MEMZONE;

- return 0;
+ return rte_mempool_populate_one_by_one(mp, max_objs, vaddr, paddr, len,
+ obj_cb);
+
}

struct rte_mempool_ops dpaa_mpool_ops = {
@@ -296,7 +297,7 @@ struct rte_mempool_ops dpaa_mpool_ops = {
.enqueue = dpaa_mbuf_free_bulk,
.dequeue = dpaa_mbuf_alloc_bulk,
.get_count = dpaa_mbuf_get_count,
- .register_memory_area = dpaa_register_memory_area,
+ .populate = dpaa_populate,
};

MEMPOOL_REGISTER_OPS(dpaa_mpool_ops);

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:08 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Mempool get/put API cares about cache itself, but sometimes it is
required to flush the cache explicitly.

The function is moved in the file since it now requires
rte_mempool_default_cache().

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.h | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 6a0039d..16d95ae 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -1148,22 +1148,6 @@ void
rte_mempool_cache_free(struct rte_mempool_cache *cache);

/**
- * Flush a user-owned mempool cache to the specified mempool.
- *
- * @param cache
- * A pointer to the mempool cache.
- * @param mp
- * A pointer to the mempool.
- */
-static __rte_always_inline void
-rte_mempool_cache_flush(struct rte_mempool_cache *cache,
- struct rte_mempool *mp)
-{
- rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
- cache->len = 0;
-}
-
-/**
* Get a pointer to the per-lcore default mempool cache.
*
* @param mp
@@ -1186,6 +1170,26 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
}

/**
+ * Flush a user-owned mempool cache to the specified mempool.
+ *
+ * @param cache
+ * A pointer to the mempool cache.
+ * @param mp
+ * A pointer to the mempool.
+ */
+static __rte_always_inline void
+rte_mempool_cache_flush(struct rte_mempool_cache *cache,
+ struct rte_mempool *mp)
+{
+ if (cache == NULL)
+ cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ if (cache == NULL || cache->len == 0)
+ return;
+ rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
+ cache->len = 0;
+}
+
+/**
* @internal Put several objects back in the mempool; used internally.
* @param mp
* A pointer to the mempool structure.

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:01 UTC

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:03 UTC

Callback to populate pool objects has all required information and
executed a bit later than register memory area callback.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/octeontx/rte_mempool_octeontx.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index 36cc23b..8700bfb 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -151,26 +151,15 @@ octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
}

static int
-octeontx_fpavf_register_memory_area(const struct rte_mempool *mp,
- char *vaddr, rte_iova_t paddr, size_t len)
-{
- RTE_SET_USED(paddr);
- uint8_t gpool;
- uintptr_t pool_bar;
-
- gpool = octeontx_fpa_bufpool_gpool(mp->pool_id);
- pool_bar = mp->pool_id & ~(uint64_t)FPA_GPOOL_MASK;
-
- return octeontx_fpavf_pool_set_range(pool_bar, len, vaddr, gpool);
-}
-
-static int
octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs,
void *vaddr, rte_iova_t iova, size_t len,
rte_mempool_populate_obj_cb_t *obj_cb)
{
size_t total_elt_sz;
size_t off;
+ uint8_t gpool;
+ uintptr_t pool_bar;
+ int ret;

if (iova == RTE_BAD_IOVA)
return -EINVAL;
@@ -187,6 +176,13 @@ octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs,
iova += off;
len -= off;

+ gpool = octeontx_fpa_bufpool_gpool(mp->pool_id);
+ pool_bar = mp->pool_id & ~(uint64_t)FPA_GPOOL_MASK;
+
+ ret = octeontx_fpavf_pool_set_range(pool_bar, len, vaddr, gpool);
+ if (ret < 0)
+ return ret;
+
return rte_mempool_populate_one_by_one(mp, max_objs, vaddr, iova, len,
obj_cb);
}
@@ -198,7 +194,6 @@ static struct rte_mempool_ops octeontx_fpavf_ops = {
.enqueue = octeontx_fpavf_enqueue,
.dequeue = octeontx_fpavf_dequeue,
.get_count = octeontx_fpavf_get_count,
- .register_memory_area = octeontx_fpavf_register_memory_area,
.calc_mem_size = octeontx_fpavf_calc_mem_size,
.populate = octeontx_fpavf_populate,
};

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:09 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Primarily, it is intended as a way for the mempool driver to provide
additional information on how it lays up objects inside the mempool.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.h | 31 +++++++++++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops.c | 15 +++++++++++++++
2 files changed, 46 insertions(+)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 16d95ae..75630e6 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -218,6 +218,11 @@ struct rte_mempool_memhdr {
void *opaque; /**< Argument passed to the free callback */
};

+/*
+ * Additional information about the mempool
+ */
+struct rte_mempool_info;
+
/**
* The RTE mempool structure.
*/
@@ -484,6 +489,13 @@ int rte_mempool_populate_one_by_one(struct rte_mempool *mp,
void *vaddr, rte_iova_t iova, size_t len,
rte_mempool_populate_obj_cb_t *obj_cb);

+/**
+ * Get some additional information about a mempool.
+ */
+typedef int (*rte_mempool_get_info_t)(const struct rte_mempool *mp,
+ struct rte_mempool_info *info);
+
+
/** Structure defining mempool operations structure */
struct rte_mempool_ops {
char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -502,6 +514,10 @@ struct rte_mempool_ops {
* provided memory chunk.
*/
rte_mempool_populate_t populate;
+ /**
+ * Get mempool info
+ */
+ rte_mempool_get_info_t get_info;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -662,6 +678,21 @@ int rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
rte_mempool_populate_obj_cb_t *obj_cb);

/**
+ * @internal wrapper for mempool_ops get_info callback.
+ *
+ * @param mp [in]
+ * Pointer to the memory pool.
+ * @param info [out]
+ * Pointer to the rte_mempool_info structure
+ * @return
+ * - 0: Success; The mempool driver supports retrieving supplementary
+ * mempool information
+ * - -ENOTSUP - doesn't support get_info ops (valid case).
+ */
+int rte_mempool_ops_get_info(const struct rte_mempool *mp,
+ struct rte_mempool_info *info);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 37b0802..949ab43 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -88,6 +88,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_count = h->get_count;
ops->calc_mem_size = h->calc_mem_size;
ops->populate = h->populate;
+ ops->get_info = h->get_info;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

@@ -160,6 +161,20 @@ rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
return ops->populate(mp, max_objs, vaddr, iova, len, obj_cb);
}

+/* wrapper to get additional mempool info */
+int
+rte_mempool_ops_get_info(const struct rte_mempool *mp,
+ struct rte_mempool_info *info)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ RTE_FUNC_PTR_OR_ERR_RET(ops->get_info, -ENOTSUP);
+ return ops->get_info(mp, info);
+}
+
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:12 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/bucket/rte_mempool_bucket.c | 13 +++++++++++++
1 file changed, 13 insertions(+)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index 03fccf1..d1e7c27 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -42,6 +42,7 @@ struct bucket_data {
unsigned int header_size;
unsigned int total_elt_size;
unsigned int obj_per_bucket;
+ unsigned int bucket_stack_thresh;
uintptr_t bucket_page_mask;
struct rte_ring *shared_bucket_ring;
struct bucket_stack *buckets[RTE_MAX_LCORE];
@@ -139,6 +140,7 @@ bucket_enqueue(struct rte_mempool *mp, void * const *obj_table,
unsigned int n)
{
struct bucket_data *bd = mp->pool_data;
+ struct bucket_stack *local_stack = bd->buckets[rte_lcore_id()];
unsigned int i;
int rc = 0;

@@ -146,6 +148,15 @@ bucket_enqueue(struct rte_mempool *mp, void * const *obj_table,
rc = bucket_enqueue_single(bd, obj_table[i]);
RTE_ASSERT(rc == 0);
}
+ if (local_stack->top > bd->bucket_stack_thresh) {
+ rte_ring_enqueue_bulk(bd->shared_bucket_ring,
+ &local_stack->objects
+ [bd->bucket_stack_thresh],
+ local_stack->top -
+ bd->bucket_stack_thresh,
+ NULL);
+ local_stack->top = bd->bucket_stack_thresh;
+ }
return rc;
}

@@ -408,6 +419,8 @@ bucket_alloc(struct rte_mempool *mp)
bd->obj_per_bucket = (bd->bucket_mem_size - bucket_header_size) /
bd->total_elt_size;
bd->bucket_page_mask = ~(rte_align64pow2(bd->bucket_mem_size) - 1);
+ /* eventually this should be a tunable parameter */
+ bd->bucket_stack_thresh = (mp->size / bd->obj_per_bucket) * 4 / 3;

if (mp->flags & MEMPOOL_F_SP_PUT)
rg_flags |= RING_F_SP_ENQ;

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:10 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

If mempool manager supports object blocks (physically and virtual
contiguous set of objects), it is sufficient to get the first
object only and the function allows to avoid filling in of
information about each block member.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.h | 125 ++++++++++++++++++++++++++++++++++-
lib/librte_mempool/rte_mempool_ops.c | 1 +
2 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 75630e6..fa216db 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -221,7 +221,10 @@ struct rte_mempool_memhdr {
/*
* Additional information about the mempool
*/
-struct rte_mempool_info;
+struct rte_mempool_info {
+ /** Number of objects in the contiguous block */
+ unsigned int contig_block_size;
+};

/**
* The RTE mempool structure.
@@ -399,6 +402,12 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
void **obj_table, unsigned int n);

/**
+ * Dequeue a number of contiquous object blocks from the external pool.
+ */
+typedef int (*rte_mempool_dequeue_contig_blocks_t)(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n);
+
+/**
* Return the number of available objects in the external pool.
*/
typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);
@@ -518,6 +527,10 @@ struct rte_mempool_ops {
* Get mempool info
*/
rte_mempool_get_info_t get_info;
+ /**
+ * Dequeue a number of contiguous object blocks.
+ */
+ rte_mempool_dequeue_contig_blocks_t dequeue_contig_blocks;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -596,6 +609,30 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
}

/**
+ * @internal Wrapper for mempool_ops dequeue_contig_blocks callback.
+ *
+ * @param mp
+ * Pointer to the memory pool.
+ * @param first_obj_table
+ * Pointer to a table of void * pointers (first objects).
+ * @param n
+ * Number of blocks to get.
+ * @return
+ * - 0: Success; got n objects.
+ * - <0: Error; code of dequeue function.
+ */
+static inline int
+rte_mempool_ops_dequeue_contig_blocks(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+ RTE_ASSERT(ops->dequeue_contig_blocks != NULL);
+ return ops->dequeue_contig_blocks(mp, first_obj_table, n);
+}
+
+/**
* @internal wrapper for mempool_ops enqueue callback.
*
* @param mp
@@ -1500,6 +1537,92 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p)
}

/**
+ * @internal Get contiguous blocks of objects from the pool. Used internally.
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param first_obj_table
+ * A pointer to a pointer to the first object in each block.
+ * @param n
+ * A number of blocks to get.
+ * @return
+ * - >0: Success
+ * - <0: Error
+ */
+static __rte_always_inline int
+__mempool_generic_get_contig_blocks(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n)
+{
+ int ret;
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ struct rte_mempool_info info;
+ rte_mempool_ops_get_info(mp, &info);
+#endif
+
+ ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n);
+ if (ret < 0)
+ __MEMPOOL_STAT_ADD(mp, get_fail,
+ n * info.contig_block_size);
+ else
+ __MEMPOOL_STAT_ADD(mp, get_success,
+ n * info.contig_block_size);
+
+ return ret;
+}
+
+/**
+ * Get a contiguous blocks of objects from the mempool.
+ *
+ * If cache is enabled, consider to flush it first, to reuse objects
+ * as soon as possible.
+ *
+ * The application should check that the driver supports the operation
+ * by calling rte_mempool_ops_get_info() and checking that `contig_block_size`
+ * is not zero.
+ *
+ * @param mp
+ * A pointer to the mempool structure.
+ * @param first_obj_table
+ * A pointer to a pointer to the first object in each block.
+ * @param n
+ * The number of blocks to get from mempool.
+ * @return
+ * - >0: the size of the block
+ * - -ENOBUFS: Not enough entries in the mempool; no object is retrieved.
+ * - -EOPNOTSUPP: The mempool driver does not support block dequeue
+ */
+static __rte_always_inline int
+rte_mempool_get_contig_blocks(struct rte_mempool *mp,
+ void **first_obj_table, unsigned int n)
+{
+ int ret;
+
+ ret = __mempool_generic_get_contig_blocks(mp, first_obj_table, n);
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (ret == 0) {
+ struct rte_mempool_info info;
+ const size_t total_elt_sz =
+ mp->header_size + mp->elt_size + mp->trailer_size;
+ unsigned int i, j;
+
+ rte_mempool_ops_get_info(mp, &info);
+
+ for (i = 0; i < n; ++i) {
+ void *first_obj = first_obj_table[i];
+
+ for (j = 0; j < info.contig_block_size; ++j) {
+ void *obj;
+
+ obj = (void *)((uintptr_t)first_obj +
+ j * total_elt_sz);
+ rte_mempool_check_cookies(mp, &obj, 1, 1);
+ }
+ }
+ }
+#endif
+ return ret;
+}
+
+/**
* Return the number of entries in the mempool.
*
* When cache is enabled, this function has to browse the length of
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 949ab43..9fa8c23 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -89,6 +89,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->calc_mem_size = h->calc_mem_size;
ops->populate = h->populate;
ops->get_info = h->get_info;
+ ops->dequeue_contig_blocks = h->dequeue_contig_blocks;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:07 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

The manager provides a way to allocate physically and virtually
contiguous set of objects.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
MAINTAINERS | 9 +
config/common_base | 2 +
drivers/mempool/Makefile | 1 +
drivers/mempool/bucket/Makefile | 27 +
drivers/mempool/bucket/rte_mempool_bucket.c | 561 +++++++++++++++++++++
.../mempool/bucket/rte_mempool_bucket_version.map | 4 +
mk/rte.app.mk | 1 +
7 files changed, 605 insertions(+)
create mode 100644 drivers/mempool/bucket/Makefile
create mode 100644 drivers/mempool/bucket/rte_mempool_bucket.c
create mode 100644 drivers/mempool/bucket/rte_mempool_bucket_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 5788ea0..9df2cf5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -303,6 +303,15 @@ F: test/test/test_event_eth_rx_adapter.c
F: doc/guides/prog_guide/event_ethernet_rx_adapter.rst

+Memory Pool Drivers
+-------------------
+
+Bucket memory pool
+M: Artem V. Andreev <***@oktetlabs.ru>
+M: Andrew Rybchenko <***@solarflare.com>
+F: drivers/mempool/bucket/
+
+
Bus Drivers
-----------

diff --git a/config/common_base b/config/common_base
index 170a389..4fe42f6 100644
--- a/config/common_base
+++ b/config/common_base
@@ -622,6 +622,8 @@ CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG=n
#
# Compile Mempool drivers
#
+CONFIG_RTE_DRIVER_MEMPOOL_BUCKET=y
+CONFIG_RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB=64
CONFIG_RTE_DRIVER_MEMPOOL_RING=y
CONFIG_RTE_DRIVER_MEMPOOL_STACK=y

diff --git a/drivers/mempool/Makefile b/drivers/mempool/Makefile
index aae2cb1..45fca04 100644
--- a/drivers/mempool/Makefile
+++ b/drivers/mempool/Makefile
@@ -3,6 +3,7 @@

include $(RTE_SDK)/mk/rte.vars.mk

+DIRS-$(CONFIG_RTE_DRIVER_MEMPOOL_BUCKET) += bucket
DIRS-$(CONFIG_RTE_LIBRTE_DPAA_MEMPOOL) += dpaa
DIRS-$(CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL) += dpaa2
DIRS-$(CONFIG_RTE_DRIVER_MEMPOOL_RING) += ring
diff --git a/drivers/mempool/bucket/Makefile b/drivers/mempool/bucket/Makefile
new file mode 100644
index 0000000..7364916
--- /dev/null
+++ b/drivers/mempool/bucket/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Copyright (c) 2017-2018 Solarflare Communications Inc.
+# All rights reserved.
+#
+# This software was jointly developed between OKTET Labs (under contract
+# for Solarflare) and Solarflare Communications, Inc.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_mempool_bucket.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_eal -lrte_mempool -lrte_ring
+
+EXPORT_MAP := rte_mempool_bucket_version.map
+
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_DRIVER_MEMPOOL_BUCKET) += rte_mempool_bucket.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
new file mode 100644
index 0000000..dc4e1dc
--- /dev/null
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2017-2018 Solarflare Communications Inc.
+ * All rights reserved.
+ *
+ * This software was jointly developed between OKTET Labs (under contract
+ * for Solarflare) and Solarflare Communications, Inc.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+
+/*
+ * The general idea of the bucket mempool driver is as follows.
+ * We keep track of physically contiguous groups (buckets) of objects
+ * of a certain size. Every such a group has a counter that is
+ * incremented every time an object from that group is enqueued.
+ * Until the bucket is full, no objects from it are eligible for allocation.
+ * If a request is made to dequeue a multiply of bucket size, it is
+ * satisfied by returning the whole buckets, instead of separate objects.
+ */
+
+
+struct bucket_header {
+ unsigned int lcore_id;
+ uint8_t fill_cnt;
+};
+
+struct bucket_stack {
+ unsigned int top;
+ unsigned int limit;
+ void *objects[];
+};
+
+struct bucket_data {
+ unsigned int header_size;
+ unsigned int total_elt_size;
+ unsigned int obj_per_bucket;
+ uintptr_t bucket_page_mask;
+ struct rte_ring *shared_bucket_ring;
+ struct bucket_stack *buckets[RTE_MAX_LCORE];
+ /*
+ * Multi-producer single-consumer ring to hold objects that are
+ * returned to the mempool at a different lcore than initially
+ * dequeued
+ */
+ struct rte_ring *adoption_buffer_rings[RTE_MAX_LCORE];
+ struct rte_ring *shared_orphan_ring;
+ struct rte_mempool *pool;
+ unsigned int bucket_mem_size;
+};
+
+static struct bucket_stack *
+bucket_stack_create(const struct rte_mempool *mp, unsigned int n_elts)
+{
+ struct bucket_stack *stack;
+
+ stack = rte_zmalloc_socket("bucket_stack",
+ sizeof(struct bucket_stack) +
+ n_elts * sizeof(void *),
+ RTE_CACHE_LINE_SIZE,
+ mp->socket_id);
+ if (stack == NULL)
+ return NULL;
+ stack->limit = n_elts;
+ stack->top = 0;
+
+ return stack;
+}
+
+static void
+bucket_stack_push(struct bucket_stack *stack, void *obj)
+{
+ RTE_ASSERT(stack->top < stack->limit);
+ stack->objects[stack->top++] = obj;
+}
+
+static void *
+bucket_stack_pop_unsafe(struct bucket_stack *stack)
+{
+ RTE_ASSERT(stack->top > 0);
+ return stack->objects[--stack->top];
+}
+
+static void *
+bucket_stack_pop(struct bucket_stack *stack)
+{
+ if (stack->top == 0)
+ return NULL;
+ return bucket_stack_pop_unsafe(stack);
+}
+
+static int
+bucket_enqueue_single(struct bucket_data *bd, void *obj)
+{
+ int rc = 0;
+ uintptr_t addr = (uintptr_t)obj;
+ struct bucket_header *hdr;
+ unsigned int lcore_id = rte_lcore_id();
+
+ addr &= bd->bucket_page_mask;
+ hdr = (struct bucket_header *)addr;
+
+ if (likely(hdr->lcore_id == lcore_id)) {
+ if (hdr->fill_cnt < bd->obj_per_bucket - 1) {
+ hdr->fill_cnt++;
+ } else {
+ hdr->fill_cnt = 0;
+ /* Stack is big enough to put all buckets */
+ bucket_stack_push(bd->buckets[lcore_id], hdr);
+ }
+ } else if (hdr->lcore_id != LCORE_ID_ANY) {
+ struct rte_ring *adopt_ring =
+ bd->adoption_buffer_rings[hdr->lcore_id];
+
+ rc = rte_ring_enqueue(adopt_ring, obj);
+ /* Ring is big enough to put all objects */
+ RTE_ASSERT(rc == 0);
+ } else if (hdr->fill_cnt < bd->obj_per_bucket - 1) {
+ hdr->fill_cnt++;
+ } else {
+ hdr->fill_cnt = 0;
+ rc = rte_ring_enqueue(bd->shared_bucket_ring, hdr);
+ /* Ring is big enough to put all buckets */
+ RTE_ASSERT(rc == 0);
+ }
+
+ return rc;
+}
+
+static int
+bucket_enqueue(struct rte_mempool *mp, void * const *obj_table,
+ unsigned int n)
+{
+ struct bucket_data *bd = mp->pool_data;
+ unsigned int i;
+ int rc = 0;
+
+ for (i = 0; i < n; i++) {
+ rc = bucket_enqueue_single(bd, obj_table[i]);
+ RTE_ASSERT(rc == 0);
+ }
+ return rc;
+}
+
+static void **
+bucket_fill_obj_table(const struct bucket_data *bd, void **pstart,
+ void **obj_table, unsigned int n)
+{
+ unsigned int i;
+ uint8_t *objptr = *pstart;
+
+ for (objptr += bd->header_size, i = 0; i < n;
+ i++, objptr += bd->total_elt_size)
+ *obj_table++ = objptr;
+ *pstart = objptr;
+ return obj_table;
+}
+
+static int
+bucket_dequeue_orphans(struct bucket_data *bd, void **obj_table,
+ unsigned int n_orphans)
+{
+ unsigned int i;
+ int rc;
+ uint8_t *objptr;
+
+ rc = rte_ring_dequeue_bulk(bd->shared_orphan_ring, obj_table,
+ n_orphans, NULL);
+ if (unlikely(rc != (int)n_orphans)) {
+ struct bucket_header *hdr;
+
+ objptr = bucket_stack_pop(bd->buckets[rte_lcore_id()]);
+ hdr = (struct bucket_header *)objptr;
+
+ if (objptr == NULL) {
+ rc = rte_ring_dequeue(bd->shared_bucket_ring,
+ (void **)&objptr);
+ if (rc != 0) {
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ hdr = (struct bucket_header *)objptr;
+ hdr->lcore_id = rte_lcore_id();
+ }
+ hdr->fill_cnt = 0;
+ bucket_fill_obj_table(bd, (void **)&objptr, obj_table,
+ n_orphans);
+ for (i = n_orphans; i < bd->obj_per_bucket; i++,
+ objptr += bd->total_elt_size) {
+ rc = rte_ring_enqueue(bd->shared_orphan_ring,
+ objptr);
+ if (rc != 0) {
+ RTE_ASSERT(0);
+ rte_errno = -rc;
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+bucket_dequeue_buckets(struct bucket_data *bd, void **obj_table,
+ unsigned int n_buckets)
+{
+ struct bucket_stack *cur_stack = bd->buckets[rte_lcore_id()];
+ unsigned int n_buckets_from_stack = RTE_MIN(n_buckets, cur_stack->top);
+ void **obj_table_base = obj_table;
+
+ n_buckets -= n_buckets_from_stack;
+ while (n_buckets_from_stack-- > 0) {
+ void *obj = bucket_stack_pop_unsafe(cur_stack);
+
+ obj_table = bucket_fill_obj_table(bd, &obj, obj_table,
+ bd->obj_per_bucket);
+ }
+ while (n_buckets-- > 0) {
+ struct bucket_header *hdr;
+
+ if (unlikely(rte_ring_dequeue(bd->shared_bucket_ring,
+ (void **)&hdr) != 0)) {
+ /*
+ * Return the already-dequeued buffers
+ * back to the mempool
+ */
+ bucket_enqueue(bd->pool, obj_table_base,
+ obj_table - obj_table_base);
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ hdr->lcore_id = rte_lcore_id();
+ obj_table = bucket_fill_obj_table(bd, (void **)&hdr,
+ obj_table,
+ bd->obj_per_bucket);
+ }
+
+ return 0;
+}
+
+static int
+bucket_adopt_orphans(struct bucket_data *bd)
+{
+ int rc = 0;
+ struct rte_ring *adopt_ring =
+ bd->adoption_buffer_rings[rte_lcore_id()];
+
+ if (unlikely(!rte_ring_empty(adopt_ring))) {
+ void *orphan;
+
+ while (rte_ring_sc_dequeue(adopt_ring, &orphan) == 0) {
+ rc = bucket_enqueue_single(bd, orphan);
+ RTE_ASSERT(rc == 0);
+ }
+ }
+ return rc;
+}
+
+static int
+bucket_dequeue(struct rte_mempool *mp, void **obj_table, unsigned int n)
+{
+ struct bucket_data *bd = mp->pool_data;
+ unsigned int n_buckets = n / bd->obj_per_bucket;
+ unsigned int n_orphans = n - n_buckets * bd->obj_per_bucket;
+ int rc = 0;
+
+ bucket_adopt_orphans(bd);
+
+ if (unlikely(n_orphans > 0)) {
+ rc = bucket_dequeue_orphans(bd, obj_table +
+ (n_buckets * bd->obj_per_bucket),
+ n_orphans);
+ if (rc != 0)
+ return rc;
+ }
+
+ if (likely(n_buckets > 0)) {
+ rc = bucket_dequeue_buckets(bd, obj_table, n_buckets);
+ if (unlikely(rc != 0) && n_orphans > 0) {
+ rte_ring_enqueue_bulk(bd->shared_orphan_ring,
+ obj_table + (n_buckets *
+ bd->obj_per_bucket),
+ n_orphans, NULL);
+ }
+ }
+
+ return rc;
+}
+
+static void
+count_underfilled_buckets(struct rte_mempool *mp,
+ void *opaque,
+ struct rte_mempool_memhdr *memhdr,
+ __rte_unused unsigned int mem_idx)
+{
+ unsigned int *pcount = opaque;
+ const struct bucket_data *bd = mp->pool_data;
+ unsigned int bucket_page_sz =
+ (unsigned int)(~bd->bucket_page_mask + 1);
+ uintptr_t align;
+ uint8_t *iter;
+
+ align = (uintptr_t)RTE_PTR_ALIGN_CEIL(memhdr->addr, bucket_page_sz) -
+ (uintptr_t)memhdr->addr;
+
+ for (iter = (uint8_t *)memhdr->addr + align;
+ iter < (uint8_t *)memhdr->addr + memhdr->len;
+ iter += bucket_page_sz) {
+ struct bucket_header *hdr = (struct bucket_header *)iter;
+
+ *pcount += hdr->fill_cnt;
+ }
+}
+
+static unsigned int
+bucket_get_count(const struct rte_mempool *mp)
+{
+ const struct bucket_data *bd = mp->pool_data;
+ unsigned int count =
+ bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
+ rte_ring_count(bd->shared_orphan_ring);
+ unsigned int i;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (!rte_lcore_is_enabled(i))
+ continue;
+ count += bd->obj_per_bucket * bd->buckets[i]->top;
+ }
+
+ rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
+ count_underfilled_buckets, &count);
+
+ return count;
+}
+
+static int
+bucket_alloc(struct rte_mempool *mp)
+{
+ int rg_flags = 0;
+ int rc = 0;
+ char rg_name[RTE_RING_NAMESIZE];
+ struct bucket_data *bd;
+ unsigned int i;
+ unsigned int bucket_header_size;
+
+ bd = rte_zmalloc_socket("bucket_pool", sizeof(*bd),
+ RTE_CACHE_LINE_SIZE, mp->socket_id);
+ if (bd == NULL) {
+ rc = -ENOMEM;
+ goto no_mem_for_data;
+ }
+ bd->pool = mp;
+ if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
+ bucket_header_size = sizeof(struct bucket_header);
+ else
+ bucket_header_size = RTE_CACHE_LINE_SIZE;
+ RTE_BUILD_BUG_ON(sizeof(struct bucket_header) > RTE_CACHE_LINE_SIZE);
+ bd->header_size = mp->header_size + bucket_header_size;
+ bd->total_elt_size = mp->header_size + mp->elt_size + mp->trailer_size;
+ bd->bucket_mem_size = RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB * 1024;
+ bd->obj_per_bucket = (bd->bucket_mem_size - bucket_header_size) /
+ bd->total_elt_size;
+ bd->bucket_page_mask = ~(rte_align64pow2(bd->bucket_mem_size) - 1);
+
+ if (mp->flags & MEMPOOL_F_SP_PUT)
+ rg_flags |= RING_F_SP_ENQ;
+ if (mp->flags & MEMPOOL_F_SC_GET)
+ rg_flags |= RING_F_SC_DEQ;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (!rte_lcore_is_enabled(i))
+ continue;
+ bd->buckets[i] =
+ bucket_stack_create(mp, mp->size / bd->obj_per_bucket);
+ if (bd->buckets[i] == NULL) {
+ rc = -ENOMEM;
+ goto no_mem_for_stacks;
+ }
+ rc = snprintf(rg_name, sizeof(rg_name),
+ RTE_MEMPOOL_MZ_FORMAT ".a%u", mp->name, i);
+ if (rc < 0 || rc >= (int)sizeof(rg_name)) {
+ rc = -ENAMETOOLONG;
+ goto no_mem_for_stacks;
+ }
+ bd->adoption_buffer_rings[i] =
+ rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
+ mp->socket_id,
+ rg_flags | RING_F_SC_DEQ);
+ if (bd->adoption_buffer_rings[i] == NULL) {
+ rc = -rte_errno;
+ goto no_mem_for_stacks;
+ }
+ }
+
+ rc = snprintf(rg_name, sizeof(rg_name),
+ RTE_MEMPOOL_MZ_FORMAT ".0", mp->name);
+ if (rc < 0 || rc >= (int)sizeof(rg_name)) {
+ rc = -ENAMETOOLONG;
+ goto invalid_shared_orphan_ring;
+ }
+ bd->shared_orphan_ring =
+ rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
+ mp->socket_id, rg_flags);
+ if (bd->shared_orphan_ring == NULL) {
+ rc = -rte_errno;
+ goto cannot_create_shared_orphan_ring;
+ }
+
+ rc = snprintf(rg_name, sizeof(rg_name),
+ RTE_MEMPOOL_MZ_FORMAT ".1", mp->name);
+ if (rc < 0 || rc >= (int)sizeof(rg_name)) {
+ rc = -ENAMETOOLONG;
+ goto invalid_shared_bucket_ring;
+ }
+ bd->shared_bucket_ring =
+ rte_ring_create(rg_name,
+ rte_align32pow2((mp->size + 1) /
+ bd->obj_per_bucket),
+ mp->socket_id, rg_flags);
+ if (bd->shared_bucket_ring == NULL) {
+ rc = -rte_errno;
+ goto cannot_create_shared_bucket_ring;
+ }
+
+ mp->pool_data = bd;
+
+ return 0;
+
+cannot_create_shared_bucket_ring:
+invalid_shared_bucket_ring:
+ rte_ring_free(bd->shared_orphan_ring);
+cannot_create_shared_orphan_ring:
+invalid_shared_orphan_ring:
+no_mem_for_stacks:
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ rte_free(bd->buckets[i]);
+ rte_ring_free(bd->adoption_buffer_rings[i]);
+ }
+ rte_free(bd);
+no_mem_for_data:
+ rte_errno = -rc;
+ return rc;
+}
+
+static void
+bucket_free(struct rte_mempool *mp)
+{
+ unsigned int i;
+ struct bucket_data *bd = mp->pool_data;
+
+ if (bd == NULL)
+ return;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ rte_free(bd->buckets[i]);
+ rte_ring_free(bd->adoption_buffer_rings[i]);
+ }
+
+ rte_ring_free(bd->shared_orphan_ring);
+ rte_ring_free(bd->shared_bucket_ring);
+
+ rte_free(bd);
+}
+
+static ssize_t
+bucket_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
+ __rte_unused uint32_t pg_shift, size_t *min_total_elt_size,
+ size_t *align)
+{
+ struct bucket_data *bd = mp->pool_data;
+ unsigned int bucket_page_sz;
+
+ if (bd == NULL)
+ return -EINVAL;
+
+ bucket_page_sz = rte_align32pow2(bd->bucket_mem_size);
+ *align = bucket_page_sz;
+ *min_total_elt_size = bucket_page_sz;
+ /*
+ * Each bucket occupies its own block aligned to
+ * bucket_page_sz, so the required amount of memory is
+ * a multiple of bucket_page_sz.
+ * We also need extra space for a bucket header
+ */
+ return ((obj_num + bd->obj_per_bucket - 1) /
+ bd->obj_per_bucket) * bucket_page_sz;
+}
+
+static int
+bucket_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb)
+{
+ struct bucket_data *bd = mp->pool_data;
+ unsigned int bucket_page_sz;
+ unsigned int bucket_header_sz;
+ unsigned int n_objs;
+ uintptr_t align;
+ uint8_t *iter;
+ int rc;
+
+ if (bd == NULL)
+ return -EINVAL;
+
+ bucket_page_sz = rte_align32pow2(bd->bucket_mem_size);
+ align = RTE_PTR_ALIGN_CEIL((uintptr_t)vaddr, bucket_page_sz) -
+ (uintptr_t)vaddr;
+
+ bucket_header_sz = bd->header_size - mp->header_size;
+ if (iova != RTE_BAD_IOVA)
+ iova += align + bucket_header_sz;
+
+ for (iter = (uint8_t *)vaddr + align, n_objs = 0;
+ iter < (uint8_t *)vaddr + len && n_objs < max_objs;
+ iter += bucket_page_sz) {
+ struct bucket_header *hdr = (struct bucket_header *)iter;
+ unsigned int chunk_len = bd->bucket_mem_size;
+
+ if ((size_t)(iter - (uint8_t *)vaddr) + chunk_len > len)
+ chunk_len = len - (iter - (uint8_t *)vaddr);
+ if (chunk_len <= bucket_header_sz)
+ break;
+ chunk_len -= bucket_header_sz;
+
+ hdr->fill_cnt = 0;
+ hdr->lcore_id = LCORE_ID_ANY;
+ rc = rte_mempool_populate_one_by_one(mp,
+ RTE_MIN(bd->obj_per_bucket,
+ max_objs - n_objs),
+ iter + bucket_header_sz,
+ iova, chunk_len, obj_cb);
+ if (rc < 0)
+ return rc;
+ n_objs += rc;
+ if (iova != RTE_BAD_IOVA)
+ iova += bucket_page_sz;
+ }
+
+ return n_objs;
+}
+
+static const struct rte_mempool_ops ops_bucket = {
+ .name = "bucket",
+ .alloc = bucket_alloc,
+ .free = bucket_free,
+ .enqueue = bucket_enqueue,
+ .dequeue = bucket_dequeue,
+ .get_count = bucket_get_count,
+ .calc_mem_size = bucket_calc_mem_size,
+ .populate = bucket_populate,
+};
+
+
+MEMPOOL_REGISTER_OPS(ops_bucket);
diff --git a/drivers/mempool/bucket/rte_mempool_bucket_version.map b/drivers/mempool/bucket/rte_mempool_bucket_version.map
new file mode 100644
index 0000000..9b9ab1a
--- /dev/null
+++ b/drivers/mempool/bucket/rte_mempool_bucket_version.map
@@ -0,0 +1,4 @@
+DPDK_18.05 {
+
+ local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 0169f3f..405785d 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -116,6 +116,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_VDEV_BUS) += -lrte_bus_vdev
ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
# plugins (link only if static libraries)

+_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_BUCKET) += -lrte_mempool_bucket
_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK) += -lrte_mempool_stack

_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += -lrte_pmd_af_packet

--
2.7.4

Andrew Rybchenko

2018-01-23 13:16:11 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/bucket/rte_mempool_bucket.c | 52 +++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index dc4e1dc..03fccf1 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -294,6 +294,46 @@ bucket_dequeue(struct rte_mempool *mp, void **obj_table, unsigned int n)
return rc;
}

+static int
+bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
+ unsigned int n)
+{
+ struct bucket_data *bd = mp->pool_data;
+ const uint32_t header_size = bd->header_size;
+ struct bucket_stack *cur_stack = bd->buckets[rte_lcore_id()];
+ unsigned int n_buckets_from_stack = RTE_MIN(n, cur_stack->top);
+ struct bucket_header *hdr;
+ void **first_objp = first_obj_table;
+
+ bucket_adopt_orphans(bd);
+
+ n -= n_buckets_from_stack;
+ while (n_buckets_from_stack-- > 0) {
+ hdr = bucket_stack_pop_unsafe(cur_stack);
+ *first_objp++ = (uint8_t *)hdr + header_size;
+ }
+ if (n > 0) {
+ if (unlikely(rte_ring_dequeue_bulk(bd->shared_bucket_ring,
+ first_objp, n, NULL) != n)) {
+ /* Return the already dequeued buckets */
+ while (first_objp-- != first_obj_table) {
+ bucket_stack_push(cur_stack,
+ (uint8_t *)*first_objp -
+ header_size);
+ }
+ rte_errno = ENOBUFS;
+ return -rte_errno;
+ }
+ while (n-- > 0) {
+ hdr = (struct bucket_header *)*first_objp;
+ hdr->lcore_id = rte_lcore_id();
+ *first_objp++ = (uint8_t *)hdr + header_size;
+ }
+ }
+
+ return 0;
+}
+
static void
count_underfilled_buckets(struct rte_mempool *mp,
void *opaque,
@@ -546,6 +586,16 @@ bucket_populate(struct rte_mempool *mp, unsigned int max_objs,
return n_objs;
}

+static int
+bucket_get_info(const struct rte_mempool *mp, struct rte_mempool_info *info)
+{
+ struct bucket_data *bd = mp->pool_data;
+
+ info->contig_block_size = bd->obj_per_bucket;
+ return 0;
+}
+
+
static const struct rte_mempool_ops ops_bucket = {
.name = "bucket",
.alloc = bucket_alloc,
@@ -555,6 +605,8 @@ static const struct rte_mempool_ops ops_bucket = {
.get_count = bucket_get_count,
.calc_mem_size = bucket_calc_mem_size,
.populate = bucket_populate,
+ .get_info = bucket_get_info,
+ .dequeue_contig_blocks = bucket_dequeue_contig_blocks,
};

--
2.7.4

Olivier Matz

2018-01-31 16:44:40 UTC

Hi,

The patch series starts from generic enhancements suggested by Olivier.
Basically it adds driver callbacks to calculate required memory size and
to populate objects using provided memory area. It allows to remove
so-called capability flags used before to tell generic code how to
allocate and slice allocated memory into mempool objects.
Clean up which removes get_capabilities and register_memory_area is
not strictly required, but I think right thing to do.
Existing mempool drivers are updated.
I've kept rte_mempool_populate_iova_tab() intact since it seems to
be not directly related XMEM API functions.
The patch series adds bucket mempool driver which allows to allocate
(both physically and virtually) contiguous blocks of objects and adds
mempool API to do it. It is still capable to provide separate objects,
but it is definitely more heavy-weight than ring/stack drivers.
The driver will be used by the future Solarflare driver enhancements
which allow to utilize physical contiguous blocks in the NIC
hardware/firmware.
The target usecase is dequeue in blocks and enqueue separate objects
back (which are collected in buckets to be dequeued). So, the memory
pool with bucket driver is created by an application and provided to
networking PMD receive queue. The choice of bucket driver is done using
rte_eth_dev_pool_ops_supported(). A PMD that relies upon contiguous
block allocation should report the bucket driver as the only supported
and preferred one.
Introduction of the contiguous block dequeue operation is proven by
- in the original test bulks are powers of two, which is unacceptable
for us, so they are changed to multiple of contig_block_size;
- the test code is duplicated to support plain dequeue and
dequeue_contig_blocks;
- all the extra test variations (with/without cache etc) are eliminated;
- a fake read from the dequeued buffer is added (in both cases) to
simulate mbufs access.
start performance test for bucket (without cache)
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 1 n_keep= 30 Srate_persec= 111935488
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 1 n_keep= 60 Srate_persec= 115290931
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 15 n_keep= 30 Srate_persec= 353055539
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 15 n_keep= 60 Srate_persec= 353330790
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 1 n_keep= 30 Srate_persec= 224657407
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 1 n_keep= 60 Srate_persec= 230411468
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 15 n_keep= 30 Srate_persec= 706700902
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 15 n_keep= 60 Srate_persec= 703673139
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 1 n_keep= 30 Srate_persec= 425236887
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 1 n_keep= 60 Srate_persec= 437295512
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 15 n_keep= 30 Srate_persec= 1343409356
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 15 n_keep= 60 Srate_persec= 1336567397
start performance test for bucket (without cache + contiguous dequeue)
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 1 n_keep= 30 Crate_persec= 122945536
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 1 n_keep= 60 Crate_persec= 126458265
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 15 n_keep= 30 Crate_persec= 374262988
mempool_autotest cache= 0 cores= 1 n_get_bulk= 15 n_put_bulk= 15 n_keep= 60 Crate_persec= 377316966
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 1 n_keep= 30 Crate_persec= 244842496
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 1 n_keep= 60 Crate_persec= 251618917
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 15 n_keep= 30 Crate_persec= 751226060
mempool_autotest cache= 0 cores= 2 n_get_bulk= 15 n_put_bulk= 15 n_keep= 60 Crate_persec= 756233010
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 1 n_keep= 30 Crate_persec= 462068120
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 1 n_keep= 60 Crate_persec= 476997221
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 15 n_keep= 30 Crate_persec= 1432171313
mempool_autotest cache= 0 cores= 4 n_get_bulk= 15 n_put_bulk= 15 n_keep= 60 Crate_persec= 1438829771
The number of objects in the contiguous block is a function of bucket
memory size (.config option) and total element size. In the future
additional API with possibility to pass parameters on mempool allocation
may be added.
It breaks ABI since changes rte_mempool_ops. Also it removes
rte_mempool_ops_register_memory_area() and
rte_mempool_ops_get_capabilities() since corresponding callbacks are
removed.
The target DPDK release is 18.05.
- add driver ops to calculate required memory size and populate
mempool objects, remove extra flags which were required before
to control it
- transition of octeontx and dpaa drivers to the new callbacks
- change info API to get information from driver required to
API user to know contiguous block size
- remove get_capabilities (not required any more and may be
substituted with more in info get API)
- remove register_memory_area since it is substituted with
populate callback which can do more
- use SPDX tags
- avoid all objects affinity to single lcore
- fix bucket get_count
- deprecate XMEM API
- avoid introduction of a new function to flush cache
- fix NO_CACHE_ALIGN case in bucket mempool
mempool: fix phys contig check if populate default skipped
mempool: add op to calculate memory size to be allocated
mempool/octeontx: add callback to calculate memory size
mempool: add op to populate objects using provided memory
mempool/octeontx: implement callback to populate objects
mempool: remove callback to get capabilities
mempool: deprecate xmem functions
mempool/octeontx: prepare to remove register memory area op
mempool/dpaa: convert to use populate driver op
mempool: remove callback to register memory area
mempool: ensure the mempool is initialized before populating
mempool/bucket: implement bucket mempool manager
mempool: support flushing the default cache of the mempool
mempool: implement abstract mempool info API
mempool: support block dequeue operation
mempool/bucket: implement block dequeue operation
mempool/bucket: do not allow one lcore to grab all buckets
MAINTAINERS | 9 +
config/common_base | 2 +
drivers/mempool/Makefile | 1 +
drivers/mempool/bucket/Makefile | 27 +
drivers/mempool/bucket/rte_mempool_bucket.c | 626 +++++++++++++++++++++
.../mempool/bucket/rte_mempool_bucket_version.map | 4 +
drivers/mempool/dpaa/dpaa_mempool.c | 13 +-
drivers/mempool/octeontx/rte_mempool_octeontx.c | 63 ++-
lib/librte_mempool/rte_mempool.c | 192 ++++---
lib/librte_mempool/rte_mempool.h | 366 +++++++++---
lib/librte_mempool/rte_mempool_ops.c | 48 +-
lib/librte_mempool/rte_mempool_version.map | 11 +-
mk/rte.app.mk | 1 +
13 files changed, 1184 insertions(+), 179 deletions(-)
create mode 100644 drivers/mempool/bucket/Makefile
create mode 100644 drivers/mempool/bucket/rte_mempool_bucket.c
create mode 100644 drivers/mempool/bucket/rte_mempool_bucket_version.map

Globally, the RFC looks fine to me. Thanks for this good work.

I didn't review the mempool/bucket part like I did last time. About the
changes to the mempool API, I think it's a good enhancement: it makes
things more flexible and removes complexity in the common code. Some
points may still need some discussions, for instance how the PMDs and
applications take advantage of block dequeue operations and get_info().

I have some specific comments that are sent directly as replies to the
patches.

Since it changes dpaa and octeontx, having feedback from people from NXP
and Cavium Networks would be good.

Thanks,
Olivier

Andrew Rybchenko

2018-03-10 15:39:33 UTC

The initial patch series [1] is split into two to simplify processing.
The second series relies on this one and will add bucket mempool driver
and related ops.

The patch series has generic enhancements suggested by Olivier.
Basically it adds driver callbacks to calculate required memory size and
to populate objects using provided memory area. It allows to remove
so-called capability flags used before to tell generic code how to
allocate and slice allocated memory into mempool objects.
Clean up which removes get_capabilities and register_memory_area is
not strictly required, but I think right thing to do.
Existing mempool drivers are updated.

I've kept rte_mempool_populate_iova_tab() intact since it seems to
be not directly related XMEM API functions.

It breaks ABI since changes rte_mempool_ops. Also it removes
rte_mempool_ops_register_memory_area() and
rte_mempool_ops_get_capabilities() since corresponding callbacks are
removed.

Internal global functions are not listed in map file since it is not
a part of external API.

[1] http://dpdk.org/ml/archives/dev/2018-January/088698.html

RFCv1 -> RFCv2:
- add driver ops to calculate required memory size and populate
mempool objects, remove extra flags which were required before
to control it
- transition of octeontx and dpaa drivers to the new callbacks
- change info API to get information from driver required to
API user to know contiguous block size
- remove get_capabilities (not required any more and may be
substituted with more in info get API)
- remove register_memory_area since it is substituted with
populate callback which can do more
- use SPDX tags
- avoid all objects affinity to single lcore
- fix bucket get_count
- deprecate XMEM API
- avoid introduction of a new function to flush cache
- fix NO_CACHE_ALIGN case in bucket mempool

RFCv2 -> v1:
- split the series in two
- squash octeontx patches which implement calc_mem_size and populate
callbacks into the patch which removes get_capabilities since it is
the easiest way to untangle the tangle of tightly related library
functions and flags advertised by the driver
- consistently name default callbacks
- move default callbacks to dedicated file
- see detailed description in patches

Andrew Rybchenko (7):
mempool: add op to calculate memory size to be allocated
mempool: add op to populate objects using provided memory
mempool: remove callback to get capabilities
mempool: deprecate xmem functions
mempool/octeontx: prepare to remove register memory area op
mempool/dpaa: prepare to remove register memory area op
mempool: remove callback to register memory area

Artem V. Andreev (2):
mempool: ensure the mempool is initialized before populating
mempool: support flushing the default cache of the mempool

doc/guides/rel_notes/deprecation.rst | 12 +-
doc/guides/rel_notes/release_18_05.rst | 32 ++-
drivers/mempool/dpaa/dpaa_mempool.c | 13 +-
drivers/mempool/octeontx/rte_mempool_octeontx.c | 64 ++++--
lib/librte_mempool/Makefile | 3 +-
lib/librte_mempool/meson.build | 5 +-
lib/librte_mempool/rte_mempool.c | 159 +++++++--------
lib/librte_mempool/rte_mempool.h | 260 +++++++++++++++++-------
lib/librte_mempool/rte_mempool_ops.c | 37 ++--
lib/librte_mempool/rte_mempool_ops_default.c | 51 +++++
lib/librte_mempool/rte_mempool_version.map | 11 +-
test/test/test_mempool.c | 31 ---
12 files changed, 437 insertions(+), 241 deletions(-)
create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c

--
2.7.4

Andrew Rybchenko

2018-03-10 15:39:34 UTC

Size of memory chunk required to populate mempool objects depends
on how objects are stored in the memory. Different mempool drivers
may have different requirements and a new operation allows to
calculate memory size in accordance with driver requirements and
advertise requirements on minimum memory chunk size and alignment
in a generic way.

Bump ABI version since the patch breaks it.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
RFCv2 -> v1:
- move default calc_mem_size callback to rte_mempool_ops_default.c
- add ABI changes to release notes
- name default callback consistently: rte_mempool_op_<callback>_default()
- bump ABI version since it is the first patch which breaks ABI
- describe default callback behaviour in details
- avoid introduction of internal function to cope with depration
(keep it to deprecation patch)
- move cache-line or page boundary chunk alignment to default callback
- highlight that min_chunk_size and align parameters are output only

doc/guides/rel_notes/deprecation.rst | 3 +-
doc/guides/rel_notes/release_18_05.rst | 7 ++-
lib/librte_mempool/Makefile | 3 +-
lib/librte_mempool/meson.build | 5 +-
lib/librte_mempool/rte_mempool.c | 43 +++++++--------
lib/librte_mempool/rte_mempool.h | 80 +++++++++++++++++++++++++++-
lib/librte_mempool/rte_mempool_ops.c | 18 +++++++
lib/librte_mempool/rte_mempool_ops_default.c | 38 +++++++++++++
lib/librte_mempool/rte_mempool_version.map | 8 +++
9 files changed, 177 insertions(+), 28 deletions(-)
create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 6594585..e02d4ca 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -72,8 +72,7 @@ Deprecation Notices

- removal of ``get_capabilities`` mempool ops and related flags.
- substitute ``register_memory_area`` with ``populate`` ops.
- - addition of new ops to customize required memory chunk calculation,
- customize objects population and allocate contiguous
+ - addition of new ops to customize objects population and allocate contiguous
block of objects if underlying driver supports it.

* mbuf: The control mbuf API will be removed in v18.05. The impacted
diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index f2525bb..59583ea 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -80,6 +80,11 @@ ABI Changes
Also, make sure to start the actual text at the margin.
=========================================================

+* **Changed rte_mempool_ops structure.**
+
+ A new callback ``calc_mem_size`` has been added to ``rte_mempool_ops``
+ to allow to customize required memory size calculation.
+

Removed Items
-------------
@@ -152,7 +157,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_latencystats.so.1
librte_lpm.so.2
librte_mbuf.so.3
- librte_mempool.so.3
+ + librte_mempool.so.4
+ librte_meter.so.2
librte_metrics.so.1
librte_net.so.1
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index 24e735a..072740f 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -11,11 +11,12 @@ LDLIBS += -lrte_eal -lrte_ring

EXPORT_MAP := rte_mempool_version.map

-LIBABIVER := 3
+LIBABIVER := 4

# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops_default.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h

diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build
index 7a4f3da..9e3b527 100644
--- a/lib/librte_mempool/meson.build
+++ b/lib/librte_mempool/meson.build
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation

-version = 2
-sources = files('rte_mempool.c', 'rte_mempool_ops.c')
+version = 4
+sources = files('rte_mempool.c', 'rte_mempool_ops.c',
+ 'rte_mempool_ops_default.c')
headers = files('rte_mempool.h')
deps += ['ring']
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 54f7f4b..3bfb36e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -544,39 +544,33 @@ rte_mempool_populate_default(struct rte_mempool *mp)
unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
- size_t size, total_elt_sz, align, pg_sz, pg_shift;
+ ssize_t mem_size;
+ size_t align, pg_sz, pg_shift;
rte_iova_t iova;
unsigned mz_id, n;
- unsigned int mp_flags;
int ret;

/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;

- /* Get mempool capabilities */
- mp_flags = 0;
- ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
- if ((ret < 0) && (ret != -ENOTSUP))
- return ret;
-
- /* update mempool capabilities */
- mp->flags |= mp_flags;
-
if (rte_eal_has_hugepages()) {
pg_shift = 0; /* not needed, zone is physically contiguous */
pg_sz = 0;
- align = RTE_CACHE_LINE_SIZE;
} else {
pg_sz = getpagesize();
pg_shift = rte_bsf32(pg_sz);
- align = pg_sz;
}

- total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
- size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
- mp->flags);
+ size_t min_chunk_size;
+
+ mem_size = rte_mempool_ops_calc_mem_size(mp, n, pg_shift,
+ &min_chunk_size, &align);
+ if (mem_size < 0) {
+ ret = mem_size;
+ goto fail;
+ }

ret = snprintf(mz_name, sizeof(mz_name),
RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
@@ -585,7 +579,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
goto fail;
}

- mz = rte_memzone_reserve_aligned(mz_name, size,
+ mz = rte_memzone_reserve_aligned(mz_name, mem_size,
mp->socket_id, mz_flags, align);
/* not enough memory, retry with the biggest zone we have */
if (mz == NULL)
@@ -596,6 +590,12 @@ rte_mempool_populate_default(struct rte_mempool *mp)
goto fail;
}

+ if (mz->len < min_chunk_size) {
+ rte_memzone_free(mz);
+ ret = -ENOMEM;
+ goto fail;
+ }
+
if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
iova = RTE_BAD_IOVA;
else
@@ -628,13 +628,14 @@ rte_mempool_populate_default(struct rte_mempool *mp)
static size_t
get_anon_size(const struct rte_mempool *mp)
{
- size_t size, total_elt_sz, pg_sz, pg_shift;
+ size_t size, pg_sz, pg_shift;
+ size_t min_chunk_size;
+ size_t align;

pg_sz = getpagesize();
pg_shift = rte_bsf32(pg_sz);
- total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
- size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift,
- mp->flags);
+ size = rte_mempool_ops_calc_mem_size(mp, mp->size, pg_shift,
+ &min_chunk_size, &align);

return size;
}
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 8b1b7f7..0151f6c 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -399,6 +399,56 @@ typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
typedef int (*rte_mempool_ops_register_memory_area_t)
(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);

+/**
+ * Calculate memory size required to store given number of objects.
+ *
+ * @param[in] mp
+ * Pointer to the memory pool.
+ * @param[in] obj_num
+ * Number of objects.
+ * @param[in] pg_shift
+ * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param[out] min_chunk_size
+ * Location for minimum size of the memory chunk which may be used to
+ * store memory pool objects.
+ * @param[out] align
+ * Location with required memory chunk alignment.
+ * @return
+ * Required memory size aligned at page boundary.
+ */
+typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+
+/**
+ * Default way to calculate memory size required to store given number of
+ * objects.
+ *
+ * If page boundaries may be ignored, it is just a product of total
+ * object size including header and trailer and number of objects.
+ * Otherwise, it is a number of pages required to store given number of
+ * objects without crossing page boundary.
+ *
+ * Note that if object size is bigger than page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * If mempool driver requires object addresses to be block size aligned
+ * (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS), space for one extra element is
+ * reserved to be able to meet the requirement.
+ *
+ * Minimum size of memory chunk is either all required space, if
+ * capabilities say that whole memory area must be physically contiguous
+ * (MEMPOOL_F_CAPA_PHYS_CONTIG), or a maximum of the page size and total
+ * element size.
+ *
+ * Required memory chunk alignment is a maximum of page size and cache
+ * line size.
+ */
+ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+
/** Structure defining mempool operations structure */
struct rte_mempool_ops {
char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -415,6 +465,11 @@ struct rte_mempool_ops {
* Notify new memory area to mempool
*/
rte_mempool_ops_register_memory_area_t register_memory_area;
+ /**
+ * Optional callback to calculate memory size required to
+ * store specified number of objects.
+ */
+ rte_mempool_calc_mem_size_t calc_mem_size;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -564,6 +619,29 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
char *vaddr, rte_iova_t iova, size_t len);

/**
+ * @internal wrapper for mempool_ops calc_mem_size callback.
+ * API to calculate size of memory required to store specified number of
+ * object.
+ *
+ * @param[in] mp
+ * Pointer to the memory pool.
+ * @param[in] obj_num
+ * Number of objects.
+ * @param[in] pg_shift
+ * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param[out] min_chunk_size
+ * Location for minimum size of the memory chunk which may be used to
+ * store memory pool objects.
+ * @param[out] align
+ * Location with required memory chunk alignment.
+ * @return
+ * Required memory size aligned at page boundary.
+ */
+ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
@@ -1533,7 +1611,7 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* of objects. Assume that the memory buffer will be aligned at page
* boundary.
*
- * Note that if object size is bigger then page size, then it assumes
+ * Note that if object size is bigger than page size, then it assumes
* that pages are grouped in subsets of physically continuous pages big
* enough to store at least one object.
*
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 0732255..26908cc 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -59,6 +59,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_count = h->get_count;
ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
+ ops->calc_mem_size = h->calc_mem_size;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

@@ -123,6 +124,23 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
return ops->register_memory_area(mp, vaddr, iova, len);
}

+/* wrapper to notify new memory area to external mempool */
+ssize_t
+rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ if (ops->calc_mem_size == NULL)
+ return rte_mempool_op_calc_mem_size_default(mp, obj_num,
+ pg_shift, min_chunk_size, align);
+
+ return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align);
+}
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
new file mode 100644
index 0000000..57fe79b
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 6WIND S.A.
+ * Copyright(c) 2018 Solarflare Communications Inc.
+ */
+
+#include <rte_mempool.h>
+
+ssize_t
+rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
+{
+ unsigned int mp_flags;
+ int ret;
+ size_t total_elt_sz;
+ size_t mem_size;
+
+ /* Get mempool capabilities */
+ mp_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ mem_size = rte_mempool_xmem_size(obj_num, total_elt_sz, pg_shift,
+ mp->flags | mp_flags);
+
+ if (mp_flags & MEMPOOL_F_CAPA_PHYS_CONTIG)
+ *min_chunk_size = mem_size;
+ else
+ *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
+
+ *align = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, (size_t)1 << pg_shift);
+
+ return mem_size;
+}
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 62b76f9..e2a054b 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -51,3 +51,11 @@ DPDK_17.11 {
rte_mempool_populate_iova_tab;

} DPDK_16.07;
+
+DPDK_18.05 {
+ global:
+
+ rte_mempool_op_calc_mem_size_default;
+
+} DPDK_17.11;
+

--
2.7.4

santosh

2018-03-11 12:51:33 UTC

Hi Andrew,

Post by Andrew Rybchenko
Size of memory chunk required to populate mempool objects depends
on how objects are stored in the memory. Different mempool drivers
may have different requirements and a new operation allows to
calculate memory size in accordance with driver requirements and
advertise requirements on minimum memory chunk size and alignment
in a generic way.
Bump ABI version since the patch breaks it.
---
- move default calc_mem_size callback to rte_mempool_ops_default.c
- add ABI changes to release notes
- name default callback consistently: rte_mempool_op_<callback>_default()
- bump ABI version since it is the first patch which breaks ABI
- describe default callback behaviour in details
- avoid introduction of internal function to cope with depration
(keep it to deprecation patch)
- move cache-line or page boundary chunk alignment to default callback
- highlight that min_chunk_size and align parameters are output only

[...]

Post by Andrew Rybchenko
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
new file mode 100644
index 0000000..57fe79b
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 6WIND S.A.
+ * Copyright(c) 2018 Solarflare Communications Inc.
+ */
+
+#include <rte_mempool.h>
+
+ssize_t
+rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
+{
+ unsigned int mp_flags;
+ int ret;
+ size_t total_elt_sz;
+ size_t mem_size;
+
+ /* Get mempool capabilities */
+ mp_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ mem_size = rte_mempool_xmem_size(obj_num, total_elt_sz, pg_shift,
+ mp->flags | mp_flags);
+

Looks ok to me except a nit:
(mp->flags | mp_flags) style expression is to differentiate that
mp_flags holds driver specific flag like BLK_ALIGN and mp->flags
has appl specific flags.. is it so? If not then why not simply
do like:
mp->flags |= mp_flags.

Thanks.

Andrew Rybchenko

2018-03-12 06:53:26 UTC

Post by Olivier MATZ
Hi Andrew,

[...]

(mp->flags | mp_flags) style expression is to differentiate that
mp_flags holds driver specific flag like BLK_ALIGN and mp->flags
has appl specific flags.. is it so? If not then why not simply
mp->flags |= mp_flags.

In fact it does not mater a lot since the code is removed in the patch 3.
Here it is required just for consistency. Also, mp argument is a const
which will not allow to change its members.

Olivier Matz

2018-03-19 17:03:52 UTC

Looks good to me. Just see below for few minor comments.

Post by Andrew Rybchenko
---
- move default calc_mem_size callback to rte_mempool_ops_default.c
- add ABI changes to release notes
- name default callback consistently: rte_mempool_op_<callback>_default()
- bump ABI version since it is the first patch which breaks ABI
- describe default callback behaviour in details
- avoid introduction of internal function to cope with depration

typo (depration)

Post by Andrew Rybchenko
(keep it to deprecation patch)
- move cache-line or page boundary chunk alignment to default callback
- highlight that min_chunk_size and align parameters are output only

[...]

Post by Andrew Rybchenko
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -11,11 +11,12 @@ LDLIBS += -lrte_eal -lrte_ring
EXPORT_MAP := rte_mempool_version.map
-LIBABIVER := 3
+LIBABIVER := 4
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops_default.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build
index 7a4f3da..9e3b527 100644
--- a/lib/librte_mempool/meson.build
+++ b/lib/librte_mempool/meson.build
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
-version = 2
-sources = files('rte_mempool.c', 'rte_mempool_ops.c')
+version = 4
+sources = files('rte_mempool.c', 'rte_mempool_ops.c',
+ 'rte_mempool_ops_default.c')
headers = files('rte_mempool.h')
deps += ['ring']

It's strange to see that meson does not have the same
.so version than the legacy build system.

+CC Bruce in case he wants to fix this issue separately.

[...]

Post by Andrew Rybchenko
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -51,3 +51,11 @@ DPDK_17.11 {
rte_mempool_populate_iova_tab;
} DPDK_16.07;
+
+DPDK_18.05 {
+
+ rte_mempool_op_calc_mem_size_default;
+
+} DPDK_17.11;
+

Another minor comment. When applying the patch with git am:

Applying: mempool: add op to calculate memory size to be allocated
.git/rebase-apply/patch:399: new blank line at EOF.
+
warning: 1 line adds whitespace errors.

Andrew Rybchenko

2018-03-20 10:29:11 UTC

It's strange to see that meson does not have the same
.so version than the legacy build system.
+CC Bruce in case he wants to fix this issue separately.

I'll make a patchset to fix all similar issues. It should be definitely
separate
since it should be backported to 18.02.

I think main problem here is the version=1 default in the case of meson.
So, there are really many examples w/o version and it is simply
lost/forgotten
when a new library is added to meson.

Bruce Richardson

2018-03-20 14:41:42 UTC

Looks good to me. Just see below for few minor comments.

typo (depration)

Post by Andrew Rybchenko
(keep it to deprecation patch)
- move cache-line or page boundary chunk alignment to default callback
- highlight that min_chunk_size and align parameters are output only

[...]

It's strange to see that meson does not have the same
.so version than the legacy build system.
+CC Bruce in case he wants to fix this issue separately.

The so version drift occurred during the development of the next-build
tree, sadly. While initially all version were correct, as the patches
flowed into mainline I wasn't able to keep up with all the version changed.
:-(
Since nobody is actually using meson for packaging (yet), I'm not sure this
is critical, so I don't mind whether it's fixed in a separate patch or not.

/Bruce

Andrew Rybchenko

2018-03-10 15:39:35 UTC

The callback allows to customize how objects are stored in the
memory chunk. Default implementation of the callback which simply
puts objects one by one is available.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
RFCv2 -> v1:
- advertise ABI changes in release notes
- use consistent name for default callback:
rte_mempool_op_<callback>_default()
- add opaque data pointer to populated object callback
- move default callback to dedicated file

doc/guides/rel_notes/deprecation.rst | 2 +-
doc/guides/rel_notes/release_18_05.rst | 2 +
lib/librte_mempool/rte_mempool.c | 23 +++----
lib/librte_mempool/rte_mempool.h | 90 ++++++++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops.c | 21 +++++++
lib/librte_mempool/rte_mempool_ops_default.c | 24 ++++++++
lib/librte_mempool/rte_mempool_version.map | 1 +
7 files changed, 148 insertions(+), 15 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index e02d4ca..c06fc67 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -72,7 +72,7 @@ Deprecation Notices

- removal of ``get_capabilities`` mempool ops and related flags.
- substitute ``register_memory_area`` with ``populate`` ops.
- - addition of new ops to customize objects population and allocate contiguous
+ - addition of new op to allocate contiguous
block of objects if underlying driver supports it.

* mbuf: The control mbuf API will be removed in v18.05. The impacted
diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 59583ea..abaefe5 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -84,6 +84,8 @@ ABI Changes

A new callback ``calc_mem_size`` has been added to ``rte_mempool_ops``
to allow to customize required memory size calculation.
+ A new callback ``populate`` has been added to ``rte_mempool_ops``
+ to allow to customize objects population.

Removed Items
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 3bfb36e..ed0e982 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -99,7 +99,8 @@ static unsigned optimize_object_size(unsigned obj_size)
}

static void
-mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
+mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque,
+ void *obj, rte_iova_t iova)
{
struct rte_mempool_objhdr *hdr;
struct rte_mempool_objtlr *tlr __rte_unused;
@@ -116,9 +117,6 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
tlr = __mempool_get_trailer(obj);
tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE;
#endif
-
- /* enqueue in ring */
- rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
}

/* call obj_cb() for each mempool element */
@@ -396,16 +394,13 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
else
off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;

- while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
- off += mp->header_size;
- if (iova == RTE_BAD_IOVA)
- mempool_add_elem(mp, (char *)vaddr + off,
- RTE_BAD_IOVA);
- else
- mempool_add_elem(mp, (char *)vaddr + off, iova + off);
- off += mp->elt_size + mp->trailer_size;
- i++;
- }
+ if (off > len)
+ return -EINVAL;
+
+ i = rte_mempool_ops_populate(mp, mp->size - mp->populated_size,
+ (char *)vaddr + off,
+ (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off),
+ len - off, mempool_add_elem, NULL);

/* not enough room to store one object */
if (i == 0)
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 0151f6c..49083bd 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -449,6 +449,63 @@ ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
uint32_t obj_num, uint32_t pg_shift,
size_t *min_chunk_size, size_t *align);

+/**
+ * Function to be called for each populated object.
+ *
+ * @param[in] mp
+ * A pointer to the mempool structure.
+ * @param[in] opaque
+ * An opaque pointer passed to iterator.
+ * @param[in] vaddr
+ * Object virtual address.
+ * @param[in] iova
+ * Input/output virtual address of the object or RTE_BAD_IOVA.
+ */
+typedef void (rte_mempool_populate_obj_cb_t)(struct rte_mempool *mp,
+ void *opaque, void *vaddr, rte_iova_t iova);
+
+/**
+ * Populate memory pool objects using provided memory chunk.
+ *
+ * Populated objects should be enqueued to the pool, e.g. using
+ * rte_mempool_ops_enqueue_bulk().
+ *
+ * If the given IO address is unknown (iova = RTE_BAD_IOVA),
+ * the chunk doesn't need to be physically contiguous (only virtually),
+ * and allocated objects may span two pages.
+ *
+ * @param[in] mp
+ * A pointer to the mempool structure.
+ * @param[in] max_objs
+ * Maximum number of objects to be populated.
+ * @param[in] vaddr
+ * The virtual address of memory that should be used to store objects.
+ * @param[in] iova
+ * The IO address
+ * @param[in] len
+ * The length of memory in bytes.
+ * @param[in] obj_cb
+ * Callback function to be executed for each populated object.
+ * @param[in] obj_cb_arg
+ * An opaque pointer passed to the callback function.
+ * @return
+ * The number of objects added on success.
+ * On error, no objects are populated and a negative errno is returned.
+ */
+typedef int (*rte_mempool_populate_t)(struct rte_mempool *mp,
+ unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg);
+
+/**
+ * Default way to populate memory pool object using provided memory
+ * chunk: just slice objects one by one.
+ */
+int rte_mempool_op_populate_default(struct rte_mempool *mp,
+ unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg);
+
/** Structure defining mempool operations structure */
struct rte_mempool_ops {
char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -470,6 +527,11 @@ struct rte_mempool_ops {
* store specified number of objects.
*/
rte_mempool_calc_mem_size_t calc_mem_size;
+ /**
+ * Optional callback to populate mempool objects using
+ * provided memory chunk.
+ */
+ rte_mempool_populate_t populate;
} __rte_cache_aligned;

#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -642,6 +704,34 @@ ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
size_t *min_chunk_size, size_t *align);

/**
+ * @internal wrapper for mempool_ops populate callback.
+ *
+ * Populate memory pool objects using provided memory chunk.
+ *
+ * @param[in] mp
+ * A pointer to the mempool structure.
+ * @param[in] max_objs
+ * Maximum number of objects to be populated.
+ * @param[in] vaddr
+ * The virtual address of memory that should be used to store objects.
+ * @param[in] iova
+ * The IO address
+ * @param[in] len
+ * The length of memory in bytes.
+ * @param[in] obj_cb
+ * Callback function to be executed for each populated object.
+ * @param[in] obj_cb_arg
+ * An opaque pointer passed to the callback function.
+ * @return
+ * The number of objects added on success.
+ * On error, no objects are populated and a negative errno is returned.
+ */
+int rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb,
+ void *obj_cb_arg);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 26908cc..1a7f39f 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -60,6 +60,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
ops->calc_mem_size = h->calc_mem_size;
+ ops->populate = h->populate;

rte_spinlock_unlock(&rte_mempool_ops_table.sl);

@@ -141,6 +142,26 @@ rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align);
}

+/* wrapper to populate memory pool objects using provided memory chunk */
+int
+rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb,
+ void *obj_cb_arg)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ if (ops->populate == NULL)
+ return rte_mempool_op_populate_default(mp, max_objs, vaddr,
+ iova, len, obj_cb,
+ obj_cb_arg);
+
+ return ops->populate(mp, max_objs, vaddr, iova, len, obj_cb,
+ obj_cb_arg);
+}
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
index 57fe79b..57295f7 100644
--- a/lib/librte_mempool/rte_mempool_ops_default.c
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -36,3 +36,27 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,

return mem_size;
}
+
+int
+rte_mempool_op_populate_default(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
+{
+ size_t total_elt_sz;
+ size_t off;
+ unsigned int i;
+ void *obj;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ for (off = 0, i = 0; off + total_elt_sz <= len && i < max_objs; i++) {
+ off += mp->header_size;
+ obj = (char *)vaddr + off;
+ obj_cb(mp, obj_cb_arg, obj,
+ (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off));
+ rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
+ off += mp->elt_size + mp->trailer_size;
+ }
+
+ return i;
+}
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index e2a054b..90e79ec 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -56,6 +56,7 @@ DPDK_18.05 {
global:

rte_mempool_op_calc_mem_size_default;
+ rte_mempool_op_populate_default;

} DPDK_17.11;

--
2.7.4

Olivier Matz

2018-03-19 17:04:20 UTC

Post by Andrew Rybchenko
The callback allows to customize how objects are stored in the
memory chunk. Default implementation of the callback which simply
puts objects one by one is available.

[...]

Post by Andrew Rybchenko
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -99,7 +99,8 @@ static unsigned optimize_object_size(unsigned obj_size)
}
static void
-mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
+mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque,
+ void *obj, rte_iova_t iova)
{
struct rte_mempool_objhdr *hdr;
struct rte_mempool_objtlr *tlr __rte_unused;
@@ -116,9 +117,6 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
tlr = __mempool_get_trailer(obj);
tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE;
#endif
-
- /* enqueue in ring */
- rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
}
/* call obj_cb() for each mempool element */

Before this patch, the purpose of mempool_add_elem() was to add
an object into a mempool:
1- write object header and trailers
2- chain it into the list of objects
3- add it into the ring/stack/whatever (=enqueue)

Now, the enqueue is done in rte_mempool_op_populate_default() or will be
done in the driver. I'm not sure it's a good idea to separate 3- from
2-, because an object that is chained into the list is expected to be
in the ring/stack too.

This risk of mis-synchronization is also enforced by the fact that
ops->populate() can be provided by the driver and mempool_add_elem() is
passed as a callback pointer.

It's not clear to me why rte_mempool_ops_enqueue_bulk() is
removed from mempool_add_elem().

Post by Andrew Rybchenko
@@ -396,16 +394,13 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
else
off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
- while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
- off += mp->header_size;
- if (iova == RTE_BAD_IOVA)
- mempool_add_elem(mp, (char *)vaddr + off,
- RTE_BAD_IOVA);
- else
- mempool_add_elem(mp, (char *)vaddr + off, iova + off);
- off += mp->elt_size + mp->trailer_size;
- i++;
- }
+ if (off > len)
+ return -EINVAL;

I think there is a memory leak here (memhdr), but it's my fault ;)
I introduced a similar code in commit 84121f1971:

if (i == 0)
return -EINVAL;

I can send a patch for it if you want.

Andrew Rybchenko

2018-03-21 07:05:54 UTC

Post by Andrew Rybchenko
The callback allows to customize how objects are stored in the
memory chunk. Default implementation of the callback which simply
puts objects one by one is available.

[...]

Before this patch, the purpose of mempool_add_elem() was to add
1- write object header and trailers
2- chain it into the list of objects
3- add it into the ring/stack/whatever (=enqueue)
Now, the enqueue is done in rte_mempool_op_populate_default() or will be
done in the driver. I'm not sure it's a good idea to separate 3- from
2-, because an object that is chained into the list is expected to be
in the ring/stack too.

When an object is dequeued, it is still chained into the list, but not in
the ring/stack. Separation is to use callback for generic mempool
housekeeping. Enqueue is a driver-specific operation.

Post by Olivier Matz
This risk of mis-synchronization is also enforced by the fact that
ops->populate() can be provided by the driver and mempool_add_elem() is
passed as a callback pointer.
It's not clear to me why rte_mempool_ops_enqueue_bulk() is
removed from mempool_add_elem().

The idea was that it could be more efficient (and probably the only way)
to enqueue the first time inside the driver. In theory bucket mempool
could init and enqueue full buckets instead of objects one-by-one.
However, finally it appears to be easier to reuse default populate
callback and enqueue operation.
So, now I have no strong opinion and agree with your arguments,
that's why I've tried to highlight it rte_mempool_populate_t description.
Even explicit description does not always help...
So, should I return enqueue to callback or leave as is in my patches?

I think there is a memory leak here (memhdr), but it's my fault ;)
if (i == 0)
return -EINVAL;
I can send a patch for it if you want.

This one is yours, above is mine :)
Don't worry, I'll submit separate pre-patch to fix it with appropriate
Fixes and Cc.

Andrew Rybchenko

2018-03-10 15:39:36 UTC

The callback was introduced to let generic code to know octeontx
mempool driver requirements to use single physically contiguous
memory chunk to store all objects and align object address to
total object size. Now these requirements are met using a new
callbacks to calculate required memory chunk size and to populate
objects using provided memory chunk.

These capability flags are not used anywhere else.

Restricting capabilities to flags is not generic and likely to
be insufficient to describe mempool driver features. If required
in the future, API which returns structured information may be
added.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
RFCv2 -> v1:
- squash mempool/octeontx patches to add calc_mem_size and populate
callbacks to this one in order to avoid breakages in the middle of
patchset
- advertise API changes in release notes

doc/guides/rel_notes/deprecation.rst | 1 -
doc/guides/rel_notes/release_18_05.rst | 11 +++++
drivers/mempool/octeontx/rte_mempool_octeontx.c | 59 +++++++++++++++++++++----
lib/librte_mempool/rte_mempool.c | 44 ++----------------
lib/librte_mempool/rte_mempool.h | 52 +---------------------
lib/librte_mempool/rte_mempool_ops.c | 14 ------
lib/librte_mempool/rte_mempool_ops_default.c | 15 +------
lib/librte_mempool/rte_mempool_version.map | 1 -
8 files changed, 68 insertions(+), 129 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index c06fc67..4deed9a 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -70,7 +70,6 @@ Deprecation Notices

The following changes are planned:

- - removal of ``get_capabilities`` mempool ops and related flags.
- substitute ``register_memory_area`` with ``populate`` ops.
- addition of new op to allocate contiguous
block of objects if underlying driver supports it.
diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index abaefe5..c50f26c 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -66,6 +66,14 @@ API Changes
Also, make sure to start the actual text at the margin.
=========================================================

+* **Removed mempool capability flags and related functions.**
+
+ Flags ``MEMPOOL_F_CAPA_PHYS_CONTIG`` and
+ ``MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS`` were used by octeontx mempool
+ driver to customize generic mempool library behaviour.
+ Now the new driver callbacks ``calc_mem_size`` and ``populate`` may be
+ used to achieve it without specific knowledge in the generic code.
+

ABI Changes
-----------
@@ -86,6 +94,9 @@ ABI Changes
to allow to customize required memory size calculation.
A new callback ``populate`` has been added to ``rte_mempool_ops``
to allow to customize objects population.
+ Callback ``get_capabilities`` has been removed from ``rte_mempool_ops``
+ since its features are covered by ``calc_mem_size`` and ``populate``
+ callbacks.

Removed Items
diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index d143d05..f2c4f6a 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -126,14 +126,29 @@ octeontx_fpavf_get_count(const struct rte_mempool *mp)
return octeontx_fpa_bufpool_free_count(pool);
}

-static int
-octeontx_fpavf_get_capabilities(const struct rte_mempool *mp,
- unsigned int *flags)
+static ssize_t
+octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
{
- RTE_SET_USED(mp);
- *flags |= (MEMPOOL_F_CAPA_PHYS_CONTIG |
- MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS);
- return 0;
+ ssize_t mem_size;
+
+ /*
+ * Simply need space for one more object to be able to
+ * fullfil alignment requirements.
+ */
+ mem_size = rte_mempool_op_calc_mem_size_default(mp, obj_num + 1,
+ pg_shift,
+ min_chunk_size, align);
+ if (mem_size >= 0) {
+ /*
+ * Memory area which contains objects must be physically
+ * contiguous.
+ */
+ *min_chunk_size = mem_size;
+ }
+
+ return mem_size;
}

static int
@@ -150,6 +165,33 @@ octeontx_fpavf_register_memory_area(const struct rte_mempool *mp,
return octeontx_fpavf_pool_set_range(pool_bar, len, vaddr, gpool);
}

+static int
+octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs,
+ void *vaddr, rte_iova_t iova, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
+{
+ size_t total_elt_sz;
+ size_t off;
+
+ if (iova == RTE_BAD_IOVA)
+ return -EINVAL;
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ /* align object start address to a multiple of total_elt_sz */
+ off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
+
+ if (len < off)
+ return -EINVAL;
+
+ vaddr = (char *)vaddr + off;
+ iova += off;
+ len -= off;
+
+ return rte_mempool_op_populate_default(mp, max_objs, vaddr, iova, len,
+ obj_cb, obj_cb_arg);
+}
+
static struct rte_mempool_ops octeontx_fpavf_ops = {
.name = "octeontx_fpavf",
.alloc = octeontx_fpavf_alloc,
@@ -157,8 +199,9 @@ static struct rte_mempool_ops octeontx_fpavf_ops = {
.enqueue = octeontx_fpavf_enqueue,
.dequeue = octeontx_fpavf_dequeue,
.get_count = octeontx_fpavf_get_count,
- .get_capabilities = octeontx_fpavf_get_capabilities,
.register_memory_area = octeontx_fpavf_register_memory_area,
+ .calc_mem_size = octeontx_fpavf_calc_mem_size,
+ .populate = octeontx_fpavf_populate,
};

MEMPOOL_REGISTER_OPS(octeontx_fpavf_ops);
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index ed0e982..fdcda45 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -208,15 +208,9 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
*/
size_t
rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- unsigned int flags)
+ __rte_unused unsigned int flags)
{
size_t obj_per_page, pg_num, pg_sz;
- unsigned int mask;
-
- mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
- if ((flags & mask) == mask)
- /* alignment need one additional object */
- elt_num += 1;

if (total_elt_sz == 0)
return 0;
@@ -240,18 +234,12 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
ssize_t
rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
- uint32_t pg_shift, unsigned int flags)
+ uint32_t pg_shift, __rte_unused unsigned int flags)
{
uint32_t elt_cnt = 0;
rte_iova_t start, end;
uint32_t iova_idx;
size_t pg_sz = (size_t)1 << pg_shift;
- unsigned int mask;
-
- mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
- if ((flags & mask) == mask)
- /* alignment need one additional object */
- elt_num += 1;

/* if iova is NULL, assume contiguous memory */
if (iova == NULL) {
@@ -330,8 +318,6 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
void *opaque)
{
- unsigned total_elt_sz;
- unsigned int mp_capa_flags;
unsigned i = 0;
size_t off;
struct rte_mempool_memhdr *memhdr;
@@ -354,27 +340,6 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
if (mp->populated_size >= mp->size)
return -ENOSPC;

- total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
-
- /* Get mempool capabilities */
- mp_capa_flags = 0;
- ret = rte_mempool_ops_get_capabilities(mp, &mp_capa_flags);
- if ((ret < 0) && (ret != -ENOTSUP))
- return ret;
-
- /* update mempool capabilities */
- mp->flags |= mp_capa_flags;
-
- /* Detect pool area has sufficient space for elements */
- if (mp_capa_flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
- if (len < total_elt_sz * mp->size) {
- RTE_LOG(ERR, MEMPOOL,
- "pool area %" PRIx64 " not enough\n",
- (uint64_t)len);
- return -ENOSPC;
- }
- }
-
memhdr = rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr), 0);
if (memhdr == NULL)
return -ENOMEM;
@@ -386,10 +351,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
memhdr->free_cb = free_cb;
memhdr->opaque = opaque;

- if (mp_capa_flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
- /* align object start address to a multiple of total_elt_sz */
- off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
- else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
+ if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
else
off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 49083bd..cd3b229 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -245,24 +245,6 @@ struct rte_mempool {
#define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/
#define MEMPOOL_F_POOL_CREATED 0x0010 /**< Internal: pool is created. */
#define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */
-/**
- * This capability flag is advertised by a mempool handler, if the whole
- * memory area containing the objects must be physically contiguous.
- * Note: This flag should not be passed by application.
- */
-#define MEMPOOL_F_CAPA_PHYS_CONTIG 0x0040
-/**
- * This capability flag is advertised by a mempool handler. Used for a case
- * where mempool driver wants object start address(vaddr) aligned to block
- * size(/ total element size).
- *
- * Note:
- * - This flag should not be passed by application.
- * Flag used for mempool driver only.
- * - Mempool driver must also set MEMPOOL_F_CAPA_PHYS_CONTIG flag along with
- * MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS.
- */
-#define MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS 0x0080

/**
* @internal When debug is enabled, store some statistics.
@@ -388,12 +370,6 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);

/**
- * Get the mempool capabilities.
- */
-typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
- unsigned int *flags);
-
-/**
* Notify new memory area to mempool.
*/
typedef int (*rte_mempool_ops_register_memory_area_t)
@@ -433,13 +409,7 @@ typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp,
* that pages are grouped in subsets of physically continuous pages big
* enough to store at least one object.
*
- * If mempool driver requires object addresses to be block size aligned
- * (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS), space for one extra element is
- * reserved to be able to meet the requirement.
- *
- * Minimum size of memory chunk is either all required space, if
- * capabilities say that whole memory area must be physically contiguous
- * (MEMPOOL_F_CAPA_PHYS_CONTIG), or a maximum of the page size and total
+ * Minimum size of memory chunk is a maximum of the page size and total
* element size.
*
* Required memory chunk alignment is a maximum of page size and cache
@@ -515,10 +485,6 @@ struct rte_mempool_ops {
rte_mempool_dequeue_t dequeue; /**< Dequeue an object. */
rte_mempool_get_count get_count; /**< Get qty of available objs. */
/**
- * Get the mempool capabilities
- */
- rte_mempool_get_capabilities_t get_capabilities;
- /**
* Notify new memory area to mempool
*/
rte_mempool_ops_register_memory_area_t register_memory_area;
@@ -644,22 +610,6 @@ unsigned
rte_mempool_ops_get_count(const struct rte_mempool *mp);

/**
- * @internal wrapper for mempool_ops get_capabilities callback.
- *
- * @param mp [in]
- * Pointer to the memory pool.
- * @param flags [out]
- * Pointer to the mempool flags.
- * @return
- * - 0: Success; The mempool driver has advertised his pool capabilities in
- * flags param.
- * - -ENOTSUP - doesn't support get_capabilities ops (valid case).
- * - Otherwise, pool create fails.
- */
-int
-rte_mempool_ops_get_capabilities(const struct rte_mempool *mp,
- unsigned int *flags);
-/**
* @internal wrapper for mempool_ops register_memory_area callback.
* API to notify the mempool handler when a new memory area is added to pool.
*
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 1a7f39f..6ac669a 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -57,7 +57,6 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->enqueue = h->enqueue;
ops->dequeue = h->dequeue;
ops->get_count = h->get_count;
- ops->get_capabilities = h->get_capabilities;
ops->register_memory_area = h->register_memory_area;
ops->calc_mem_size = h->calc_mem_size;
ops->populate = h->populate;
@@ -99,19 +98,6 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp)
return ops->get_count(mp);
}

-/* wrapper to get external mempool capabilities. */
-int
-rte_mempool_ops_get_capabilities(const struct rte_mempool *mp,
- unsigned int *flags)
-{
- struct rte_mempool_ops *ops;
-
- ops = rte_mempool_get_ops(mp->ops_index);
-
- RTE_FUNC_PTR_OR_ERR_RET(ops->get_capabilities, -ENOTSUP);
- return ops->get_capabilities(mp, flags);
-}
-
/* wrapper to notify new memory area to external mempool */
int
rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
index 57295f7..3defc15 100644
--- a/lib/librte_mempool/rte_mempool_ops_default.c
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -11,26 +11,15 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
uint32_t obj_num, uint32_t pg_shift,
size_t *min_chunk_size, size_t *align)
{
- unsigned int mp_flags;
- int ret;
size_t total_elt_sz;
size_t mem_size;

- /* Get mempool capabilities */
- mp_flags = 0;
- ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
- if ((ret < 0) && (ret != -ENOTSUP))
- return ret;
-
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;

mem_size = rte_mempool_xmem_size(obj_num, total_elt_sz, pg_shift,
- mp->flags | mp_flags);
+ mp->flags);

- if (mp_flags & MEMPOOL_F_CAPA_PHYS_CONTIG)
- *min_chunk_size = mem_size;
- else
- *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
+ *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);

*align = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, (size_t)1 << pg_shift);

diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 90e79ec..42ca4df 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -45,7 +45,6 @@ DPDK_16.07 {
DPDK_17.11 {
global:

- rte_mempool_ops_get_capabilities;
rte_mempool_ops_register_memory_area;
rte_mempool_populate_iova;
rte_mempool_populate_iova_tab;

--
2.7.4

Burakov, Anatoly

2018-03-14 14:40:24 UTC

Post by Andrew Rybchenko
The callback was introduced to let generic code to know octeontx
mempool driver requirements to use single physically contiguous
memory chunk to store all objects and align object address to
total object size. Now these requirements are met using a new
callbacks to calculate required memory chunk size and to populate
objects using provided memory chunk.
These capability flags are not used anywhere else.
Restricting capabilities to flags is not generic and likely to
be insufficient to describe mempool driver features. If required
in the future, API which returns structured information may be
added.
---

Just a general comment - it is not enough to describe minimum memchunk
requirements. With memory hotplug patchset that's hopefully getting
merged in 18.05, memzones will no longer be guaranteed to be
IOVA-contiguous. So, if a driver requires its mempool to not only be
populated from a single memzone, but a single *physically contiguous*
memzone, going by only callbacks will not do, because whether or not
something should be a single memzone says nothing about whether this
memzone has to also be IOVA-contiguous.

So i believe this needs to stay in one form or another.

(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be IOVA-contiguous,
but that's a topic for another conversation. prime user for this would
be KNI.)

--
Thanks,
Anatoly

Andrew Rybchenko

2018-03-14 16:12:27 UTC

Just a general comment - it is not enough to describe minimum memchunk
requirements. With memory hotplug patchset that's hopefully getting
merged in 18.05, memzones will no longer be guaranteed to be
IOVA-contiguous. So, if a driver requires its mempool to not only be
populated from a single memzone, but a single *physically contiguous*
memzone, going by only callbacks will not do, because whether or not
something should be a single memzone says nothing about whether this
memzone has to also be IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation. prime
user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous. So, we
have 4 levels:
- MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == 0) -- IOVA-congtiguous
is not required at all
- no MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == total_obj_size) --
object should be IOVA-contiguous
- min_chunk_size > total_obj_size -- group of objects should be
IOVA-contiguous
- min_chunk_size == <all-objects-size> -- all objects should be
IOVA-contiguous

If so, how allocation should be implemented?
1. if (min_chunk_size > min_page_size)
a. try all contiguous
b. if cannot, do by mem_chunk_size contiguous
2. else allocate non-contiguous

--
Andrew.

Burakov, Anatoly

2018-03-14 16:53:34 UTC

Just a general comment - it is not enough to describe minimum memchunk
requirements. With memory hotplug patchset that's hopefully getting
merged in 18.05, memzones will no longer be guaranteed to be
IOVA-contiguous. So, if a driver requires its mempool to not only be
populated from a single memzone, but a single *physically contiguous*
memzone, going by only callbacks will not do, because whether or not
something should be a single memzone says nothing about whether this
memzone has to also be IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation. prime
user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous.

Why? It's perfectly reasonable to e.g. implement a software mempool
driver that would perform some optimizations due to all objects being in
the same VA-contiguous memzone, yet not be dependent on underlying
physical memory layout. These are two separate concerns IMO.

Post by Andrew Rybchenko
So, we
- MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == 0) -- IOVA-congtiguous
is not required at all
- no MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == total_obj_size) --
object should be IOVA-contiguous
- min_chunk_size > total_obj_size -- group of objects should be
IOVA-contiguous
- min_chunk_size == <all-objects-size> -- all objects should be
IOVA-contiguous

I don't think this "automagic" decision on what should be
IOVA-contiguous or not is the way to go. It needlessly complicates
things, when all it takes is another flag passed to mempool allocator
somewhere.

I'm not sure what is the best solution here. Perhaps another option
would be to let mempool drivers allocate their memory as well? I.e.
leave current behavior as default, as it's likely that it would be
suitable for nearly all use cases, but provide another option to
override memory allocation completely, so that e.g. octeontx could just
do a memzone_reserve_contig() without regard for default allocation
settings. I think this could be the cleanest solution.

Post by Andrew Rybchenko
If so, how allocation should be implemented?
1. if (min_chunk_size > min_page_size)
a. try all contiguous
b. if cannot, do by mem_chunk_size contiguous
2. else allocate non-contiguous
--
Andrew.

--
Thanks,
Anatoly

Andrew Rybchenko

2018-03-14 17:24:16 UTC

Just a general comment - it is not enough to describe minimum
memchunk requirements. With memory hotplug patchset that's hopefully
getting merged in 18.05, memzones will no longer be guaranteed to be
IOVA-contiguous. So, if a driver requires its mempool to not only be
populated from a single memzone, but a single *physically
contiguous* memzone, going by only callbacks will not do, because
whether or not something should be a single memzone says nothing
about whether this memzone has to also be IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation. prime
user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous.

Why? It's perfectly reasonable to e.g. implement a software mempool
driver that would perform some optimizations due to all objects being
in the same VA-contiguous memzone, yet not be dependent on underlying
physical memory layout. These are two separate concerns IMO.

It looks like there is some misunderstanding here or I simply don't
understand your point.
Above I mean that driver should be able to advertise its requirements on
IOVA-contiguous regions.
If driver do not care about physical memory layout, no problem.

Post by Andrew Rybchenko
So, we
  - MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == 0) --
IOVA-congtiguous is not required at all
  - no MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == total_obj_size) --
object should be IOVA-contiguous
  - min_chunk_size > total_obj_size -- group of objects should be
IOVA-contiguous
  - min_chunk_size == <all-objects-size> -- all objects should be
IOVA-contiguous

No, it is not just one flag. We really need option (3) above: group of
objects IOVA-contiguous in [1].
Of course, it is possible to use option (4) instead: everything
IOVA-contigous, but I think it is bad - it may be very big and
hard/impossible to allocate due to fragmentation.

Post by Burakov, Anatoly
I'm not sure what is the best solution here. Perhaps another option
would be to let mempool drivers allocate their memory as well? I.e.
leave current behavior as default, as it's likely that it would be
suitable for nearly all use cases, but provide another option to
override memory allocation completely, so that e.g. octeontx could
just do a memzone_reserve_contig() without regard for default
allocation settings. I think this could be the cleanest solution.

For me it is hard to say. I don't know DPDK history good enough to say
why there is a mempool API to populate objects on externally provided
memory. If it may be removed, it is OK for me to do memory allocation
inside rte_mempool or mempool drivers. Otherwise, if it is still allowed
to allocate memory externally and pass it to mempool, it must be a way
to express IOVA-contiguos requirements.

[1] https://dpdk.org/dev/patchwork/patch/34338/

Post by Andrew Rybchenko
If so, how allocation should be implemented?
  1. if (min_chunk_size > min_page_size)
     a. try all contiguous
     b. if cannot, do by mem_chunk_size contiguous
  2. else allocate non-contiguous
--
Andrew.

Burakov, Anatoly

2018-03-15 09:48:41 UTC

Just a general comment - it is not enough to describe minimum
memchunk requirements. With memory hotplug patchset that's hopefully
getting merged in 18.05, memzones will no longer be guaranteed to be
IOVA-contiguous. So, if a driver requires its mempool to not only be
populated from a single memzone, but a single *physically
contiguous* memzone, going by only callbacks will not do, because
whether or not something should be a single memzone says nothing
about whether this memzone has to also be IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation. prime
user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous.

Why? It's perfectly reasonable to e.g. implement a software mempool
driver that would perform some optimizations due to all objects being
in the same VA-contiguous memzone, yet not be dependent on underlying
physical memory layout. These are two separate concerns IMO.

Please correct me if i'm wrong, but my understanding was that you wanted
to use min_chunk as a way to express minimum requirements for
IOVA-contiguous memory. If i understood you correctly, i don't think
that's the way to go because there could be valid use cases where a
mempool driver would like to advertise min_chunk_size to be equal to its
total size (i.e. allocate everything in a single memzone), yet not
require that memzone to be IOVA-contiguous. I think these are two
different concerns, and one does not, and should not imply the other.

Post by Andrew Rybchenko
So, we
  - MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == 0) --
IOVA-congtiguous is not required at all
  - no MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == total_obj_size) --
object should be IOVA-contiguous
  - min_chunk_size > total_obj_size -- group of objects should be
IOVA-contiguous
  - min_chunk_size == <all-objects-size> -- all objects should be
IOVA-contiguous

Exactly: we shouldn't be forcing IOVA-contiguous memory just because
mempool requrested a big min_chunk_size, nor do i think it is wise to
encode such heuristics (referring to your 4 "levels" quoted above) into
the mempool allocator.

Populating mempool objects is not the same as reserving memory where
those objects would reside. The closest to "allocate memory externally"
we have is rte_mempool_xmem_create(), which you are removing in this
patchset.

Post by Andrew Rybchenko
If so, how allocation should be implemented?
  1. if (min_chunk_size > min_page_size)
     a. try all contiguous
     b. if cannot, do by mem_chunk_size contiguous
  2. else allocate non-contiguous
--
Andrew.

--
Thanks,
Anatoly

Andrew Rybchenko

2018-03-15 11:49:46 UTC

Just a general comment - it is not enough to describe minimum
memchunk requirements. With memory hotplug patchset that's
hopefully getting merged in 18.05, memzones will no longer be
guaranteed to be IOVA-contiguous. So, if a driver requires its
mempool to not only be populated from a single memzone, but a
single *physically contiguous* memzone, going by only callbacks
will not do, because whether or not something should be a single
memzone says nothing about whether this memzone has to also be
IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation.
prime user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous.

Why? It's perfectly reasonable to e.g. implement a software mempool
driver that would perform some optimizations due to all objects
being in the same VA-contiguous memzone, yet not be dependent on
underlying physical memory layout. These are two separate concerns IMO.

It looks like there is some misunderstanding here or I simply don't
understand your point.
Above I mean that driver should be able to advertise its requirements
on IOVA-contiguous regions.
If driver do not care about physical memory layout, no problem.

Please correct me if i'm wrong, but my understanding was that you
wanted to use min_chunk as a way to express minimum requirements for
IOVA-contiguous memory. If i understood you correctly, i don't think
that's the way to go because there could be valid use cases where a
mempool driver would like to advertise min_chunk_size to be equal to
its total size (i.e. allocate everything in a single memzone), yet not
require that memzone to be IOVA-contiguous. I think these are two
different concerns, and one does not, and should not imply the other.

Aha, you're saying that virtual-contiguous and IOVA-contiguous
requirements are different things that it could be usecases where
virtual contiguous is important but IOVA-contiguos is not required. It
is perfectly fine.
As I understand IOVA-contiguous (physical) typically means
virtual-contiguous as well. Requirements to have everything virtually
contiguous and some blocks physically contiguous are unlikely. So, it
may be reduced to either virtual or physical contiguous. If mempool does
not care about physical contiguous at all, MEMPOOL_F_NO_PHYS_CONTIG flag
should be used and min_chunk_size should mean virtual contiguous
requirements. If mempool requires physical contiguous objects, there is
*no* MEMPOOL_F_NO_PHYS_CONTIG flag and min_chunk_size means physical
contiguous requirements.

Post by Andrew Rybchenko
So, we
  - MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == 0) --
IOVA-congtiguous is not required at all
  - no MEMPOOL_F_NO_PHYS_CONTIG (min_chunk_size == total_obj_size)
-- object should be IOVA-contiguous
  - min_chunk_size > total_obj_size -- group of objects should be
IOVA-contiguous
  - min_chunk_size == <all-objects-size> -- all objects should be
IOVA-contiguous

No, it is not just one flag. We really need option (3) above: group
of objects IOVA-contiguous in [1].
Of course, it is possible to use option (4) instead: everything
IOVA-contigous, but I think it is bad - it may be very big and
hard/impossible to allocate due to fragmentation.

For me it is hard to say. I don't know DPDK history good enough to
say why there is a mempool API to populate objects on externally
provided memory. If it may be removed, it is OK for me to do memory
allocation inside rte_mempool or mempool drivers. Otherwise, if it is
still allowed to allocate memory externally and pass it to mempool,
it must be a way to express IOVA-contiguos requirements.
[1] https://dpdk.org/dev/patchwork/patch/34338/

Populating mempool objects is not the same as reserving memory where
those objects would reside. The closest to "allocate memory
externally" we have is rte_mempool_xmem_create(), which you are
removing in this patchset.

It is not the only function. Other functions remain:
rte_mempool_populate_iova, rte_mempool_populate_iova_tab,
rte_mempool_populate_virt. These functions may be used to add mem areas
to mempool to populate objects. So, the memory is allocated externally
and external entity needs to know requirements on memory allocation:
size and virtual or both virtual/physical contiguous.

Post by Andrew Rybchenko
If so, how allocation should be implemented?
  1. if (min_chunk_size > min_page_size)
     a. try all contiguous
     b. if cannot, do by mem_chunk_size contiguous
  2. else allocate non-contiguous
--
Andrew.

Burakov, Anatoly

2018-03-15 12:00:08 UTC

Just a general comment - it is not enough to describe minimum
memchunk requirements. With memory hotplug patchset that's
hopefully getting merged in 18.05, memzones will no longer be
guaranteed to be IOVA-contiguous. So, if a driver requires its
mempool to not only be populated from a single memzone, but a
single *physically contiguous* memzone, going by only callbacks
will not do, because whether or not something should be a single
memzone says nothing about whether this memzone has to also be
IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation.
prime user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous.

Why? It's perfectly reasonable to e.g. implement a software mempool
driver that would perform some optimizations due to all objects
being in the same VA-contiguous memzone, yet not be dependent on
underlying physical memory layout. These are two separate concerns IMO.

It looks like there is some misunderstanding here or I simply don't
understand your point.
Above I mean that driver should be able to advertise its requirements
on IOVA-contiguous regions.
If driver do not care about physical memory layout, no problem.

Please correct me if i'm wrong, but my understanding was that you
wanted to use min_chunk as a way to express minimum requirements for
IOVA-contiguous memory. If i understood you correctly, i don't think
that's the way to go because there could be valid use cases where a
mempool driver would like to advertise min_chunk_size to be equal to
its total size (i.e. allocate everything in a single memzone), yet not
require that memzone to be IOVA-contiguous. I think these are two
different concerns, and one does not, and should not imply the other.

Fair point. I think we're in agreement now :) This will need to be
documented then.

--
Thanks,
Anatoly

Andrew Rybchenko

2018-03-15 12:44:34 UTC

Just a general comment - it is not enough to describe minimum
memchunk requirements. With memory hotplug patchset that's
hopefully getting merged in 18.05, memzones will no longer be
guaranteed to be IOVA-contiguous. So, if a driver requires its
mempool to not only be populated from a single memzone, but a
single *physically contiguous* memzone, going by only callbacks
will not do, because whether or not something should be a single
memzone says nothing about whether this memzone has to also be
IOVA-contiguous.
So i believe this needs to stay in one form or another.
(also it would be nice to have a flag that a user could pass to
mempool_create that would force memzone reservation be
IOVA-contiguous, but that's a topic for another conversation.
prime user for this would be KNI.)

I think that min_chunk_size should be treated as IOVA-contiguous.

Why? It's perfectly reasonable to e.g. implement a software
mempool driver that would perform some optimizations due to all
objects being in the same VA-contiguous memzone, yet not be
dependent on underlying physical memory layout. These are two
separate concerns IMO.

It looks like there is some misunderstanding here or I simply don't
understand your point.
Above I mean that driver should be able to advertise its
requirements on IOVA-contiguous regions.
If driver do not care about physical memory layout, no problem.

Please correct me if i'm wrong, but my understanding was that you
wanted to use min_chunk as a way to express minimum requirements for
IOVA-contiguous memory. If i understood you correctly, i don't think
that's the way to go because there could be valid use cases where a
mempool driver would like to advertise min_chunk_size to be equal to
its total size (i.e. allocate everything in a single memzone), yet
not require that memzone to be IOVA-contiguous. I think these are
two different concerns, and one does not, and should not imply the
other.

Aha, you're saying that virtual-contiguous and IOVA-contiguous
requirements are different things that it could be usecases where
virtual contiguous is important but IOVA-contiguos is not required.
It is perfectly fine.
As I understand IOVA-contiguous (physical) typically means
virtual-contiguous as well. Requirements to have everything virtually
contiguous and some blocks physically contiguous are unlikely. So, it
may be reduced to either virtual or physical contiguous. If mempool
does not care about physical contiguous at all,
MEMPOOL_F_NO_PHYS_CONTIG flag should be used and min_chunk_size
should mean virtual contiguous requirements. If mempool requires
physical contiguous objects, there is *no* MEMPOOL_F_NO_PHYS_CONTIG
flag and min_chunk_size means physical contiguous requirements.

Fair point. I think we're in agreement now :) This will need to be
documented then.

OK, I'll do. I don't mind to rebase mine patch series on top of yours,
but I'd like to do it a bit later when yours is closer to final version
or even applied - it has really many prerequisites (pre-series) which
should be collected first. It is really major changes.

Olivier Matz

2018-03-19 17:05:19 UTC

Hi,

On Thu, Mar 15, 2018 at 03:44:34PM +0300, Andrew Rybchenko wrote:

[...]

Post by Andrew Rybchenko
Aha, you're saying that virtual-contiguous and IOVA-contiguous
requirements are different things that it could be usecases where
virtual contiguous is important but IOVA-contiguos is not required.
It is perfectly fine.
As I understand IOVA-contiguous (physical) typically means
virtual-contiguous as well. Requirements to have everything
virtually contiguous and some blocks physically contiguous are
unlikely. So, it may be reduced to either virtual or physical
contiguous. If mempool does not care about physical contiguous at
all, MEMPOOL_F_NO_PHYS_CONTIG flag should be used and min_chunk_size
should mean virtual contiguous requirements. If mempool requires
physical contiguous objects, there is *no* MEMPOOL_F_NO_PHYS_CONTIG
flag and min_chunk_size means physical contiguous requirements.

Just as a side note, from what I understood, having VA="contiguous" and
IOVA="don't care" would be helpful for mbuf pools with mellanox drivers
because perform better in that case.

Olivier Matz

2018-03-19 17:06:11 UTC

Looks fine...

Post by Andrew Rybchenko
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -126,14 +126,29 @@ octeontx_fpavf_get_count(const struct rte_mempool *mp)
return octeontx_fpa_bufpool_free_count(pool);
}
-static int
-octeontx_fpavf_get_capabilities(const struct rte_mempool *mp,
- unsigned int *flags)
+static ssize_t
+octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
+ uint32_t obj_num, uint32_t pg_shift,
+ size_t *min_chunk_size, size_t *align)
{
- RTE_SET_USED(mp);
- *flags |= (MEMPOOL_F_CAPA_PHYS_CONTIG |
- MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS);
- return 0;
+ ssize_t mem_size;
+
+ /*
+ * Simply need space for one more object to be able to
+ * fullfil alignment requirements.
+ */

...ah, just one typo:

fullfil -> fulfil or fulfill

Andrew Rybchenko

2018-03-10 15:39:37 UTC

Move rte_mempool_xmem_size() code to internal helper function
since it is required in two places: deprecated rte_mempool_xmem_size()
and non-deprecated rte_mempool_op_calc_mem_size_deafult().

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
RFCv2 -> v1:
- advertise deprecation in release notes
- factor out default memory size calculation into non-deprecated
internal function to avoid usage of deprecated function internally
- remove test for deprecated functions to address build issue because
of usage of deprecated functions (it is easy to allow usage of
deprecated function in Makefile, but very complicated in meson)

doc/guides/rel_notes/deprecation.rst | 7 -------
doc/guides/rel_notes/release_18_05.rst | 10 +++++++++
lib/librte_mempool/rte_mempool.c | 19 ++++++++++++++---
lib/librte_mempool/rte_mempool.h | 25 ++++++++++++++++++++++
lib/librte_mempool/rte_mempool_ops_default.c | 4 ++--
test/test/test_mempool.c | 31 ----------------------------
6 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 4deed9a..473330d 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -60,13 +60,6 @@ Deprecation Notices
- ``rte_eal_mbuf_default_mempool_ops``

* mempool: several API and ABI changes are planned in v18.05.
- The following functions, introduced for Xen, which is not supported
- anymore since v17.11, are hard to use, not used anywhere else in DPDK.
- Therefore they will be deprecated in v18.05 and removed in v18.08:
-
- - ``rte_mempool_xmem_create``
- - ``rte_mempool_xmem_size``
- - ``rte_mempool_xmem_usage``

The following changes are planned:

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index c50f26c..0244f91 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -74,6 +74,16 @@ API Changes
Now the new driver callbacks ``calc_mem_size`` and ``populate`` may be
used to achieve it without specific knowledge in the generic code.

+* **Deprecated mempool xmem functions.**
+
+ The following functions, introduced for Xen, which is not supported
+ anymore since v17.11, are hard to use, not used anywhere else in DPDK.
+ Therefore they were deprecated in v18.05 and will be removed in v18.08:
+
+ - ``rte_mempool_xmem_create``
+ - ``rte_mempool_xmem_size``
+ - ``rte_mempool_xmem_usage``
+

ABI Changes
-----------
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index fdcda45..b57ba2a 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -204,11 +204,13 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,

/*
- * Calculate maximum amount of memory required to store given number of objects.
+ * Internal function to calculate required memory chunk size shared
+ * by default implementation of the corresponding callback and
+ * deprecated external function.
*/
size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- __rte_unused unsigned int flags)
+rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
+ uint32_t pg_shift)
{
size_t obj_per_page, pg_num, pg_sz;

@@ -228,6 +230,17 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
}

/*
+ * Calculate maximum amount of memory required to store given number of objects.
+ */
+size_t
+rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
+ __rte_unused unsigned int flags)
+{
+ return rte_mempool_calc_mem_size_helper(elt_num, total_elt_sz,
+ pg_shift);
+}
+
+/*
* Calculate how much memory would be actually required with the
* given memory footprint to store required number of elements.
*/
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index cd3b229..ebfc95c 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -420,6 +420,28 @@ ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
size_t *min_chunk_size, size_t *align);

/**
+ * @internal Helper function to calculate memory size required to store
+ * specified number of objects in assumption that the memory buffer will
+ * be aligned at page boundary.
+ *
+ * Note that if object size is bigger than page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * @param elt_num
+ * Number of elements.
+ * @param total_elt_sz
+ * The size of each element, including header and trailer, as returned
+ * by rte_mempool_calc_obj_size().
+ * @param pg_shift
+ * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @return
+ * Required memory size aligned at page boundary.
+ */
+size_t rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
+ uint32_t pg_shift);
+
+/**
* Function to be called for each populated object.
*
* @param[in] mp
@@ -905,6 +927,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
* The pointer to the new allocated mempool, on success. NULL on error
* with rte_errno set appropriately. See rte_mempool_create() for details.
*/
+__rte_deprecated
struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
@@ -1667,6 +1690,7 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* @return
* Required memory size aligned at page boundary.
*/
+__rte_deprecated
size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
uint32_t pg_shift, unsigned int flags);

@@ -1698,6 +1722,7 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
* buffer is too small, return a negative value whose absolute value
* is the actual number of elements that can be stored in that buffer.
*/
+__rte_deprecated
ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
uint32_t pg_shift, unsigned int flags);
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
index 3defc15..fd63ca1 100644
--- a/lib/librte_mempool/rte_mempool_ops_default.c
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -16,8 +16,8 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,

total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;

- mem_size = rte_mempool_xmem_size(obj_num, total_elt_sz, pg_shift,
- mp->flags);
+ mem_size = rte_mempool_calc_mem_size_helper(obj_num, total_elt_sz,
+ pg_shift);

*min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);

diff --git a/test/test/test_mempool.c b/test/test/test_mempool.c
index 63f921e..8d29af2 100644
--- a/test/test/test_mempool.c
+++ b/test/test/test_mempool.c
@@ -444,34 +444,6 @@ test_mempool_same_name_twice_creation(void)
return 0;
}

-/*
- * Basic test for mempool_xmem functions.
- */
-static int
-test_mempool_xmem_misc(void)
-{
- uint32_t elt_num, total_size;
- size_t sz;
- ssize_t usz;
-
- elt_num = MAX_KEEP;
- total_size = rte_mempool_calc_obj_size(MEMPOOL_ELT_SIZE, 0, NULL);
- sz = rte_mempool_xmem_size(elt_num, total_size, MEMPOOL_PG_SHIFT_MAX,
- 0);
-
- usz = rte_mempool_xmem_usage(NULL, elt_num, total_size, 0, 1,
- MEMPOOL_PG_SHIFT_MAX, 0);
-
- if (sz != (size_t)usz) {
- printf("failure @ %s: rte_mempool_xmem_usage(%u, %u) "
- "returns: %#zx, while expected: %#zx;\n",
- __func__, elt_num, total_size, sz, (size_t)usz);
- return -1;
- }
-
- return 0;
-}
-
static void
walk_cb(struct rte_mempool *mp, void *userdata __rte_unused)
{
@@ -596,9 +568,6 @@ test_mempool(void)
if (test_mempool_same_name_twice_creation() < 0)
goto err;

- if (test_mempool_xmem_misc() < 0)
- goto err;
-
/* test the stack handler */
if (test_mempool_basic(mp_stack, 1) < 0)
goto err;

--
2.7.4

Andrew Rybchenko

2018-03-10 15:39:38 UTC

Callback to populate pool objects has all required information and
executed a bit later than register memory area callback.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/octeontx/rte_mempool_octeontx.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c b/drivers/mempool/octeontx/rte_mempool_octeontx.c
index f2c4f6a..ae038d3 100644
--- a/drivers/mempool/octeontx/rte_mempool_octeontx.c
+++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c
@@ -152,26 +152,15 @@ octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp,
}

static int
-octeontx_fpavf_register_memory_area(const struct rte_mempool *mp,
- char *vaddr, rte_iova_t paddr, size_t len)
-{
- RTE_SET_USED(paddr);
- uint8_t gpool;
- uintptr_t pool_bar;
-
- gpool = octeontx_fpa_bufpool_gpool(mp->pool_id);
- pool_bar = mp->pool_id & ~(uint64_t)FPA_GPOOL_MASK;
-
- return octeontx_fpavf_pool_set_range(pool_bar, len, vaddr, gpool);
-}
-
-static int
octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs,
void *vaddr, rte_iova_t iova, size_t len,
rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
{
size_t total_elt_sz;
size_t off;
+ uint8_t gpool;
+ uintptr_t pool_bar;
+ int ret;

if (iova == RTE_BAD_IOVA)
return -EINVAL;
@@ -188,6 +177,13 @@ octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs,
iova += off;
len -= off;

+ gpool = octeontx_fpa_bufpool_gpool(mp->pool_id);
+ pool_bar = mp->pool_id & ~(uint64_t)FPA_GPOOL_MASK;
+
+ ret = octeontx_fpavf_pool_set_range(pool_bar, len, vaddr, gpool);
+ if (ret < 0)
+ return ret;
+
return rte_mempool_op_populate_default(mp, max_objs, vaddr, iova, len,
obj_cb, obj_cb_arg);
}
@@ -199,7 +195,6 @@ static struct rte_mempool_ops octeontx_fpavf_ops = {
.enqueue = octeontx_fpavf_enqueue,
.dequeue = octeontx_fpavf_dequeue,
.get_count = octeontx_fpavf_get_count,
- .register_memory_area = octeontx_fpavf_register_memory_area,
.calc_mem_size = octeontx_fpavf_calc_mem_size,
.populate = octeontx_fpavf_populate,
};

--
2.7.4

Andrew Rybchenko

2018-03-10 15:39:39 UTC

Populate mempool driver callback is executed a bit later than
register memory area, provides the same information and will
substitute the later since it gives more flexibility and in addition
to notification about memory area allows to customize how mempool
objects are stored in memory.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
drivers/mempool/dpaa/dpaa_mempool.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/mempool/dpaa/dpaa_mempool.c b/drivers/mempool/dpaa/dpaa_mempool.c
index fb3b6ba..a2bbb39 100644
--- a/drivers/mempool/dpaa/dpaa_mempool.c
+++ b/drivers/mempool/dpaa/dpaa_mempool.c
@@ -264,10 +264,9 @@ dpaa_mbuf_get_count(const struct rte_mempool *mp)
}

static int
-dpaa_register_memory_area(const struct rte_mempool *mp,
- char *vaddr __rte_unused,
- rte_iova_t paddr __rte_unused,
- size_t len)
+dpaa_populate(const struct rte_mempool *mp, unsigned int max_objs,
+ char *vaddr, rte_iova_t paddr, size_t len,
+ rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
{
struct dpaa_bp_info *bp_info;
unsigned int total_elt_sz;
@@ -289,7 +288,9 @@ dpaa_register_memory_area(const struct rte_mempool *mp,
if (len >= total_elt_sz * mp->size)
bp_info->flags |= DPAA_MPOOL_SINGLE_SEGMENT;

- return 0;
+ return rte_mempool_op_populate_default(mp, max_objs, vaddr, paddr, len,
+ obj_cb, obj_cb_arg);
+
}

struct rte_mempool_ops dpaa_mpool_ops = {
@@ -299,7 +300,7 @@ struct rte_mempool_ops dpaa_mpool_ops = {
.enqueue = dpaa_mbuf_free_bulk,
.dequeue = dpaa_mbuf_alloc_bulk,
.get_count = dpaa_mbuf_get_count,
- .register_memory_area = dpaa_register_memory_area,
+ .populate = dpaa_populate,
};

MEMPOOL_REGISTER_OPS(dpaa_mpool_ops);

--
2.7.4

Andrew Rybchenko

2018-03-10 15:39:40 UTC

The callback is not required any more since there is a new callback
to populate objects using provided memory area which provides
the same information.

Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
RFCv2 -> v1:
- advertise ABI changes in release notes

doc/guides/rel_notes/deprecation.rst | 1 -
doc/guides/rel_notes/release_18_05.rst | 2 ++
lib/librte_mempool/rte_mempool.c | 5 -----
lib/librte_mempool/rte_mempool.h | 31 ------------------------------
lib/librte_mempool/rte_mempool_ops.c | 14 --------------
lib/librte_mempool/rte_mempool_version.map | 1 -
6 files changed, 2 insertions(+), 52 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 473330d..5301259 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -63,7 +63,6 @@ Deprecation Notices

The following changes are planned:

- - substitute ``register_memory_area`` with ``populate`` ops.
- addition of new op to allocate contiguous
block of objects if underlying driver supports it.

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 0244f91..9d40db1 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -107,6 +107,8 @@ ABI Changes
Callback ``get_capabilities`` has been removed from ``rte_mempool_ops``
since its features are covered by ``calc_mem_size`` and ``populate``
callbacks.
+ Callback ``register_memory_area`` has been removed from ``rte_mempool_ops``
+ since the new callback ``populate`` may be used instead of it.

Removed Items
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index b57ba2a..844d907 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -344,11 +344,6 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
mp->flags |= MEMPOOL_F_POOL_CREATED;
}

- /* Notify memory area to mempool */
- ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len);
- if (ret != -ENOTSUP && ret < 0)
- return ret;
-
/* mempool is already populated */
if (mp->populated_size >= mp->size)
return -ENOSPC;
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index ebfc95c..5f63f86 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -370,12 +370,6 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);

/**
- * Notify new memory area to mempool.
- */
-typedef int (*rte_mempool_ops_register_memory_area_t)
-(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);
-
-/**
* Calculate memory size required to store given number of objects.
*
* @param[in] mp
@@ -507,10 +501,6 @@ struct rte_mempool_ops {
rte_mempool_dequeue_t dequeue; /**< Dequeue an object. */
rte_mempool_get_count get_count; /**< Get qty of available objs. */
/**
- * Notify new memory area to mempool
- */
- rte_mempool_ops_register_memory_area_t register_memory_area;
- /**
* Optional callback to calculate memory size required to
* store specified number of objects.
*/
@@ -632,27 +622,6 @@ unsigned
rte_mempool_ops_get_count(const struct rte_mempool *mp);

/**
- * @internal wrapper for mempool_ops register_memory_area callback.
- * API to notify the mempool handler when a new memory area is added to pool.
- *
- * @param mp
- * Pointer to the memory pool.
- * @param vaddr
- * Pointer to the buffer virtual address.
- * @param iova
- * Pointer to the buffer IO address.
- * @param len
- * Pool size.
- * @return
- * - 0: Success;
- * - -ENOTSUP - doesn't support register_memory_area ops (valid error case).
- * - Otherwise, rte_mempool_populate_phys fails thus pool create fails.
- */
-int
-rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
- char *vaddr, rte_iova_t iova, size_t len);
-
-/**
* @internal wrapper for mempool_ops calc_mem_size callback.
* API to calculate size of memory required to store specified number of
* object.
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 6ac669a..ea9be1e 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -57,7 +57,6 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->enqueue = h->enqueue;
ops->dequeue = h->dequeue;
ops->get_count = h->get_count;
- ops->register_memory_area = h->register_memory_area;
ops->calc_mem_size = h->calc_mem_size;
ops->populate = h->populate;

@@ -99,19 +98,6 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp)
}

/* wrapper to notify new memory area to external mempool */
-int
-rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
- rte_iova_t iova, size_t len)
-{
- struct rte_mempool_ops *ops;
-
- ops = rte_mempool_get_ops(mp->ops_index);
-
- RTE_FUNC_PTR_OR_ERR_RET(ops->register_memory_area, -ENOTSUP);
- return ops->register_memory_area(mp, vaddr, iova, len);
-}
-
-/* wrapper to notify new memory area to external mempool */
ssize_t
rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
uint32_t obj_num, uint32_t pg_shift,
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 42ca4df..f539a5a 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -45,7 +45,6 @@ DPDK_16.07 {
DPDK_17.11 {
global:

- rte_mempool_ops_register_memory_area;
rte_mempool_populate_iova;
rte_mempool_populate_iova_tab;

--
2.7.4

Andrew Rybchenko

2018-03-10 15:39:41 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Callback to calculate required memory area size may require mempool
driver data to be already allocated and initialized.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
RFCv2 -> v1:
- rename helper function as mempool_ops_alloc_once()

lib/librte_mempool/rte_mempool.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 844d907..12085cd 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -322,6 +322,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
}
}

+static int
+mempool_ops_alloc_once(struct rte_mempool *mp)
+{
+ int ret;
+
+ /* create the internal ring if not already done */
+ if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+ ret = rte_mempool_ops_alloc(mp);
+ if (ret != 0)
+ return ret;
+ mp->flags |= MEMPOOL_F_POOL_CREATED;
+ }
+ return 0;
+}
+
/* Add objects in the pool, using a physically contiguous memory
* zone. Return the number of objects added, or a negative value
* on error.
@@ -336,13 +351,9 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
struct rte_mempool_memhdr *memhdr;
int ret;

- /* create the internal ring if not already done */
- if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
- ret = rte_mempool_ops_alloc(mp);
- if (ret != 0)
- return ret;
- mp->flags |= MEMPOOL_F_POOL_CREATED;
- }
+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;

/* mempool is already populated */
if (mp->populated_size >= mp->size)
@@ -515,6 +526,10 @@ rte_mempool_populate_default(struct rte_mempool *mp)
unsigned mz_id, n;
int ret;

+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;
+
/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;

--
2.7.4

Olivier Matz

2018-03-19 17:06:57 UTC

Post by Andrew Rybchenko
Callback to calculate required memory area size may require mempool
driver data to be already allocated and initialized.
---
- rename helper function as mempool_ops_alloc_once()
lib/librte_mempool/rte_mempool.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 844d907..12085cd 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -322,6 +322,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
}
}
+static int
+mempool_ops_alloc_once(struct rte_mempool *mp)
+{
+ int ret;
+
+ /* create the internal ring if not already done */
+ if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+ ret = rte_mempool_ops_alloc(mp);
+ if (ret != 0)
+ return ret;
+ mp->flags |= MEMPOOL_F_POOL_CREATED;
+ }
+ return 0;
+}
+
/* Add objects in the pool, using a physically contiguous memory
* zone. Return the number of objects added, or a negative value
* on error.
@@ -336,13 +351,9 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
struct rte_mempool_memhdr *memhdr;
int ret;
- /* create the internal ring if not already done */
- if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
- ret = rte_mempool_ops_alloc(mp);
- if (ret != 0)
- return ret;
- mp->flags |= MEMPOOL_F_POOL_CREATED;
- }
+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;
/* mempool is already populated */
if (mp->populated_size >= mp->size)
@@ -515,6 +526,10 @@ rte_mempool_populate_default(struct rte_mempool *mp)
unsigned mz_id, n;
int ret;
+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;
+
/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;

Is there a reason why we need to add it in
rte_mempool_populate_default() but not in rte_mempool_populate_virt() and
rte_mempool_populate_iova_tab()?

Andrew Rybchenko

2018-03-20 13:32:04 UTC

Is there a reason why we need to add it in
rte_mempool_populate_default() but not in rte_mempool_populate_virt() and
rte_mempool_populate_iova_tab()?

The reason is rte_mempool_ops_calc_mem_size() call
from rte_mempool_populate_default(). rte_mempool_ops_*() are not
called directly from rte_mempool_populate_virt() and
rte_mempool_populate_iova_tab().

In fact I've found out that rte_mempool_ops_calc_mem_size() is called
from get_anon_size() which is called from rte_mempool_populate_anon().
So, we need to add to get_anon_size() as well.

May be it is even better to make the patch the first in series to make
sure that it is already OK when rte_mempool_ops_calc_mem_size()
is added. What do you think?

Olivier Matz

2018-03-20 16:57:00 UTC

Is there a reason why we need to add it in
rte_mempool_populate_default() but not in rte_mempool_populate_virt() and
rte_mempool_populate_iova_tab()?

The reason is rte_mempool_ops_calc_mem_size() call
from rte_mempool_populate_default(). rte_mempool_ops_*() are not
called directly from rte_mempool_populate_virt() and
rte_mempool_populate_iova_tab().
In fact I've found out that rte_mempool_ops_calc_mem_size() is called
from get_anon_size() which is called from rte_mempool_populate_anon().
So, we need to add to get_anon_size() as well.
May be it is even better to make the patch the first in series to make
sure that it is already OK when rte_mempool_ops_calc_mem_size()
is added. What do you think?

Yes, sounds good.

Olivier

Andrew Rybchenko

2018-03-10 15:39:42 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Mempool get/put API cares about cache itself, but sometimes it is
required to flush the cache explicitly.

The function is moved in the file since it now requires
rte_mempool_default_cache().

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
lib/librte_mempool/rte_mempool.h | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 5f63f86..4ecb2f6 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -1159,22 +1159,6 @@ void
rte_mempool_cache_free(struct rte_mempool_cache *cache);

/**
- * Flush a user-owned mempool cache to the specified mempool.
- *
- * @param cache
- * A pointer to the mempool cache.
- * @param mp
- * A pointer to the mempool.
- */
-static __rte_always_inline void
-rte_mempool_cache_flush(struct rte_mempool_cache *cache,
- struct rte_mempool *mp)
-{
- rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
- cache->len = 0;
-}
-
-/**
* Get a pointer to the per-lcore default mempool cache.
*
* @param mp
@@ -1197,6 +1181,26 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
}

/**
+ * Flush a user-owned mempool cache to the specified mempool.
+ *
+ * @param cache
+ * A pointer to the mempool cache.
+ * @param mp
+ * A pointer to the mempool.
+ */
+static __rte_always_inline void
+rte_mempool_cache_flush(struct rte_mempool_cache *cache,
+ struct rte_mempool *mp)
+{
+ if (cache == NULL)
+ cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ if (cache == NULL || cache->len == 0)
+ return;
+ rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
+ cache->len = 0;
+}
+
+/**
* @internal Put several objects back in the mempool; used internally.
* @param mp
* A pointer to the mempool structure.

--
2.7.4

santosh

2018-03-14 15:49:13 UTC

Hi Andrew,

On Saturday 10 March 2018 09:09 PM, Andrew Rybchenko wrote:

[...]

- add driver ops to calculate required memory size and populate
mempool objects, remove extra flags which were required before
to control it
- transition of octeontx and dpaa drivers to the new callbacks
- change info API to get information from driver required to
API user to know contiguous block size
- remove get_capabilities (not required any more and may be
substituted with more in info get API)
- remove register_memory_area since it is substituted with
populate callback which can do more
- use SPDX tags
- avoid all objects affinity to single lcore
- fix bucket get_count
- deprecate XMEM API
- avoid introduction of a new function to flush cache
- fix NO_CACHE_ALIGN case in bucket mempool

I'm evaluating your series in octeontx platform.
Noticed a build break for dpaa platform:
CC dpaa_mempool.o
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c: In function ‘dpaa_populate’:
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:291:41: error: passing argument 1 of ‘rte_mempool_op_populate_default’ discards ‘const’ qualifier from pointer target type [-Werror=discarded-qualifiers]
return rte_mempool_op_populate_default(mp, max_objs, vaddr, paddr, len,
^
In file included from /home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.h:15:0,
from /home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:28:
/home/ubuntu/83xx/dpdk/build/include/rte_mempool.h:490:5: note: expected ‘struct rte_mempool *’ but argument is of type ‘const struct rte_mempool *’
int rte_mempool_op_populate_default(struct rte_mempool *mp,
^
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c: At top level:
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:303:14: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
.populate = dpaa_populate,
^
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:303:14: note: (near initialization for ‘dpaa_mpool_ops.populate’)
cc1: all warnings being treated as errors

may be consider adding for dpaa platform..
diff --git a/drivers/mempool/dpaa/dpaa_mempool.c b/drivers/mempool/dpaa/dpaa_mempool.c
index a2bbb392a..ce5050627 100644
--- a/drivers/mempool/dpaa/dpaa_mempool.c
+++ b/drivers/mempool/dpaa/dpaa_mempool.c
@@ -263,8 +263,8 @@ dpaa_mbuf_get_count(const struct rte_mempool *mp)
return bman_query_free_buffers(bp_info->bp);
}

-static int
-dpaa_populate(const struct rte_mempool *mp, unsigned int max_objs,
+static int __rte_unused
+dpaa_populate(struct rte_mempool *mp, unsigned int max_objs,
char *vaddr, rte_iova_t paddr, size_t len,
rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
{

Will share test and review feedback for octeontx platform soon.

[...]

Andrew Rybchenko

2018-03-14 15:57:34 UTC

Hi Santosh,

Post by Olivier MATZ
Hi Andrew,
[...]

I'm evaluating your series in octeontx platform.
CC dpaa_mempool.o
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:291:41: error: passing argument 1 of ‘rte_mempool_op_populate_default’ discards ‘const’ qualifier from pointer target type [-Werror=discarded-qualifiers]
return rte_mempool_op_populate_default(mp, max_objs, vaddr, paddr, len,
^
In file included from /home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.h:15:0,
/home/ubuntu/83xx/dpdk/build/include/rte_mempool.h:490:5: note: expected ‘struct rte_mempool *’ but argument is of type ‘const struct rte_mempool *’
int rte_mempool_op_populate_default(struct rte_mempool *mp,
^
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:303:14: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
.populate = dpaa_populate,
^
/home/ubuntu/83xx/dpdk/drivers/mempool/dpaa/dpaa_mempool.c:303:14: note: (near initialization for ‘dpaa_mpool_ops.populate’)
cc1: all warnings being treated as errors

Yes, my bad, const should be simply removed to match prototype (and
mempool is actually modified since it is populated). Will fix.

Many thank,
Andrew.

Olivier Matz

2018-03-19 17:03:22 UTC

Hi Andrew,

Thank you for this nice rework.
Globally, the patchset looks good to me. I'm sending some comments
as reply to specific patches.

Post by Andrew Rybchenko
The initial patch series [1] is split into two to simplify processing.
The second series relies on this one and will add bucket mempool driver
and related ops.
The patch series has generic enhancements suggested by Olivier.
Basically it adds driver callbacks to calculate required memory size and
to populate objects using provided memory area. It allows to remove
so-called capability flags used before to tell generic code how to
allocate and slice allocated memory into mempool objects.
Clean up which removes get_capabilities and register_memory_area is
not strictly required, but I think right thing to do.
Existing mempool drivers are updated.
I've kept rte_mempool_populate_iova_tab() intact since it seems to
be not directly related XMEM API functions.

The function rte_mempool_populate_iova_tab() (actually, it was
rte_mempool_populate_phys_tab()) was introduced to support XMEM
API. In my opinion, it can also be deprecated.

Post by Andrew Rybchenko
It breaks ABI since changes rte_mempool_ops. Also it removes
rte_mempool_ops_register_memory_area() and
rte_mempool_ops_get_capabilities() since corresponding callbacks are
removed.
Internal global functions are not listed in map file since it is not
a part of external API.
[1] http://dpdk.org/ml/archives/dev/2018-January/088698.html
- add driver ops to calculate required memory size and populate
mempool objects, remove extra flags which were required before
to control it
- transition of octeontx and dpaa drivers to the new callbacks
- change info API to get information from driver required to
API user to know contiguous block size
- remove get_capabilities (not required any more and may be
substituted with more in info get API)
- remove register_memory_area since it is substituted with
populate callback which can do more
- use SPDX tags
- avoid all objects affinity to single lcore
- fix bucket get_count
- deprecate XMEM API
- avoid introduction of a new function to flush cache
- fix NO_CACHE_ALIGN case in bucket mempool
- split the series in two
- squash octeontx patches which implement calc_mem_size and populate
callbacks into the patch which removes get_capabilities since it is
the easiest way to untangle the tangle of tightly related library
functions and flags advertised by the driver
- consistently name default callbacks
- move default callbacks to dedicated file
- see detailed description in patches
mempool: add op to calculate memory size to be allocated
mempool: add op to populate objects using provided memory
mempool: remove callback to get capabilities
mempool: deprecate xmem functions
mempool/octeontx: prepare to remove register memory area op
mempool/dpaa: prepare to remove register memory area op
mempool: remove callback to register memory area
mempool: ensure the mempool is initialized before populating
mempool: support flushing the default cache of the mempool
doc/guides/rel_notes/deprecation.rst | 12 +-
doc/guides/rel_notes/release_18_05.rst | 32 ++-
drivers/mempool/dpaa/dpaa_mempool.c | 13 +-
drivers/mempool/octeontx/rte_mempool_octeontx.c | 64 ++++--
lib/librte_mempool/Makefile | 3 +-
lib/librte_mempool/meson.build | 5 +-
lib/librte_mempool/rte_mempool.c | 159 +++++++--------
lib/librte_mempool/rte_mempool.h | 260 +++++++++++++++++-------
lib/librte_mempool/rte_mempool_ops.c | 37 ++--
lib/librte_mempool/rte_mempool_ops_default.c | 51 +++++
lib/librte_mempool/rte_mempool_version.map | 11 +-
test/test/test_mempool.c | 31 ---
12 files changed, 437 insertions(+), 241 deletions(-)
create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c
--
2.7.4

Andrew Rybchenko

2018-03-20 10:09:33 UTC

I've kept rte_mempool_populate_iova_tab() intact since it seems to
be not directly related XMEM API functions.

The function rte_mempool_populate_iova_tab() (actually, it was
rte_mempool_populate_phys_tab()) was introduced to support XMEM
API. In my opinion, it can also be deprecated.

CC Thomas

Definitely OK for me. It is not listed in deprecation notice included in
18.02,
but I think it is OK to deprecate it in 18.05 (since we're not removing,
but just deprecating it).

Thomas Monjalon

2018-03-20 11:04:54 UTC

I've kept rte_mempool_populate_iova_tab() intact since it seems to
be not directly related XMEM API functions.

The function rte_mempool_populate_iova_tab() (actually, it was
rte_mempool_populate_phys_tab()) was introduced to support XMEM
API. In my opinion, it can also be deprecated.

CC Thomas
Definitely OK for me. It is not listed in deprecation notice included in
18.02,
but I think it is OK to deprecate it in 18.05 (since we're not removing,
but just deprecating it).

Yes it is OK to deprecate this function in addition to other mempool ones
already listed as planned to be deprecated.

Andrew Rybchenko

2018-03-25 16:20:44 UTC

The patch series should be applied on top of [7].

The initial patch series [1] is split into two to simplify processing.
The second series relies on this one and will add bucket mempool driver
and related ops.

The patch series has generic enhancements suggested by Olivier.
Basically it adds driver callbacks to calculate required memory size and
to populate objects using provided memory area. It allows to remove
so-called capability flags used before to tell generic code how to
allocate and slice allocated memory into mempool objects.
Clean up which removes get_capabilities and register_memory_area is
not strictly required, but I think right thing to do.
Existing mempool drivers are updated.

rte_mempool_populate_iova_tab() is also deprecated in v2 as agreed in [2].
Unfortunately it requires addition of -Wno-deprecated-declarations flag
to librte_mempool since the function is used by deprecated earlier
rte_mempool_populate_phys_tab(). If the later may be removed in the
release, we can avoid addition of the flag to allow usage of deprecated
functions.

One open question remains from previous review [3].

The patch series interfere with memory hotplug for DPDK [4] ([5] to be
precise). So, rebase may be required.

A new patch is added to the series to rename MEMPOOL_F_NO_PHYS_CONTIG
as MEMPOOL_F_NO_IOVA_CONTIG as agreed in [6].
MEMPOOL_F_CAPA_PHYS_CONTIG is not renamed since it removed in this
patchset.

It breaks ABI since changes rte_mempool_ops. Also it removes
rte_mempool_ops_register_memory_area() and
rte_mempool_ops_get_capabilities() since corresponding callbacks are
removed.

Internal global functions are not listed in map file since it is not
a part of external API.

[1] https://dpdk.org/ml/archives/dev/2018-January/088698.html
[2] https://dpdk.org/ml/archives/dev/2018-March/093186.html
[3] https://dpdk.org/ml/archives/dev/2018-March/093329.html
[4] https://dpdk.org/ml/archives/dev/2018-March/092070.html
[5] https://dpdk.org/ml/archives/dev/2018-March/092088.html
[6] https://dpdk.org/ml/archives/dev/2018-March/093345.html
[7] https://dpdk.org/ml/archives/dev/2018-March/093196.html

v1 -> v2:
- deprecate rte_mempool_populate_iova_tab()
- add patch to fix memory leak if no objects are populated
- add patch to rename MEMPOOL_F_NO_PHYS_CONTIG
- minor fixes (typos, blank line at the end of file)
- highlight meaning of min_chunk_size (when it is virtual or
physical contiguous)
- make sure that mempool is initialized in rte_mempool_populate_anon()
- move patch to ensure that mempool is initialized earlier in the series

RFCv2 -> v1:
- split the series in two
- squash octeontx patches which implement calc_mem_size and populate
callbacks into the patch which removes get_capabilities since it is
the easiest way to untangle the tangle of tightly related library
functions and flags advertised by the driver
- consistently name default callbacks
- move default callbacks to dedicated file
- see detailed description in patches

RFCv1 -> RFCv2:
- add driver ops to calculate required memory size and populate
mempool objects, remove extra flags which were required before
to control it
- transition of octeontx and dpaa drivers to the new callbacks
- change info API to get information from driver required to
API user to know contiguous block size
- remove get_capabilities (not required any more and may be
substituted with more in info get API)
- remove register_memory_area since it is substituted with
populate callback which can do more
- use SPDX tags
- avoid all objects affinity to single lcore
- fix bucket get_count
- deprecate XMEM API
- avoid introduction of a new function to flush cache
- fix NO_CACHE_ALIGN case in bucket mempool

Andrew Rybchenko (9):
mempool: fix memhdr leak when no objects are populated
mempool: rename flag to control IOVA-contiguous objects
mempool: add op to calculate memory size to be allocated
mempool: add op to populate objects using provided memory
mempool: remove callback to get capabilities
mempool: deprecate xmem functions
mempool/octeontx: prepare to remove register memory area op
mempool/dpaa: prepare to remove register memory area op
mempool: remove callback to register memory area

Artem V. Andreev (2):
mempool: ensure the mempool is initialized before populating
mempool: support flushing the default cache of the mempool

doc/guides/rel_notes/deprecation.rst | 12 +-
doc/guides/rel_notes/release_18_05.rst | 33 ++-
drivers/mempool/dpaa/dpaa_mempool.c | 13 +-
drivers/mempool/octeontx/rte_mempool_octeontx.c | 64 ++++--
drivers/net/thunderx/nicvf_ethdev.c | 2 +-
lib/librte_mempool/Makefile | 6 +-
lib/librte_mempool/meson.build | 17 +-
lib/librte_mempool/rte_mempool.c | 179 ++++++++-------
lib/librte_mempool/rte_mempool.h | 280 +++++++++++++++++-------
lib/librte_mempool/rte_mempool_ops.c | 37 ++--
lib/librte_mempool/rte_mempool_ops_default.c | 51 +++++
lib/librte_mempool/rte_mempool_version.map | 10 +-
test/test/test_mempool.c | 31 ---
13 files changed, 485 insertions(+), 250 deletions(-)
create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c

--
2.7.4

Andrew Rybchenko

2018-03-25 16:20:45 UTC

Fixes: 84121f197187 ("mempool: store memory chunks in a list")
Cc: ***@dpdk.org

Suggested-by: Olivier Matz <***@6wind.com>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
v1 -> v2:
- added in v2 as discussed in [1]

[1] https://dpdk.org/ml/archives/dev/2018-March/093329.html

lib/librte_mempool/rte_mempool.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 54f7f4b..80bf941 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -408,12 +408,18 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
}

/* not enough room to store one object */
- if (i == 0)
- return -EINVAL;
+ if (i == 0) {
+ ret = -EINVAL;
+ goto fail;
+ }

STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next);
mp->nb_mem_chunks++;
return i;
+
+fail:
+ rte_free(memhdr);
+ return ret;
}

int

--
2.7.4

Andrew Rybchenko

2018-03-25 16:20:46 UTC

Flag MEMPOOL_F_NO_PHYS_CONTIG is renamed as MEMPOOL_F_NO_IOVA_CONTIG
to follow IO memory contiguos terminology.
MEMPOOL_F_NO_PHYS_CONTIG is kept for backward compatibility and
deprecated.

Suggested-by: Olivier Matz <***@6wind.com>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
v1 -> v2:
- added in v2 as discussed in [1]

[1] https://dpdk.org/ml/archives/dev/2018-March/093345.html

drivers/net/thunderx/nicvf_ethdev.c | 2 +-
lib/librte_mempool/rte_mempool.c | 6 +++---
lib/librte_mempool/rte_mempool.h | 9 +++++----
3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c
index 067f224..f3be744 100644
--- a/drivers/net/thunderx/nicvf_ethdev.c
+++ b/drivers/net/thunderx/nicvf_ethdev.c
@@ -1308,7 +1308,7 @@ nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
}

/* Mempool memory must be physically contiguous */
- if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) {
+ if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG) {
PMD_INIT_LOG(ERR, "Mempool memory must be physically contiguous");
return -EINVAL;
}
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 80bf941..6ffa795 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -446,7 +446,7 @@ rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
if (mp->nb_mem_chunks != 0)
return -EEXIST;

- if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+ if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA,
pg_num * pg_sz, free_cb, opaque);

@@ -500,7 +500,7 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
if (RTE_ALIGN_CEIL(len, pg_sz) != len)
return -EINVAL;

- if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+ if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
return rte_mempool_populate_iova(mp, addr, RTE_BAD_IOVA,
len, free_cb, opaque);

@@ -602,7 +602,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
goto fail;
}

- if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+ if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
iova = RTE_BAD_IOVA;
else
iova = mz->iova;
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 8b1b7f7..e531a15 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -244,7 +244,8 @@ struct rte_mempool {
#define MEMPOOL_F_SP_PUT 0x0004 /**< Default put is "single-producer".*/
#define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/
#define MEMPOOL_F_POOL_CREATED 0x0010 /**< Internal: pool is created. */
-#define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */
+#define MEMPOOL_F_NO_IOVA_CONTIG 0x0020 /**< Don't need IOVA contiguous objs. */
+#define MEMPOOL_F_NO_PHYS_CONTIG MEMPOOL_F_NO_IOVA_CONTIG /* deprecated */
/**
* This capability flag is advertised by a mempool handler, if the whole
* memory area containing the objects must be physically contiguous.
@@ -710,8 +711,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
* - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
* when using rte_mempool_get() or rte_mempool_get_bulk() is
* "single-consumer". Otherwise, it is "multi-consumers".
- * - MEMPOOL_F_NO_PHYS_CONTIG: If set, allocated objects won't
- * necessarily be contiguous in physical memory.
+ * - MEMPOOL_F_NO_IOVA_CONTIG: If set, allocated objects won't
+ * necessarily be contiguous in IO memory.
* @return
* The pointer to the new allocated mempool, on success. NULL on error
* with rte_errno set appropriately. Possible rte_errno values include:
@@ -1439,7 +1440,7 @@ rte_mempool_empty(const struct rte_mempool *mp)
* A pointer (virtual address) to the element of the pool.
* @return
* The IO address of the elt element.
- * If the mempool was created with MEMPOOL_F_NO_PHYS_CONTIG, the
+ * If the mempool was created with MEMPOOL_F_NO_IOVA_CONTIG, the
* returned value is RTE_BAD_IOVA.
*/
static inline rte_iova_t

--
2.7.4

Andrew Rybchenko

2018-03-25 16:20:47 UTC

From: "Artem V. Andreev" <***@oktetlabs.ru>

Callback to calculate required memory area size may require mempool
driver data to be already allocated and initialized.

Signed-off-by: Artem V. Andreev <***@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <***@solarflare.com>
---
v1 -> v2:
- add init check to mempool_ops_alloc_once()
- move ealier in the patch series since it is required when driver
ops are called and it is better to have it before new ops are added

RFCv2 -> v1:
- rename helper function as mempool_ops_alloc_once()

lib/librte_mempool/rte_mempool.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 6ffa795..d8e3720 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -323,6 +323,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
}
}

+static int
+mempool_ops_alloc_once(struct rte_mempool *mp)
+{
+ int ret;
+
+ /* create the internal ring if not already done */
+ if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+ ret = rte_mempool_ops_alloc(mp);
+ if (ret != 0)
+ return ret;
+ mp->flags |= MEMPOOL_F_POOL_CREATED;
+ }
+ return 0;
+}
+
/* Add objects in the pool, using a physically contiguous memory
* zone. Return the number of objects added, or a negative value
* on error.
@@ -339,13 +354,9 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
struct rte_mempool_memhdr *memhdr;
int ret;

- /* create the internal ring if not already done */
- if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
- ret = rte_mempool_ops_alloc(mp);
- if (ret != 0)
- return ret;
- mp->flags |= MEMPOOL_F_POOL_CREATED;
- }
+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;

/* Notify memory area to mempool */
ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len);
@@ -556,6 +567,10 @@ rte_mempool_populate_default(struct rte_mempool *mp)
unsigned int mp_flags;
int ret;

+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;
+
/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;
@@ -667,6 +682,10 @@ rte_mempool_populate_anon(struct rte_mempool *mp)
return 0;
}

+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;
+
/* get chunk of virtually continuous memory */
size = get_anon_size(mp);
addr = mmap(NULL, size, PROT_READ | PROT_WRITE,

--
2.7.4

Andrew Rybchenko

2018-03-25 16:20:48 UTC