Discussion:
[dpdk-dev] [PATCH 11/22] net/fm10k: enable port detach on secondary process
Qi Zhang
2018-06-07 12:38:38 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/fm10k/fm10k_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 3ff1b0e0f..c7042be4e 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -3264,6 +3264,15 @@ static int eth_fm10k_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_fm10k_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_fm10k_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-07 12:38:43 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/null/rte_eth_null.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 1d2e6b9e9..e5b8d2f03 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -623,6 +623,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -667,18 +668,31 @@ static int
rte_pmd_null_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
+ const char *name;

if (!dev)
return -EINVAL;

+ name = rte_vdev_device_name(dev);
+
PMD_LOG(INFO, "Closing null ethdev on numa socket %u",
rte_socket_id());

/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
rte_free(eth_dev->data->dev_private);

rte_eth_dev_release_port(eth_dev);
--
2.13.6
Qi Zhang
2018-06-07 12:38:34 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/i40e/i40e_ethdev.c | 2 ++
drivers/net/i40e/i40e_ethdev_vf.c | 9 +++++++++
2 files changed, 11 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 13c5d3296..1de3c1499 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -678,6 +678,8 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);

if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, i40e_vf_representor_uninit);
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 804e44530..2b1ece851 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1500,6 +1500,15 @@ static int eth_i40evf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, i40evf_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-07 12:38:47 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/tap/rte_eth_tap.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 5531fe9d9..56d3b6cc9 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -1759,6 +1759,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1827,12 +1828,24 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
struct pmd_internals *internals;
+ const char *name;
int i;

+ name = rte_vdev_device_name(dev);
/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (!eth_dev)
- return 0;
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }

internals = eth_dev->data->dev_private;
--
2.13.6
Wiles, Keith
2018-06-07 19:01:09 UTC
Permalink
Post by Qi Zhang
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.
Previously, detach ports on secondary process will mess with the primary
process and cause the device to be not able to attach again. Taking
advantage of the rte_eth_release_port_local call we can fix the problem
with minor changes.
Post by Qi Zhang
---
drivers/net/tap/rte_eth_tap.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 5531fe9d9..56d3b6cc9 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -1759,6 +1759,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1827,12 +1828,24 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
struct pmd_internals *internals;
+ const char *name;
int i;
+ name = rte_vdev_device_name(dev);
/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (!eth_dev)
- return 0;
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
/pprocess/process/
Post by Qi Zhang
+ if (strlen(rte_vdev_device_args(dev)) == 0)
What does strlen() do with a null string returned by rte_vdev_device_args(), I believe it just returns with zero, but we need to make sure. If it does not then we must protect strlen().
Post by Qi Zhang
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
internals = eth_dev->data->dev_private;
--
2.13.6
Regards,
Keith
Qi Zhang
2018-06-07 12:38:42 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/kni/rte_eth_kni.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index ab63ea427..3ee527ab2 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -419,6 +419,7 @@ eth_kni_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &eth_kni_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -463,6 +464,16 @@ eth_kni_remove(struct rte_vdev_device *vdev)
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
eth_kni_dev_stop(eth_dev);

internals = eth_dev->data->dev_private;
--
2.13.6
Qi Zhang
2018-06-07 12:38:31 UTC
Permalink
The patch introduce the solution to handle different hotplug cases in
multi-process situation, it include below scenario:

1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately

In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.

Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced, it will be implemented in
following separate patch.

Scenario for Case 1, 2:

attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success

detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed

Case 3, 4:
This will be implemented in following patch.

Case 5, 6:
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowd to have
private device so far.

Case 7, 8:
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.

APIs chenages:

rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.

New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_eal/common/eal_private.h | 8 ++
lib/librte_eal/linuxapp/eal/eal.c | 6 ++
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/rte_ethdev.c | 183 ++++++++++++++++++++++++++++---
lib/librte_ethdev/rte_ethdev.h | 37 +++++++
lib/librte_ethdev/rte_ethdev_core.h | 5 +
lib/librte_ethdev/rte_ethdev_driver.h | 27 +++++
lib/librte_ethdev/rte_ethdev_mp.c | 195 ++++++++++++++++++++++++++++++++++
lib/librte_ethdev/rte_ethdev_mp.h | 44 ++++++++
9 files changed, 489 insertions(+), 17 deletions(-)
create mode 100644 lib/librte_ethdev/rte_ethdev_mp.c
create mode 100644 lib/librte_ethdev/rte_ethdev_mp.h

diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d50..92fa59bed 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -258,4 +258,12 @@ int rte_mp_channel_init(void);
*/
void dev_callback_process(char *device_name, enum rte_dev_event_type event);

+/**
+ * Register mp channel callback functions of ethdev layer.
+ *
+ * @return
+ * 0 on success.
+ * (<0) on failure.
+ */
+int rte_eth_dev_mp_init(void);
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8655b8691..b276e1caa 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -1041,6 +1041,12 @@ rte_eal_init(int argc, char **argv)

rte_eal_mcfg_complete();

+ if (rte_eth_dev_mp_init()) {
+ rte_eal_init_alert("rte_eth_dev_mp_init() failed\n");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
return fctret;
}

diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d82..04e93f337 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -19,6 +19,7 @@ EXPORT_MAP := rte_ethdev_version.map
LIBABIVER := 9

SRCS-y += rte_ethdev.c
+SRCS-y += rte_ethdev_mp.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index ec14adb91..24360f522 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -41,11 +41,12 @@
#include "rte_ethdev.h"
#include "rte_ethdev_driver.h"
#include "ethdev_profile.h"
+#include "rte_ethdev_mp.h"

-static int ethdev_logtype;
+int ethdev_logtype;

-#define ethdev_log(level, fmt, ...) \
- rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+#define RTE_ETH_MP_ACTION_REQUEST "rte_eth_mp_request"
+#define RTE_ETH_MP_ACTION_RESPONSE "rte_eth_mp_response"

static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
@@ -656,9 +657,8 @@ eth_err(uint16_t port_id, int ret)
return ret;
}

-/* attach the new device, then store port_id of the device */
int
-rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
+do_eth_dev_attach(const char *devargs, uint16_t *port_id)
{
int current = rte_eth_dev_count_total();
struct rte_devargs da;
@@ -703,14 +703,104 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
return ret;
}

-/* detach the device, then store the name of the device */
int
-rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
+do_eth_dev_detach(uint16_t port_id)
{
struct rte_device *dev;
struct rte_bus *bus;
+ int ret = 0;
+
+ dev = rte_eth_devices[port_id].device;
+ if (dev == NULL)
+ return -EINVAL;
+
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL)
+ return -ENOENT;
+
+ ret = rte_eal_hotplug_remove(bus->name, dev->name);
+ if (ret < 0)
+ return ret;
+
+ rte_eth_dev_release_port(&rte_eth_devices[port_id]);
+ return ret;
+
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
+{
+ struct eth_dev_mp_req req;
+ int ret;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+
+ /**
+ * If secondary process, we just send request to primray
+ * to start the process.
+ */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+
+ ret = rte_eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to primary\n");
+ return ret;
+ }
+
+ *port_id = req.port_id;
+ return req.result;
+ }
+
+ ret = do_eth_dev_attach(devargs, port_id);
+ if (ret)
+ return ret;
+
+ /* send attach request to seoncary */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+ req.port_id = *port_id;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to secondary\n");
+ goto rollback;
+ }
+
+ if (req.result)
+ goto rollback;
+
+ return 0;
+
+rollback:
+ /* send rollback request to secondary since some one fail to attach */
+ req.t = REQ_TYPE_ATTACH_ROLLBACK;
+ req.port_id = *port_id;
+ rte_eth_dev_request_to_secondary(&req);
+
+ do_eth_dev_detach(*port_id);
+
+ return -ENODEV;
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ return do_eth_dev_attach(devargs, port_id);
+}
+
+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
+{
+ struct eth_dev_mp_req req = {0};
+ int ret;
uint32_t dev_flags;
- int ret = -1;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

@@ -721,22 +811,81 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
return -ENOTSUP;
}

- dev = rte_eth_devices[port_id].device;
- if (dev == NULL)
- return -EINVAL;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ req.t = REQ_TYPE_DETACH;
+ req.port_id = port_id;

- bus = rte_bus_find_by_device(dev);
- if (bus == NULL)
- return -ENOENT;
+ /**
+ * If secondary process, we just send request to primray
+ * to start the process.
+ */
+ ret = rte_eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device detach request to primary\n");
+ return ret;
+ }

- ret = rte_eal_hotplug_remove(bus->name, dev->name);
- if (ret < 0)
+ return req.result;
+ }
+
+ /* check pre_detach */
+ req.t = REQ_TYPE_PRE_DETACH;
+ req.port_id = port_id;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device pre-detach request to secondary\n");
+ return ret;
+ }
+
+ if (req.result) {
+ ethdev_log(ERR, "Device is busy on secondary, can't be detached\n");
+ return req.result;
+ }
+
+ /* detach on seconary first */
+ req.t = REQ_TYPE_DETACH;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device detach request to secondary\n");
+ return ret;
+ }
+
+ if (req.result)
+ /**
+ * this should rarely happen, something wrong in secondary
+ * process, will not block primary detach.
+ */
+ ethdev_log(ERR, "Failed to detach device on secondary process\n");
+
+ /* detach on primary */
+ ret = do_eth_dev_detach(port_id);
+ if (ret)
return ret;

- rte_eth_dev_release_port(&rte_eth_devices[port_id]);
return 0;
}

+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
+{
+ uint32_t dev_flags;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+ dev_flags = rte_eth_devices[port_id].data->dev_flags;
+ if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
+ ethdev_log(ERR,
+ "Port %" PRIu16 " is bonded, cannot detach", port_id);
+ return -ENOTSUP;
+ }
+
+ return do_eth_dev_detach(port_id);
+}
+
static int
rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
{
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984ea..bb03d613b 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -1462,6 +1462,9 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);

/**
* Attach a new Ethernet device specified by arguments.
+ * In multi-process mode, it will sync with other process
+ * to make sure all processes attach the device, any
+ * failure on other process will rollback the action.
*
* @param devargs
* A pointer to a strings array describing the new device
@@ -1475,9 +1478,27 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);
int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);

/**
+ * Attach a private Ethernet device specified by arguments.
+ * A private device is invisible to other process.
+ * Can only be invoked in secondary process.
+ *
+ * @param devargs
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ * @param port_id
+ * A pointer to a port identifier actually attached.
+ * @return
+ * 0 on success and port_id is filled, negative on error
+ */
+int rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id);
+
+/**
* Detach a Ethernet device specified by port identifier.
* This function must be called when the device is in the
* closed state.
+ * In multi-process mode, it will sync with other process
+ * to detach the device.
*
* @param port_id
* The port identifier of the device to detach.
@@ -1490,6 +1511,22 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
int rte_eth_dev_detach(uint16_t port_id, char *devname);

/**
+ * Detach a private Ethernet device specified by port identifier
+ * This function must be called when the device is in the
+ * closed state.
+ * Can only be invoked in secondary process.
+ *
+ * @param port_id
+ * The port identifier of the device to detach.
+ * @param devname
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ * @return
+ * 0 on success and devname is filled, negative on error
+ */
+int rte_eth_dev_detach_private(uint16_t port_id, char *devname);
+
+/**
* Convert a numerical speed in Mbps to a bitmap flag that can be used in
* the bitmap link_speeds of the struct rte_eth_conf
*
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index 33d12b3a2..2cb6de745 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -622,4 +622,9 @@ struct rte_eth_dev_data {
*/
extern struct rte_eth_dev rte_eth_devices[];

+extern int ethdev_logtype;
+#define ethdev_log(level, fmt, ...) \
+ rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+
+
#endif /* _RTE_ETHDEV_CORE_H_ */
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index 261335426..616add313 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -338,6 +338,33 @@ typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
int __rte_experimental
rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit);

+/**
+ * Attach a new Ethernet device in current process.
+ *
+ * @param devargs
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ * @param port_id
+ * A pointer to a port identifier actually attached.
+ * @return
+ * 0 on success and port_id is filled, negative on error
+ */
+int do_eth_dev_attach(const char *devargs, uint16_t *port_id);
+
+/**
+ * Detach a Ethernet device in current process.
+ *
+ * @param port_id
+ * The port identifier of the device to detach.
+ * @param devname
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ * @return
+ * 0 on success and devname is filled, negative on error
+ */
+int do_eth_dev_detach(uint16_t port_id);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_mp.c b/lib/librte_ethdev/rte_ethdev_mp.c
new file mode 100644
index 000000000..8ede8151d
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_mp.c
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include "rte_ethdev_driver.h"
+#include "rte_ethdev_mp.h"
+
+static int detach_on_secondary(uint16_t port_id)
+{
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "detach on secondary: invalid port %d\n",
+ port_id);
+ return -ENODEV;
+ }
+
+ dev = rte_eth_devices[port_id].device;
+ if (dev == NULL)
+ return -EINVAL;
+
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL)
+ return -ENOENT;
+
+ ret = rte_eal_hotplug_remove(bus->name, dev->name);
+ if (ret) {
+ ethdev_log(ERR, "failed to hot unplug bus: %s, device:%s\n",
+ bus->name, dev->name);
+ return ret;
+ }
+
+ rte_eth_dev_release_port_local(&rte_eth_devices[port_id]);
+ return ret;
+}
+
+static int attach_on_secondary(const char *devargs, uint16_t port_id)
+{
+ struct rte_devargs da;
+ int ret;
+
+ if (rte_eth_devices[port_id].state != RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "port %d already in used, failed to attach\n",
+ port_id);
+ return -EINVAL;
+ }
+
+ memset(&da, 0, sizeof(da));
+
+ if (rte_devargs_parse(&da, "%s", devargs)) {
+ ethdev_log(ERR, "failed to parse devargs %s\n", devargs);
+ return -EINVAL;
+ }
+
+ ret = rte_eal_hotplug_add(da.bus->name, da.name, "");
+ if (ret) {
+ ethdev_log(ERR, "failed to hotplug bus:%s, device:%s\n",
+ da.bus->name, da.name);
+ free(da.args);
+ return ret;
+ }
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "failed to attach to port %d, this is a pmd issue\n",
+ port_id);
+ return -ENODEV;
+ }
+ free(da.args);
+ return 0;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ (void)msg;
+ (void)(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+{
+ (void)msg;
+ (void)(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct rte_mp_msg mp_resp = {0};
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_resp.param;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+
+ switch (req->t) {
+ case REQ_TYPE_ATTACH:
+ ret = attach_on_secondary(req->devargs, req->port_id);
+ break;
+ case REQ_TYPE_PRE_DETACH:
+ ret = 0;
+ break;
+ case REQ_TYPE_DETACH:
+ case REQ_TYPE_ATTACH_ROLLBACK:
+ ret = detach_on_secondary(req->port_id);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ strcpy(mp_resp.name, ETH_DEV_MP_ACTION_REQUEST);
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+ resp->result = ret;
+ if (rte_mp_reply(&mp_resp, peer) < 0) {
+ ethdev_log(ERR, "failed to send reply to primary request\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req)
+{
+ (void)req;
+ return -ENOTSUP;
+}
+
+/**
+ * Request from primary to secondary.
+ *
+ * Be invoked when try to attach or detach a share device
+ * from primary process.
+ */
+int rte_eth_dev_request_to_secondary(struct eth_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req = {0};
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strcpy(mp_req.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret) {
+ ethdev_log(ERR, "rte_mp_request_sync failed\n");
+ return ret;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result) {
+ req->result = resp->result;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int rte_eth_dev_mp_init(void)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
+ handle_secondary_request)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_REQUEST);
+ return -1;
+ }
+ } else {
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_RESPONSE,
+ handle_primary_response)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_RESPONSE);
+ return -1;
+ }
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
+ handle_primary_request)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_REQUEST);
+ }
+ }
+
+ return 0;
+}
+
diff --git a/lib/librte_ethdev/rte_ethdev_mp.h b/lib/librte_ethdev/rte_ethdev_mp.h
new file mode 100644
index 000000000..c3e55dfec
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_mp.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_MP_H_
+#define _RTE_ETHDEV_MP_H_
+
+#define MAX_DEV_ARGS_LEN 0x80
+
+#define ETH_DEV_MP_ACTION_REQUEST "eth_dev_mp_request"
+#define ETH_DEV_MP_ACTION_RESPONSE "eth_dev_mp_response"
+
+enum eth_dev_req_type {
+ REQ_TYPE_ATTACH,
+ REQ_TYPE_PRE_DETACH,
+ REQ_TYPE_DETACH,
+ REQ_TYPE_ATTACH_ROLLBACK,
+};
+
+struct eth_dev_mp_req {
+ enum eth_dev_req_type t;
+ char devargs[MAX_DEV_ARGS_LEN];
+ uint16_t port_id;
+ int result;
+};
+
+/**
+ * this is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request issued from primary.
+ */
+int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int rte_eth_dev_request_to_secondary(struct eth_dev_mp_req *req);
+
+/* Register mp channel callback functions of ethdev layer.*/
+int rte_eth_dev_mp_init(void);
+
+#endif
--
2.13.6
Burakov, Anatoly
2018-06-15 15:44:43 UTC
Permalink
Post by Qi Zhang
The patch introduce the solution to handle different hotplug cases in
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced, it will be implemented in
following separate patch.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
This will be implemented in following patch.
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowd to have
private device so far.
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.
---
lib/librte_eal/common/eal_private.h | 8 ++
lib/librte_eal/linuxapp/eal/eal.c | 6 ++
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/rte_ethdev.c | 183 ++++++++++++++++++++++++++++---
lib/librte_ethdev/rte_ethdev.h | 37 +++++++
lib/librte_ethdev/rte_ethdev_core.h | 5 +
lib/librte_ethdev/rte_ethdev_driver.h | 27 +++++
lib/librte_ethdev/rte_ethdev_mp.c | 195 ++++++++++++++++++++++++++++++++++
lib/librte_ethdev/rte_ethdev_mp.h | 44 ++++++++
9 files changed, 489 insertions(+), 17 deletions(-)
create mode 100644 lib/librte_ethdev/rte_ethdev_mp.c
create mode 100644 lib/librte_ethdev/rte_ethdev_mp.h
Haven't looked at the code yet, but general comment: please don't prefix
internal-only files with rte_, it makes it look like they are part of
external API.
--
Thanks,
Anatoly
Burakov, Anatoly
2018-06-18 08:18:17 UTC
Permalink
Post by Qi Zhang
The patch introduce the solution to handle different hotplug cases in
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced, it will be implemented in
following separate patch.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
This will be implemented in following patch.
If these will be implemented in following patch, why spend half the
commit message talking about it? :) This commit doesn't implement
secondary process functionality at all, so the commit message should
probably be reworded to only include primary process logic, no?
Post by Qi Zhang
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowd to have
private device so far.
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.
Multiple typos - "chenages", "temporally", "allowd", etc.
Post by Qi Zhang
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.
---
<snip>
Post by Qi Zhang
rte_eal_mcfg_complete();
+ if (rte_eth_dev_mp_init()) {
+ rte_eal_init_alert("rte_eth_dev_mp_init() failed\n");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
Why is this done after the end of init? rte_eal_mcfg_complete() makes it
so that secondaries can initialize, at that point all initialization
should have been finished. I would expect this to be called after
(before?) bus probe, since this is device-related.
Post by Qi Zhang
return fctret;
}
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d82..04e93f337 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -19,6 +19,7 @@ EXPORT_MAP := rte_ethdev_version.map
LIBABIVER := 9
<snip>
Post by Qi Zhang
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+
+ /**
+ * If secondary process, we just send request to primray
+ * to start the process.
+ */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+
+ ret = rte_eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to primary\n");
The log message is a little misleading. It can be that secondary has
failed to send request. It can also be that it succeeded, but the attach
itself has failed. I think a better message would be "attach request has
failed" or something to that effect.
Post by Qi Zhang
+ return ret;
+ }
+
+ *port_id = req.port_id;
+ return req.result;
+ }
+
+ ret = do_eth_dev_attach(devargs, port_id);
+ if (ret)
+ return ret;
+
+ /* send attach request to seoncary */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+ req.port_id = *port_id;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to secondary\n");
Same as above - log message can/might be misleading. There are a few
other places where similar log message is present, those should be
corrected too.
Post by Qi Zhang
+ goto rollback;
+ }
+
+ if (req.result)
+ goto rollback;
+
+ return 0;
<snip>
Post by Qi Zhang
+{
+ uint32_t dev_flags;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+ dev_flags = rte_eth_devices[port_id].data->dev_flags;
+ if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
+ ethdev_log(ERR,
+ "Port %" PRIu16 " is bonded, cannot detach", port_id);
+ return -ENOTSUP;
+ }
Do we have to do a similar check for failsafe devices?
Post by Qi Zhang
+
+ return do_eth_dev_detach(port_id);
+}
+
static int
rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
{
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984ea..bb03d613b 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
<snip>
Post by Qi Zhang
/**
+ * Attach a private Ethernet device specified by arguments.
+ * A private device is invisible to other process.
+ * Can only be invoked in secondary process.
+ *
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ * A pointer to a port identifier actually attached.
+ * 0 on success and port_id is filled, negative on error
+ */
+int rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id);
New API's should be marked as __rte_experimental.
Post by Qi Zhang
+
+/**
* Detach a Ethernet device specified by port identifier.
* This function must be called when the device is in the
* closed state.
+ * In multi-process mode, it will sync with other process
+ * to detach the device.
*
* The port identifier of the device to detach.
@@ -1490,6 +1511,22 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
<snip>
Post by Qi Zhang
+ * Detach a Ethernet device in current process.
+ *
+ * The port identifier of the device to detach.
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ * 0 on success and devname is filled, negative on error
+ */
+int do_eth_dev_detach(uint16_t port_id);
+
Why is this made part of an external API? You should have a separate,
private header file for these.
Post by Qi Zhang
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_mp.c b/lib/librte_ethdev/rte_ethdev_mp.c
new file mode 100644
index 000000000..8ede8151d
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_mp.c
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include "rte_ethdev_driver.h"
+#include "rte_ethdev_mp.h"
+
+static int detach_on_secondary(uint16_t port_id)
<snip>
Post by Qi Zhang
+ free(da.args);
+ return 0;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ (void)msg;
+ (void)(peer);
+ return -ENOTSUP;
Please either mark arguments as __rte_unused, or use RTE_SET_USED(blah)
macro. Same in other similar places.
Post by Qi Zhang
+}
+
+static int handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+{
+ (void)msg;
+ (void)(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
<snip>
Post by Qi Zhang
+ ret = detach_on_secondary(req->port_id);
+ break;
+ ret = -EINVAL;
+ }
+
+ strcpy(mp_resp.name, ETH_DEV_MP_ACTION_REQUEST);
Here and in other places: rte_strlcpy?
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-19 03:22:53 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Monday, June 18, 2018 4:18 PM
Subject: Re: [PATCH 04/22] ethdev: enable hotplug on multi-process
Post by Qi Zhang
The patch introduce the solution to handle different hotplug cases in
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary 4. Detach a share device from
secondary 5. Attach a private device from secondary 6. Detach a
private device from secondary 7. Detach a share device from secondary
privately 8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by
default.
Post by Qi Zhang
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still
use it, so a handshake mechanism is introduced, it will be implemented
in following separate patch.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
This will be implemented in following patch.
If these will be implemented in following patch, why spend half the commit
message talking about it? :)
Sorry, I didn't get your point about "see half commit to talk about it" :)
This patch covered an overview, and also the implementation of case 1,2,5,6,7,8

For case 3, 4, just below 4 lines to describe it

3. Attach a share device from secondary.
4. Detach a share device from secondary.
Case 3, 4:
This will be implemented in following patch.
is commit doesn't implement secondary
process functionality at all, so the commit message should probably be
reworded to only include primary process logic, no?
OK, I will reword it to highlight the patch's scope as description at above.
Post by Qi Zhang
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowed to have
private device so far.
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.
Multiple typos - "chenages", "temporally", "allowd", etc.
Thanks
Post by Qi Zhang
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.
---
<snip>
Post by Qi Zhang
rte_eal_mcfg_complete();
+ if (rte_eth_dev_mp_init()) {
+ rte_eal_init_alert("rte_eth_dev_mp_init() failed\n");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
Why is this done after the end of init? rte_eal_mcfg_complete() makes it
so that secondaries can initialize, at that point all initialization
should have been finished. I would expect this to be called after
(before?) bus probe, since this is device-related.
OK will move ahead.
Post by Qi Zhang
return fctret;
}
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d82..04e93f337 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -19,6 +19,7 @@ EXPORT_MAP := rte_ethdev_version.map
LIBABIVER := 9
<snip>
Post by Qi Zhang
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+
+ /**
+ * If secondary process, we just send request to primray
+ * to start the process.
+ */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+
+ ret = rte_eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to
primary\n");
The log message is a little misleading. It can be that secondary has
failed to send request. It can also be that it succeeded, but the attach
itself has failed. I think a better message would be "attach request has
failed" or something to that effect.
The return value of rte_eth_dev_request_to_primary only means communication fail,
(message not able to send, or not get reply in time).
but not the fail on attach/detach itself. (which comes from req->result)
Post by Qi Zhang
+ return ret;
+ }
+
+ *port_id = req.port_id;
+ return req.result;
+ }
+
+ ret = do_eth_dev_attach(devargs, port_id);
+ if (ret)
+ return ret;
+
+ /* send attach request to seoncary */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+ req.port_id = *port_id;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to
secondary\n");
Same as above - log message can/might be misleading. There are a few
other places where similar log message is present, those should be
corrected too.
Same as above
Post by Qi Zhang
+ goto rollback;
+ }
+
+ if (req.result)
+ goto rollback;
+
+ return 0;
<snip>
Post by Qi Zhang
+{
+ uint32_t dev_flags;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+ dev_flags = rte_eth_devices[port_id].data->dev_flags;
+ if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
+ ethdev_log(ERR,
+ "Port %" PRIu16 " is bonded, cannot detach", port_id);
+ return -ENOTSUP;
+ }
Do we have to do a similar check for failsafe devices?
Just keep it same logic as before, it could be a separate patch to fix I guess.
Post by Qi Zhang
+
+ return do_eth_dev_detach(port_id);
+}
+
static int
rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t
nb_queues)
Post by Qi Zhang
{
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984ea..bb03d613b 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
<snip>
Post by Qi Zhang
/**
+ * Attach a private Ethernet device specified by arguments.
+ * A private device is invisible to other process.
+ * Can only be invoked in secondary process.
+ *
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ * A pointer to a port identifier actually attached.
+ * 0 on success and port_id is filled, negative on error
+ */
+int rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id);
New API's should be marked as __rte_experimental.
OK
Post by Qi Zhang
+
+/**
* Detach a Ethernet device specified by port identifier.
* This function must be called when the device is in the
* closed state.
+ * In multi-process mode, it will sync with other process
+ * to detach the device.
*
* The port identifier of the device to detach.
@@ -1490,6 +1511,22 @@ int rte_eth_dev_attach(const char *devargs,
uint16_t *port_id);
<snip>
Post by Qi Zhang
+ * Detach a Ethernet device in current process.
+ *
+ * The port identifier of the device to detach.
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ * 0 on success and devname is filled, negative on error
+ */
+int do_eth_dev_detach(uint16_t port_id);
+
Why is this made part of an external API? You should have a separate,
private header file for these.
OK, will add to ethdev_private.h in v2.
Post by Qi Zhang
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_mp.c
b/lib/librte_ethdev/rte_ethdev_mp.c
Post by Qi Zhang
new file mode 100644
index 000000000..8ede8151d
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_mp.c
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include "rte_ethdev_driver.h"
+#include "rte_ethdev_mp.h"
+
+static int detach_on_secondary(uint16_t port_id)
<snip>
Post by Qi Zhang
+ free(da.args);
+ return 0;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg, const
void *peer)
Post by Qi Zhang
+{
+ (void)msg;
+ (void)(peer);
+ return -ENOTSUP;
Please either mark arguments as __rte_unused, or use RTE_SET_USED(blah)
macro. Same in other similar places.
OK.
Post by Qi Zhang
+}
+
+static int handle_primary_response(const struct rte_mp_msg *msg, const
void *peer)
Post by Qi Zhang
+{
+ (void)msg;
+ (void)(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_request(const struct rte_mp_msg *msg, const
void *peer)
Post by Qi Zhang
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
<snip>
Post by Qi Zhang
+ ret = detach_on_secondary(req->port_id);
+ break;
+ ret = -EINVAL;
+ }
+
+ strcpy(mp_resp.name, ETH_DEV_MP_ACTION_REQUEST);
Here and in other places: rte_strlcpy?
OK

Thanks!
Qi
--
Thanks,
A
Burakov, Anatoly
2018-06-19 08:37:08 UTC
Permalink
Post by Zhang, Qi Z
-----Original Message-----
From: Burakov, Anatoly
Sent: Monday, June 18, 2018 4:18 PM
Subject: Re: [PATCH 04/22] ethdev: enable hotplug on multi-process
Post by Qi Zhang
The patch introduce the solution to handle different hotplug cases in
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary 4. Detach a share device from
secondary 5. Attach a private device from secondary 6. Detach a
private device from secondary 7. Detach a share device from secondary
privately 8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by
default.
Post by Qi Zhang
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still
use it, so a handshake mechanism is introduced, it will be implemented
in following separate patch.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
This will be implemented in following patch.
If these will be implemented in following patch, why spend half the commit
message talking about it? :)
Sorry, I didn't get your point about "see half commit to talk about it" :)
This patch covered an overview, and also the implementation of case 1,2,5,6,7,8
For case 3, 4, just below 4 lines to describe it
3. Attach a share device from secondary.
4. Detach a share device from secondary.
This will be implemented in following patch.
is commit doesn't implement secondary
process functionality at all, so the commit message should probably be
reworded to only include primary process logic, no?
OK, I will reword it to highlight the patch's scope as description at above.
Thanks!

<snip>
Post by Zhang, Qi Z
The return value of rte_eth_dev_request_to_primary only means communication fail,
(message not able to send, or not get reply in time).
but not the fail on attach/detach itself. (which comes from req->result)
Ah, yes, my apologies, you're right! The log message is fine then.

<snip>
Post by Zhang, Qi Z
Do we have to do a similar check for failsafe devices?
Just keep it same logic as before, it could be a separate patch to fix I guess.
Sure.

<snip>
Post by Zhang, Qi Z
Here and in other places: rte_strlcpy?
OK
Apologies, this should read strlcpy, not rte_strlcpy.
--
Thanks,
Anatoly
Qi Zhang
2018-06-07 12:38:32 UTC
Permalink
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device
can't be detached, this help applicaiton to prevent unexpected
device detaching, especially in multi-process envrionment.

Aslo the new API let application to register a callback function
which will be invoked before a device is going to be detached,
the return value of the function will decide if device will continue
be detached or not, this support application to do condition check
at runtime.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/rte_ethdev.c | 41 ++++++++++++++-
lib/librte_ethdev/rte_ethdev.h | 64 ++++++++++++++++++++++
lib/librte_ethdev/rte_ethdev_lock.c | 102 ++++++++++++++++++++++++++++++++++++
lib/librte_ethdev/rte_ethdev_lock.h | 23 ++++++++
lib/librte_ethdev/rte_ethdev_mp.c | 3 +-
6 files changed, 232 insertions(+), 2 deletions(-)
create mode 100644 lib/librte_ethdev/rte_ethdev_lock.c
create mode 100644 lib/librte_ethdev/rte_ethdev_lock.h

diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index 04e93f337..5c4646469 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -20,6 +20,7 @@ LIBABIVER := 9

SRCS-y += rte_ethdev.c
SRCS-y += rte_ethdev_mp.c
+SRCS-y += rte_ethdev_lock.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 24360f522..6494e71a4 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -42,6 +42,7 @@
#include "rte_ethdev_driver.h"
#include "ethdev_profile.h"
#include "rte_ethdev_mp.h"
+#include "rte_ethdev_lock.h"

int ethdev_logtype;

@@ -787,7 +788,6 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
int
rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id)
{
-
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return -ENOTSUP;

@@ -828,6 +828,10 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
return req.result;
}

+ ret = process_lock_callbacks(port_id);
+ if (ret)
+ return ret;
+
/* check pre_detach */
req.t = REQ_TYPE_PRE_DETACH;
req.port_id = port_id;
@@ -870,6 +874,7 @@ int
rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
{
uint32_t dev_flags;
+ int ret;

if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return -ENOTSUP;
@@ -883,6 +888,10 @@ rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
return -ENOTSUP;
}

+ ret = process_lock_callbacks(port_id);
+ if (ret)
+ return ret;
+
return do_eth_dev_detach(port_id);
}

@@ -4683,6 +4692,36 @@ rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
return result;
}

+static int
+dev_is_busy(uint16_t port_id __rte_unused, void *user_args __rte_unused)
+{
+ return -EBUSY;
+}
+
+int
+rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return register_lock_callback(port_id, dev_is_busy, NULL);
+ else
+ return register_lock_callback(port_id, callback, user_args);
+}
+
+int
+rte_eth_dev_unlock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return unregister_lock_callback(port_id, dev_is_busy, NULL);
+ else
+ return unregister_lock_callback(port_id, callback, user_args);
+}
+
RTE_INIT(ethdev_init_log);
static void
ethdev_init_log(void)
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index bb03d613b..506b6acdd 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -4356,6 +4356,70 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
}

+/**
+ * Callback function before device is detached.
+ *
+ * This type of function will be added into a function list, and will be
+ * invoked before device be detached. Application can register a callback
+ * function so it can be notified and do some cleanup before detach happen.
+ * Also, any callback function return !0 value will prevent device be
+ * detached(ref. rte_eth_dev_lock and rte_eth_dev_unlock).
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param user_args
+ * This is parameter "user_args" be saved when callback function is
+ * registered(rte_dev_eth_lock).
+ *
+ * @return
+ * 0 device is allowed be detached.
+ * !0 device is not allowed be detached.
+ */
+typedef int (*rte_eth_dev_lock_callback_t)(uint16_t port_id, void *user_args);
+
+/**
+ * Lock an Ethernet Device directly or register a callback function
+ * for condition check at runtime, this help application to prevent
+ * a device be detached unexpectly.
+ * NOTE: Lock a device mutliple times with same parmeter will increase
+ * a ref_count, and coresponding unlock decrease the ref_count, the
+ * device will be unlocked when ref_count reach 0.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param callback
+ * !NULL the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * NULL lock the device directly, basically this just regiter a empty
+ * callback function(dev_is_busy) that return -EBUSY, so we can
+ * handle the pre-detach check in unified way.
+ * @param user_args
+ * parameter will be parsed to callback function, only valid when
+ * callback != NULL.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/**
+ * Reverse operation of rte_eth_dev_lock.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param callback
+ * NULL decrease the ref_count of default callback function.
+ * !NULL decrease the ref_count of specific callback with matched
+ * user_args.
+ * @param user_args
+ * parameter to match, only valid when callback != NULL.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_unlock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_lock.c b/lib/librte_ethdev/rte_ethdev_lock.c
new file mode 100644
index 000000000..688d1d70a
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_lock.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include "rte_ethdev_lock.h"
+
+struct lock_entry {
+ TAILQ_ENTRY(lock_entry) next;
+ rte_eth_dev_lock_callback_t callback;
+ uint16_t port_id;
+ void *user_args;
+ int ref_count;
+};
+
+TAILQ_HEAD(lock_entry_list, lock_entry);
+static struct lock_entry_list lock_entry_list =
+ TAILQ_HEAD_INITIALIZER(lock_entry_list);
+static rte_spinlock_t lock_entry_lock = RTE_SPINLOCK_INITIALIZER;
+
+int
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (!le) {
+ le = calloc(1, sizeof(struct lock_entry));
+ if (!le) {
+ rte_spinlock_unlock(&lock_entry_lock);
+ return -ENOMEM;
+ }
+ le->callback = callback;
+ le->port_id = port_id;
+ le->user_args = user_args;
+ TAILQ_INSERT_TAIL(&lock_entry_list, le, next);
+ }
+ le->ref_count++;
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return 0;
+}
+
+int
+unregister_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+ int ret = 0;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (le) {
+ le->ref_count--;
+ if (!le->ref_count) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return ret;
+}
+
+int
+process_lock_callbacks(uint16_t port_id)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id != port_id)
+ continue;
+
+ if (le->callback(port_id, le->user_args)) {
+ rte_spinlock_unlock(&lock_entry_lock);
+ return -EBUSY;
+ }
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return 0;
+}
diff --git a/lib/librte_ethdev/rte_ethdev_lock.h b/lib/librte_ethdev/rte_ethdev_lock.h
new file mode 100644
index 000000000..7b370c926
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_lock.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_LOCK_H_
+#define _RTE_ETHDEV_LOCK_H_
+
+#include "rte_ethdev.h"
+
+int
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+int
+unregister_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+int
+process_lock_callbacks(uint16_t port_id);
+
+#endif
diff --git a/lib/librte_ethdev/rte_ethdev_mp.c b/lib/librte_ethdev/rte_ethdev_mp.c
index 8ede8151d..e23c8b010 100644
--- a/lib/librte_ethdev/rte_ethdev_mp.c
+++ b/lib/librte_ethdev/rte_ethdev_mp.c
@@ -4,6 +4,7 @@

#include "rte_ethdev_driver.h"
#include "rte_ethdev_mp.h"
+#include "rte_ethdev_lock.h"

static int detach_on_secondary(uint16_t port_id)
{
@@ -101,7 +102,7 @@ static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer
ret = attach_on_secondary(req->devargs, req->port_id);
break;
case REQ_TYPE_PRE_DETACH:
- ret = 0;
+ ret = process_lock_callbacks(req->port_id);
break;
case REQ_TYPE_DETACH:
case REQ_TYPE_ATTACH_ROLLBACK:
--
2.13.6
Burakov, Anatoly
2018-06-15 15:42:33 UTC
Permalink
Post by Qi Zhang
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device
can't be detached, this help applicaiton to prevent unexpected
device detaching, especially in multi-process envrionment.
Aslo the new API let application to register a callback function
which will be invoked before a device is going to be detached,
the return value of the function will decide if device will continue
be detached or not, this support application to do condition check
at runtime.
---
<snip>
Post by Qi Zhang
+
+int
+rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return register_lock_callback(port_id, dev_is_busy, NULL);
+ else
+ return register_lock_callback(port_id, callback, user_args);
As much as i don't like seeing negative errno values as return, the rest
of ethdev library uses those, so this is OK :)
Post by Qi Zhang
+}
+
+int
+rte_eth_dev_unlock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
<snip>
Post by Qi Zhang
+ * Also, any callback function return !0 value will prevent device be
+ * detached(ref. rte_eth_dev_lock and rte_eth_dev_unlock).
+ *
+ * The port identifier of the Ethernet device.
+ * This is parameter "user_args" be saved when callback function is
+ * registered(rte_dev_eth_lock).
+ *
+ * 0 device is allowed be detached.
+ * !0 device is not allowed be detached.
!0 can be negative or positive. Are we expecting positive return values
from this API?
Post by Qi Zhang
+ */
+typedef int (*rte_eth_dev_lock_callback_t)(uint16_t port_id, void *user_args);
+
+/**
+ * Lock an Ethernet Device directly or register a callback function
+ * for condition check at runtime, this help application to prevent
+ * a device be detached unexpectly.
+ * NOTE: Lock a device mutliple times with same parmeter will increase
+ * a ref_count, and coresponding unlock decrease the ref_count, the
+ * device will be unlocked when ref_count reach 0.
Nitpick: "note" sections should be done with @note marker.

Also, i would mention that this is a per-process lock that does not
affect other processes (assuming i understood the code correctly, of
course...).
Post by Qi Zhang
+ *
+ * The port identifier of the Ethernet device.
+ * !NULL the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * NULL lock the device directly, basically this just regiter a empty
+ * callback function(dev_is_busy) that return -EBUSY, so we can
+ * handle the pre-detach check in unified way.
+ * parameter will be parsed to callback function, only valid when
+ * callback != NULL.
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args);
Nitpicks: DPDK style guide discourages using spaces as indentation
(other parts of this patch, and other patches have this issue as well).
Post by Qi Zhang
+
+/**
+ * Reverse operation of rte_eth_dev_lock.
+ *
+ * The port identifier of the Ethernet device.
+ * NULL decrease the ref_count of default callback function.
+ * !NULL decrease the ref_count of specific callback with matched
+ * user_args.
+ * parameter to match, only valid when callback != NULL.
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_unlock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_lock.c b/lib/librte_ethdev/rte_ethdev_lock.c
rte_ethdev_lock.* seem to be internal-only files. Perhaps you should
name them without the rte_ prefix to indicate that they're not exported?
Post by Qi Zhang
new file mode 100644
index 000000000..688d1d70a
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_lock.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include "rte_ethdev_lock.h"
+
+struct lock_entry {
+ TAILQ_ENTRY(lock_entry) next;
+ rte_eth_dev_lock_callback_t callback;
+ uint16_t port_id;
<snip>
Post by Qi Zhang
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (!le) {
+ le = calloc(1, sizeof(struct lock_entry));
+ if (!le) {
Nitpick: generally, DPDK style guide prefers "if (value)" or "if
(!value)" to only be reserved for boolean values, and use explicit
comparison (e.g. "if (value == NULL)" or "if (value == 0)") for all
other cases.
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-20 04:00:01 UTC
Permalink
Hi Anatoly:
Sorry to miss this email and reply late.
-----Original Message-----
From: Burakov, Anatoly
Sent: Friday, June 15, 2018 11:43 PM
Subject: Re: [PATCH 05/22] ethdev: introduce device lock
Post by Qi Zhang
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device can't
be detached, this help applicaiton to prevent unexpected device
detaching, especially in multi-process envrionment.
Aslo the new API let application to register a callback function which
will be invoked before a device is going to be detached, the return
value of the function will decide if device will continue be detached
or not, this support application to do condition check at runtime.
---
<snip>
Post by Qi Zhang
+
+int
+rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return register_lock_callback(port_id, dev_is_busy, NULL);
+ else
+ return register_lock_callback(port_id, callback, user_args);
As much as i don't like seeing negative errno values as return, the rest of
ethdev library uses those, so this is OK :)
Post by Qi Zhang
+}
+
+int
+rte_eth_dev_unlock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
<snip>
Post by Qi Zhang
+ * Also, any callback function return !0 value will prevent device be
+ * detached(ref. rte_eth_dev_lock and rte_eth_dev_unlock).
+ *
+ * The port identifier of the Ethernet device.
+ * This is parameter "user_args" be saved when callback function is
+ * registered(rte_dev_eth_lock).
+ *
+ * 0 device is allowed be detached.
+ * !0 device is not allowed be detached.
!0 can be negative or positive. Are we expecting positive return values from
this API?
I have no strong opinion, but if you think below or other option is better, I can change
=0 device is allowed be detached.
<0 device is not allowed be detached.
Post by Qi Zhang
+ */
+typedef int (*rte_eth_dev_lock_callback_t)(uint16_t port_id, void
+*user_args);
+
+/**
+ * Lock an Ethernet Device directly or register a callback function
+ * for condition check at runtime, this help application to prevent
+ * a device be detached unexpectedly
+ * NOTE: Lock a device multiple times with same parmeter will
+increase
+ * a ref_count, and corresponding unlock decrease the ref_count, the
+ * device will be unlocked when ref_count reach 0.
Also, i would mention that this is a per-process lock that does not affect other
processes (assuming i understood the code correctly, of course...).
OK, I will add more comment to explain this.
Post by Qi Zhang
+ *
+ * The port identifier of the Ethernet device.
+ * !NULL the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * NULL lock the device directly, basically this just regiter a empty
+ * callback function(dev_is_busy) that return -EBUSY, so we can
+ * handle the pre-detach check in unified way.
+ * parameter will be parsed to callback function, only valid when
+ * callback != NULL.
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t
callback,
Post by Qi Zhang
+ void *user_args);
Nitpicks: DPDK style guide discourages using spaces as indentation (other parts
of this patch, and other patches have this issue as well).
OK, will fix all.
Post by Qi Zhang
+
+/**
+ * Reverse operation of rte_eth_dev_lock.
+ *
+ * The port identifier of the Ethernet device.
+ * NULL decrease the ref_count of default callback function.
+ * !NULL decrease the ref_count of specific callback with matched
+ * user_args.
+ * parameter to match, only valid when callback != NULL.
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_unlock(uint16_t port_id, rte_eth_dev_lock_callback_t
callback,
Post by Qi Zhang
+ void *user_args);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_lock.c
b/lib/librte_ethdev/rte_ethdev_lock.c
rte_ethdev_lock.* seem to be internal-only files. Perhaps you should name
them without the rte_ prefix to indicate that they're not exported?
Post by Qi Zhang
new file mode 100644
index 000000000..688d1d70a
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_lock.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation */ #include
+"rte_ethdev_lock.h"
+
+struct lock_entry {
+ TAILQ_ENTRY(lock_entry) next;
+ rte_eth_dev_lock_callback_t callback;
+ uint16_t port_id;
<snip>
Post by Qi Zhang
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (!le) {
+ le = calloc(1, sizeof(struct lock_entry));
+ if (!le) {
Nitpick: generally, DPDK style guide prefers "if (value)" or "if (!value)" to only
be reserved for boolean values, and use explicit comparison (e.g. "if (value ==
NULL)" or "if (value == 0)") for all other cases.
Stephen Hemminger
2018-06-15 16:09:03 UTC
Permalink
On Thu, 7 Jun 2018 20:38:32 +0800
Post by Qi Zhang
+/**
+ * Lock an Ethernet Device directly or register a callback function
+ * for condition check at runtime, this help application to prevent
+ * a device be detached unexpectly.
+ * NOTE: Lock a device mutliple times with same parmeter will increase
+ * a ref_count, and coresponding unlock decrease the ref_count, the
+ * device will be unlocked when ref_count reach 0.
+ *
+ * The port identifier of the Ethernet device.
+ * !NULL the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * NULL lock the device directly, basically this just regiter a empty
+ * callback function(dev_is_busy) that return -EBUSY, so we can
+ * handle the pre-detach check in unified way.
+ * parameter will be parsed to callback function, only valid when
+ * callback != NULL.
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
+ void *user_args);
I prefer API's that do one thing with one function.
Why not
rte_eth_dev_lock(uint16_t port_id);
rte_eth_dev_ondetach(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
void *user_args);
Zhang, Qi Z
2018-06-19 14:16:30 UTC
Permalink
-----Original Message-----
Sent: Saturday, June 16, 2018 12:09 AM
Subject: Re: [dpdk-dev] [PATCH 05/22] ethdev: introduce device lock
On Thu, 7 Jun 2018 20:38:32 +0800
Post by Qi Zhang
+/**
+ * Lock an Ethernet Device directly or register a callback function
+ * for condition check at runtime, this help application to prevent
+ * a device be detached unexpectly.
+ * NOTE: Lock a device mutliple times with same parmeter will increase
+ * a ref_count, and coresponding unlock decrease the ref_count, the
+ * device will be unlocked when ref_count reach 0.
+ *
+ * The port identifier of the Ethernet device.
+ * !NULL the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * NULL lock the device directly, basically this just regiter a empty
+ * callback function(dev_is_busy) that return -EBUSY, so we can
+ * handle the pre-detach check in unified way.
+ * parameter will be parsed to callback function, only valid when
+ * callback != NULL.
+ * 0 on success, negative on error.
+ */
+int rte_eth_dev_lock(uint16_t port_id, rte_eth_dev_lock_callback_t
callback,
Post by Qi Zhang
+ void *user_args);
I prefer API's that do one thing with one function.
Agree
Why not
rte_eth_dev_lock(uint16_t port_id);
rte_eth_dev_ondetach(uint16_t port_id, rte_eth_dev_lock_callback_t callback,
void *user_args);
Rte_eth_dev_ondetach looks like a callback function,
but this is the function to register some condition check.
How about rte_eth_dev_lock and rte_eth_dev_lock_with_cond?

Thanks
Qi
Qi Zhang
2018-06-07 12:38:30 UTC
Permalink
Add driver API rte_eth_release_port_local to support the requirement
that an ethdev only be released on secondary process, so only local
state be set to unused , share data will not be reset so primary
process can still use it.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/rte_ethdev.c | 24 +++++++++++++++++++++---
lib/librte_ethdev/rte_ethdev_driver.h | 13 +++++++++++++
2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index cd4bfd3c6..ec14adb91 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -359,6 +359,23 @@ rte_eth_dev_attach_secondary(const char *name)
}

int
+rte_eth_dev_release_port_local(struct rte_eth_dev *eth_dev)
+{
+ if (eth_dev == NULL)
+ return -EINVAL;
+
+ _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+
+ rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+ eth_dev->state = RTE_ETH_DEV_UNUSED;
+
+ rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+
+ return 0;
+}
+
+int
rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
{
if (eth_dev == NULL)
@@ -370,9 +387,10 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)

rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);

- eth_dev->state = RTE_ETH_DEV_UNUSED;
-
- memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+ if (eth_dev->state != RTE_ETH_DEV_UNUSED) {
+ eth_dev->state = RTE_ETH_DEV_UNUSED;
+ memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+ }

rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);

diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c9c825e3f..261335426 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -70,6 +70,19 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);

/**
* @internal
+ * Release the specified ethdev port in local process, only set to ethdev
+ * state to unused, but not reset share data since it assume other process
+ * is still using it, typically it is called by secondary process.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * @return
+ * - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_local(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
* Release device queues and clear its configuration to force the user
* application to reconfigure it. It is for internal use only.
*
--
2.13.6
Qi Zhang
2018-06-07 12:38:37 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/e1000/igb_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index edc7be319..bd2b2d218 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1089,6 +1089,15 @@ static int eth_igb_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_igb_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_igb_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-07 12:38:45 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/pcap/rte_eth_pcap.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 6bd4a7d79..4c366a92b 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -925,6 +925,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1016,6 +1017,7 @@ static int
pmd_pcap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
+ const char *name;

PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
rte_socket_id());
@@ -1023,11 +1025,22 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
if (!dev)
return -1;

+ name = rte_vdev_device_name(dev);
/* reserve an ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
rte_free(eth_dev->data->dev_private);

rte_eth_dev_release_port(eth_dev);
--
2.13.6
Qi Zhang
2018-06-07 12:38:33 UTC
Permalink
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the implementation
references malloc_mp.c.

device attach on secondary:
a) seconary send asycn request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.

device detach on secondary:
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/rte_ethdev_mp.c | 477 +++++++++++++++++++++++++++++++++++++-
lib/librte_ethdev/rte_ethdev_mp.h | 1 +
2 files changed, 468 insertions(+), 10 deletions(-)

diff --git a/lib/librte_ethdev/rte_ethdev_mp.c b/lib/librte_ethdev/rte_ethdev_mp.c
index e23c8b010..6dbd23fa5 100644
--- a/lib/librte_ethdev/rte_ethdev_mp.c
+++ b/lib/librte_ethdev/rte_ethdev_mp.c
@@ -2,10 +2,69 @@
* Copyright(c) 2010-2018 Intel Corporation
*/

+#include <sys/time.h>
+
#include "rte_ethdev_driver.h"
#include "rte_ethdev_mp.h"
#include "rte_ethdev_lock.h"

+enum req_state {
+ REQ_STATE_INACTIVE = 0,
+ REQ_STATE_ACTIVE,
+ REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+ TAILQ_ENTRY(mp_request) next;
+ struct eth_dev_mp_req user_req; /**< contents of request */
+ pthread_cond_t cond; /**< variable we use to time out on this request */
+ enum req_state state; /**< indicate status of this request */
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
+static uint64_t
+get_unique_id(void)
+{
+ uint64_t id;
+
+ do {
+ id = rte_rand();
+ } while (find_request_by_id(id) != NULL);
+ return id;
+}
+
+static int
+send_request_to_secondary_async(const struct eth_dev_mp_req *req);
+
static int detach_on_secondary(uint16_t port_id)
{
struct rte_device *dev;
@@ -72,18 +131,325 @@ static int attach_on_secondary(const char *devargs, uint16_t port_id)
return 0;
}

-static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+static int
+check_reply(const struct eth_dev_mp_req *req,
+ const struct rte_mp_reply *reply)
+{
+ struct eth_dev_mp_req *resp;
+ int i;
+
+ if (reply->nb_received != reply->nb_sent)
+ return -EINVAL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ resp = (struct eth_dev_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != req->t) {
+ ethdev_log(ERR, "Unexpected response to async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->id != req->id) {
+ ethdev_log(ERR, "response to wrong async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
+
+static int
+send_response_to_secondary(const struct eth_dev_mp_req *req, int result)
+{
+ struct rte_mp_msg resp_msg = {0};
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)resp_msg.param;
+ int ret = 0;
+
+ resp_msg.len_param = sizeof(*resp);
+ strcpy(resp_msg.name, ETH_DEV_MP_ACTION_RESPONSE);
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_sendmsg(&resp_msg);
+ if (ret)
+ ethdev_log(ERR, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static int
+handle_async_attach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ struct mp_request *entry;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct eth_dev_mp_req tmp_req;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_ATTACH_ROLLBACK;
+
+ ret = send_request_to_secondary_async(&tmp_req);
+ if (ret) {
+ ethdev_log(ERR, "couldn't send async request\n");
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+ } else {
+ send_response_to_secondary(req, 0);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+handle_async_detach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ struct mp_request *entry;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (ret) {
+ send_response_to_secondary(req, ret);
+ } else {
+ do_eth_dev_detach(req->port_id);
+ send_response_to_secondary(req, 0);
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+handle_async_pre_detach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ struct mp_request *entry;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct eth_dev_mp_req tmp_req;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_DETACH;
+
+ ret = send_request_to_secondary_async(&tmp_req);
+ if (ret) {
+ ethdev_log(ERR, "couldn't send async request\n");
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+ } else {
+ send_response_to_secondary(req, ret);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+}
+
+static int
+handle_async_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply __rte_unused)
+{
+ struct mp_request *entry;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ /* we have nothing to do if rollback still fail, just detach */
+ do_eth_dev_detach(req->port_id);
+ /* send response to secondary with the reason of rollback */
+ send_response_to_secondary(req, req->result);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+send_request_to_secondary_async(const struct eth_dev_mp_req *req)
{
- (void)msg;
- (void)(peer);
- return -ENOTSUP;
+ struct rte_mp_msg mp_req = {0};
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ rte_mp_async_reply_t clb;
+ struct mp_request *entry;
+ int ret = 0;
+
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strcpy(mp_req.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ if (req->t == REQ_TYPE_ATTACH)
+ clb = handle_async_attach_response;
+ else if (req->t == REQ_TYPE_PRE_DETACH)
+ clb = handle_async_pre_detach_response;
+ else if (req->t == REQ_TYPE_DETACH)
+ clb = handle_async_detach_response;
+ else if (req->t == REQ_TYPE_ATTACH_ROLLBACK)
+ clb = handle_async_rollback_response;
+ else
+ return -1;
+ do {
+ ret = rte_mp_request_async(&mp_req, &ts, clb);
+ } while (ret != 0 && rte_errno == EEXIST);
+
+ if (ret)
+ ethdev_log(ERR, "couldn't send async request\n");
+ entry = find_request_by_id(req->id);
+ (void)entry;
+ return ret;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct eth_dev_mp_req tmp_req;
+ struct mp_request *entry;
+ uint16_t port_id;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (entry) {
+ ethdev_log(ERR, "duplicate request id\n");
+ ret = -EEXIST;
+ goto finish;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ ethdev_log(ERR, "not enough memory to allocate request entry\n");
+ ret = -ENOMEM;
+ goto finish;
+ }
+
+ if (req->t == REQ_TYPE_ATTACH) {
+ ret = do_eth_dev_attach(req->devargs, &port_id);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.port_id = port_id;
+ ret = send_request_to_secondary_async(&tmp_req);
+ }
+ } else if (req->t == REQ_TYPE_DETACH) {
+ if (!rte_eth_dev_is_valid_port(req->port_id))
+ ret = -EINVAL;
+ if (!ret)
+ ret = process_lock_callbacks(req->port_id);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_PRE_DETACH;
+ ret = send_request_to_secondary_async(&tmp_req);
+ }
+ } else {
+ ethdev_log(ERR, "unsupported secondary to primary request\n");
+ ret = -ENOTSUP;
+ goto finish;
+ }
+
+ if (ret) {
+ ret = send_response_to_secondary(req, ret);
+ if (ret) {
+ ethdev_log(ERR, "failed to send response to secondary\n");
+ goto finish;
+ }
+ } else {
+ memcpy(&entry->user_req, req, sizeof(*req));
+ entry->state = REQ_STATE_ACTIVE;
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+ entry = NULL;
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ if (entry)
+ free(entry);
+ return ret;
}

-static int handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+static int handle_primary_response(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
{
- (void)msg;
- (void)(peer);
- return -ENOTSUP;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct mp_request *entry;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (entry) {
+ entry->user_req.result = req->result;
+ entry->user_req.port_id = req->port_id;
+ entry->state = REQ_STATE_COMPLETE;
+
+ pthread_cond_signal(&entry->cond);
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+
+ return 0;
}

static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
@@ -124,10 +490,101 @@ static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer
return 0;
}

+/**
+ * secondary to primary request.
+ *
+ * device attach:
+ * a) seconary send request to primary.
+ * b) primary attach the new device if failed goto i).
+ * c) primary forward attach request to all secondary.
+ * d) secondary receive request and attach device and send reply.
+ * e) primary check the reply if all success go to j).
+ * f) primary send attach rollback request to all secondary.
+ * g) secondary receive the request and detach device and send reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send fail response to secondary, goto k).
+ * j) send success response to secondary.
+ * k) end.
+
+ * device detach:
+ * a) secondary send request to primary.
+ * b) primary perform pre-detach check, if device is locked, got j).
+ * c) primary send pre-detach check request to all secondary.
+ * d) secondary perform pre-detach check and send reply.
+ * e) primary check the reply if any fail goto j).
+ * f) primary send detach request to all secondary
+ * g) secondary detach the device and send reply
+ * h) primary detach the device.
+ * i) send success response to secondary, goto k).
+ * j) send fail response to secondary.
+ * k) end.
+ */
int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req)
{
- (void)req;
- return -ENOTSUP;
+ struct rte_mp_msg msg = {0};
+ struct eth_dev_mp_req *msg_req = (struct eth_dev_mp_req *)msg.param;
+ struct mp_request *entry;
+ struct timespec ts = {0};
+ struct timeval now;
+ int ret = 0;
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ ethdev_log(ERR, "not enough memory to allocate request entry\n");
+ return -ENOMEM;
+ }
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ ret = gettimeofday(&now, NULL);
+ if (ret) {
+ ethdev_log(ERR, "cannot get current time\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+ ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+ (now.tv_usec * 1000) / 1000000000;
+
+ pthread_cond_init(&entry->cond, NULL);
+
+ msg.len_param = sizeof(*req);
+ strcpy(msg.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ req->id = get_unique_id();
+
+ memcpy(msg_req, req, sizeof(*req));
+
+ ret = rte_mp_sendmsg(&msg);
+ if (ret) {
+ ethdev_log(ERR, "cannot send message to primary");
+ goto finish;
+ }
+
+ memcpy(&entry->user_req, req, sizeof(*req));
+
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+ do {
+ ret = pthread_cond_timedwait(&entry->cond,
+ &mp_request_list.lock, &ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ if (entry->state != REQ_STATE_COMPLETE) {
+ RTE_LOG(ERR, EAL, "request time out\n");
+ ret = -ETIMEDOUT;
+ } else {
+ req->result = entry->user_req.result;
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return ret;
}

/**
diff --git a/lib/librte_ethdev/rte_ethdev_mp.h b/lib/librte_ethdev/rte_ethdev_mp.h
index c3e55dfec..6d10dfdad 100644
--- a/lib/librte_ethdev/rte_ethdev_mp.h
+++ b/lib/librte_ethdev/rte_ethdev_mp.h
@@ -18,6 +18,7 @@ enum eth_dev_req_type {
};

struct eth_dev_mp_req {
+ uint64_t id;
enum eth_dev_req_type t;
char devargs[MAX_DEV_ARGS_LEN];
uint16_t port_id;
--
2.13.6
Burakov, Anatoly
2018-06-18 08:51:09 UTC
Permalink
Post by Qi Zhang
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the implementation
references malloc_mp.c.
a) seconary send asycn request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.
---
<snip>
Post by Qi Zhang
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
Patch number 4 should've used this #define to set up its timeout.
Post by Qi Zhang
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
<snip>
Post by Qi Zhang
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
+
+static int
+send_response_to_secondary(const struct eth_dev_mp_req *req, int result)
+{
+ struct rte_mp_msg resp_msg = {0};
I've been bitten by this in the past - some compilers (*cough* clang
*cough*) don't like this kind of zero-initialization depending on which
type of parameter comes first in the structure, so i would refrain from
using it and used memset(0) instead.
Post by Qi Zhang
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)resp_msg.param;
+ int ret = 0;
+
+ resp_msg.len_param = sizeof(*resp);
+ strcpy(resp_msg.name, ETH_DEV_MP_ACTION_RESPONSE);
here and in other places - strlcpy()?
Post by Qi Zhang
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_sendmsg(&resp_msg);
+ if (ret)
+ ethdev_log(ERR, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static int
+handle_async_attach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
<snip>
Post by Qi Zhang
+ else
+ return -1;
+ do {
+ ret = rte_mp_request_async(&mp_req, &ts, clb);
+ } while (ret != 0 && rte_errno == EEXIST);
+
+ if (ret)
+ ethdev_log(ERR, "couldn't send async request\n");
+ entry = find_request_by_id(req->id > + (void)entry;
Why did you look up entry and then marked it as used without checking
the return value? Leftover? Some code missing?
Post by Qi Zhang
+ return ret;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct eth_dev_mp_req tmp_req;
<snip>
Post by Qi Zhang
@@ -124,10 +490,101 @@ static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer
return 0;
}
+/**
+ * secondary to primary request.
+ *
+ * a) seconary send request to primary.
+ * b) primary attach the new device if failed goto i).
+ * c) primary forward attach request to all secondary.
+ * d) secondary receive request and attach device and send reply.
+ * e) primary check the reply if all success go to j).
+ * f) primary send attach rollback request to all secondary.
+ * g) secondary receive the request and detach device and send reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send fail response to secondary, goto k).
+ * j) send success response to secondary.
+ * k) end.
+
+ * a) secondary send request to primary.
+ * b) primary perform pre-detach check, if device is locked, got j).
+ * c) primary send pre-detach check request to all secondary.
+ * d) secondary perform pre-detach check and send reply.
+ * e) primary check the reply if any fail goto j).
+ * f) primary send detach request to all secondary
+ * g) secondary detach the device and send reply
+ * h) primary detach the device.
+ * i) send success response to secondary, goto k).
+ * j) send fail response to secondary.
+ * k) end.
+ */
I think this comment should be at the top of this file.
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-19 03:33:32 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Monday, June 18, 2018 4:51 PM
Subject: Re: [PATCH 06/22] ethdev: support attach or detach share device
from secondary
Post by Qi Zhang
+ else
+ return -1;
+ do {
+ ret = rte_mp_request_async(&mp_req, &ts, clb);
+ } while (ret != 0 && rte_errno == EEXIST);
+
+ if (ret)
+ ethdev_log(ERR, "couldn't send async request\n");
+ entry = find_request_by_id(req->id > + (void)entry;
Why did you look up entry and then marked it as used without checking the
return value? Leftover? Some code missing?
Some debug code forgot be removed :)

BTW, also accept all other commen
Qi Zhang
2018-06-07 12:38:35 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/ixgbe/ixgbe_ethdev.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 87d2ad090..260640e50 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1792,6 +1792,9 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);
+
if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, ixgbe_vf_representor_uninit);
else
@@ -1809,6 +1812,15 @@ static struct rte_pci_driver rte_ixgbe_pmd = {
static int eth_ixgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);
+
return rte_eth_dev_pci_generic_probe(pci_dev,
sizeof(struct ixgbe_adapter), eth_ixgbevf_dev_init);
}
--
2.13.6
Qi Zhang
2018-06-07 12:38:40 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/bonding/rte_eth_bond_pmd.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 02d94b1b1..1221f62b2 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3065,6 +3065,7 @@ bond_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &default_dev_ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -3171,6 +3172,16 @@ bond_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
RTE_ASSERT(eth_dev->device == &dev->device);

internals = eth_dev->data->dev_private;
--
2.13.6
Chas Williams
2018-06-07 14:21:45 UTC
Permalink
Post by Qi Zhang
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.
This commit message needs some work. Otherwise, I think this particular
patch is fine.
Post by Qi Zhang
---
drivers/net/bonding/rte_eth_bond_pmd.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 02d94b1b1..1221f62b2 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3065,6 +3065,7 @@ bond_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &default_dev_ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -3171,6 +3172,16 @@ bond_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
process?
Post by Qi Zhang
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
RTE_ASSERT(eth_dev->device == &dev->device);
internals = eth_dev->data->dev_private;
--
2.13.6
Qi Zhang
2018-06-07 12:38:29 UTC
Permalink
Implemented the bus ops scan_one, besides this improve the scan
efficiency in hotplug case, it aslo avoid sync IPC invoke (which
happens in vdev->scan on secondary process). The benifit is it
removes the potiential deadlock in the case when secondary process
receive a request from primary process to attach a new device, since
vdev->scan will be invoked on mp thread itself at this case.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/bus/vdev/vdev.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)

diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c
index 6139dd551..cdbd77df0 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -467,6 +467,35 @@ vdev_scan(void)
return 0;
}

+static struct rte_device *vdev_scan_one(struct rte_devargs *devargs)
+{
+ struct rte_vdev_device *dev = NULL;
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev) {
+ VDEV_LOG(ERR, "failed to allocate memory for new device");
+ return NULL;
+ }
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+
+ if (find_vdev(devargs->name)) {
+ VDEV_LOG(ERR, "device %s already exist", devargs->name);
+ free(dev);
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ return NULL;
+ }
+
+ dev->device.devargs = devargs;
+ dev->device.numa_node = SOCKET_ID_ANY;
+ dev->device.name = devargs->name;
+ TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
+
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+
+ return &dev->device;
+}
+
static int
vdev_probe(void)
{
@@ -531,6 +560,7 @@ vdev_unplug(struct rte_device *dev)

static struct rte_bus rte_vdev_bus = {
.scan = vdev_scan,
+ .scan_one = vdev_scan_one,
.probe = vdev_probe,
.find_device = vdev_find_device,
.plug = vdev_plug,
--
2.13.6
Shreyansh Jain
2018-06-08 12:08:20 UTC
Permalink
Post by Qi Zhang
Implemented the bus ops scan_one, besides this improve the scan
efficiency in hotplug case, it aslo avoid sync IPC invoke (which
^^^^
also
Post by Qi Zhang
happens in vdev->scan on secondary process). The benifit is it
^^^^^^^
benefit
Post by Qi Zhang
removes the potiential deadlock in the case when secondary process
^^^^^^^^^^
potential
Post by Qi Zhang
receive a request from primary process to attach a new device, since
vdev->scan will be invoked on mp thread itself at this case.
^^^^^^^
in that


Besides the above spells, is it possible to re-write the commit?
You mention it "...improves the scan efficiency..." - how? Is that an
implicit output of introducing the new scan_one for vdev?
Post by Qi Zhang
---
drivers/bus/vdev/vdev.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c
index 6139dd551..cdbd77df0 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -467,6 +467,35 @@ vdev_scan(void)
return 0;
}
[...]
Zhang, Qi Z
2018-06-13 13:32:48 UTC
Permalink
-----Original Message-----
Sent: Friday, June 8, 2018 8:08 PM
Subject: Re: [dpdk-dev] [PATCH 02/22] bus/vdev: enable one device scan
Post by Qi Zhang
Implemented the bus ops scan_one, besides this improve the scan
efficiency in hotplug case, it aslo avoid sync IPC invoke (which
^^^^
also
Post by Qi Zhang
happens in vdev->scan on secondary process). The benifit is it
^^^^^^^
benefit
Post by Qi Zhang
removes the potiential deadlock in the case when secondary process
^^^^^^^^^^
potential
Post by Qi Zhang
receive a request from primary process to attach a new device, since
vdev->scan will be invoked on mp thread itself at this case.
^^^^^^^
in that
Besides the above spells, is it possible to re-write the commit?
You mention it "...improves the scan efficiency..." - how? Is that an implicit
output of introducing the new scan_one for vdev?
"Improve scan efficiency" should be general to all buses in hot plug case.
since compare to bus->scan, bus->scan_one no need to iterate all devargs.
But yes, it's not the original purpose for this patch set, but a bonus.

I will re-write comment with below format to make it more clear.
The patch implemented bus ops scan_one for vdev, it gives two benefits
1. improve scan efficiency ....
2. avoid sync IPC invoke .....

Regards
Qi
Post by Qi Zhang
---
drivers/bus/vdev/vdev.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c index
6139dd551..cdbd77df0 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -467,6 +467,35 @@ vdev_scan(void)
Qi Zhang
2018-06-07 12:38:44 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/octeontx/octeontx_ethdev.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c
index 1eb453b21..b42b69896 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -1016,6 +1016,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,

eth_dev->tx_pkt_burst = octeontx_xmit_pkts;
eth_dev->rx_pkt_burst = octeontx_recv_pkts;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1138,6 +1139,18 @@ octeontx_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0) {
+ rte_eth_dev_release_port_local(eth_dev);
+ continue;
+ }
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
nic = octeontx_pmd_priv(eth_dev);
rte_event_dev_stop(nic->evdev);
PMD_INIT_LOG(INFO, "Closing octeontx device %s", octtx_name);
@@ -1148,6 +1161,9 @@ octeontx_remove(struct rte_vdev_device *dev)
rte_event_dev_close(nic->evdev);
}

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
/* Free FC resource */
octeontx_pko_fc_free();
--
2.13.6
Qi Zhang
2018-06-07 12:38:49 UTC
Permalink
The sample code demonstrate device (ethdev only) management
at multi-process envrionment. User can attach/detach a device
on primary process and see it is synced on secondary process
automatically, also user can lock a device to prevent it be
detached or unlock it to go back to default behaviour.

How to start?
./devmgm_mp --proc-type=auto
help
list
/* attach a af_packet vdev */
attach net_af_packet,iface=eth0
/* detach port 0 */
detach 0
/* attach a private af_packet vdev (secondary process only)*/
attachp net_af_packet,iface=eth0
/* detach a private device (secondary process only) */
detachp 0
/* lock port 0 */
lock 0
/* unlock port 0 */
unlock 0
Signed-off-by: Qi Zhang <***@intel.com>
---
examples/devmgm_mp/Makefile | 64 +++++++
examples/devmgm_mp/commands.c | 383 +++++++++++++++++++++++++++++++++++++++++
examples/devmgm_mp/commands.h | 10 ++
examples/devmgm_mp/main.c | 41 +++++
examples/devmgm_mp/meson.build | 11 ++
5 files changed, 509 insertions(+)
create mode 100644 examples/devmgm_mp/Makefile
create mode 100644 examples/devmgm_mp/commands.c
create mode 100644 examples/devmgm_mp/commands.h
create mode 100644 examples/devmgm_mp/main.c
create mode 100644 examples/devmgm_mp/meson.build

diff --git a/examples/devmgm_mp/Makefile b/examples/devmgm_mp/Makefile
new file mode 100644
index 000000000..e6c0cb0c5
--- /dev/null
+++ b/examples/devmgm_mp/Makefile
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+# binary name
+APP = devmgm_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c
+
+# Build using pkg-config variables if possible
+$(shell pkg-config --exists libdpdk)
+ifeq ($(.SHELLSTATUS),0)
+
+all: shared
+.PHONY: shared static
+shared: build/$(APP)-shared
+ ln -sf $(APP)-shared build/$(APP)
+static: build/$(APP)-static
+ ln -sf $(APP)-static build/$(APP)
+
+PC_FILE := $(shell pkg-config --path libdpdk)
+CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
+LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
+LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
+
+build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
+
+build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
+
+build:
+ @mkdir -p $@
+
+.PHONY: clean
+clean:
+ rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
+ rmdir --ignore-fail-on-non-empty build
+
+else
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = devmgm_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c
+
+CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS_parse_obj_list.o := -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/devmgm_mp/commands.c b/examples/devmgm_mp/commands.c
new file mode 100644
index 000000000..145cb766e
--- /dev/null
+++ b/examples/devmgm_mp/commands.c
@@ -0,0 +1,383 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright (c) 2009, Olivier MATZ <***@droids-corp.org>
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <termios.h>
+#ifndef __linux__
+ #ifdef __FreeBSD__
+ #include <sys/socket.h>
+ #else
+ #include <net/socket.h>
+ #endif
+#endif
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+#include <rte_ethdev.h>
+
+/**********************************************************/
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(cl,
+ "commands:\n"
+ "- attach <devargs>\n"
+ "- detach <port_id>\n"
+ "- attachp <devargs>\n"
+ "- detachp <port_id>\n"
+ "- lock <port_id>\n"
+ "- unlock <port_id>\n"
+ "- list\n\n");
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "show help",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_help_help,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "quit",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_list_result {
+ cmdline_fixed_string_t list;
+};
+
+static void cmd_list_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ uint16_t port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ cmdline_printf(cl, "list all etherdev\n");
+
+ RTE_ETH_FOREACH_DEV(port_id) {
+ rte_eth_dev_get_name_by_port(port_id, dev_name);
+ /* Secondary process's ethdev->state may not be
+ * updated after detach on primary process, but
+ * ethdev->data should already be reset, so
+ * use strlen(dev_name) == 0 to know the port is
+ * not used.
+ *
+ * TODO: Secondary process should be informed when a
+ * port is released on primary through mp channel.
+ */
+ if (strlen(dev_name) > 0)
+ cmdline_printf(cl, "%d\t%s\n", port_id, dev_name);
+ else
+ printf("empty dev_name is not expected!\n");
+ }
+}
+
+cmdline_parse_token_string_t cmd_list_list =
+ TOKEN_STRING_INITIALIZER(struct cmd_list_result, list, "list");
+
+cmdline_parse_inst_t cmd_list = {
+ .f = cmd_list_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "list all devices",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_list_list,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_attach_result {
+ cmdline_fixed_string_t attach;
+ cmdline_fixed_string_t device;
+};
+
+static void cmd_dev_attach_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_attach_result *res = parsed_result;
+ uint16_t port_id;
+
+ if (!rte_eth_dev_attach(res->device, &port_id))
+ cmdline_printf(cl, "attached device %s at port %d\n",
+ res->device, port_id);
+ else
+ cmdline_printf(cl, "failed to attached device %s\n",
+ res->device);
+}
+
+cmdline_parse_token_string_t cmd_dev_attach_attach =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attach_result, attach,
+ "attach");
+cmdline_parse_token_string_t cmd_dev_attach_device =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attach_result, device, NULL);
+
+cmdline_parse_inst_t cmd_attach_device = {
+ .f = cmd_dev_attach_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "attach a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_attach_attach,
+ (void *)&cmd_dev_attach_device,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_attachp_result {
+ cmdline_fixed_string_t attachp;
+ cmdline_fixed_string_t device;
+};
+
+static void cmd_dev_attachp_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_attachp_result *res = parsed_result;
+ uint16_t port_id;
+
+ if (!rte_eth_dev_attach_private(res->device, &port_id))
+ cmdline_printf(cl, "attached prviate device %s at port %d\n",
+ res->device, port_id);
+ else
+ cmdline_printf(cl, "failed to attached private device %s\n",
+ res->device);
+}
+
+cmdline_parse_token_string_t cmd_dev_attachp_attachp =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attachp_result, attachp,
+ "attachp");
+cmdline_parse_token_string_t cmd_dev_attachp_device =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attachp_result, device, NULL);
+
+cmdline_parse_inst_t cmd_attachp_device = {
+ .f = cmd_dev_attachp_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "attach a private device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_attachp_attachp,
+ (void *)&cmd_dev_attachp_device,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_detach_result {
+ cmdline_fixed_string_t detach;
+ uint16_t port_id;
+};
+
+static void cmd_dev_detach_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_detach_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ printf("detaching...\n");
+ if (!rte_eth_dev_detach(port_id, dev_name))
+ cmdline_printf(cl, "detached device at port %d\n",
+ port_id);
+ else
+ cmdline_printf(cl, "failed to dettached at port %d\n",
+ port_id);
+}
+
+cmdline_parse_token_string_t cmd_dev_detach_detach =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_detach_result, detach,
+ "detach");
+cmdline_parse_token_num_t cmd_dev_detach_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_detach_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_detach_device = {
+ .f = cmd_dev_detach_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "detach a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_detach_detach,
+ (void *)&cmd_dev_detach_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_detachp_result {
+ cmdline_fixed_string_t detachp;
+ uint16_t port_id;
+};
+
+static void cmd_dev_detachp_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_detachp_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ printf("detaching...\n");
+ if (!rte_eth_dev_detach_private(port_id, dev_name))
+ cmdline_printf(cl, "detached private device at port %d\n",
+ port_id);
+ else
+ cmdline_printf(cl, "failed to detach private device at port %d\n",
+ port_id);
+}
+
+cmdline_parse_token_string_t cmd_dev_detachp_detachp =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_detachp_result, detachp,
+ "detachp");
+cmdline_parse_token_num_t cmd_dev_detachp_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_detachp_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_detachp_device = {
+ .f = cmd_dev_detachp_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "detach a private device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_detachp_detachp,
+ (void *)&cmd_dev_detachp_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_lock_result {
+ cmdline_fixed_string_t lock;
+ uint16_t port_id;
+};
+
+static void cmd_dev_lock_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_lock_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ int ret = 0;
+
+ ret = rte_eth_dev_lock(res->port_id, NULL, NULL);
+ cmdline_printf(cl, "lock port %d, ret = %d\n", port_id, ret);
+}
+
+cmdline_parse_token_string_t cmd_dev_lock_lock =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_lock_result, lock, "lock");
+cmdline_parse_token_num_t cmd_dev_lock_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_lock_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_lock_device = {
+ .f = cmd_dev_lock_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "lock a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_lock_lock,
+ (void *)&cmd_dev_lock_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_unlock_result {
+ cmdline_fixed_string_t unlock;
+ uint16_t port_id;
+};
+
+static void cmd_dev_unlock_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_unlock_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ int ret = 0;
+
+ ret = rte_eth_dev_unlock(res->port_id, NULL, NULL);
+ cmdline_printf(cl, "unlock port %d, ret = %d\n", port_id, ret);
+}
+
+cmdline_parse_token_string_t cmd_dev_unlock_unlock =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_unlock_result, unlock,
+ "unlock");
+cmdline_parse_token_num_t cmd_dev_unlock_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_unlock_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_unlock_device = {
+ .f = cmd_dev_unlock_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "unlock a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_unlock_unlock,
+ (void *)&cmd_dev_unlock_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+/**********************************************************/
+/****** CONTEXT (list of instruction) */
+
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_help,
+ (cmdline_parse_inst_t *)&cmd_quit,
+ (cmdline_parse_inst_t *)&cmd_list,
+ (cmdline_parse_inst_t *)&cmd_attach_device,
+ (cmdline_parse_inst_t *)&cmd_detach_device,
+ (cmdline_parse_inst_t *)&cmd_attachp_device,
+ (cmdline_parse_inst_t *)&cmd_detachp_device,
+ (cmdline_parse_inst_t *)&cmd_lock_device,
+ (cmdline_parse_inst_t *)&cmd_unlock_device,
+ NULL,
+};
diff --git a/examples/devmgm_mp/commands.h b/examples/devmgm_mp/commands.h
new file mode 100644
index 000000000..791204547
--- /dev/null
+++ b/examples/devmgm_mp/commands.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _COMMANDS_H_
+#define _COMMANDS_H_
+
+extern cmdline_parse_ctx_t main_ctx[];
+
+#endif /* _COMMANDS_H_ */
diff --git a/examples/devmgm_mp/main.c b/examples/devmgm_mp/main.c
new file mode 100644
index 000000000..f2f2e5a2f
--- /dev/null
+++ b/examples/devmgm_mp/main.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright (c) 2009, Olivier MATZ <***@droids-corp.org>
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <termios.h>
+#include <sys/queue.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "commands.h"
+
+int main(int argc, char **argv)
+{
+ int ret;
+ struct cmdline *cl;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ cl = cmdline_stdin_new(main_ctx, "example> ");
+ if (cl == NULL)
+ rte_panic("Cannot create cmdline instance\n");
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ return 0;
+}
diff --git a/examples/devmgm_mp/meson.build b/examples/devmgm_mp/meson.build
new file mode 100644
index 000000000..f916eb9af
--- /dev/null
+++ b/examples/devmgm_mp/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+ 'commands.c', 'main.c'
+)
--
2.13.6
Burakov, Anatoly
2018-06-18 10:36:15 UTC
Permalink
Post by Qi Zhang
The sample code demonstrate device (ethdev only) management
at multi-process envrionment. User can attach/detach a device
on primary process and see it is synced on secondary process
automatically, also user can lock a device to prevent it be
detached or unlock it to go back to default behaviour.
How to start?
./devmgm_mp --proc-type=auto
help
list
/* attach a af_packet vdev */
attach net_af_packet,iface=eth0
/* detach port 0 */
detach 0
/* attach a private af_packet vdev (secondary process only)*/
attachp net_af_packet,iface=eth0
/* detach a private device (secondary process only) */
detachp 0
/* lock port 0 */
lock 0
/* unlock port 0 */
unlock 0
---
I think the "devmgm_mp" is not a descriptive enough name. What this
example demonstrates, is device hotplug. So how about naming the example
app "hotplug"? (or "mp_hotplug" to indicate that it specifically sets
out to demonstrate multiprocess hotplug)
Post by Qi Zhang
examples/devmgm_mp/Makefile | 64 +++++++
examples/devmgm_mp/commands.c | 383 +++++++++++++++++++++++++++++++++++++++++
examples/devmgm_mp/commands.h | 10 ++
examples/devmgm_mp/main.c | 41 +++++
examples/devmgm_mp/meson.build | 11 ++
5 files changed, 509 insertions(+)
create mode 100644 examples/devmgm_mp/Makefile
create mode 100644 examples/devmgm_mp/commands.c
create mode 100644 examples/devmgm_mp/commands.h
create mode 100644 examples/devmgm_mp/main.c
create mode 100644 examples/devmgm_mp/meson.build
<snip>
Post by Qi Zhang
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <termios.h>
+#ifndef __linux__
+ #ifdef __FreeBSD__
+ #include <sys/socket.h>
+ #else
+ #include <net/socket.h>
+ #endif
+#endif
This seems like a weird define. Care to elaborate why are we checking
for __linux__ not being defined?

If you're trying to differentiate between Linux and FreeBSD, there's a
readly RTE_EXEC_ENV_* config options, e.g.

#ifdef RTE_EXEC_ENV_LINUXAPP
// linux defines
#endif
#ifdef RTE_EXEC_ENV_BSDAPP
// bsd defines
#endif

or something to that effect.
Post by Qi Zhang
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+#include <rte_ethdev.h>
Generally (and as per DPDK coding guidelines), we prefer defines ordered
as follows:

1) system defines enclosed in brackets
2) DPDK defines (rte_blah) enclosed in brackets
3) private/application-specific defines enclosed in quotes.

All three groups should be separated by newline.

So, these defines should've read as:

#include <stdblah.h>
#include <sys/blah.h>

#include <rte_blah.h>
#include <rte_foo.h>

#include "cmdline_blah.h"
#include "cmdline_foo.h"
Post by Qi Zhang
+
+/**********************************************************/
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
<snip>
Post by Qi Zhang
+{
+ uint16_t port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ cmdline_printf(cl, "list all etherdev\n");
+
+ RTE_ETH_FOREACH_DEV(port_id) {
+ rte_eth_dev_get_name_by_port(port_id, dev_name);
+ /* Secondary process's ethdev->state may not be
+ * updated after detach on primary process, but
+ * ethdev->data should already be reset, so
+ * use strlen(dev_name) == 0 to know the port is
+ * not used.
+ *
+ * TODO: Secondary process should be informed when a
+ * port is released on primary through mp channel.
+ */
That seems like a weird thing to leave out for TODO - it looks like an
API deficiency. Can this be automatically updated on multiprocess
hotplug sync, or somehow managed inside RTE_ETH_FOREACH_DEV?

As i understand, per-process ethdev list is not protected by any locks,
so doing this is racy. Since this is a multiprocess hotplug example app,
it should demonstrate best practices. So, either RTE_ETH_FOREACH_DEV
should be fixed to handle this case, or the application should
demonstrate how to properly synchronize access to local device list. The
latter is probably better as adding locking around ethdev device list is
outside the scope of this patchset.
Post by Qi Zhang
+ if (strlen(dev_name) > 0)
+ cmdline_printf(cl, "%d\t%s\n", port_id, dev_name);
+ else
+ printf("empty dev_name is not expected!\n");
+ }
<snip>
Post by Qi Zhang
+#include "commands.h"
+
+int main(int argc, char **argv)
+{
+ int ret;
+ struct cmdline *cl;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ cl = cmdline_stdin_new(main_ctx, "example> ");
+ if (cl == NULL)
+ rte_panic("Cannot create cmdline instance\n");
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ return 0;
Application should call rte_eal_cleanup() before exit. Otherwise, each
secondary started and stopped will leak memory.
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-22 06:49:56 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Monday, June 18, 2018 6:36 PM
Subject: Re: [PATCH 22/22] examples/devmgm_mp: add simple device
management sample
The sample code demonstrate device (ethdev only) management at
multi-process envrionment. User can attach/detach a device on primary
process and see it is synced on secondary process automatically, also
user can lock a device to prevent it be detached or unlock it to go
back to default behaviour.
How to start?
./devmgm_mp --proc-type=auto
help
list
/* attach a af_packet vdev */
attach net_af_packet,iface=eth0
/* detach port 0 */
detach 0
/* attach a private af_packet vdev (secondary process only)*/
attachp net_af_packet,iface=eth0
/* detach a private device (secondary process only) */
detachp 0
/* lock port 0 */
lock 0
/* unlock port 0 */
unlock 0
---
I think the "devmgm_mp" is not a descriptive enough name. What this
example demonstrates, is device hotplug. So how about naming the example
app "hotplug"? (or "mp_hotplug" to indicate that it specifically sets out to
demonstrate multiprocess hotplug)
Ok, I saw all the multi-process samples are in examples/multi_process, so I think this the right place to add
it could be "hotplug_mp" to follow other samples naming rule.
examples/devmgm_mp/Makefile | 64 +++++++
examples/devmgm_mp/commands.c | 383
+++++++++++++++++++++++++++++++++++++++++
examples/devmgm_mp/commands.h | 10 ++
examples/devmgm_mp/main.c | 41 +++++
examples/devmgm_mp/meson.build | 11 ++
5 files changed, 509 insertions(+)
create mode 100644 examples/devmgm_mp/Makefile
create mode 100644 examples/devmgm_mp/commands.c
create mode 100644 examples/devmgm_mp/commands.h
create mode 100644 examples/devmgm_mp/main.c
create mode 100644 examples/devmgm_mp/meson.build
<snip>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <termios.h>
+#ifndef __linux__
+ #ifdef __FreeBSD__
+ #include <sys/socket.h>
+ #else
+ #include <net/socket.h>
+ #endif
+#endif
This seems like a weird define. Care to elaborate why are we checking for
__linux__ not being defined?
OK, this is copy from exist sample code :), I will clean up the header file in v3.
If you're trying to differentiate between Linux and FreeBSD, there's a readly
RTE_EXEC_ENV_* config options, e.g.
#ifdef RTE_EXEC_ENV_LINUXAPP
// linux defines
#endif
#ifdef RTE_EXEC_ENV_BSDAPP
// bsd defines
#endif
or something to that effect.
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+#include <rte_ethdev.h>
Generally (and as per DPDK coding guidelines), we prefer defines ordered as
1) system defines enclosed in brackets
2) DPDK defines (rte_blah) enclosed in brackets
3) private/application-specific defines enclosed in quotes.
All three groups should be separated by newline.
#include <stdblah.h>
#include <sys/blah.h>
#include <rte_blah.h>
#include <rte_foo.h>
#include "cmdline_blah.h"
#include "cmdline_foo.h"
Got it, thanks
+
+/**********************************************************/
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void
+*parsed_result,
<snip>
+{
+ uint16_t port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ cmdline_printf(cl, "list all etherdev\n");
+
+ RTE_ETH_FOREACH_DEV(port_id) {
+ rte_eth_dev_get_name_by_port(port_id, dev_name);
+ /* Secondary process's ethdev->state may not be
+ * updated after detach on primary process, but
+ * ethdev->data should already be reset, so
+ * use strlen(dev_name) == 0 to know the port is
+ * not used.
+ *
+ * TODO: Secondary process should be informed when a
+ * port is released on primary through mp channel.
+ */
That seems like a weird thing to leave out for TODO - it looks like an API
deficiency. Can this be automatically updated on multiprocess hotplug sync, or
somehow managed inside RTE_ETH_FOREACH_DEV?
As i understand, per-process ethdev list is not protected by any locks, so doing
this is racy. Since this is a multiprocess hotplug example app, it should
demonstrate best practices. So, either RTE_ETH_FOREACH_DEV should be
fixed to handle this case, or the application should demonstrate how to
properly synchronize access to local device list. The latter is probably better as
adding locking around ethdev device list is outside the scope of this patchset.
All this comment should be removed since TODO already done :)
Actually, we guarantee device be detached from secondary before primary.
+ if (strlen(dev_name) > 0)
+ cmdline_printf(cl, "%d\t%s\n", port_id, dev_name);
+ else
+ printf("empty dev_name is not expected!\n");
+ }
<snip>
+#include "commands.h"
+
+int main(int argc, char **argv)
+{
+ int ret;
+ struct cmdline *cl;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ cl = cmdline_stdin_new(main_ctx, "example> ");
+ if (cl == NULL)
+ rte_panic("Cannot create cmdline instance\n");
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ return 0;
Application should call rte_eal_cleanup() before exit. Otherwise, each
secondary started and stopped will leak memory.
OK, will add it.
Qi Zhang
2018-06-07 12:38:41 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/failsafe/failsafe.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index eafbb75df..aa676069d 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -328,6 +328,7 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &failsafe_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -338,10 +339,25 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
static int
rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
{
+ struct rte_eth_dev *eth_dev;
const char *name;

name = rte_vdev_device_name(vdev);
INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+
+ eth_dev = rte_eth_dev_allocated(name);
+ if (!eth_dev)
+ return -ENODEV;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario.
+ */
+ }
+
return fs_rte_eth_free(name);
}
--
2.13.6
Qi Zhang
2018-06-07 12:38:39 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/af_packet/rte_eth_af_packet.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index ea47abbf8..e1afbfc14 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -935,6 +935,7 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -986,6 +987,16 @@ rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
internals = eth_dev->data->dev_private;
for (q = 0; q < internals->nb_queues; q++) {
rte_free(internals->rx_queue[q].rd);
--
2.13.6
Qi Zhang
2018-06-07 12:38:46 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/softnic/rte_eth_softnic.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/softnic/rte_eth_softnic.c b/drivers/net/softnic/rte_eth_softnic.c
index 6b3c13e5c..fdb2f0825 100644
--- a/drivers/net/softnic/rte_eth_softnic.c
+++ b/drivers/net/softnic/rte_eth_softnic.c
@@ -750,6 +750,7 @@ pmd_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &pmd_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -803,17 +804,29 @@ pmd_remove(struct rte_vdev_device *vdev)
{
struct rte_eth_dev *dev = NULL;
struct pmd_internals *p;
+ const char *name;

if (!vdev)
return -EINVAL;

- PMD_LOG(INFO, "Removing device \"%s\"",
- rte_vdev_device_name(vdev));
+ name = rte_vdev_device_name(vdev);
+ PMD_LOG(INFO, "Removing device \"%s\"", name);

/* Find the ethdev entry */
- dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+ dev = rte_eth_dev_allocated(name);
if (dev == NULL)
return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_local(dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
p = dev->data->dev_private;

/* Free device data structures*/
--
2.13.6
Qi Zhang
2018-06-07 12:38:36 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/e1000/em_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7039dc100..e626cb10c 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -349,6 +349,15 @@ static int eth_em_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_em_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_local(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_em_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-07 12:38:28 UTC
Permalink
When hot plug a new device, it is not necessary to scan everything
on the bus since the devname and devargs are already there. So new
rte_bus ops "scan_one" is introduced, bus driver can implement this
function to simply the hotplug process.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_eal/common/eal_common_dev.c | 17 +++++++++++++----
lib/librte_eal/common/include/rte_bus.h | 4 ++++
2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b162..1ad033536 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -147,11 +147,20 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;

- ret = bus->scan();
- if (ret)
- goto err_devarg;
+ /**
+ * if bus support to scan specific device by devargs,
+ * we don't need to scan all devices on the bus.
+ */
+ if (bus->scan_one) {
+ dev = bus->scan_one(da);
+ } else {
+ ret = bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ }

- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4e..b15cff892 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -83,6 +83,7 @@ enum rte_iova_mode {
*/
typedef int (*rte_bus_scan_t)(void);

+typedef struct rte_device *(*rte_bus_scan_one_t)(struct rte_devargs *);
/**
* Implementation specific probe function which is responsible for linking
* devices on that bus with applicable drivers.
@@ -95,6 +96,8 @@ typedef int (*rte_bus_scan_t)(void);
*/
typedef int (*rte_bus_probe_t)(void);

+
+
/**
* Device iterator to find a device on a bus.
*
@@ -204,6 +207,7 @@ struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
+ rte_bus_scan_one_t scan_one; /**< Scan one device by devargs */
rte_bus_probe_t probe; /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on the bus */
rte_bus_plug_t plug; /**< Probe single device for drivers */
--
2.13.6
Shreyansh Jain
2018-06-08 11:12:24 UTC
Permalink
Post by Qi Zhang
When hot plug a new device, it is not necessary to scan everything
on the bus since the devname and devargs are already there. So new
rte_bus ops "scan_one" is introduced, bus driver can implement this
function to simply the hotplug process.
^^^^^^^^^
simplify
Post by Qi Zhang
---
lib/librte_eal/common/eal_common_dev.c | 17 +++++++++++++----
lib/librte_eal/common/include/rte_bus.h | 4 ++++
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b162..1ad033536 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -147,11 +147,20 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;
- ret = bus->scan();
- if (ret)
- goto err_devarg;
+ /**
+ * if bus support to scan specific device by devargs,
+ * we don't need to scan all devices on the bus.
+ */
+ if (bus->scan_one) {
+ dev = bus->scan_one(da);
+ } else {
+ ret = bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ }
- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4e..b15cff892 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -83,6 +83,7 @@ enum rte_iova_mode {
*/
typedef int (*rte_bus_scan_t)(void);
+typedef struct rte_device *(*rte_bus_scan_one_t)(struct rte_devargs *);
You should add comments over the declaration, just like the other
similar declarations.
And, a new line should be here.
Post by Qi Zhang
/**
* Implementation specific probe function which is responsible for linking
* devices on that bus with applicable drivers.
@@ -95,6 +96,8 @@ typedef int (*rte_bus_scan_t)(void);
*/
typedef int (*rte_bus_probe_t)(void);
+
+
And please remove the extra lines added above in next version of patch.
Post by Qi Zhang
/**
* Device iterator to find a device on a bus.
*
@@ -204,6 +207,7 @@ struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
+ rte_bus_scan_one_t scan_one; /**< Scan one device by devargs */
I think you mean "Scan one device using devargs" rather than "Scan one
device by devargs".
Post by Qi Zhang
rte_bus_probe_t probe; /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on the bus */
rte_bus_plug_t plug; /**< Probe single device for drivers */
Zhang, Qi Z
2018-06-13 13:32:38 UTC
Permalink
Hi Shreyansh:
Thanks for your review.
Will fix base on your comments in v2.
Regards
Qi
-----Original Message-----
Sent: Friday, June 8, 2018 7:12 PM
Subject: Re: [dpdk-dev] [PATCH 01/22] eal: introduce one device scan
When hot plug a new device, it is not necessary to scan everything on
the bus since the devname and devargs are already there. So new
rte_bus ops "scan_one" is introduced, bus driver can implement this
function to simply the hotplug process.
^^^^^^^^^
simplify
---
lib/librte_eal/common/eal_common_dev.c | 17 +++++++++++++----
lib/librte_eal/common/include/rte_bus.h | 4 ++++
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_dev.c
b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b162..1ad033536 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -147,11 +147,20 @@ int __rte_experimental
rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;
- ret = bus->scan();
- if (ret)
- goto err_devarg;
+ /**
+ * if bus support to scan specific device by devargs,
+ * we don't need to scan all devices on the bus.
+ */
+ if (bus->scan_one) {
+ dev = bus->scan_one(da);
+ } else {
+ ret = bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = bus->find_device(NULL, cmp_detached_dev_name,
devname);
+ }
- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
diff --git a/lib/librte_eal/common/include/rte_bus.h
b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4e..b15cff892 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -83,6 +83,7 @@ enum rte_iova_mode {
*/
typedef int (*rte_bus_scan_t)(void);
+typedef struct rte_device *(*rte_bus_scan_one_t)(struct rte_devargs
+*);
You should add comments over the declaration, just like the other similar
declarations.
And, a new line should be here.
/**
* Implementation specific probe function which is responsible for
linking
* devices on that bus with applicable drivers.
@@ -95,6 +96,8 @@ typedef int (*rte_bus_scan_t)(void);
*/
typedef int (*rte_bus_probe_t)(void);
+
+
And please remove the extra lines added above in next version of patch.
/**
* Device iterator to find a device on a bus.
*
@@ -204,6 +207,7 @@ struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list
*/
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to
bus */
+ rte_bus_scan_one_t scan_one; /**< Scan one device by devargs */
I think you mean "Scan one device using devargs" rather than "Scan one
device by devargs".
rte_bus_probe_t probe; /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on the bus
*/
rte_bus_plug_t plug; /**< Probe single device for dri
Qi Zhang
2018-06-07 12:38:48 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_local, we can support this with
minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index ba9d768a0..4ab34cefd 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -1353,6 +1353,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1435,6 +1436,16 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_local(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
eth_dev_close(eth_dev);

rte_free(vring_states[eth_dev->data->port_id]);
--
2.13.6
Burakov, Anatoly
2018-06-15 15:16:00 UTC
Permalink
Hi Qi,

I haven't read the code yet, and i'll be the first to admit that i'm not
too well versed on how shared/private device data works, so my apologies
in advance if all of below comments are addressed by implementation
details or are way off base!
Post by Qi Zhang
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately
<...>
Post by Qi Zhang
Secondary process can also temporally to detach a share device
"privately" then attach it back later, this action also not impact other
processes.
Do we really need to implement these cases? It seems to me that this
"reattach it later" introduces unnecessary complexity. If secondary has
detached the device, i think it is safer if we cannot reattach it,
period, because it was a shared device. What if we try to attach it when
a handshake has already completed and all other processes expect to
detach it?

(in fact, do we differentiate between non-existent device and shared
device that has been "privately detached"? I would expect that we keep
the device as detached as opposed to forgetting about it, so that, come
handshake, we can safely reply "yeah, we can detach the device", but
maybe it's OK to not treat request to detach a non-existent device as an
error... thoughts? am i getting something wrong?)
Post by Qi Zhang
==============
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in
secondary process.
New API rte_eth_dev_lock and rte_eth_dev_unlock are introduced to let
application lock or unlock on specific ethdev, a locked device
can't be detached. This help applicaiton to prevent unexpected
device detaching, especially in multi-process envrionment.
Aslo the new API let application to register a callback function
which will be invoked before a device is going to be detached,
the return value of the function will decide if device will continue
be detached or not, this support application to do condition check
at runtime.
I assume that you've added device locking to avoid having to do
handshake before detach, correct? Is this a shared lock of some kind, or
is it a private lock? If it's shared lock, what happens if the process
holding that lock crashes?
Post by Qi Zhang
===========
Currently device removing is not handled well in secondary process on most
pmd drivers, rte_eth_dev_relase_port will be invoked and will mess up
primary process since it reset all shared data. So we introduced new API
rte_eth_dev_release_port_local which only reset ethdev's state to unsued but
not touch shared data so other process will not be impacted.
Since not all device driver is target to support primary-secondary
process model, so the patch set only fix this on all Intel devices and
vdev, it can be refereneced by other driver when equevalent fix is required
Nitpick - why the naming mismatch between *_private() and *_local()?
Post by Qi Zhang
===========
The solution does not cover the case that primary process exit while
secondary processes still be active. Though this is not a typial use
1. secondary process can't attach / detach any shared device since no
primary exist.
2. secondary process still can attach / detach private device.
3. secondary process still can detach a share device privately but may
not attach it back, that ethdev slot will become zombie slot.
I think this should be explicit and by design. Shared devices can only
be communicated to all secondaries through a primary process. No primary
- no shared devices. I don't think we can do anything about it unless we
implement some kind of peer-to-peer IPC (which isn't happening as far as
i'm aware).



Thanks for your work on this patchset!
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-19 02:43:34 UTC
Permalink
Hi Anatoly:

Thanks for the review, see my reply in inline.
-----Original Message-----
From: Burakov, Anatoly
Sent: Friday, June 15, 2018 11:16 PM
Subject: Re: [PATCH 00/22] enable hotplug on multi-process
Hi Qi,
I haven't read the code yet, and i'll be the first to admit that i'm not too well
versed on how shared/private device data works, so my apologies in advance
if all of below comments are addressed by implementation details or are way
off base!
Post by Qi Zhang
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary 4. Detach a share device from
secondary 5. Attach a private device from secondary 6. Detach a
private device from secondary 7. Detach a share device from secondary
privately 8. Attach a share device from secondary privately
<...>
Post by Qi Zhang
Secondary process can also temporally to detach a share device
"privately" then attach it back later, this action also not impact other
processes.
Do we really need to implement these cases? It seems to me that this
"reattach it later" introduces unnecessary complexity.
I agree it's not necessary, but this looks like a free feature based on current implementation :)
If secondary has
detached the device, I think it is safer if we cannot reattach it,
period, because it was a shared device. What if we try to attach it when
a handshake has already completed and all other processes expect to
detach it?
in the case: attach back a shared device already be detached will fail as expected.

For PCI devices, it will fail at driver probe.
For vdev, it will failed at rte_eth_dev_attach_secondary.
(in fact, do we differentiate between non-existent device and shared
device that has been "privately detached"? I would expect that we keep
the device as detached as opposed to forgetting about it, so that, come
handshake, we can safely reply "yeah, we can detach the device", but
maybe it's OK to not treat request to detach a non-existent device as an
error... thoughts? am i getting something wrong?)
Post by Qi Zhang
==============
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in
secondary process.
New API rte_eth_dev_lock and rte_eth_dev_unlock are introduced to let
application lock or unlock on specific ethdev, a locked device
can't be detached. This help applicaiton to prevent unexpected
device detaching, especially in multi-process envrionment.
Aslo the new API let application to register a callback function
which will be invoked before a device is going to be detached,
the return value of the function will decide if device will continue
be detached or not, this support application to do condition check
at runtime.
I assume that you've added device locking to avoid having to do
handshake before detach, correct?
Yes.
Is this a shared lock of some kind, or
is it a private lock? If it's shared lock, what happens if the process
holding that lock crashes?
It’s a kind of process's private lock, but a shared device be locked any process will prevent it be detached.
Post by Qi Zhang
===========
Currently device removing is not handled well in secondary process on
most
Post by Qi Zhang
pmd drivers, rte_eth_dev_relase_port will be invoked and will mess up
primary process since it reset all shared data. So we introduced new API
rte_eth_dev_release_port_local which only reset ethdev's state to unsued
but
Post by Qi Zhang
not touch shared data so other process will not be impacted.
Since not all device driver is target to support primary-secondary
process model, so the patch set only fix this on all Intel devices and
vdev, it can be refereneced by other driver when equevalent fix is required
Nitpick - why the naming mismatch between *_private() and *_local()?
Agree.
"private" make the API more identical.
Post by Qi Zhang
===========
The solution does not cover the case that primary process exit while
secondary processes still be active. Though this is not a typial use
1. secondary process can't attach / detach any shared device since no
primary exist.
2. secondary process still can attach / detach private device.
3. secondary process still can detach a share device privately but may
not attach it back, that ethdev slot will become zombie slot.
I think this should be explicit and by design. Shared devices can only
be communicated to all secondaries through a primary process. No primary
- no shared devices. I don't think we can do anything about it unless we
implement some kind of peer-to-peer IPC (which isn't happening as far as
i'm aware).
Agree.
Thanks for your work on this patchset!
Thanks for the design review and all the helpful inputs.

Regards
Qi
--
Thanks,
Anatol
Qi Zhang
2018-06-21 02:00:39 UTC
Permalink
The patch implemented the ops scan_one for vdev bus, it gives two benefits
1. Improve scan efficiency when a device is attached as hotplug, since no
need to populate a new device by iterating all devargs in devargs_list.
2. It also avoid sync IPC invoke (which happens in vdev->scan on secondary
process). The benefit is this removes the potential deadlock in the case
when secondary process receive a request from primary process to attach a
new device, since vdev->scan will be invoked on mp thread itself in that
case.

Signed-off-by: Qi Zhang <***@intel.com>
---

v2:
- improve commit log

drivers/bus/vdev/vdev.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)

diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c
index 6139dd551..cdbd77df0 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -467,6 +467,35 @@ vdev_scan(void)
return 0;
}

+static struct rte_device *vdev_scan_one(struct rte_devargs *devargs)
+{
+ struct rte_vdev_device *dev = NULL;
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev) {
+ VDEV_LOG(ERR, "failed to allocate memory for new device");
+ return NULL;
+ }
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+
+ if (find_vdev(devargs->name)) {
+ VDEV_LOG(ERR, "device %s already exist", devargs->name);
+ free(dev);
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ return NULL;
+ }
+
+ dev->device.devargs = devargs;
+ dev->device.numa_node = SOCKET_ID_ANY;
+ dev->device.name = devargs->name;
+ TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
+
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+
+ return &dev->device;
+}
+
static int
vdev_probe(void)
{
@@ -531,6 +560,7 @@ vdev_unplug(struct rte_device *dev)

static struct rte_bus rte_vdev_bus = {
.scan = vdev_scan,
+ .scan_one = vdev_scan_one,
.probe = vdev_probe,
.find_device = vdev_find_device,
.plug = vdev_plug,
--
2.13.6
Qi Zhang
2018-06-21 02:00:38 UTC
Permalink
When hot plug a new device, it is not necessary to scan everything
on the bus since the devname and devargs are already there. So new
rte_bus ops "scan_one" is introduced, bus driver can implement this
function to simplify the hotplug process.

Signed-off-by: Qi Zhang <***@intel.com>
---

v2:
- fix spelling
- add missing comments.

lib/librte_eal/common/eal_common_dev.c | 17 +++++++++++++----
lib/librte_eal/common/include/rte_bus.h | 16 ++++++++++++++++
2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b162..1ad033536 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -147,11 +147,20 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;

- ret = bus->scan();
- if (ret)
- goto err_devarg;
+ /**
+ * if bus support to scan specific device by devargs,
+ * we don't need to scan all devices on the bus.
+ */
+ if (bus->scan_one) {
+ dev = bus->scan_one(da);
+ } else {
+ ret = bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ }

- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4e..3269ef78b 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -84,6 +84,21 @@ enum rte_iova_mode {
typedef int (*rte_bus_scan_t)(void);

/**
+ * Bus specific scan for one specific device attached on the bus.
+ * For each bus object, the scan would be responsible for finding the specific
+ * device and adding it to its private device list, and the device object will
+ * be return also.
+ *
+ * @param devargs
+ * Device arguments be used to identify the device.
+ *
+ * @return
+ * !NULL for successful scan
+ * NULL for unsuccessful scan
+ */
+typedef struct rte_device *(*rte_bus_scan_one_t)(struct rte_devargs *devargs);
+
+/**
* Implementation specific probe function which is responsible for linking
* devices on that bus with applicable drivers.
*
@@ -204,6 +219,7 @@ struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
+ rte_bus_scan_one_t scan_one; /**< Scan one device using devargs */
rte_bus_probe_t probe; /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on the bus */
rte_bus_plug_t plug; /**< Probe single device for drivers */
--
2.13.6
Burakov, Anatoly
2018-06-21 07:56:21 UTC
Permalink
Post by Qi Zhang
When hot plug a new device, it is not necessary to scan everything
on the bus since the devname and devargs are already there. So new
rte_bus ops "scan_one" is introduced, bus driver can implement this
function to simplify the hotplug process.
---
<snip>
Post by Qi Zhang
+/**
* Implementation specific probe function which is responsible for linking
* devices on that bus with applicable drivers.
*
@@ -204,6 +219,7 @@ struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
+ rte_bus_scan_one_t scan_one; /**< Scan one device using devargs */
rte_bus_probe_t probe; /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on the bus */
rte_bus_plug_t plug; /**< Probe single device for drivers */
Does this break ABI for bus?
--
Thanks,
Anatoly
Qi Zhang
2018-06-21 02:00:40 UTC
Permalink
Add driver API rte_eth_release_port_private to support the
requirement that an ethdev only be released on secondary process,
so only local state be set to unused , share data will not be
reset so primary process can still use it.

Signed-off-by: Qi Zhang <***@intel.com>
---

v2:
- rename rte_eth_release_port_local to rte_eth_release_port_private.

lib/librte_ethdev/rte_ethdev.c | 24 +++++++++++++++++++++---
lib/librte_ethdev/rte_ethdev_driver.h | 13 +++++++++++++
2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index a9977df97..205b2ee33 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -359,6 +359,23 @@ rte_eth_dev_attach_secondary(const char *name)
}

int
+rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev)
+{
+ if (eth_dev == NULL)
+ return -EINVAL;
+
+ _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+
+ rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+ eth_dev->state = RTE_ETH_DEV_UNUSED;
+
+ rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+
+ return 0;
+}
+
+int
rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
{
if (eth_dev == NULL)
@@ -370,9 +387,10 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)

rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);

- eth_dev->state = RTE_ETH_DEV_UNUSED;
-
- memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+ if (eth_dev->state != RTE_ETH_DEV_UNUSED) {
+ eth_dev->state = RTE_ETH_DEV_UNUSED;
+ memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+ }

rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);

diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c9c825e3f..49c27223d 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -70,6 +70,19 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);

/**
* @internal
+ * Release the specified ethdev port in local process, only set to ethdev
+ * state to unused, but not reset share data since it assume other process
+ * is still using it, typically it is called by secondary process.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * @return
+ * - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
* Release device queues and clear its configuration to force the user
* application to reconfigure it. It is for internal use only.
*
--
2.13.6
Burakov, Anatoly
2018-06-21 08:06:17 UTC
Permalink
Post by Qi Zhang
Add driver API rte_eth_release_port_private to support the
requirement that an ethdev only be released on secondary process,
so only local state be set to unused , share data will not be
reset so primary process can still use it.
---
<snip>
Post by Qi Zhang
/**
+ * Release the specified ethdev port in local process, only set to ethdev
+ * state to unused, but not reset share data since it assume other process
+ * is still using it, typically it is called by secondary process.
+ *
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev);
+
As far as i can tell, even though the function is marked as internal, it
should still be exported in the .map file (see rte_eth_dev_allocate()
for example).

Thomas and others, does this count as new API? Should this be marked as
__rte_experimental? Presumably, we guarantee ABI stability for internal
functions too, so my expectation would be yes.
Post by Qi Zhang
+/**
* Release device queues and clear its configuration to force the user
* application to reconfigure it. It is for internal use only.
*
--
Thanks,
Anatoly
Thomas Monjalon
2018-06-21 08:21:10 UTC
Permalink
Post by Burakov, Anatoly
Post by Qi Zhang
Add driver API rte_eth_release_port_private to support the
requirement that an ethdev only be released on secondary process,
so only local state be set to unused , share data will not be
reset so primary process can still use it.
---
<snip>
Post by Qi Zhang
/**
+ * Release the specified ethdev port in local process, only set to ethdev
+ * state to unused, but not reset share data since it assume other process
+ * is still using it, typically it is called by secondary process.
+ *
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev);
+
As far as i can tell, even though the function is marked as internal, it
should still be exported in the .map file (see rte_eth_dev_allocate()
for example).
Thomas and others, does this count as new API? Should this be marked as
__rte_experimental? Presumably, we guarantee ABI stability for internal
functions too, so my expectation would be yes.
You know the A in ABI stands for Application :)
If it is not called by application, it has no impact on ABI.

However, I am not sure about having this function at all.
Who is calling it?
Zhang, Qi Z
2018-06-21 08:21:21 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Thursday, June 21, 2018 4:06 PM
Subject: Re: [PATCH v2 03/22] ethdev: add function to release port in local
process
Add driver API rte_eth_release_port_private to support the requirement
that an ethdev only be released on secondary process, so only local
state be set to unused , share data will not be reset so primary
process can still use it.
---
<snip>
/**
+ * Release the specified ethdev port in local process, only set to
+ethdev
+ * state to unused, but not reset share data since it assume other
+process
+ * is still using it, typically it is called by secondary process.
+ *
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev);
+
As far as i can tell, even though the function is marked as internal, it should still
be exported in the .map file (see rte_eth_dev_allocate() for example).
Thomas and others, does this count as new API? Should this be marked as
__rte_experimental? Presumably, we guarantee ABI stability for internal
functions too, so my expectation would be yes.
Sorry, I not intent to mark this as experimental, I must forgot to remove this
It should rte_eth_dev_attach/detach_private and rte_eth_dev_lock/unlock .

I guess internal API is not necessary to have this.
I will remove it in v3

Thanks
Qi
+/**
* Release device queues and clear its configuration to force the user
* application to reconfigure it. It is for internal use only.
*
-
Qi Zhang
2018-06-21 02:00:50 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/bonding/rte_eth_bond_pmd.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index f155ff779..da45ba9ba 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3062,6 +3062,7 @@ bond_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &default_dev_ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -3168,6 +3169,16 @@ bond_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
RTE_ASSERT(eth_dev->device == &dev->device);

internals = eth_dev->data->dev_private;
--
2.13.6
Qi Zhang
2018-06-21 02:00:51 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/failsafe/failsafe.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index eafbb75df..c5e8651f6 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -328,6 +328,7 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &failsafe_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -338,10 +339,25 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
static int
rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
{
+ struct rte_eth_dev *eth_dev;
const char *name;

name = rte_vdev_device_name(vdev);
INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+
+ eth_dev = rte_eth_dev_allocated(name);
+ if (!eth_dev)
+ return -ENODEV;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario.
+ */
+ }
+
return fs_rte_eth_free(name);
}
--
2.13.6
Qi Zhang
2018-06-21 02:00:52 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/kni/rte_eth_kni.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index ab63ea427..e5679c76a 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -419,6 +419,7 @@ eth_kni_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &eth_kni_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -463,6 +464,16 @@ eth_kni_remove(struct rte_vdev_device *vdev)
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
eth_kni_dev_stop(eth_dev);

internals = eth_dev->data->dev_private;
--
2.13.6
Qi Zhang
2018-06-21 02:00:53 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/null/rte_eth_null.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 1d2e6b9e9..2f040729b 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -623,6 +623,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -667,18 +668,31 @@ static int
rte_pmd_null_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
+ const char *name;

if (!dev)
return -EINVAL;

+ name = rte_vdev_device_name(dev);
+
PMD_LOG(INFO, "Closing null ethdev on numa socket %u",
rte_socket_id());

/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
rte_free(eth_dev->data->dev_private);

rte_eth_dev_release_port(eth_dev);
--
2.13.6
Qi Zhang
2018-06-21 02:00:54 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/octeontx/octeontx_ethdev.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c
index 1eb453b21..497bacdc6 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -1016,6 +1016,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,

eth_dev->tx_pkt_burst = octeontx_xmit_pkts;
eth_dev->rx_pkt_burst = octeontx_recv_pkts;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1138,6 +1139,18 @@ octeontx_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0) {
+ rte_eth_dev_release_port_private(eth_dev);
+ continue;
+ }
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
nic = octeontx_pmd_priv(eth_dev);
rte_event_dev_stop(nic->evdev);
PMD_INIT_LOG(INFO, "Closing octeontx device %s", octtx_name);
@@ -1148,6 +1161,9 @@ octeontx_remove(struct rte_vdev_device *dev)
rte_event_dev_close(nic->evdev);
}

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
/* Free FC resource */
octeontx_pko_fc_free();
--
2.13.6
Qi Zhang
2018-06-21 02:00:58 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index ba9d768a0..f773711b4 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -1353,6 +1353,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1435,6 +1436,16 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
eth_dev_close(eth_dev);

rte_free(vring_states[eth_dev->data->port_id]);
--
2.13.6
Qi Zhang
2018-06-21 02:00:59 UTC
Permalink
The sample code demonstrate device (ethdev only) management
at multi-process envrionment. User can attach/detach a device
on primary process and see it is synced on secondary process
automatically, also user can lock a device to prevent it be
detached or unlock it to go back to default behaviour.

How to start?
./devmgm_mp --proc-type=auto
help
list
/* attach a af_packet vdev */
attach net_af_packet,iface=eth0
/* detach port 0 */
detach 0
/* attach a private af_packet vdev (secondary process only)*/
attachp net_af_packet,iface=eth0
/* detach a private device (secondary process only) */
detachp 0
/* lock port 0 */
lock 0
/* unlock port 0 */
unlock 0
Signed-off-by: Qi Zhang <***@intel.com>
---
examples/devmgm_mp/Makefile | 64 +++++++
examples/devmgm_mp/commands.c | 381 +++++++++++++++++++++++++++++++++++++++++
examples/devmgm_mp/commands.h | 10 ++
examples/devmgm_mp/main.c | 39 +++++
examples/devmgm_mp/meson.build | 11 ++
5 files changed, 505 insertions(+)
create mode 100644 examples/devmgm_mp/Makefile
create mode 100644 examples/devmgm_mp/commands.c
create mode 100644 examples/devmgm_mp/commands.h
create mode 100644 examples/devmgm_mp/main.c
create mode 100644 examples/devmgm_mp/meson.build

diff --git a/examples/devmgm_mp/Makefile b/examples/devmgm_mp/Makefile
new file mode 100644
index 000000000..0ad3ec80e
--- /dev/null
+++ b/examples/devmgm_mp/Makefile
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+# binary name
+APP = devmgm_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c
+
+# Build using pkg-config variables if possible
+$(shell pkg-config --exists libdpdk)
+ifeq ($(.SHELLSTATUS),0)
+
+all: shared
+.PHONY: shared static
+shared: build/$(APP)-shared
+ ln -sf $(APP)-shared build/$(APP)
+static: build/$(APP)-static
+ ln -sf $(APP)-static build/$(APP)
+
+PC_FILE := $(shell pkg-config --path libdpdk)
+CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
+LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
+LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
+
+build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
+
+build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
+
+build:
+ @mkdir -p $@
+
+.PHONY: clean
+clean:
+ rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
+ rmdir --ignore-fail-on-non-empty build
+
+else
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = devmgm_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c
+
+CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS_parse_obj_list.o := -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/devmgm_mp/commands.c b/examples/devmgm_mp/commands.c
new file mode 100644
index 000000000..11eb66730
--- /dev/null
+++ b/examples/devmgm_mp/commands.c
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <termios.h>
+#ifndef __linux__
+ #ifdef __FreeBSD__
+ #include <sys/socket.h>
+ #else
+ #include <net/socket.h>
+ #endif
+#endif
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+#include <rte_ethdev.h>
+
+/**********************************************************/
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(cl,
+ "commands:\n"
+ "- attach <devargs>\n"
+ "- detach <port_id>\n"
+ "- attachp <devargs>\n"
+ "- detachp <port_id>\n"
+ "- lock <port_id>\n"
+ "- unlock <port_id>\n"
+ "- list\n\n");
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "show help",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_help_help,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "quit",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_list_result {
+ cmdline_fixed_string_t list;
+};
+
+static void cmd_list_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ uint16_t port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ cmdline_printf(cl, "list all etherdev\n");
+
+ RTE_ETH_FOREACH_DEV(port_id) {
+ rte_eth_dev_get_name_by_port(port_id, dev_name);
+ /* Secondary process's ethdev->state may not be
+ * updated after detach on primary process, but
+ * ethdev->data should already be reset, so
+ * use strlen(dev_name) == 0 to know the port is
+ * not used.
+ *
+ * TODO: Secondary process should be informed when a
+ * port is released on primary through mp channel.
+ */
+ if (strlen(dev_name) > 0)
+ cmdline_printf(cl, "%d\t%s\n", port_id, dev_name);
+ else
+ printf("empty dev_name is not expected!\n");
+ }
+}
+
+cmdline_parse_token_string_t cmd_list_list =
+ TOKEN_STRING_INITIALIZER(struct cmd_list_result, list, "list");
+
+cmdline_parse_inst_t cmd_list = {
+ .f = cmd_list_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "list all devices",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_list_list,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_attach_result {
+ cmdline_fixed_string_t attach;
+ cmdline_fixed_string_t device;
+};
+
+static void cmd_dev_attach_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_attach_result *res = parsed_result;
+ uint16_t port_id;
+
+ if (!rte_eth_dev_attach(res->device, &port_id))
+ cmdline_printf(cl, "attached device %s at port %d\n",
+ res->device, port_id);
+ else
+ cmdline_printf(cl, "failed to attached device %s\n",
+ res->device);
+}
+
+cmdline_parse_token_string_t cmd_dev_attach_attach =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attach_result, attach,
+ "attach");
+cmdline_parse_token_string_t cmd_dev_attach_device =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attach_result, device, NULL);
+
+cmdline_parse_inst_t cmd_attach_device = {
+ .f = cmd_dev_attach_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "attach a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_attach_attach,
+ (void *)&cmd_dev_attach_device,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_attachp_result {
+ cmdline_fixed_string_t attachp;
+ cmdline_fixed_string_t device;
+};
+
+static void cmd_dev_attachp_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_attachp_result *res = parsed_result;
+ uint16_t port_id;
+
+ if (!rte_eth_dev_attach_private(res->device, &port_id))
+ cmdline_printf(cl, "attached prviate device %s at port %d\n",
+ res->device, port_id);
+ else
+ cmdline_printf(cl, "failed to attached private device %s\n",
+ res->device);
+}
+
+cmdline_parse_token_string_t cmd_dev_attachp_attachp =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attachp_result, attachp,
+ "attachp");
+cmdline_parse_token_string_t cmd_dev_attachp_device =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_attachp_result, device, NULL);
+
+cmdline_parse_inst_t cmd_attachp_device = {
+ .f = cmd_dev_attachp_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "attach a private device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_attachp_attachp,
+ (void *)&cmd_dev_attachp_device,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_detach_result {
+ cmdline_fixed_string_t detach;
+ uint16_t port_id;
+};
+
+static void cmd_dev_detach_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_detach_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ printf("detaching...\n");
+ if (!rte_eth_dev_detach(port_id, dev_name))
+ cmdline_printf(cl, "detached device at port %d\n",
+ port_id);
+ else
+ cmdline_printf(cl, "failed to dettached at port %d\n",
+ port_id);
+}
+
+cmdline_parse_token_string_t cmd_dev_detach_detach =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_detach_result, detach,
+ "detach");
+cmdline_parse_token_num_t cmd_dev_detach_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_detach_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_detach_device = {
+ .f = cmd_dev_detach_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "detach a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_detach_detach,
+ (void *)&cmd_dev_detach_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_detachp_result {
+ cmdline_fixed_string_t detachp;
+ uint16_t port_id;
+};
+
+static void cmd_dev_detachp_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_detachp_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+ printf("detaching...\n");
+ if (!rte_eth_dev_detach_private(port_id, dev_name))
+ cmdline_printf(cl, "detached private device at port %d\n",
+ port_id);
+ else
+ cmdline_printf(cl, "failed to detach private device at port %d\n",
+ port_id);
+}
+
+cmdline_parse_token_string_t cmd_dev_detachp_detachp =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_detachp_result, detachp,
+ "detachp");
+cmdline_parse_token_num_t cmd_dev_detachp_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_detachp_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_detachp_device = {
+ .f = cmd_dev_detachp_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "detach a private device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_detachp_detachp,
+ (void *)&cmd_dev_detachp_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_lock_result {
+ cmdline_fixed_string_t lock;
+ uint16_t port_id;
+};
+
+static void cmd_dev_lock_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_lock_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ int ret = 0;
+
+ ret = rte_eth_dev_lock(res->port_id);
+ cmdline_printf(cl, "lock port %d, ret = %d\n", port_id, ret);
+}
+
+cmdline_parse_token_string_t cmd_dev_lock_lock =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_lock_result, lock, "lock");
+cmdline_parse_token_num_t cmd_dev_lock_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_lock_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_lock_device = {
+ .f = cmd_dev_lock_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "lock a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_lock_lock,
+ (void *)&cmd_dev_lock_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_dev_unlock_result {
+ cmdline_fixed_string_t unlock;
+ uint16_t port_id;
+};
+
+static void cmd_dev_unlock_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_dev_unlock_result *res = parsed_result;
+ uint16_t port_id = res->port_id;
+ int ret = 0;
+
+ ret = rte_eth_dev_unlock(res->port_id);
+ cmdline_printf(cl, "unlock port %d, ret = %d\n", port_id, ret);
+}
+
+cmdline_parse_token_string_t cmd_dev_unlock_unlock =
+ TOKEN_STRING_INITIALIZER(struct cmd_dev_unlock_result, unlock,
+ "unlock");
+cmdline_parse_token_num_t cmd_dev_unlock_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_dev_unlock_result, port_id, UINT16);
+
+cmdline_parse_inst_t cmd_unlock_device = {
+ .f = cmd_dev_unlock_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "unlock a device",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_dev_unlock_unlock,
+ (void *)&cmd_dev_unlock_port_id,
+ NULL,
+ },
+};
+
+/**********************************************************/
+/**********************************************************/
+/****** CONTEXT (list of instruction) */
+
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_help,
+ (cmdline_parse_inst_t *)&cmd_quit,
+ (cmdline_parse_inst_t *)&cmd_list,
+ (cmdline_parse_inst_t *)&cmd_attach_device,
+ (cmdline_parse_inst_t *)&cmd_detach_device,
+ (cmdline_parse_inst_t *)&cmd_attachp_device,
+ (cmdline_parse_inst_t *)&cmd_detachp_device,
+ (cmdline_parse_inst_t *)&cmd_lock_device,
+ (cmdline_parse_inst_t *)&cmd_unlock_device,
+ NULL,
+};
diff --git a/examples/devmgm_mp/commands.h b/examples/devmgm_mp/commands.h
new file mode 100644
index 000000000..afcf177db
--- /dev/null
+++ b/examples/devmgm_mp/commands.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMMANDS_H_
+#define _COMMANDS_H_
+
+extern cmdline_parse_ctx_t main_ctx[];
+
+#endif /* _COMMANDS_H_ */
diff --git a/examples/devmgm_mp/main.c b/examples/devmgm_mp/main.c
new file mode 100644
index 000000000..9bf0c9962
--- /dev/null
+++ b/examples/devmgm_mp/main.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <termios.h>
+#include <sys/queue.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "commands.h"
+
+int main(int argc, char **argv)
+{
+ int ret;
+ struct cmdline *cl;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ cl = cmdline_stdin_new(main_ctx, "example> ");
+ if (cl == NULL)
+ rte_panic("Cannot create cmdline instance\n");
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ return 0;
+}
diff --git a/examples/devmgm_mp/meson.build b/examples/devmgm_mp/meson.build
new file mode 100644
index 000000000..cec7be717
--- /dev/null
+++ b/examples/devmgm_mp/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+ 'commands.c', 'main.c'
+)
--
2.13.6
Burakov, Anatoly
2018-06-21 07:54:05 UTC
Permalink
Post by Qi Zhang
The sample code demonstrate device (ethdev only) management
at multi-process envrionment. User can attach/detach a device
on primary process and see it is synced on secondary process
automatically, also user can lock a device to prevent it be
detached or unlock it to go back to default behaviour.
How to start?
./devmgm_mp --proc-type=auto
help
list
/* attach a af_packet vdev */
attach net_af_packet,iface=eth0
/* detach port 0 */
detach 0
/* attach a private af_packet vdev (secondary process only)*/
attachp net_af_packet,iface=eth0
/* detach a private device (secondary process only) */
detachp 0
/* lock port 0 */
lock 0
/* unlock port 0 */
unlock 0
---
Hi Qi,

I believe you've missed my comments for v1 of this patch.
--
Thanks,
Anatoly
Qi Zhang
2018-06-21 02:00:41 UTC
Permalink
We are going to introduce the solution to handle different hotplug
cases in multi-process situation, it include below scenario:

1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately

In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.

Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced.

This patch covers the implementation of case 1,2,5,6,7,8.
Case 3,4 will be implemented on separate patch as well as handshake
mechanism.

Scenario for Case 1, 2:

attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success

detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed

Case 5, 6:
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowed to have
private device so far.

Case 7, 8:
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.

APIs changes:

rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.

New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.

Signed-off-by: Qi Zhang <***@intel.com>
---

v2:
- rename rte_ethdev_mp.* to ethdev_mp.*
- add experimental tag for rte_eth_dev_attach_private and
rte_ethdev_detach_private.
- move do_eth_dev_attach and do_eth_dev_detach to ethdev_private.h
- move rte_eth_dev_mp_init before rte_eal_mcfg_complete.
- fix meson.build.
- improve commit log.

lib/librte_eal/common/eal_private.h | 8 ++
lib/librte_eal/linuxapp/eal/eal.c | 7 ++
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/ethdev_mp.c | 198 ++++++++++++++++++++++++++++++++++++
lib/librte_ethdev/ethdev_mp.h | 44 ++++++++
lib/librte_ethdev/ethdev_private.h | 39 +++++++
lib/librte_ethdev/meson.build | 1 +
lib/librte_ethdev/rte_ethdev.c | 184 +++++++++++++++++++++++++++++----
lib/librte_ethdev/rte_ethdev.h | 45 ++++++++
lib/librte_ethdev/rte_ethdev_core.h | 5 +
10 files changed, 515 insertions(+), 17 deletions(-)
create mode 100644 lib/librte_ethdev/ethdev_mp.c
create mode 100644 lib/librte_ethdev/ethdev_mp.h
create mode 100644 lib/librte_ethdev/ethdev_private.h

diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d50..92fa59bed 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -258,4 +258,12 @@ int rte_mp_channel_init(void);
*/
void dev_callback_process(char *device_name, enum rte_dev_event_type event);

+/**
+ * Register mp channel callback functions of ethdev layer.
+ *
+ * @return
+ * 0 on success.
+ * (<0) on failure.
+ */
+int rte_eth_dev_mp_init(void);
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8655b8691..b7788c42d 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -1024,6 +1024,13 @@ rte_eal_init(int argc, char **argv)
return -1;
}

+ /* Initialize mp channel for ethdev layer */
+ if (rte_eth_dev_mp_init()) {
+ rte_eal_init_alert("rte_eth_dev_mp_init() failed\n");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
#ifdef VFIO_PRESENT
/* Register mp action after probe() so that we got enough info */
if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d82..d0a059b83 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -19,6 +19,7 @@ EXPORT_MAP := rte_ethdev_version.map
LIBABIVER := 9

SRCS-y += rte_ethdev.c
+SRCS-y += ethdev_mp.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/ethdev_mp.c b/lib/librte_ethdev/ethdev_mp.c
new file mode 100644
index 000000000..73dd36485
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_mp.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <rte_string_fns.h>
+#include "rte_ethdev_driver.h"
+#include "ethdev_mp.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+static int detach_on_secondary(uint16_t port_id)
+{
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "detach on secondary: invalid port %d\n",
+ port_id);
+ return -ENODEV;
+ }
+
+ dev = rte_eth_devices[port_id].device;
+ if (dev == NULL)
+ return -EINVAL;
+
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL)
+ return -ENOENT;
+
+ ret = rte_eal_hotplug_remove(bus->name, dev->name);
+ if (ret) {
+ ethdev_log(ERR, "failed to hot unplug bus: %s, device:%s\n",
+ bus->name, dev->name);
+ return ret;
+ }
+
+ rte_eth_dev_release_port_private(&rte_eth_devices[port_id]);
+ return ret;
+}
+
+static int attach_on_secondary(const char *devargs, uint16_t port_id)
+{
+ struct rte_devargs da;
+ int ret;
+
+ if (rte_eth_devices[port_id].state != RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "port %d already in used, failed to attach\n",
+ port_id);
+ return -EINVAL;
+ }
+
+ memset(&da, 0, sizeof(da));
+
+ if (rte_devargs_parse(&da, "%s", devargs)) {
+ ethdev_log(ERR, "failed to parse devargs %s\n", devargs);
+ return -EINVAL;
+ }
+
+ ret = rte_eal_hotplug_add(da.bus->name, da.name, "");
+ if (ret) {
+ ethdev_log(ERR, "failed to hotplug bus:%s, device:%s\n",
+ da.bus->name, da.name);
+ free(da.args);
+ return ret;
+ }
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "failed to attach to port %d, this is a pmd issue\n",
+ port_id);
+ return -ENODEV;
+ }
+ free(da.args);
+ return 0;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ RTE_SET_USED(msg);
+ RTE_SET_USED(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+{
+ RTE_SET_USED(msg);
+ RTE_SET_USED(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct rte_mp_msg mp_resp;
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_resp.param;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+
+ switch (req->t) {
+ case REQ_TYPE_ATTACH:
+ ret = attach_on_secondary(req->devargs, req->port_id);
+ break;
+ case REQ_TYPE_PRE_DETACH:
+ ret = 0;
+ break;
+ case REQ_TYPE_DETACH:
+ case REQ_TYPE_ATTACH_ROLLBACK:
+ ret = detach_on_secondary(req->port_id);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ strlcpy(mp_resp.name, ETH_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+ resp->result = ret;
+ if (rte_mp_reply(&mp_resp, peer) < 0) {
+ ethdev_log(ERR, "failed to send reply to primary request\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req)
+{
+ RTE_SET_USED(req);
+ return -ENOTSUP;
+}
+
+/**
+ * Request from primary to secondary.
+ *
+ * Be invoked when try to attach or detach a share device
+ * from primary process.
+ */
+int rte_eth_dev_request_to_secondary(struct eth_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, ETH_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret) {
+ ethdev_log(ERR, "rte_mp_request_sync failed\n");
+ return ret;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result) {
+ req->result = resp->result;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int rte_eth_dev_mp_init(void)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
+ handle_secondary_request)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_REQUEST);
+ return -1;
+ }
+ } else {
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_RESPONSE,
+ handle_primary_response)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_RESPONSE);
+ return -1;
+ }
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
+ handle_primary_request)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_REQUEST);
+ }
+ }
+
+ return 0;
+}
diff --git a/lib/librte_ethdev/ethdev_mp.h b/lib/librte_ethdev/ethdev_mp.h
new file mode 100644
index 000000000..c3e55dfec
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_mp.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_MP_H_
+#define _RTE_ETHDEV_MP_H_
+
+#define MAX_DEV_ARGS_LEN 0x80
+
+#define ETH_DEV_MP_ACTION_REQUEST "eth_dev_mp_request"
+#define ETH_DEV_MP_ACTION_RESPONSE "eth_dev_mp_response"
+
+enum eth_dev_req_type {
+ REQ_TYPE_ATTACH,
+ REQ_TYPE_PRE_DETACH,
+ REQ_TYPE_DETACH,
+ REQ_TYPE_ATTACH_ROLLBACK,
+};
+
+struct eth_dev_mp_req {
+ enum eth_dev_req_type t;
+ char devargs[MAX_DEV_ARGS_LEN];
+ uint16_t port_id;
+ int result;
+};
+
+/**
+ * this is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request issued from primary.
+ */
+int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int rte_eth_dev_request_to_secondary(struct eth_dev_mp_req *req);
+
+/* Register mp channel callback functions of ethdev layer.*/
+int rte_eth_dev_mp_init(void);
+
+#endif
diff --git a/lib/librte_ethdev/ethdev_private.h b/lib/librte_ethdev/ethdev_private.h
new file mode 100644
index 000000000..981e7de8a
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_private.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _ETHDEV_PRIVATE_H_
+#define _ETHDEV_PRIVATE_H_
+
+/**
+ * Attach a new Ethernet device in current process.
+ *
+ * @param devargs
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ *
+ * @param port_id
+ * A pointer to a port identifier actually attached.
+ *
+ * @return
+ * 0 on success and port_id is filled, negative on error
+ */
+int do_eth_dev_attach(const char *devargs, uint16_t *port_id);
+
+/**
+ * Detach a Ethernet device in current process.
+ *
+ * @param port_id
+ * The port identifier of the device to detach.
+ *
+ * @param devname
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ *
+ * @return
+ * 0 on success and devname is filled, negative on error
+ */
+int do_eth_dev_detach(uint16_t port_id);
+
+#endif
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index aed5d2265..b60256855 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -5,6 +5,7 @@ name = 'ethdev'
version = 9
allow_experimental_apis = true
sources = files('ethdev_profile.c',
+ 'ethdev_mp.c'
'rte_ethdev.c',
'rte_flow.c',
'rte_mtr.c',
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 205b2ee33..77f53a634 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -41,11 +41,13 @@
#include "rte_ethdev.h"
#include "rte_ethdev_driver.h"
#include "ethdev_profile.h"
+#include "ethdev_mp.h"
+#include "ethdev_private.h"

-static int ethdev_logtype;
+int ethdev_logtype;

-#define ethdev_log(level, fmt, ...) \
- rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+#define RTE_ETH_MP_ACTION_REQUEST "rte_eth_mp_request"
+#define RTE_ETH_MP_ACTION_RESPONSE "rte_eth_mp_response"

static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
@@ -656,9 +658,8 @@ eth_err(uint16_t port_id, int ret)
return ret;
}

-/* attach the new device, then store port_id of the device */
int
-rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
+do_eth_dev_attach(const char *devargs, uint16_t *port_id)
{
int current = rte_eth_dev_count_total();
struct rte_devargs da;
@@ -703,14 +704,104 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
return ret;
}

-/* detach the device, then store the name of the device */
int
-rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
+do_eth_dev_detach(uint16_t port_id)
{
struct rte_device *dev;
struct rte_bus *bus;
+ int ret = 0;
+
+ dev = rte_eth_devices[port_id].device;
+ if (dev == NULL)
+ return -EINVAL;
+
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL)
+ return -ENOENT;
+
+ ret = rte_eal_hotplug_remove(bus->name, dev->name);
+ if (ret < 0)
+ return ret;
+
+ rte_eth_dev_release_port(&rte_eth_devices[port_id]);
+ return ret;
+
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
+{
+ struct eth_dev_mp_req req;
+ int ret;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+
+ /**
+ * If secondary process, we just send request to primary
+ * to start the process.
+ */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+
+ ret = rte_eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to primary\n");
+ return ret;
+ }
+
+ *port_id = req.port_id;
+ return req.result;
+ }
+
+ ret = do_eth_dev_attach(devargs, port_id);
+ if (ret)
+ return ret;
+
+ /* send attach request to seoncary */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+ req.port_id = *port_id;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to secondary\n");
+ goto rollback;
+ }
+
+ if (req.result)
+ goto rollback;
+
+ return 0;
+
+rollback:
+ /* send rollback request to secondary since some one fail to attach */
+ req.t = REQ_TYPE_ATTACH_ROLLBACK;
+ req.port_id = *port_id;
+ rte_eth_dev_request_to_secondary(&req);
+
+ do_eth_dev_detach(*port_id);
+
+ return -ENODEV;
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ return do_eth_dev_attach(devargs, port_id);
+}
+
+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
+{
+ struct eth_dev_mp_req req = {0};
+ int ret;
uint32_t dev_flags;
- int ret = -1;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

@@ -721,22 +812,81 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
return -ENOTSUP;
}

- dev = rte_eth_devices[port_id].device;
- if (dev == NULL)
- return -EINVAL;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ req.t = REQ_TYPE_DETACH;
+ req.port_id = port_id;

- bus = rte_bus_find_by_device(dev);
- if (bus == NULL)
- return -ENOENT;
+ /**
+ * If secondary process, we just send request to primary
+ * to start the process.
+ */
+ ret = rte_eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device detach request to primary\n");
+ return ret;
+ }

- ret = rte_eal_hotplug_remove(bus->name, dev->name);
- if (ret < 0)
+ return req.result;
+ }
+
+ /* check pre_detach */
+ req.t = REQ_TYPE_PRE_DETACH;
+ req.port_id = port_id;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device pre-detach request to secondary\n");
+ return ret;
+ }
+
+ if (req.result) {
+ ethdev_log(ERR, "Device is busy on secondary, can't be detached\n");
+ return req.result;
+ }
+
+ /* detach on seconary first */
+ req.t = REQ_TYPE_DETACH;
+ ret = rte_eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device detach request to secondary\n");
+ return ret;
+ }
+
+ if (req.result)
+ /**
+ * this should rarely happen, something wrong in secondary
+ * process, will not block primary detach.
+ */
+ ethdev_log(ERR, "Failed to detach device on secondary process\n");
+
+ /* detach on primary */
+ ret = do_eth_dev_detach(port_id);
+ if (ret)
return ret;

- rte_eth_dev_release_port(&rte_eth_devices[port_id]);
return 0;
}

+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
+{
+ uint32_t dev_flags;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+ dev_flags = rte_eth_devices[port_id].data->dev_flags;
+ if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
+ ethdev_log(ERR,
+ "Port %" PRIu16 " is bonded, cannot detach", port_id);
+ return -ENOTSUP;
+ }
+
+ return do_eth_dev_detach(port_id);
+}
+
static int
rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
{
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984ea..813806e3c 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -1462,6 +1462,9 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);

/**
* Attach a new Ethernet device specified by arguments.
+ * In multi-process mode, it will sync with other process
+ * to make sure all processes attach the device, any
+ * failure on other process will rollback the action.
*
* @param devargs
* A pointer to a strings array describing the new device
@@ -1475,9 +1478,31 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);
int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);

/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Attach a private Ethernet device specified by arguments.
+ * A private device is invisible to other process.
+ * Can only be invoked in secondary process.
+ *
+ * @param devargs
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ * @param port_id
+ * A pointer to a port identifier actually attached.
+ * @return
+ * 0 on success and port_id is filled, negative on error
+ */
+int __rte_experimental
+rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id);
+
+/**
* Detach a Ethernet device specified by port identifier.
* This function must be called when the device is in the
* closed state.
+ * In multi-process mode, it will sync with other process
+ * to detach the device.
*
* @param port_id
* The port identifier of the device to detach.
@@ -1490,6 +1515,26 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
int rte_eth_dev_detach(uint16_t port_id, char *devname);

/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Detach a private Ethernet device specified by port identifier
+ * This function must be called when the device is in the
+ * closed state.
+ * Can only be invoked in secondary process.
+ *
+ * @param port_id
+ * The port identifier of the device to detach.
+ * @param devname
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ * @return
+ * 0 on success and devname is filled, negative on error
+ */
+int __rte_experimental
+rte_eth_dev_detach_private(uint16_t port_id, char *devname);
+
+/**
* Convert a numerical speed in Mbps to a bitmap flag that can be used in
* the bitmap link_speeds of the struct rte_eth_conf
*
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index 33d12b3a2..2cb6de745 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -622,4 +622,9 @@ struct rte_eth_dev_data {
*/
extern struct rte_eth_dev rte_eth_devices[];

+extern int ethdev_logtype;
+#define ethdev_log(level, fmt, ...) \
+ rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+
+
#endif /* _RTE_ETHDEV_CORE_H_ */
--
2.13.6
Burakov, Anatoly
2018-06-21 08:36:50 UTC
Permalink
Post by Qi Zhang
We are going to introduce the solution to handle different hotplug
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced.
This patch covers the implementation of case 1,2,5,6,7,8.
Case 3,4 will be implemented on separate patch as well as handshake
mechanism.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowed to have
private device so far.
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.
---
<snip>
Post by Qi Zhang
+ memset(&da, 0, sizeof(da));
+
+ if (rte_devargs_parse(&da, "%s", devargs)) {
+ ethdev_log(ERR, "failed to parse devargs %s\n", devargs);
+ return -EINVAL;
+ }
+
+ ret = rte_eal_hotplug_add(da.bus->name, da.name, "");
+ if (ret) {
+ ethdev_log(ERR, "failed to hotplug bus:%s, device:%s\n",
+ da.bus->name, da.name);
+ free(da.args);
+ return ret;
+ }
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "failed to attach to port %d, this is a pmd issue\n",
+ port_id);
+ return -ENODEV;
^^^ Leaking da.args here?
Post by Qi Zhang
+ }
+ free(da.args);
+ return 0;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ RTE_SET_USED(msg);
+ RTE_SET_USED(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+{
<snip>
Post by Qi Zhang
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret) {
+ ethdev_log(ERR, "rte_mp_request_sync failed\n");
+ return ret;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result) {
+ req->result = resp->result;
+ break;
+ }
+ }
Do we care if nb_sent != nb_received?
Post by Qi Zhang
+
+ return 0;
+}
+
+int rte_eth_dev_mp_init(void)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
<snip>
Post by Qi Zhang
+/**
+ * this is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request issued from primary.
+ */
+int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int rte_eth_dev_request_to_secondary(struct eth_dev_mp_req *req);
Nitpicking, but the two above functions aren't used outside ethdev
library. You can probably drop the rte_ prefix.
Post by Qi Zhang
+
+/* Register mp channel callback functions of ethdev layer.*/
+int rte_eth_dev_mp_init(void);
I don't quite understand what you're doing here. (Or rather, i
understand the intention, but i don't understand the implementation :) )

This function is meant to be called from EAL at startup. First of all,
why is it declared twice (once in eal_private, once in ethdev_private)?

Second of all, ethdev is a library, but this function is called from
EAL. Which means it cannot be in a private header (nor should it be
declared in EAL), and you cannot even call it from EAL because that
would introduce a circular dependency between EAL and ethdev.

So, this needs to be redone the other way around - have ethdev register
itself with EAL, and get called at some point, in a generic way (e.g.
see how bus probe works for example). I don't know what this would look
like - maybe some kind of generic multiprocess init?
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-21 09:14:39 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Thursday, June 21, 2018 4:37 PM
Subject: Re: [PATCH v2 04/22] ethdev: enable hotplug on multi-process
Post by Qi Zhang
We are going to introduce the solution to handle different hotplug
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary 4. Detach a share device from
secondary 5. Attach a private device from secondary 6. Detach a
private device from secondary 7. Detach a share device from secondary
privately 8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still
use it, so a handshake mechanism is introduced.
This patch covers the implementation of case 1,2,5,6,7,8.
Case 3,4 will be implemented on separate patch as well as handshake
mechanism.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
Secondary process can attach private device which only visible to
itself, in this case no IPC is involved, primary process is not
allowed to have private device so far.
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in
secondary process.
---
<snip>
Post by Qi Zhang
+ memset(&da, 0, sizeof(da));
+
+ if (rte_devargs_parse(&da, "%s", devargs)) {
+ ethdev_log(ERR, "failed to parse devargs %s\n", devargs);
+ return -EINVAL;
+ }
+
+ ret = rte_eal_hotplug_add(da.bus->name, da.name, "");
+ if (ret) {
+ ethdev_log(ERR, "failed to hotplug bus:%s, device:%s\n",
+ da.bus->name, da.name);
+ free(da.args);
+ return ret;
+ }
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "failed to attach to port %d, this is a pmd issue\n",
+ port_id);
+ return -ENODEV;
^^^ Leaking da.args here?
Post by Qi Zhang
+ }
+ free(da.args);
+ return 0;
+}
+
+static int handle_secondary_request(const struct rte_mp_msg *msg,
+const void *peer) {
+ RTE_SET_USED(msg);
+ RTE_SET_USED(peer);
+ return -ENOTSUP;
+}
+
+static int handle_primary_response(const struct rte_mp_msg *msg,
+const void *peer) {
<snip>
Post by Qi Zhang
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret) {
+ ethdev_log(ERR, "rte_mp_request_sync failed\n");
+ return ret;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result) {
+ req->result = resp->result;
+ break;
+ }
+ }
Do we care if nb_sent != nb_received?
Post by Qi Zhang
+
+ return 0;
+}
+
+int rte_eth_dev_mp_init(void)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ if (rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
<snip>
Post by Qi Zhang
+/**
+ * this is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request issued from primary.
+ */
+int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int rte_eth_dev_request_to_secondary(struct eth_dev_mp_req *req);
Nitpicking, but the two above functions aren't used outside ethdev library.
You can probably drop the rte_ prefix.
Post by Qi Zhang
+
+/* Register mp channel callback functions of ethdev layer.*/ int
+rte_eth_dev_mp_init(void);
I don't quite understand what you're doing here. (Or rather, i understand the
intention, but i don't understand the implementation :) )
This function is meant to be called from EAL at startup. First of all, why is it
declared twice (once in eal_private, once in ethdev_private)?
Ah, I forgot this, this is something in mess.
Second of all, ethdev is a library, but this function is called from EAL. Which
means it cannot be in a private header (nor should it be declared in EAL), and
you cannot even call it from EAL because that would introduce a circular
dependency between EAL and ethdev.
So, this needs to be redone the other way around - have ethdev register itself
with EAL, and get called at some point, in a generic way (e.g.
see how bus probe works for example). I don't know what this would look like
- maybe some kind of generic multiprocess init?
Yes, properly like this. I will re-work it.

Thank
Andrew Rybchenko
2018-06-22 13:54:21 UTC
Permalink
Post by Qi Zhang
We are going to introduce the solution to handle different hotplug
1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately
In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.
Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced.
This patch covers the implementation of case 1,2,5,6,7,8.
Case 3,4 will be implemented on separate patch as well as handshake
mechanism.
attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success
detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowed to have
private device so far.
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.
rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.
New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.
---
- rename rte_ethdev_mp.* to ethdev_mp.*
- add experimental tag for rte_eth_dev_attach_private and
rte_ethdev_detach_private.
- move do_eth_dev_attach and do_eth_dev_detach to ethdev_private.h
- move rte_eth_dev_mp_init before rte_eal_mcfg_complete.
- fix meson.build.
- improve commit log.
lib/librte_eal/common/eal_private.h | 8 ++
lib/librte_eal/linuxapp/eal/eal.c | 7 ++
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/ethdev_mp.c | 198 ++++++++++++++++++++++++++++++++++++
lib/librte_ethdev/ethdev_mp.h | 44 ++++++++
lib/librte_ethdev/ethdev_private.h | 39 +++++++
lib/librte_ethdev/meson.build | 1 +
lib/librte_ethdev/rte_ethdev.c | 184 +++++++++++++++++++++++++++++----
lib/librte_ethdev/rte_ethdev.h | 45 ++++++++
lib/librte_ethdev/rte_ethdev_core.h | 5 +
10 files changed, 515 insertions(+), 17 deletions(-)
create mode 100644 lib/librte_ethdev/ethdev_mp.c
create mode 100644 lib/librte_ethdev/ethdev_mp.h
create mode 100644 lib/librte_ethdev/ethdev_private.h
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d50..92fa59bed 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -258,4 +258,12 @@ int rte_mp_channel_init(void);
*/
void dev_callback_process(char *device_name, enum rte_dev_event_type event);
+/**
+ * Register mp channel callback functions of ethdev layer.
+ *
+ * 0 on success.
+ * (<0) on failure.
+ */
+int rte_eth_dev_mp_init(void);
It looks like it makes cross-dependency between EAL and ethdev.
As far as I can see EAL does not have references to rte_eth_dev
functions yet. It looks really suspicious. The function is declared in
EAL, but implemented in ethdev.
Moreover, it is declared once again in ethdev_mp.h.

<...>
Post by Qi Zhang
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index 33d12b3a2..2cb6de745 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -622,4 +622,9 @@ struct rte_eth_dev_data {
*/
extern struct rte_eth_dev rte_eth_devices[];
+extern int ethdev_logtype;
+#define ethdev_log(level, fmt, ...) \
+ rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
It looks like it clashes with ethdev logging changes submitted by Ferruh.

Andrew.
Qi Zhang
2018-06-21 02:00:42 UTC
Permalink
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device
can't be detached, this help application to prevent unexpected
device detaching, especially in multi-process environment.

Also introduce the new API rte_eth_dev_lock_with_callback and
rte_eth_dev_unlock_with callback to let application to register
a callback function which will be invoked before a device is going
to be detached, the return value of the function will decide if
device will continue be detached or not, this support application
to do condition check at runtime.

Signed-off-by: Qi Zhang <***@intel.com>
---

v2:
- rename rte_ethdev_lock.* to ethdev_lock.*
- separate rte_eth_dev_[un]lock into rte_eth_dev_[un]lock and
rte_eth_dev_[un]lock_with_callback
- lock callbacks will be removed automatically after device is detached.
- add experimental tag for all new APIs.
- fix meson.build

lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/ethdev_lock.c | 139 ++++++++++++++++++++++++++++++++++++++++
lib/librte_ethdev/ethdev_lock.h | 31 +++++++++
lib/librte_ethdev/ethdev_mp.c | 3 +-
lib/librte_ethdev/meson.build | 1 +
lib/librte_ethdev/rte_ethdev.c | 60 ++++++++++++++++-
lib/librte_ethdev/rte_ethdev.h | 124 +++++++++++++++++++++++++++++++++++
7 files changed, 357 insertions(+), 2 deletions(-)
create mode 100644 lib/librte_ethdev/ethdev_lock.c
create mode 100644 lib/librte_ethdev/ethdev_lock.h

diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index d0a059b83..62bef03fc 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -20,6 +20,7 @@ LIBABIVER := 9

SRCS-y += rte_ethdev.c
SRCS-y += ethdev_mp.c
+SRCS-y += ethdev_lock.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/ethdev_lock.c b/lib/librte_ethdev/ethdev_lock.c
new file mode 100644
index 000000000..6c13e87a9
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_lock.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include "ethdev_lock.h"
+
+struct lock_entry {
+ TAILQ_ENTRY(lock_entry) next;
+ rte_eth_dev_lock_callback_t callback;
+ uint16_t port_id;
+ void *user_args;
+ int ref_count;
+};
+
+TAILQ_HEAD(lock_entry_list, lock_entry);
+static struct lock_entry_list lock_entry_list =
+ TAILQ_HEAD_INITIALIZER(lock_entry_list);
+static rte_spinlock_t lock_entry_lock = RTE_SPINLOCK_INITIALIZER;
+
+int
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (le == NULL) {
+ le = calloc(1, sizeof(struct lock_entry));
+ if (le == NULL) {
+ rte_spinlock_unlock(&lock_entry_lock);
+ return -ENOMEM;
+ }
+ le->callback = callback;
+ le->port_id = port_id;
+ le->user_args = user_args;
+ TAILQ_INSERT_TAIL(&lock_entry_list, le, next);
+ }
+ le->ref_count++;
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return 0;
+}
+
+int
+unregister_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+ int ret = 0;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (le != NULL) {
+ le->ref_count--;
+ if (le->ref_count == 0) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return ret;
+}
+
+static int clean_lock_callback_one(uint16_t port_id)
+{
+ struct lock_entry *le;
+ int ret = 0;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id)
+ break;
+ }
+
+ if (le != NULL) {
+ le->ref_count--;
+ if (le->ref_count == 0) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return ret;
+
+}
+
+void clean_lock_callback(uint16_t port_id)
+{
+ int ret;
+
+ for (;;) {
+ ret = clean_lock_callback_one(port_id);
+ if (ret == -ENOENT)
+ break;
+ }
+}
+
+int process_lock_callbacks(uint16_t port_id)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id != port_id)
+ continue;
+
+ if (le->callback(port_id, le->user_args)) {
+ rte_spinlock_unlock(&lock_entry_lock);
+ return -EBUSY;
+ }
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return 0;
+}
diff --git a/lib/librte_ethdev/ethdev_lock.h b/lib/librte_ethdev/ethdev_lock.h
new file mode 100644
index 000000000..82132eb0c
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_lock.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_LOCK_H_
+#define _RTE_ETHDEV_LOCK_H_
+
+#include "rte_ethdev.h"
+
+/* Register lock callback function on specific port */
+int
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/* Unregister lock callback function on specific port */
+int
+unregister_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/**
+ * Unregister all callback function on specific port.
+ * This will be called when a device is detached.
+ */
+void clean_lock_callback(uint16_t port_id);
+
+/* Run each callback one by one. */
+int process_lock_callbacks(uint16_t port_id);
+
+#endif
diff --git a/lib/librte_ethdev/ethdev_mp.c b/lib/librte_ethdev/ethdev_mp.c
index 73dd36485..10c03d25f 100644
--- a/lib/librte_ethdev/ethdev_mp.c
+++ b/lib/librte_ethdev/ethdev_mp.c
@@ -5,6 +5,7 @@
#include <rte_string_fns.h>
#include "rte_ethdev_driver.h"
#include "ethdev_mp.h"
+#include "ethdev_lock.h"

#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */

@@ -104,7 +105,7 @@ static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer
ret = attach_on_secondary(req->devargs, req->port_id);
break;
case REQ_TYPE_PRE_DETACH:
- ret = 0;
+ ret = process_lock_callbacks(req->port_id);
break;
case REQ_TYPE_DETACH:
case REQ_TYPE_ATTACH_ROLLBACK:
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index b60256855..9bb0aec7f 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -6,6 +6,7 @@ version = 9
allow_experimental_apis = true
sources = files('ethdev_profile.c',
'ethdev_mp.c'
+ 'ethdev_lock.c'
'rte_ethdev.c',
'rte_flow.c',
'rte_mtr.c',
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 77f53a634..b98ce6766 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -43,6 +43,7 @@
#include "ethdev_profile.h"
#include "ethdev_mp.h"
#include "ethdev_private.h"
+#include "ethdev_lock.h"

int ethdev_logtype;

@@ -723,6 +724,7 @@ do_eth_dev_detach(uint16_t port_id)
if (ret < 0)
return ret;

+ clean_lock_callback(port_id);
rte_eth_dev_release_port(&rte_eth_devices[port_id]);
return ret;

@@ -788,7 +790,6 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
int
rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id)
{
-
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return -ENOTSUP;

@@ -829,6 +830,10 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
return req.result;
}

+ ret = process_lock_callbacks(port_id);
+ if (ret)
+ return ret;
+
/* check pre_detach */
req.t = REQ_TYPE_PRE_DETACH;
req.port_id = port_id;
@@ -871,6 +876,7 @@ int
rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
{
uint32_t dev_flags;
+ int ret;

if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return -ENOTSUP;
@@ -884,6 +890,10 @@ rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
return -ENOTSUP;
}

+ ret = process_lock_callbacks(port_id);
+ if (ret)
+ return ret;
+
return do_eth_dev_detach(port_id);
}

@@ -4686,6 +4696,54 @@ rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
return result;
}

+static int
+dev_is_busy(uint16_t port_id __rte_unused, void *user_args __rte_unused)
+{
+ return -EBUSY;
+}
+
+int
+rte_eth_dev_lock(uint16_t port_id)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ return register_lock_callback(port_id, dev_is_busy, NULL);
+}
+
+int
+rte_eth_dev_lock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return -EINVAL;
+
+ return register_lock_callback(port_id, callback, user_args);
+}
+
+int
+rte_eth_dev_unlock(uint16_t port_id)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ return unregister_lock_callback(port_id, dev_is_busy, NULL);
+}
+
+int
+rte_eth_dev_unlock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return -EINVAL;
+
+ return unregister_lock_callback(port_id, callback, user_args);
+}
+
RTE_INIT(ethdev_init_log);
static void
ethdev_init_log(void)
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 813806e3c..1596b6e2b 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -4364,6 +4364,130 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
}

+/**
+ * Callback function before device is detached.
+ *
+ * This type of function will be added into a function list, and will be
+ * invoked before device be detached. Application can register a callback
+ * function so it can be notified and do some cleanup before detach happen.
+ * Also, any callback function return !0 value will prevent device be
+ * detached (ref. rte_eth_dev_lock_with_callback and
+ * rte_eth_dev_unlock_with_callback).
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param user_args
+ * This is parameter "user_args" be saved when callback function is
+ * registered(rte_dev_eth_lock).
+ *
+ * @return
+ * 0 device is allowed be detached.
+ * !0 device is not allowed be detached.
+ */
+typedef int (*rte_eth_dev_lock_callback_t)(uint16_t port_id, void *user_args);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Lock an Ethernet Device, this help application to prevent a device
+ * be detached unexpectedly.
+ *
+ * @note
+ * In multi-process situation, any process lock a share device will
+ * prevent it be detached from all process. Also this is per-process
+ * lock, which means unlock a device from process A take no effect
+ * if the device is locked from process B.
+ *
+ * @note
+ * Lock a device multiple times will increase a ref_count, and
+ * corresponding unlock decrease the ref_count, the device will be
+ * unlocked when ref_count reach 0.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental rte_eth_dev_lock(uint16_t port_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Lock an Ethernet device base on a callback function which can performs
+ * condition check at the moment before device be detached. if the
+ * condition check not pass, the device will not be detached, else,
+ * continue to detach or not rely on return value of other callbacks
+ * on the same port.
+ *
+ * @note
+ * Same as rte_eth_dev_lock, it is per-process lock.
+ *
+ * @note
+ * Lock a device with different callback or user_args will add different
+ * lock entries (<callback, user_args> pair) in a list. Lock a device
+ * multiple times with same callback and args will only increase a
+ * ref_count of specific lock entry, and corresponding unlock decrease
+ * the ref_count, an entry will be removed if its ref_count reach 0.
+ *
+ * @note
+ * All callbacks be attached to specific port will be removed
+ * automatically if the device is detached.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param callback
+ * the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * @param user_args
+ * parameter will be parsed to callback function.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental
+rte_eth_dev_lock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reverse operation of rte_eth_dev_lock.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental rte_eth_dev_unlock(uint16_t port_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reverse operation of rte_eth_dev_lock_with_callback.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param callback
+ * parameter to match a lock entry.
+ * @param user_args
+ * parameter to match a lock entry.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental
+rte_eth_dev_unlock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
#ifdef __cplusplus
}
#endif
--
2.13.6
Burakov, Anatoly
2018-06-21 08:51:22 UTC
Permalink
Post by Qi Zhang
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device
can't be detached, this help application to prevent unexpected
device detaching, especially in multi-process environment.
Also introduce the new API rte_eth_dev_lock_with_callback and
rte_eth_dev_unlock_with callback to let application to register
a callback function which will be invoked before a device is going
to be detached, the return value of the function will decide if
device will continue be detached or not, this support application
to do condition check at runtime.
---
<snip>
Post by Qi Zhang
+
+static int clean_lock_callback_one(uint16_t port_id)
+{
+ struct lock_entry *le;
+ int ret = 0;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id)
+ break;
+ }
+
+ if (le != NULL) {
+ le->ref_count--;
+ if (le->ref_count == 0) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return ret;
+
+}
+
+void clean_lock_callback(uint16_t port_id)
+{
+ int ret;
+
+ for (;;) {
+ ret = clean_lock_callback_one(port_id);
+ if (ret == -ENOENT)
+ break;
+ }
+}
Why not lock/unlock the list in clean_lock_callback() and proceed to
cleaning callbacks one by one, instead of locking-and-unlocking the list
over and over again?
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-21 09:16:22 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Thursday, June 21, 2018 4:51 PM
Subject: Re: [PATCH v2 05/22] ethdev: introduce device lock
Post by Qi Zhang
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device can't
be detached, this help application to prevent unexpected device
detaching, especially in multi-process environment.
Also introduce the new API rte_eth_dev_lock_with_callback and
rte_eth_dev_unlock_with callback to let application to register a
callback function which will be invoked before a device is going to be
detached, the return value of the function will decide if device will
continue be detached or not, this support application to do condition
check at runtime.
---
<snip>
Post by Qi Zhang
+
+static int clean_lock_callback_one(uint16_t port_id) {
+ struct lock_entry *le;
+ int ret = 0;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id)
+ break;
+ }
+
+ if (le != NULL) {
+ le->ref_count--;
+ if (le->ref_count == 0) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return ret;
+
+}
+
+void clean_lock_callback(uint16_t port_id) {
+ int ret;
+
+ for (;;) {
+ ret = clean_lock_callback_one(port_id);
+ if (ret == -ENOENT)
+ break;
+ }
+}
Why not lock/unlock the list in clean_lock_callback() and proceed to cleaning
callbacks one by one, instead of locking-and-unlocking the list over and over
again?
Definitely!
Qi Zhang
2018-06-21 02:00:43 UTC
Permalink
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the implementation
references malloc_mp.c.

device attach on secondary:
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.

device detach on secondary:
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.

Signed-off-by: Qi Zhang <***@intel.com>
---

v2:
- fix coding style.
- improve comments.
- remove debug code.

lib/librte_ethdev/ethdev_mp.c | 485 +++++++++++++++++++++++++++++++++++++++++-
lib/librte_ethdev/ethdev_mp.h | 1 +
2 files changed, 475 insertions(+), 11 deletions(-)

diff --git a/lib/librte_ethdev/ethdev_mp.c b/lib/librte_ethdev/ethdev_mp.c
index 10c03d25f..f2ea53fd6 100644
--- a/lib/librte_ethdev/ethdev_mp.c
+++ b/lib/librte_ethdev/ethdev_mp.c
@@ -3,12 +3,101 @@
*/

#include <rte_string_fns.h>
+#include <sys/time.h>
+
#include "rte_ethdev_driver.h"
#include "ethdev_mp.h"
#include "ethdev_lock.h"
+#include "ethdev_private.h"
+
+/**
+ * secondary to primary request.
+ * start from function rte_eth_dev_request_to_primary.
+ *
+ * device attach:
+ * a) secondary send request to primary.
+ * b) primary attach the new device if failed goto i).
+ * c) primary forward attach request to all secondary.
+ * d) secondary receive request and attach device and send reply.
+ * e) primary check the reply if all success go to j).
+ * f) primary send attach rollback request to all secondary.
+ * g) secondary receive the request and detach device and send reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send fail response to secondary, goto k).
+ * j) send success response to secondary.
+ * k) end.
+
+ * device detach:
+ * a) secondary send request to primary.
+ * b) primary perform pre-detach check, if device is locked, got j).
+ * c) primary send pre-detach check request to all secondary.
+ * d) secondary perform pre-detach check and send reply.
+ * e) primary check the reply if any fail goto j).
+ * f) primary send detach request to all secondary
+ * g) secondary detach the device and send reply
+ * h) primary detach the device.
+ * i) send success response to secondary, goto k).
+ * j) send fail response to secondary.
+ * k) end.
+ */
+
+enum req_state {
+ REQ_STATE_INACTIVE = 0,
+ REQ_STATE_ACTIVE,
+ REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+ TAILQ_ENTRY(mp_request) next;
+ struct eth_dev_mp_req user_req; /**< contents of request */
+ pthread_cond_t cond; /**< variable we use to time out on this request */
+ enum req_state state; /**< indicate status of this request */
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};

#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */

+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
+static uint64_t
+get_unique_id(void)
+{
+ uint64_t id;
+
+ do {
+ id = rte_rand();
+ } while (find_request_by_id(id) != NULL);
+ return id;
+}
+
+static int
+send_request_to_secondary_async(const struct eth_dev_mp_req *req);
+
static int detach_on_secondary(uint16_t port_id)
{
struct rte_device *dev;
@@ -75,21 +164,330 @@ static int attach_on_secondary(const char *devargs, uint16_t port_id)
return 0;
}

-static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+static int
+check_reply(const struct eth_dev_mp_req *req, const struct rte_mp_reply *reply)
+{
+ struct eth_dev_mp_req *resp;
+ int i;
+
+ if (reply->nb_received != reply->nb_sent)
+ return -EINVAL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ resp = (struct eth_dev_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != req->t) {
+ ethdev_log(ERR, "Unexpected response to async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->id != req->id) {
+ ethdev_log(ERR, "response to wrong async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
+
+static int
+send_response_to_secondary(const struct eth_dev_mp_req *req, int result)
+{
+ struct rte_mp_msg resp_msg;
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)resp_msg.param;
+ int ret = 0;
+
+ memset(&resp_msg, 0, sizeof(resp_msg));
+ resp_msg.len_param = sizeof(*resp);
+ strcpy(resp_msg.name, ETH_DEV_MP_ACTION_RESPONSE);
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_sendmsg(&resp_msg);
+ if (ret)
+ ethdev_log(ERR, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static int
+handle_async_attach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct mp_request *entry;
+ struct eth_dev_mp_req tmp_req;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_ATTACH_ROLLBACK;
+
+ ret = send_request_to_secondary_async(&tmp_req);
+ if (ret) {
+ ethdev_log(ERR, "couldn't send async request\n");
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+ } else {
+ send_response_to_secondary(req, 0);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+handle_async_detach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
{
- RTE_SET_USED(msg);
- RTE_SET_USED(peer);
- return -ENOTSUP;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct mp_request *entry;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (ret) {
+ send_response_to_secondary(req, ret);
+ } else {
+ do_eth_dev_detach(req->port_id);
+ send_response_to_secondary(req, 0);
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
}

-static int handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+static int
+handle_async_pre_detach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
{
- RTE_SET_USED(msg);
- RTE_SET_USED(peer);
- return -ENOTSUP;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct eth_dev_mp_req tmp_req;
+ struct mp_request *entry;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_DETACH;
+
+ ret = send_request_to_secondary_async(&tmp_req);
+ if (ret) {
+ ethdev_log(ERR, "couldn't send async request\n");
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+ } else {
+ send_response_to_secondary(req, ret);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
}

-static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+static int
+handle_async_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct mp_request *entry;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ /* we have nothing to do if rollback still fail, just detach */
+ do_eth_dev_detach(req->port_id);
+ /* send response to secondary with the reason of rollback */
+ send_response_to_secondary(req, req->result);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+send_request_to_secondary_async(const struct eth_dev_mp_req *req)
+{
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ struct rte_mp_msg mp_req;
+ rte_mp_async_reply_t clb;
+ int ret = 0;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strcpy(mp_req.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ if (req->t == REQ_TYPE_ATTACH)
+ clb = handle_async_attach_response;
+ else if (req->t == REQ_TYPE_PRE_DETACH)
+ clb = handle_async_pre_detach_response;
+ else if (req->t == REQ_TYPE_DETACH)
+ clb = handle_async_detach_response;
+ else if (req->t == REQ_TYPE_ATTACH_ROLLBACK)
+ clb = handle_async_rollback_response;
+ else
+ return -1;
+ do {
+ ret = rte_mp_request_async(&mp_req, &ts, clb);
+ } while (ret != 0 && rte_errno == EEXIST);
+
+ if (ret)
+ ethdev_log(ERR, "couldn't send async request\n");
+
+ return ret;
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct eth_dev_mp_req tmp_req;
+ struct mp_request *entry;
+ uint16_t port_id;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (entry) {
+ ethdev_log(ERR, "duplicate request id\n");
+ ret = -EEXIST;
+ goto finish;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ ethdev_log(ERR, "not enough memory to allocate request entry\n");
+ ret = -ENOMEM;
+ goto finish;
+ }
+
+ if (req->t == REQ_TYPE_ATTACH) {
+ ret = do_eth_dev_attach(req->devargs, &port_id);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.port_id = port_id;
+ ret = send_request_to_secondary_async(&tmp_req);
+ }
+ } else if (req->t == REQ_TYPE_DETACH) {
+ if (!rte_eth_dev_is_valid_port(req->port_id))
+ ret = -EINVAL;
+ if (!ret)
+ ret = process_lock_callbacks(req->port_id);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_PRE_DETACH;
+ ret = send_request_to_secondary_async(&tmp_req);
+ }
+ } else {
+ ethdev_log(ERR, "unsupported secondary to primary request\n");
+ ret = -ENOTSUP;
+ goto finish;
+ }
+
+ if (ret) {
+ ret = send_response_to_secondary(req, ret);
+ if (ret) {
+ ethdev_log(ERR, "failed to send response to secondary\n");
+ goto finish;
+ }
+ } else {
+ memcpy(&entry->user_req, req, sizeof(*req));
+ entry->state = REQ_STATE_ACTIVE;
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+ entry = NULL;
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ if (entry)
+ free(entry);
+ return ret;
+}
+
+static int
+handle_primary_response(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct mp_request *entry;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (entry) {
+ entry->user_req.result = req->result;
+ entry->user_req.port_id = req->port_id;
+ entry->state = REQ_STATE_COMPLETE;
+
+ pthread_cond_signal(&entry->cond);
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+
+ return 0;
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
{
const struct eth_dev_mp_req *req =
(const struct eth_dev_mp_req *)msg->param;
@@ -129,8 +527,73 @@ static int handle_primary_request(const struct rte_mp_msg *msg, const void *peer

int rte_eth_dev_request_to_primary(struct eth_dev_mp_req *req)
{
- RTE_SET_USED(req);
- return -ENOTSUP;
+ struct rte_mp_msg msg;
+ struct eth_dev_mp_req *msg_req = (struct eth_dev_mp_req *)msg.param;
+ struct mp_request *entry;
+ struct timespec ts;
+ struct timeval now;
+ int ret = 0;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&ts, 0, sizeof(ts));
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ ethdev_log(ERR, "not enough memory to allocate request entry\n");
+ return -ENOMEM;
+ }
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ ret = gettimeofday(&now, NULL);
+ if (ret) {
+ ethdev_log(ERR, "cannot get current time\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+ ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+ (now.tv_usec * 1000) / 1000000000;
+
+ pthread_cond_init(&entry->cond, NULL);
+
+ msg.len_param = sizeof(*req);
+ strcpy(msg.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ req->id = get_unique_id();
+
+ memcpy(msg_req, req, sizeof(*req));
+
+ ret = rte_mp_sendmsg(&msg);
+ if (ret) {
+ ethdev_log(ERR, "cannot send message to primary");
+ goto finish;
+ }
+
+ memcpy(&entry->user_req, req, sizeof(*req));
+
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+ do {
+ ret = pthread_cond_timedwait(&entry->cond,
+ &mp_request_list.lock, &ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ if (entry->state != REQ_STATE_COMPLETE) {
+ RTE_LOG(ERR, EAL, "request time out\n");
+ ret = -ETIMEDOUT;
+ } else {
+ req->result = entry->user_req.result;
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return ret;
}

/**
diff --git a/lib/librte_ethdev/ethdev_mp.h b/lib/librte_ethdev/ethdev_mp.h
index c3e55dfec..6d10dfdad 100644
--- a/lib/librte_ethdev/ethdev_mp.h
+++ b/lib/librte_ethdev/ethdev_mp.h
@@ -18,6 +18,7 @@ enum eth_dev_req_type {
};

struct eth_dev_mp_req {
+ uint64_t id;
enum eth_dev_req_type t;
char devargs[MAX_DEV_ARGS_LEN];
uint16_t port_id;
--
2.13.6
Burakov, Anatoly
2018-06-21 09:06:20 UTC
Permalink
Post by Qi Zhang
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the implementation
references malloc_mp.c.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.
---
<snip>
Post by Qi Zhang
-static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+static int
+check_reply(const struct eth_dev_mp_req *req, const struct rte_mp_reply *reply)
+{
+ struct eth_dev_mp_req *resp;
+ int i;
+
+ if (reply->nb_received != reply->nb_sent)
+ return -EINVAL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ resp = (struct eth_dev_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != req->t) {
+ ethdev_log(ERR, "Unexpected response to async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->id != req->id) {
+ ethdev_log(ERR, "response to wrong async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
As far as i understand, return values from this will propagate all the
way up to user return value. How would a user differentiate between
-EINVAL returned from invalid parameters, and -EINVAL from failed reply?
I think this error code should be different (don't know which one though
:) ).

(as a side note, you keep returning -EINVAL all over the place, even
when problem is not in user's arguments - you should probably fix those
too. for example, if request ID not found, return code should probably
be something like -ENOENT)
--
Thanks,
Anatoly
Zhang, Qi Z
2018-06-21 12:50:02 UTC
Permalink
-----Original Message-----
From: Burakov, Anatoly
Sent: Thursday, June 21, 2018 5:06 PM
Subject: Re: [PATCH v2 06/22] ethdev: support attach or detach share device
from secondary
Post by Qi Zhang
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the
implementation references malloc_mp.c.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.
---
<snip>
Post by Qi Zhang
-static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+static int
+check_reply(const struct eth_dev_mp_req *req, const struct
+rte_mp_reply *reply) {
+ struct eth_dev_mp_req *resp;
+ int i;
+
+ if (reply->nb_received != reply->nb_sent)
+ return -EINVAL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ resp = (struct eth_dev_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != req->t) {
+ ethdev_log(ERR, "Unexpected response to async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->id != req->id) {
+ ethdev_log(ERR, "response to wrong async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
As far as i understand, return values from this will propagate all the way up to
user return value.
Yes
How would a user differentiate between -EINVAL returned
from invalid parameters, and -EINVAL from failed reply?
My understanding is if
(resp->t != req->t) or (resp->id != req->id) is not expected to happen at any condition.
there should be a bug if it does happen.
So the return value is not necessary to be sensitive.
Am I right?
I think this error code should be different (don't know which one though
:) ).
(as a side note, you keep returning -EINVAL all over the place, even when
problem is not in user's arguments - you should probably fix those too. for
example, if request ID not found, return code should probably be something
like -ENOENT)
Yes, -ENOENT is better than -EINVAL for id mismatch?
Burakov, Anatoly
2018-06-21 12:56:32 UTC
Permalink
Post by Zhang, Qi Z
-----Original Message-----
From: Burakov, Anatoly
Sent: Thursday, June 21, 2018 5:06 PM
Subject: Re: [PATCH v2 06/22] ethdev: support attach or detach share device
from secondary
Post by Qi Zhang
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the
implementation references malloc_mp.c.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.
---
<snip>
Post by Qi Zhang
-static int handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+static int
+check_reply(const struct eth_dev_mp_req *req, const struct
+rte_mp_reply *reply) {
+ struct eth_dev_mp_req *resp;
+ int i;
+
+ if (reply->nb_received != reply->nb_sent)
+ return -EINVAL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ resp = (struct eth_dev_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != req->t) {
+ ethdev_log(ERR, "Unexpected response to async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->id != req->id) {
+ ethdev_log(ERR, "response to wrong async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
As far as i understand, return values from this will propagate all the way up to
user return value.
Yes
How would a user differentiate between -EINVAL returned
from invalid parameters, and -EINVAL from failed reply?
My understanding is if
(resp->t != req->t) or (resp->id != req->id) is not expected to happen at any condition.
there should be a bug if it does happen.
So the return value is not necessary to be sensitive.
Am I right?
You're right, it won't happen under normal conditions. However, on the
off-chance that it does, the error return should still be meaningful.
Under normal conditions, malloc() doesn't fail either :)
--
Thanks,
Anatoly
Qi Zhang
2018-06-21 02:00:44 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/i40e/i40e_ethdev.c | 2 ++
drivers/net/i40e/i40e_ethdev_vf.c | 9 +++++++++
2 files changed, 11 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 13c5d3296..7d1f98422 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -678,6 +678,8 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);

if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, i40e_vf_representor_uninit);
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 804e44530..fc6f079d5 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1500,6 +1500,15 @@ static int eth_i40evf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, i40evf_dev_uninit);
}
--
2.13.6
Andrew Rybchenko
2018-06-22 13:57:23 UTC
Permalink
Post by Qi Zhang
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.
---
drivers/net/i40e/i40e_ethdev.c | 2 ++
drivers/net/i40e/i40e_ethdev_vf.c | 9 +++++++++
2 files changed, 11 insertions(+)
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 13c5d3296..7d1f98422 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -678,6 +678,8 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, i40e_vf_representor_uninit);
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 804e44530..fc6f079d5 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1500,6 +1500,15 @@ static int eth_i40evf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, i40evf_dev_uninit);
}
As far as I can see similar changes are done in really many files.
Is it possible to avoid the duplication?
Can it be part of rte_eth_dev_pci_generic_remove() or correctly
named wrapper?

Andrew.
Qi Zhang
2018-06-21 02:00:45 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/ixgbe/ixgbe_ethdev.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 87d2ad090..f9d560835 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1792,6 +1792,9 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, ixgbe_vf_representor_uninit);
else
@@ -1809,6 +1812,15 @@ static struct rte_pci_driver rte_ixgbe_pmd = {
static int eth_ixgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_probe(pci_dev,
sizeof(struct ixgbe_adapter), eth_ixgbevf_dev_init);
}
--
2.13.6
Qi Zhang
2018-06-21 02:00:46 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/e1000/em_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7039dc100..e6b7ce63a 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -349,6 +349,15 @@ static int eth_em_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_em_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_em_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-21 02:00:47 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/e1000/igb_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index edc7be319..db07a83e3 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1089,6 +1089,15 @@ static int eth_igb_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_igb_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_igb_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-21 02:00:48 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/fm10k/fm10k_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 3ff1b0e0f..f73301182 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -3264,6 +3264,15 @@ static int eth_fm10k_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_fm10k_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_fm10k_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-21 02:00:49 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/af_packet/rte_eth_af_packet.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index ea47abbf8..33ac19de8 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -935,6 +935,7 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -986,6 +987,16 @@ rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
internals = eth_dev->data->dev_private;
for (q = 0; q < internals->nb_queues; q++) {
rte_free(internals->rx_queue[q].rd);
--
2.13.6
Qi Zhang
2018-06-21 02:00:55 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/pcap/rte_eth_pcap.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 6bd4a7d79..6cc20c2b2 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -925,6 +925,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1016,6 +1017,7 @@ static int
pmd_pcap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
+ const char *name;

PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
rte_socket_id());
@@ -1023,11 +1025,22 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
if (!dev)
return -1;

+ name = rte_vdev_device_name(dev);
/* reserve an ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
rte_free(eth_dev->data->dev_private);

rte_eth_dev_release_port(eth_dev);
--
2.13.6
Qi Zhang
2018-06-21 02:00:56 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/softnic/rte_eth_softnic.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/softnic/rte_eth_softnic.c b/drivers/net/softnic/rte_eth_softnic.c
index 6b3c13e5c..a45a7b0dd 100644
--- a/drivers/net/softnic/rte_eth_softnic.c
+++ b/drivers/net/softnic/rte_eth_softnic.c
@@ -750,6 +750,7 @@ pmd_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &pmd_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -803,17 +804,29 @@ pmd_remove(struct rte_vdev_device *vdev)
{
struct rte_eth_dev *dev = NULL;
struct pmd_internals *p;
+ const char *name;

if (!vdev)
return -EINVAL;

- PMD_LOG(INFO, "Removing device \"%s\"",
- rte_vdev_device_name(vdev));
+ name = rte_vdev_device_name(vdev);
+ PMD_LOG(INFO, "Removing device \"%s\"", name);

/* Find the ethdev entry */
- dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+ dev = rte_eth_dev_allocated(name);
if (dev == NULL)
return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_private(dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
p = dev->data->dev_private;

/* Free device data structures*/
--
2.13.6
Qi Zhang
2018-06-21 02:00:57 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/tap/rte_eth_tap.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index df396bfde..bb5f20b01 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -1759,6 +1759,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1827,12 +1828,24 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
struct pmd_internals *internals;
+ const char *name;
int i;

+ name = rte_vdev_device_name(dev);
/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (!eth_dev)
- return 0;
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }

internals = eth_dev->data->dev_private;
--
2.13.6
Wiles, Keith
2018-06-21 12:39:55 UTC
Permalink
Post by Qi Zhang
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.
---
drivers/net/tap/rte_eth_tap.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index df396bfde..bb5f20b01 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -1759,6 +1759,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1827,12 +1828,24 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
struct pmd_internals *internals;
+ const char *name;
int i;
+ name = rte_vdev_device_name(dev);
/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (!eth_dev)
- return 0;
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
internals = eth_dev->data->dev_private;
--
2.13.6
Regards,
Keith
Qi Zhang
2018-06-25 07:17:22 UTC
Permalink
v3:
- enable mp init callback register to help non-eal module to initialize
mp channel during rte_eal_init
- fix when attach share device from secondary.
1) dead lock due to sync IPC be invoked in rte_malloc in primary
process when handle secondary request to attach device, the
solution is primary process to issue share device attach/detach
in interrupt thread.
2) return port_id not correct.
- check nb_sent and nb_received in sync IPC.
- fix memory leak duirng error handling at attach_on_secondary.
- improve clean_lock_callback to only lock/unlock spinlock once
- improve error code return in check-reply during async IPC.
- remove rte_ prefix of internal function in ethdev_mp.c
- sample code improvement.
1) rename sample to "hotplug_mp", and move to example/multi-process.
2) cleanup header include.
3) call rte_eal_cleanup before exit.

v2:
- rename rte_ethdev_mp.* to ethdev_mp.*
- rename rte_ethdev_lock.* to ethdev_lock.*
- move internal funciton to ethdev_private.h
- separate rte_eth_dev_[un]lock into rte_eth_dev_[un]lock and
rte_eth_dev_[un]lock_with_callback
- lock callbacks will be removed automatically after device is detached.
- add experimental tag for all new APIs.
- fix coding style issue.
- fix wrong lisence header in sample code.
- fix spelling
- fix meson.build.
- improve comments.

Background:
===========

Currently secondary process will only sync ethdev from primary
process at init stage, but it will not be aware if device
is attached/detached on primary process at runtime.

While there is the requirement from application that take
primary-secondary process model. The primary process work as a
resource management process, it will create/destroy virtual device
at runtime, while the secondary process deal with the network stuff
with these devices.

Solution:
=========

So the orignial intention is to fix this gap, but beyond that
the patch set provide a more comprehesive solution to handle
different hotplug cases in multi-process situation, it cover below
scenario:

1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately

In primary-secondary process model, we assume ethernet devices are
shared by default. that means attach or detach a device on any process
will broadcast to all other processes through mp channel then device
information will be synchronized on all processes.

Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also, it is not safe to detach a share device when other process still
use it, so a handshake mechanism is introduced.

Scenario for Case 1, 2:

attach device from primary
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success

detach device from primary
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed

Scenario for case 3, 4:

attach device from secondary:
a) seconary send asycn request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock,
same reason for all following async request.)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.

detach device from secondary:
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.

Case 5, 6:
Secondary process can attach private device which only visible to
itself, in this case no IPC is involved, primary process is not allowed
to have private device so far.

Case 7, 8:
Secondary process can also temporally to detach a share device
"privately" then attach it back later, this action also not impact other
processes.

APIs chenages:
==============

rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.

New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in
secondary process.

New API rte_eth_dev_lock and rte_eth_dev_unlock are introduced to let
application lock or unlock on specific ethdev, a locked device
can't be detached. This help applicaiton to prevent unexpected
device detaching, especially in multi-process envrionment.
Aslo the new API let application to register a callback function
which will be invoked before a device is going to be detached,
the return value of the function will decide if device will continue
be detached or not, this support application to do condition check
at runtime.

PMD Impact:
===========

Currently device removing is not handled well in secondary process on
most pmd drivers, rte_eth_dev_relase_port will be invoked and will mess up
primary process since it reset all shared data. So we introduced new API
rte_eth_dev_release_port_local which only reset ethdev's state to unsued
but not touch shared data so other process will not be impacted.
Since not all device driver is target to support primary-secondary
process model, so the patch set only fix this on all Intel devices and
vdev, it can be refereneced by other driver when equevalent fix is
required

Limitation:
===========

1. The solution does not cover the case that primary process exit while
secondary processes still be active. Though this is not a typial use
case, but if this happens:
a. secondary process can't attach / detach any shared device since no
primary exist.
b. secondary process still can attach / detach private device.
c. secondary process still can detach a share device privately but may
not attach it back, that ethdev slot will become zombie slot.

2. So for, for PCI bus, case 5,6 is not supported. PCI bus scan/probe
mechanism can be improved to support attach private device on secondary
process, but this is not the scope of this patchset.

Example:
========

The patchset also contains a example to demonstrate device hotplug
in multi-process model, below are detail instructions.

/* start sample code as primary then secondary */
./hotplug_mp --proc-type=auto
help
list
/* attach a af_packet vdev */
attach net_af_packet,iface=eth0
/* detach port 0 */
detach 0
/* attach a private af_packet vdev (secondary process only)*/
attachp net_af_packet,iface=eth0
/* detach a private device (secondary process only) */
detachp 0
/* lock port 0 */
lock 0
/* unlock port 0 */
unlock 0
Qi Zhang (23):
eal: introduce one device scan
bus/vdev: enable one device scan
ethdev: add function to release port in local process
eal: enable multi process init callback
ethdev: enable hotplug on multi-process
ethdev: introduce device lock
ethdev: support attach or detach share device from secondary
net/i40e: enable port detach on secondary process
net/ixgbe: enable port detach on secondary process
net/e1000: enable port detach on secondary process
net/igb: enable port detach on secondary process
net/fm10k: enable port detach on secondary process
net/af_packet: enable port detach on secondary process
net/bonding: enable port detach on secondary process
net/failsafe: enable port detach on secondary process
net/kni: enable port detach on secondary process
net/null: enable port detach on secondary process
net/octeontx: enable port detach on secondary process
net/pcap: enable port detach on secondary process
net/softnic: enable port detach on secondary process
net/tap: enable port detach on secondary process
net/vhost: enable port detach on secondary process
examples/multi_process: add hotplug sample

drivers/bus/vdev/vdev.c | 30 ++
drivers/net/af_packet/rte_eth_af_packet.c | 11 +
drivers/net/bonding/rte_eth_bond_pmd.c | 11 +
drivers/net/e1000/em_ethdev.c | 9 +
drivers/net/e1000/igb_ethdev.c | 9 +
drivers/net/failsafe/failsafe.c | 16 +
drivers/net/fm10k/fm10k_ethdev.c | 9 +
drivers/net/i40e/i40e_ethdev.c | 2 +
drivers/net/i40e/i40e_ethdev_vf.c | 9 +
drivers/net/ixgbe/ixgbe_ethdev.c | 12 +
drivers/net/kni/rte_eth_kni.c | 11 +
drivers/net/null/rte_eth_null.c | 16 +-
drivers/net/octeontx/octeontx_ethdev.c | 16 +
drivers/net/pcap/rte_eth_pcap.c | 15 +-
drivers/net/softnic/rte_eth_softnic.c | 19 +-
drivers/net/tap/rte_eth_tap.c | 17 +-
drivers/net/vhost/rte_eth_vhost.c | 11 +
examples/multi_process/Makefile | 1 +
examples/multi_process/hotplug_mp/Makefile | 23 +
examples/multi_process/hotplug_mp/commands.c | 356 +++++++++++++
examples/multi_process/hotplug_mp/commands.h | 10 +
examples/multi_process/hotplug_mp/main.c | 41 ++
lib/librte_eal/common/eal_common_dev.c | 17 +-
lib/librte_eal/common/eal_common_proc.c | 51 +-
lib/librte_eal/common/eal_private.h | 5 +
lib/librte_eal/common/include/rte_bus.h | 16 +
lib/librte_eal/common/include/rte_eal.h | 34 ++
lib/librte_eal/linuxapp/eal/eal.c | 2 +
lib/librte_ethdev/Makefile | 2 +
lib/librte_ethdev/ethdev_lock.c | 140 ++++++
lib/librte_ethdev/ethdev_lock.h | 31 ++
lib/librte_ethdev/ethdev_mp.c | 714 +++++++++++++++++++++++++++
lib/librte_ethdev/ethdev_mp.h | 42 ++
lib/librte_ethdev/ethdev_private.h | 39 ++
lib/librte_ethdev/meson.build | 2 +
lib/librte_ethdev/rte_ethdev.c | 272 +++++++++-
lib/librte_ethdev/rte_ethdev.h | 169 +++++++
lib/librte_ethdev/rte_ethdev_core.h | 5 +
lib/librte_ethdev/rte_ethdev_driver.h | 13 +
39 files changed, 2176 insertions(+), 32 deletions(-)
create mode 100644 examples/multi_process/hotplug_mp/Makefile
create mode 100644 examples/multi_process/hotplug_mp/commands.c
create mode 100644 examples/multi_process/hotplug_mp/commands.h
create mode 100644 examples/multi_process/hotplug_mp/main.c
create mode 100644 lib/librte_ethdev/ethdev_lock.c
create mode 100644 lib/librte_ethdev/ethdev_lock.h
create mode 100644 lib/librte_ethdev/ethdev_mp.c
create mode 100644 lib/librte_ethdev/ethdev_mp.h
create mode 100644 lib/librte_ethdev/ethdev_private.h
--
2.13.6
Qi Zhang
2018-06-25 07:17:23 UTC
Permalink
When hot plug a new device, it is not necessary to scan everything
on the bus since the devname and devargs are already there. So new
rte_bus ops "scan_one" is introduced, bus driver can implement this
function to simplify the hotplug process.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_eal/common/eal_common_dev.c | 17 +++++++++++++----
lib/librte_eal/common/include/rte_bus.h | 16 ++++++++++++++++
2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b162..1ad033536 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -147,11 +147,20 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;

- ret = bus->scan();
- if (ret)
- goto err_devarg;
+ /**
+ * if bus support to scan specific device by devargs,
+ * we don't need to scan all devices on the bus.
+ */
+ if (bus->scan_one) {
+ dev = bus->scan_one(da);
+ } else {
+ ret = bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ }

- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4e..3269ef78b 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -84,6 +84,21 @@ enum rte_iova_mode {
typedef int (*rte_bus_scan_t)(void);

/**
+ * Bus specific scan for one specific device attached on the bus.
+ * For each bus object, the scan would be responsible for finding the specific
+ * device and adding it to its private device list, and the device object will
+ * be return also.
+ *
+ * @param devargs
+ * Device arguments be used to identify the device.
+ *
+ * @return
+ * !NULL for successful scan
+ * NULL for unsuccessful scan
+ */
+typedef struct rte_device *(*rte_bus_scan_one_t)(struct rte_devargs *devargs);
+
+/**
* Implementation specific probe function which is responsible for linking
* devices on that bus with applicable drivers.
*
@@ -204,6 +219,7 @@ struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
+ rte_bus_scan_one_t scan_one; /**< Scan one device using devargs */
rte_bus_probe_t probe; /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on the bus */
rte_bus_plug_t plug; /**< Probe single device for drivers */
--
2.13.6
Qi Zhang
2018-06-25 07:17:24 UTC
Permalink
The patch implemented the ops scan_one for vdev bus, it gives two benifits
1. Improve scan efficiency when a device is attached as hotplug, since no
need to pupulate a new device by iterating all devargs in devargs_list.
2. It also avoid sync IPC invoke (which happens in vdev->scan on secondary
process). The benifit is this removes the potential deadlock in the case
when secondary process receive a request from primary process to attach a
new device, since vdev->scan will be invoked on mp thread itself in that
case.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/bus/vdev/vdev.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)

diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c
index 6139dd551..cdbd77df0 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -467,6 +467,35 @@ vdev_scan(void)
return 0;
}

+static struct rte_device *vdev_scan_one(struct rte_devargs *devargs)
+{
+ struct rte_vdev_device *dev = NULL;
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev) {
+ VDEV_LOG(ERR, "failed to allocate memory for new device");
+ return NULL;
+ }
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+
+ if (find_vdev(devargs->name)) {
+ VDEV_LOG(ERR, "device %s already exist", devargs->name);
+ free(dev);
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ return NULL;
+ }
+
+ dev->device.devargs = devargs;
+ dev->device.numa_node = SOCKET_ID_ANY;
+ dev->device.name = devargs->name;
+ TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
+
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+
+ return &dev->device;
+}
+
static int
vdev_probe(void)
{
@@ -531,6 +560,7 @@ vdev_unplug(struct rte_device *dev)

static struct rte_bus rte_vdev_bus = {
.scan = vdev_scan,
+ .scan_one = vdev_scan_one,
.probe = vdev_probe,
.find_device = vdev_find_device,
.plug = vdev_plug,
--
2.13.6
Qi Zhang
2018-06-25 07:17:25 UTC
Permalink
Add driver API rte_eth_release_port_private to support the
requirement that an ethdev only be released on secondary process,
so only local state be set to unused , share data will not be
reset so primary process can still use it.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/rte_ethdev.c | 24 +++++++++++++++++++++---
lib/librte_ethdev/rte_ethdev_driver.h | 13 +++++++++++++
2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index a9977df97..205b2ee33 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -359,6 +359,23 @@ rte_eth_dev_attach_secondary(const char *name)
}

int
+rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev)
+{
+ if (eth_dev == NULL)
+ return -EINVAL;
+
+ _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+
+ rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+ eth_dev->state = RTE_ETH_DEV_UNUSED;
+
+ rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+
+ return 0;
+}
+
+int
rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
{
if (eth_dev == NULL)
@@ -370,9 +387,10 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)

rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);

- eth_dev->state = RTE_ETH_DEV_UNUSED;
-
- memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+ if (eth_dev->state != RTE_ETH_DEV_UNUSED) {
+ eth_dev->state = RTE_ETH_DEV_UNUSED;
+ memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+ }

rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);

diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c9c825e3f..49c27223d 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -70,6 +70,19 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);

/**
* @internal
+ * Release the specified ethdev port in local process, only set to ethdev
+ * state to unused, but not reset share data since it assume other process
+ * is still using it, typically it is called by secondary process.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * @return
+ * - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
* Release device queues and clear its configuration to force the user
* application to reconfigure it. It is for internal use only.
*
--
2.13.6
Qi Zhang
2018-06-25 07:17:26 UTC
Permalink
Introduce new API rte_eal_register_mp_init that help to register
a callback function which will be invoked right after multi-process
channel be established (rte_mp_channel_init). Typically the API
will be used by other module that want it's mp channel action callbacks
can be registered during rte_eal_init automatically.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_eal/common/eal_common_proc.c | 51 ++++++++++++++++++++++++++++++++-
lib/librte_eal/common/eal_private.h | 5 ++++
lib/librte_eal/common/include/rte_eal.h | 34 ++++++++++++++++++++++
lib/librte_eal/linuxapp/eal/eal.c | 2 ++
4 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 707d8ab30..fc0eb4d17 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -619,6 +619,42 @@ unlink_sockets(const char *filter)
return 0;
}

+struct mp_init_entry {
+ TAILQ_ENTRY(mp_init_entry) next;
+ rte_eal_mp_init_callback_t callback;
+};
+
+TAILQ_HEAD(mp_init_entry_list, mp_init_entry);
+static struct mp_init_entry_list mp_init_entry_list =
+ TAILQ_HEAD_INITIALIZER(mp_init_entry_list);
+
+static int process_mp_init_callbacks(void)
+{
+ struct mp_init_entry *entry;
+ int ret;
+
+ TAILQ_FOREACH(entry, &mp_init_entry_list, next) {
+ ret = entry->callback();
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int __rte_experimental
+rte_eal_register_mp_init(rte_eal_mp_init_callback_t callback)
+{
+ struct mp_init_entry *entry = calloc(1, sizeof(struct mp_init_entry));
+
+ if (entry == NULL)
+ return -ENOMEM;
+
+ entry->callback = callback;
+ TAILQ_INSERT_TAIL(&mp_init_entry_list, entry, next);
+
+ return 0;
+}
+
int
rte_mp_channel_init(void)
{
@@ -686,7 +722,20 @@ rte_mp_channel_init(void)
flock(dir_fd, LOCK_UN);
close(dir_fd);

- return 0;
+ return process_mp_init_callbacks();
+}
+
+void rte_mp_init_callback_cleanup(void)
+{
+ struct mp_init_entry *entry;
+
+ while (!TAILQ_EMPTY(&mp_init_entry_list)) {
+ TAILQ_FOREACH(entry, &mp_init_entry_list, next) {
+ TAILQ_REMOVE(&mp_init_entry_list, entry, next);
+ free(entry);
+ break;
+ }
+ }
}

/**
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d50..bc230ee23 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -247,6 +247,11 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
int rte_mp_channel_init(void);

/**
+ * Cleanup all mp channel init callbacks.
+ */
+void rte_mp_init_callback_cleanup(void);
+
+/**
* Internal Executes all the user application registered callbacks for
* the specific device. It is for DPDK internal user only. User
* application should not call it directly.
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 8de5d69e8..506f17f34 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -512,6 +512,40 @@ __rte_deprecated
const char *
rte_eal_mbuf_default_mempool_ops(void);

+/**
+ * Callback function right after multi-process channel be established.
+ * Typical implementation of these functions is to register mp channel
+ * action callbacks
+ *
+ * @return
+ * - 0 on success.
+ * - (<0) on failure.
+ */
+typedef int (*rte_eal_mp_init_callback_t)(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a callback function that will be invoked right after
+ * multi-process channel be established (rte_mp_channel_init). Typically
+ * the function is used by other module that want it's mp channel
+ * action callbacks can be registered during rte_eal_init automatically.
+ *
+ * @note
+ * This function only take effect when be called before rte_eal_init,
+ * and all registered callback will be clear during rte_eal_cleanup.
+ *
+ * @param callback
+ * function be called at that moment.
+ *
+ * @return
+ * - 0 on success.
+ * - (<0) on failure.
+ */
+int __rte_experimental
+rte_eal_register_mp_init(rte_eal_mp_init_callback_t callback);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8655b8691..45cccff7e 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -1048,6 +1048,8 @@ int __rte_experimental
rte_eal_cleanup(void)
{
rte_service_finalize();
+ rte_mp_init_callback_cleanup();
+
return 0;
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:27 UTC
Permalink
We are going to introduce the solution to handle different hotplug
cases in multi-process situation, it include below scenario:

1. Attach a share device from primary
2. Detach a share device from primary
3. Attach a share device from secondary
4. Detach a share device from secondary
5. Attach a private device from secondary
6. Detach a private device from secondary
7. Detach a share device from secondary privately
8. Attach a share device from secondary privately

In primary-secondary process model, we assume device is shared by default.
that means attach or detach a device on any process will broadcast to
all other processes through mp channel then device information will be
synchronized on all processes.

Any failure during attaching process will cause inconsistent status
between processes, so proper rollback action should be considered.
Also it is not safe to detach a share device when other process still use
it, so a handshake mechanism is introduced.

This patch covers the implementation of case 1,2,5,6,7,8.
Case 3,4 will be implemented on separate patch as well as handshake
mechanism.

Scenario for Case 1, 2:

attach device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach device and send reply.
d) primary check the reply if all success go to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach device and send reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success

detach device
a) primary perform pre-detach check, if device is locked, goto i).
b) primary send pre-detach sync request to all secondary.
c) secondary perform pre-detach check and send reply.
d) primary check the reply if any fail goto i).
e) primary send detach sync request to all secondary
f) secondary detach the device and send reply (assume no fail)
g) primary detach the device.
h) detach success
i) detach failed

Case 5, 6:
Secondary process can attach private device which only visible to itself,
in this case no IPC is involved, primary process is not allowed to have
private device so far.

Case 7, 8:
Secondary process can also temporally to detach a share device "privately"
then attach it back later, this action also not impact other processes.

APIs changes:

rte_eth_dev_attach and rte_eth_dev_attach are extended to support
share device attach/detach in primary-secondary process model, it will
be called in case 1,2,3,4.

New API rte_eth_dev_attach_private and rte_eth_dev_detach_private are
introduced to cover case 5,6,7,8, this API can only be invoked in secondary
process.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/ethdev_mp.c | 220 ++++++++++++++++++++++++++++++++++++
lib/librte_ethdev/ethdev_mp.h | 41 +++++++
lib/librte_ethdev/ethdev_private.h | 39 +++++++
lib/librte_ethdev/meson.build | 1 +
lib/librte_ethdev/rte_ethdev.c | 190 ++++++++++++++++++++++++++++---
lib/librte_ethdev/rte_ethdev.h | 45 ++++++++
lib/librte_ethdev/rte_ethdev_core.h | 5 +
8 files changed, 525 insertions(+), 17 deletions(-)
create mode 100644 lib/librte_ethdev/ethdev_mp.c
create mode 100644 lib/librte_ethdev/ethdev_mp.h
create mode 100644 lib/librte_ethdev/ethdev_private.h

diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d82..d0a059b83 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -19,6 +19,7 @@ EXPORT_MAP := rte_ethdev_version.map
LIBABIVER := 9

SRCS-y += rte_ethdev.c
+SRCS-y += ethdev_mp.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/ethdev_mp.c b/lib/librte_ethdev/ethdev_mp.c
new file mode 100644
index 000000000..7b1e89a91
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_mp.c
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <rte_string_fns.h>
+#include "rte_ethdev_driver.h"
+#include "ethdev_mp.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+static int detach_on_secondary(uint16_t port_id)
+{
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "detach on secondary: invalid port %d\n",
+ port_id);
+ return -ENODEV;
+ }
+
+ dev = rte_eth_devices[port_id].device;
+ if (dev == NULL)
+ return -EINVAL;
+
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL)
+ return -ENOENT;
+
+ ret = rte_eal_hotplug_remove(bus->name, dev->name);
+ if (ret) {
+ ethdev_log(ERR, "failed to hot unplug bus: %s, device:%s\n",
+ bus->name, dev->name);
+ return ret;
+ }
+
+ rte_eth_dev_release_port_private(&rte_eth_devices[port_id]);
+ return ret;
+}
+
+static int attach_on_secondary(const char *devargs, uint16_t port_id)
+{
+ struct rte_devargs da;
+ int ret;
+
+ if (rte_eth_devices[port_id].state != RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR, "port %d already in used, failed to attach\n",
+ port_id);
+ return -EINVAL;
+ }
+
+ memset(&da, 0, sizeof(da));
+
+ if (rte_devargs_parse(&da, "%s", devargs)) {
+ ethdev_log(ERR, "failed to parse devargs %s\n", devargs);
+ return -EINVAL;
+ }
+
+ ret = rte_eal_hotplug_add(da.bus->name, da.name, "");
+ if (ret) {
+ ethdev_log(ERR, "failed to hotplug bus:%s, device:%s\n",
+ da.bus->name, da.name);
+ free(da.args);
+ return ret;
+ }
+
+ if (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) {
+ ethdev_log(ERR,
+ "failed to attach to port %d, this is a pmd issue\n",
+ port_id);
+ free(da.args);
+ return -ENODEV;
+ }
+ free(da.args);
+ return 0;
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ RTE_SET_USED(msg);
+ RTE_SET_USED(peer);
+ return -ENOTSUP;
+}
+
+static int
+handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+{
+ RTE_SET_USED(msg);
+ RTE_SET_USED(peer);
+ return -ENOTSUP;
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct rte_mp_msg mp_resp;
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_resp.param;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+
+ switch (req->t) {
+ case REQ_TYPE_ATTACH:
+ ret = attach_on_secondary(req->devargs, req->port_id);
+ break;
+ case REQ_TYPE_PRE_DETACH:
+ ret = 0;
+ break;
+ case REQ_TYPE_DETACH:
+ case REQ_TYPE_ATTACH_ROLLBACK:
+ ret = detach_on_secondary(req->port_id);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ strlcpy(mp_resp.name, ETH_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+ resp->result = ret;
+ if (rte_mp_reply(&mp_resp, peer) < 0) {
+ ethdev_log(ERR, "failed to send reply to primary request\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int eth_dev_request_to_primary(struct eth_dev_mp_req *req)
+{
+ RTE_SET_USED(req);
+ return -ENOTSUP;
+}
+
+/**
+ * Request from primary to secondary.
+ *
+ * Be invoked when try to attach or detach a share device
+ * from primary process.
+ */
+int eth_dev_request_to_secondary(struct eth_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, ETH_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret) {
+ ethdev_log(ERR, "rte_mp_request_sync failed\n");
+ return ret;
+ }
+
+ if (mp_reply.nb_sent != mp_reply.nb_received) {
+ ethdev_log(ERR, "not all secondary reply\n");
+ return -1;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result) {
+ req->result = resp->result;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int on_mp_init(void)
+{
+ int ret;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
+ handle_secondary_request);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ } else {
+ ret = rte_mp_action_register(ETH_DEV_MP_ACTION_RESPONSE,
+ handle_primary_response);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_RESPONSE);
+ return ret;
+ }
+
+ ret = rte_mp_action_register(ETH_DEV_MP_ACTION_REQUEST,
+ handle_primary_request);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ ETH_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+RTE_INIT(ethdev_mp_init)
+{
+ if (rte_eal_register_mp_init(on_mp_init))
+ RTE_LOG(ERR, EAL, "ethdev mp channel init failed\n");
+}
diff --git a/lib/librte_ethdev/ethdev_mp.h b/lib/librte_ethdev/ethdev_mp.h
new file mode 100644
index 000000000..40be46c89
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_mp.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_MP_H_
+#define _RTE_ETHDEV_MP_H_
+
+#define MAX_DEV_ARGS_LEN 0x80
+
+#define ETH_DEV_MP_ACTION_REQUEST "eth_dev_mp_request"
+#define ETH_DEV_MP_ACTION_RESPONSE "eth_dev_mp_response"
+
+enum eth_dev_req_type {
+ REQ_TYPE_ATTACH,
+ REQ_TYPE_PRE_DETACH,
+ REQ_TYPE_DETACH,
+ REQ_TYPE_ATTACH_ROLLBACK,
+};
+
+struct eth_dev_mp_req {
+ enum eth_dev_req_type t;
+ char devargs[MAX_DEV_ARGS_LEN];
+ uint16_t port_id;
+ int result;
+};
+
+/**
+ * this is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request issued from primary.
+ */
+int eth_dev_request_to_primary(struct eth_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eth_dev_request_to_secondary(struct eth_dev_mp_req *req);
+
+#endif
diff --git a/lib/librte_ethdev/ethdev_private.h b/lib/librte_ethdev/ethdev_private.h
new file mode 100644
index 000000000..981e7de8a
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_private.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _ETHDEV_PRIVATE_H_
+#define _ETHDEV_PRIVATE_H_
+
+/**
+ * Attach a new Ethernet device in current process.
+ *
+ * @param devargs
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ *
+ * @param port_id
+ * A pointer to a port identifier actually attached.
+ *
+ * @return
+ * 0 on success and port_id is filled, negative on error
+ */
+int do_eth_dev_attach(const char *devargs, uint16_t *port_id);
+
+/**
+ * Detach a Ethernet device in current process.
+ *
+ * @param port_id
+ * The port identifier of the device to detach.
+ *
+ * @param devname
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ *
+ * @return
+ * 0 on success and devname is filled, negative on error
+ */
+int do_eth_dev_detach(uint16_t port_id);
+
+#endif
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index aed5d2265..b60256855 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -5,6 +5,7 @@ name = 'ethdev'
version = 9
allow_experimental_apis = true
sources = files('ethdev_profile.c',
+ 'ethdev_mp.c'
'rte_ethdev.c',
'rte_flow.c',
'rte_mtr.c',
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 205b2ee33..1a5861f30 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -41,11 +41,13 @@
#include "rte_ethdev.h"
#include "rte_ethdev_driver.h"
#include "ethdev_profile.h"
+#include "ethdev_mp.h"
+#include "ethdev_private.h"

-static int ethdev_logtype;
+int ethdev_logtype;

-#define ethdev_log(level, fmt, ...) \
- rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+#define RTE_ETH_MP_ACTION_REQUEST "rte_eth_mp_request"
+#define RTE_ETH_MP_ACTION_RESPONSE "rte_eth_mp_response"

static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
@@ -656,9 +658,8 @@ eth_err(uint16_t port_id, int ret)
return ret;
}

-/* attach the new device, then store port_id of the device */
int
-rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
+do_eth_dev_attach(const char *devargs, uint16_t *port_id)
{
int current = rte_eth_dev_count_total();
struct rte_devargs da;
@@ -703,14 +704,105 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
return ret;
}

-/* detach the device, then store the name of the device */
int
-rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
+do_eth_dev_detach(uint16_t port_id)
{
struct rte_device *dev;
struct rte_bus *bus;
+ int ret = 0;
+
+ dev = rte_eth_devices[port_id].device;
+ if (dev == NULL)
+ return -EINVAL;
+
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL)
+ return -ENOENT;
+
+ ret = rte_eal_hotplug_remove(bus->name, dev->name);
+ if (ret < 0)
+ return ret;
+
+ rte_eth_dev_release_port(&rte_eth_devices[port_id]);
+ return ret;
+
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
+{
+ struct eth_dev_mp_req req;
+ int ret;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+
+ /**
+ * If secondary process, we just send request to primary
+ * to start the process.
+ */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+
+ ret = eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR,
+ "Failed to send device attach request to primary\n");
+ return ret;
+ }
+
+ *port_id = req.port_id;
+ return req.result;
+ }
+
+ ret = do_eth_dev_attach(devargs, port_id);
+ if (ret)
+ return ret;
+
+ /* send attach request to seoncary */
+ req.t = REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, MAX_DEV_ARGS_LEN);
+ req.port_id = *port_id;
+ ret = eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR, "Failed to send device attach request to secondary\n");
+ goto rollback;
+ }
+
+ if (req.result)
+ goto rollback;
+
+ return 0;
+
+rollback:
+ /* send rollback request to secondary since some one fail to attach */
+ req.t = REQ_TYPE_ATTACH_ROLLBACK;
+ req.port_id = *port_id;
+ eth_dev_request_to_secondary(&req);
+
+ do_eth_dev_detach(*port_id);
+
+ return -ENODEV;
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id)
+{
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ return do_eth_dev_attach(devargs, port_id);
+}
+
+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
+{
+ struct eth_dev_mp_req req = {0};
+ int ret;
uint32_t dev_flags;
- int ret = -1;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

@@ -721,22 +813,86 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
return -ENOTSUP;
}

- dev = rte_eth_devices[port_id].device;
- if (dev == NULL)
- return -EINVAL;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ req.t = REQ_TYPE_DETACH;
+ req.port_id = port_id;

- bus = rte_bus_find_by_device(dev);
- if (bus == NULL)
- return -ENOENT;
+ /**
+ * If secondary process, we just send request to primary
+ * to start the process.
+ */
+ ret = eth_dev_request_to_primary(&req);
+ if (ret) {
+ ethdev_log(ERR,
+ "Failed to send device detach request to primary\n");
+ return ret;
+ }

- ret = rte_eal_hotplug_remove(bus->name, dev->name);
- if (ret < 0)
+ return req.result;
+ }
+
+ /* check pre_detach */
+ req.t = REQ_TYPE_PRE_DETACH;
+ req.port_id = port_id;
+ ret = eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR,
+ "Failed to send device pre-detach request to secondary\n");
+ return ret;
+ }
+
+ if (req.result) {
+ ethdev_log(ERR,
+ "Device is busy on secondary, can't be detached\n");
+ return req.result;
+ }
+
+ /* detach on seconary first */
+ req.t = REQ_TYPE_DETACH;
+ ret = eth_dev_request_to_secondary(&req);
+ if (ret) {
+ ethdev_log(ERR,
+ "Failed to send device detach request to secondary\n");
+ return ret;
+ }
+
+ if (req.result)
+ /**
+ * this should rarely happen, something wrong in secondary
+ * process, will not block primary detach.
+ */
+ ethdev_log(ERR,
+ "Failed to detach device on secondary process\n");
+
+ /* detach on primary */
+ ret = do_eth_dev_detach(port_id);
+ if (ret)
return ret;

- rte_eth_dev_release_port(&rte_eth_devices[port_id]);
return 0;
}

+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
+{
+ uint32_t dev_flags;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return -ENOTSUP;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+ dev_flags = rte_eth_devices[port_id].data->dev_flags;
+ if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
+ ethdev_log(ERR,
+ "Port %" PRIu16 " is bonded, cannot detach", port_id);
+ return -ENOTSUP;
+ }
+
+ return do_eth_dev_detach(port_id);
+}
+
static int
rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
{
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984ea..813806e3c 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -1462,6 +1462,9 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);

/**
* Attach a new Ethernet device specified by arguments.
+ * In multi-process mode, it will sync with other process
+ * to make sure all processes attach the device, any
+ * failure on other process will rollback the action.
*
* @param devargs
* A pointer to a strings array describing the new device
@@ -1475,9 +1478,31 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);
int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);

/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Attach a private Ethernet device specified by arguments.
+ * A private device is invisible to other process.
+ * Can only be invoked in secondary process.
+ *
+ * @param devargs
+ * A pointer to a strings array describing the new device
+ * to be attached. The strings should be a pci address like
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
+ * @param port_id
+ * A pointer to a port identifier actually attached.
+ * @return
+ * 0 on success and port_id is filled, negative on error
+ */
+int __rte_experimental
+rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id);
+
+/**
* Detach a Ethernet device specified by port identifier.
* This function must be called when the device is in the
* closed state.
+ * In multi-process mode, it will sync with other process
+ * to detach the device.
*
* @param port_id
* The port identifier of the device to detach.
@@ -1490,6 +1515,26 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
int rte_eth_dev_detach(uint16_t port_id, char *devname);

/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Detach a private Ethernet device specified by port identifier
+ * This function must be called when the device is in the
+ * closed state.
+ * Can only be invoked in secondary process.
+ *
+ * @param port_id
+ * The port identifier of the device to detach.
+ * @param devname
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
+ * @return
+ * 0 on success and devname is filled, negative on error
+ */
+int __rte_experimental
+rte_eth_dev_detach_private(uint16_t port_id, char *devname);
+
+/**
* Convert a numerical speed in Mbps to a bitmap flag that can be used in
* the bitmap link_speeds of the struct rte_eth_conf
*
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index 33d12b3a2..2cb6de745 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -622,4 +622,9 @@ struct rte_eth_dev_data {
*/
extern struct rte_eth_dev rte_eth_devices[];

+extern int ethdev_logtype;
+#define ethdev_log(level, fmt, ...) \
+ rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+
+
#endif /* _RTE_ETHDEV_CORE_H_ */
--
2.13.6
Qi Zhang
2018-06-25 07:17:28 UTC
Permalink
Introduce API rte_eth_dev_lock and rte_eth_dev_unlock to let
application lock or unlock on specific ethdev, a locked device
can't be detached, this help applicaiton to prevent unexpected
device detaching, especially in multi-process envrionment.

Aslo introduce the new API rte_eth_dev_lock_with_callback and
rte_eth_dev_unlock_with callback to let application to register
a callback function which will be invoked before a device is going
to be detached, the return value of the function will decide if
device will continue be detached or not, this support application
to do condition check at runtime.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/Makefile | 1 +
lib/librte_ethdev/ethdev_lock.c | 140 ++++++++++++++++++++++++++++++++++++++++
lib/librte_ethdev/ethdev_lock.h | 31 +++++++++
lib/librte_ethdev/ethdev_mp.c | 3 +-
lib/librte_ethdev/meson.build | 1 +
lib/librte_ethdev/rte_ethdev.c | 60 ++++++++++++++++-
lib/librte_ethdev/rte_ethdev.h | 124 +++++++++++++++++++++++++++++++++++
7 files changed, 358 insertions(+), 2 deletions(-)
create mode 100644 lib/librte_ethdev/ethdev_lock.c
create mode 100644 lib/librte_ethdev/ethdev_lock.h

diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index d0a059b83..62bef03fc 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -20,6 +20,7 @@ LIBABIVER := 9

SRCS-y += rte_ethdev.c
SRCS-y += ethdev_mp.c
+SRCS-y += ethdev_lock.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/ethdev_lock.c b/lib/librte_ethdev/ethdev_lock.c
new file mode 100644
index 000000000..6379519e3
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_lock.c
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include "ethdev_lock.h"
+
+struct lock_entry {
+ TAILQ_ENTRY(lock_entry) next;
+ rte_eth_dev_lock_callback_t callback;
+ uint16_t port_id;
+ void *user_args;
+ int ref_count;
+};
+
+TAILQ_HEAD(lock_entry_list, lock_entry);
+static struct lock_entry_list lock_entry_list =
+ TAILQ_HEAD_INITIALIZER(lock_entry_list);
+static rte_spinlock_t lock_entry_lock = RTE_SPINLOCK_INITIALIZER;
+
+int
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (le == NULL) {
+ le = calloc(1, sizeof(struct lock_entry));
+ if (le == NULL) {
+ rte_spinlock_unlock(&lock_entry_lock);
+ return -ENOMEM;
+ }
+ le->callback = callback;
+ le->port_id = port_id;
+ le->user_args = user_args;
+ TAILQ_INSERT_TAIL(&lock_entry_list, le, next);
+ }
+ le->ref_count++;
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return 0;
+}
+
+int
+unregister_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ struct lock_entry *le;
+ int ret = 0;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id &&
+ le->callback == callback &&
+ le->user_args == user_args)
+ break;
+ }
+
+ if (le != NULL) {
+ le->ref_count--;
+ if (le->ref_count == 0) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return ret;
+}
+
+static int clean_lock_callback_one(uint16_t port_id)
+{
+ struct lock_entry *le;
+ int ret = 0;
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id == port_id)
+ break;
+ }
+
+ if (le != NULL) {
+ le->ref_count--;
+ if (le->ref_count == 0) {
+ TAILQ_REMOVE(&lock_entry_list, le, next);
+ free(le);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ return ret;
+
+}
+
+void clean_lock_callback(uint16_t port_id)
+{
+ int ret;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ for (;;) {
+ ret = clean_lock_callback_one(port_id);
+ if (ret == -ENOENT)
+ break;
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+}
+
+int process_lock_callbacks(uint16_t port_id)
+{
+ struct lock_entry *le;
+
+ rte_spinlock_lock(&lock_entry_lock);
+
+ TAILQ_FOREACH(le, &lock_entry_list, next) {
+ if (le->port_id != port_id)
+ continue;
+
+ if (le->callback(port_id, le->user_args)) {
+ rte_spinlock_unlock(&lock_entry_lock);
+ return -EBUSY;
+ }
+ }
+
+ rte_spinlock_unlock(&lock_entry_lock);
+ return 0;
+}
diff --git a/lib/librte_ethdev/ethdev_lock.h b/lib/librte_ethdev/ethdev_lock.h
new file mode 100644
index 000000000..82132eb0c
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_lock.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_LOCK_H_
+#define _RTE_ETHDEV_LOCK_H_
+
+#include "rte_ethdev.h"
+
+/* Register lock callback function on specific port */
+int
+register_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/* Unregister lock callback function on specific port */
+int
+unregister_lock_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/**
+ * Unregister all callback function on specific port.
+ * This will be called when a device is detached.
+ */
+void clean_lock_callback(uint16_t port_id);
+
+/* Run each callback one by one. */
+int process_lock_callbacks(uint16_t port_id);
+
+#endif
diff --git a/lib/librte_ethdev/ethdev_mp.c b/lib/librte_ethdev/ethdev_mp.c
index 7b1e89a91..b00c05c23 100644
--- a/lib/librte_ethdev/ethdev_mp.c
+++ b/lib/librte_ethdev/ethdev_mp.c
@@ -5,6 +5,7 @@
#include <rte_string_fns.h>
#include "rte_ethdev_driver.h"
#include "ethdev_mp.h"
+#include "ethdev_lock.h"

#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */

@@ -109,7 +110,7 @@ handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
ret = attach_on_secondary(req->devargs, req->port_id);
break;
case REQ_TYPE_PRE_DETACH:
- ret = 0;
+ ret = process_lock_callbacks(req->port_id);
break;
case REQ_TYPE_DETACH:
case REQ_TYPE_ATTACH_ROLLBACK:
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index b60256855..9bb0aec7f 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -6,6 +6,7 @@ version = 9
allow_experimental_apis = true
sources = files('ethdev_profile.c',
'ethdev_mp.c'
+ 'ethdev_lock.c'
'rte_ethdev.c',
'rte_flow.c',
'rte_mtr.c',
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 1a5861f30..575dd40ae 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -43,6 +43,7 @@
#include "ethdev_profile.h"
#include "ethdev_mp.h"
#include "ethdev_private.h"
+#include "ethdev_lock.h"

int ethdev_logtype;

@@ -723,6 +724,7 @@ do_eth_dev_detach(uint16_t port_id)
if (ret < 0)
return ret;

+ clean_lock_callback(port_id);
rte_eth_dev_release_port(&rte_eth_devices[port_id]);
return ret;

@@ -789,7 +791,6 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
int
rte_eth_dev_attach_private(const char *devargs, uint16_t *port_id)
{
-
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return -ENOTSUP;

@@ -831,6 +832,10 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
return req.result;
}

+ ret = process_lock_callbacks(port_id);
+ if (ret)
+ return ret;
+
/* check pre_detach */
req.t = REQ_TYPE_PRE_DETACH;
req.port_id = port_id;
@@ -877,6 +882,7 @@ int
rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
{
uint32_t dev_flags;
+ int ret;

if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return -ENOTSUP;
@@ -890,6 +896,10 @@ rte_eth_dev_detach_private(uint16_t port_id, char *name __rte_unused)
return -ENOTSUP;
}

+ ret = process_lock_callbacks(port_id);
+ if (ret)
+ return ret;
+
return do_eth_dev_detach(port_id);
}

@@ -4692,6 +4702,54 @@ rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
return result;
}

+static int
+dev_is_busy(uint16_t port_id __rte_unused, void *user_args __rte_unused)
+{
+ return -EBUSY;
+}
+
+int
+rte_eth_dev_lock(uint16_t port_id)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ return register_lock_callback(port_id, dev_is_busy, NULL);
+}
+
+int
+rte_eth_dev_lock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return -EINVAL;
+
+ return register_lock_callback(port_id, callback, user_args);
+}
+
+int
+rte_eth_dev_unlock(uint16_t port_id)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ return unregister_lock_callback(port_id, dev_is_busy, NULL);
+}
+
+int
+rte_eth_dev_unlock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (callback == NULL)
+ return -EINVAL;
+
+ return unregister_lock_callback(port_id, callback, user_args);
+}
+
RTE_INIT(ethdev_init_log);
static void
ethdev_init_log(void)
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 813806e3c..1596b6e2b 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -4364,6 +4364,130 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
}

+/**
+ * Callback function before device is detached.
+ *
+ * This type of function will be added into a function list, and will be
+ * invoked before device be detached. Application can register a callback
+ * function so it can be notified and do some cleanup before detach happen.
+ * Also, any callback function return !0 value will prevent device be
+ * detached (ref. rte_eth_dev_lock_with_callback and
+ * rte_eth_dev_unlock_with_callback).
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param user_args
+ * This is parameter "user_args" be saved when callback function is
+ * registered(rte_dev_eth_lock).
+ *
+ * @return
+ * 0 device is allowed be detached.
+ * !0 device is not allowed be detached.
+ */
+typedef int (*rte_eth_dev_lock_callback_t)(uint16_t port_id, void *user_args);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Lock an Ethernet Device, this help application to prevent a device
+ * be detached unexpectedly.
+ *
+ * @note
+ * In multi-process situation, any process lock a share device will
+ * prevent it be detached from all process. Also this is per-process
+ * lock, which means unlock a device from process A take no effect
+ * if the device is locked from process B.
+ *
+ * @note
+ * Lock a device multiple times will increase a ref_count, and
+ * corresponding unlock decrease the ref_count, the device will be
+ * unlocked when ref_count reach 0.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental rte_eth_dev_lock(uint16_t port_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Lock an Ethernet device base on a callback function which can performs
+ * condition check at the moment before device be detached. if the
+ * condition check not pass, the device will not be detached, else,
+ * continue to detach or not rely on return value of other callbacks
+ * on the same port.
+ *
+ * @note
+ * Same as rte_eth_dev_lock, it is per-process lock.
+ *
+ * @note
+ * Lock a device with different callback or user_args will add different
+ * lock entries (<callback, user_args> pair) in a list. Lock a device
+ * multiple times with same callback and args will only increase a
+ * ref_count of specific lock entry, and corresponding unlock decrease
+ * the ref_count, an entry will be removed if its ref_count reach 0.
+ *
+ * @note
+ * All callbacks be attached to specific port will be removed
+ * automatically if the device is detached.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param callback
+ * the callback function will be added into a pre-detach list,
+ * it will be invoked when a device is going to be detached. The
+ * return value will decide if continue detach the device or not.
+ * @param user_args
+ * parameter will be parsed to callback function.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental
+rte_eth_dev_lock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reverse operation of rte_eth_dev_lock.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental rte_eth_dev_unlock(uint16_t port_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reverse operation of rte_eth_dev_lock_with_callback.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param callback
+ * parameter to match a lock entry.
+ * @param user_args
+ * parameter to match a lock entry.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental
+rte_eth_dev_unlock_with_callback(uint16_t port_id,
+ rte_eth_dev_lock_callback_t callback,
+ void *user_args);
+
#ifdef __cplusplus
}
#endif
--
2.13.6
Qi Zhang
2018-06-25 07:17:29 UTC
Permalink
This patch cover the multi-process hotplug case when a share device
attach/detach request be issued from secondary process, the implementation
references malloc_mp.c.

device attach on secondary:
a) seconary send asycn request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and attach the new device if failed
goto i).
c) primary forward attach request to all secondary as async request
(because this in mp thread context, use sync request will deadlock)
d) secondary receive request and attach device and send reply.
e) primary check the reply if all success go to j).
f) primary send attach rollback async request to all secondary.
g) secondary receive the request and detach device and send reply.
h) primary receive the reply and detach device as rollback action.
i) send fail response to secondary, goto k).
j) send success response to secondary.
k) secondary process receive response and return.

device detach on secondary:
a) secondary send async request to primary and wait on a condition
which will be released by matched response from primary.
b) primary receive the request and perform pre-detach check, if device
is locked, goto j).
c) primary send pre-detach async request to all secondary.
d) secondary perform pre-detach check and send reply.
e) primary check the reply if any fail goto j).
f) primary send detach async request to all secondary
g) secondary detach the device and send reply
h) primary detach the device.
i) send success response to secondary, goto k).
j) send fail response to secondary.
k) secondary process receive response and return.

Signed-off-by: Qi Zhang <***@intel.com>
---
lib/librte_ethdev/ethdev_mp.c | 513 +++++++++++++++++++++++++++++++++++++++++-
lib/librte_ethdev/ethdev_mp.h | 1 +
2 files changed, 504 insertions(+), 10 deletions(-)

diff --git a/lib/librte_ethdev/ethdev_mp.c b/lib/librte_ethdev/ethdev_mp.c
index b00c05c23..af8cec8c0 100644
--- a/lib/librte_ethdev/ethdev_mp.c
+++ b/lib/librte_ethdev/ethdev_mp.c
@@ -3,12 +3,103 @@
*/

#include <rte_string_fns.h>
+#include <sys/time.h>
+
+#include <rte_alarm.h>
+
#include "rte_ethdev_driver.h"
#include "ethdev_mp.h"
#include "ethdev_lock.h"
+#include "ethdev_private.h"
+
+/**
+ * secondary to primary request.
+ * start from function eth_dev_request_to_primary.
+ *
+ * device attach:
+ * a) seconary send request to primary.
+ * b) primary attach the new device if failed goto i).
+ * c) primary forward attach request to all secondary.
+ * d) secondary receive request and attach device and send reply.
+ * e) primary check the reply if all success go to j).
+ * f) primary send attach rollback request to all secondary.
+ * g) secondary receive the request and detach device and send reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send fail response to secondary, goto k).
+ * j) send success response to secondary.
+ * k) end.
+
+ * device detach:
+ * a) secondary send request to primary.
+ * b) primary perform pre-detach check, if device is locked, got j).
+ * c) primary send pre-detach check request to all secondary.
+ * d) secondary perform pre-detach check and send reply.
+ * e) primary check the reply if any fail goto j).
+ * f) primary send detach request to all secondary
+ * g) secondary detach the device and send reply
+ * h) primary detach the device.
+ * i) send success response to secondary, goto k).
+ * j) send fail response to secondary.
+ * k) end.
+ */
+
+enum req_state {
+ REQ_STATE_INACTIVE = 0,
+ REQ_STATE_ACTIVE,
+ REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+ TAILQ_ENTRY(mp_request) next;
+ struct eth_dev_mp_req user_req; /**< contents of request */
+ pthread_cond_t cond; /**< variable we use to time out on this request */
+ enum req_state state; /**< indicate status of this request */
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};

#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */

+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
+static uint64_t
+get_unique_id(void)
+{
+ uint64_t id;
+
+ do {
+ id = rte_rand();
+ } while (find_request_by_id(id) != NULL);
+ return id;
+}
+
+static int
+send_request_to_secondary_async(const struct eth_dev_mp_req *req);
+
static int detach_on_secondary(uint16_t port_id)
{
struct rte_device *dev;
@@ -78,19 +169,355 @@ static int attach_on_secondary(const char *devargs, uint16_t port_id)
}

static int
-handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+check_reply(const struct eth_dev_mp_req *req, const struct rte_mp_reply *reply)
+{
+ struct eth_dev_mp_req *resp;
+ int i;
+
+ if (reply->nb_received != reply->nb_sent)
+ return -EINVAL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ resp = (struct eth_dev_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != req->t) {
+ ethdev_log(ERR, "Unexpected response to async request\n");
+ return -EINVAL;
+ }
+
+ if (resp->id != req->id) {
+ ethdev_log(ERR, "response to wrong async request\n");
+ return -ENOENT;
+ }
+
+ if (resp->result)
+ return resp->result;
+ }
+
+ return 0;
+}
+
+static int
+send_response_to_secondary(const struct eth_dev_mp_req *req, int result)
+{
+ struct rte_mp_msg resp_msg;
+ struct eth_dev_mp_req *resp =
+ (struct eth_dev_mp_req *)resp_msg.param;
+ int ret = 0;
+
+ memset(&resp_msg, 0, sizeof(resp_msg));
+ resp_msg.len_param = sizeof(*resp);
+ strcpy(resp_msg.name, ETH_DEV_MP_ACTION_RESPONSE);
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_sendmsg(&resp_msg);
+ if (ret)
+ ethdev_log(ERR, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static int
+handle_async_attach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct mp_request *entry;
+ struct eth_dev_mp_req tmp_req;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_ATTACH_ROLLBACK;
+
+ ret = send_request_to_secondary_async(&tmp_req);
+ if (ret) {
+ ethdev_log(ERR, "couldn't send async request\n");
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+ } else {
+ send_response_to_secondary(req, 0);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+handle_async_detach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
{
- RTE_SET_USED(msg);
- RTE_SET_USED(peer);
- return -ENOTSUP;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct mp_request *entry;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (ret) {
+ send_response_to_secondary(req, ret);
+ } else {
+ do_eth_dev_detach(req->port_id);
+ send_response_to_secondary(req, 0);
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
}

static int
-handle_primary_response(const struct rte_mp_msg *msg, const void *peer)
+handle_async_pre_detach_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
{
- RTE_SET_USED(msg);
- RTE_SET_USED(peer);
- return -ENOTSUP;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct eth_dev_mp_req tmp_req;
+ struct mp_request *entry;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ret = check_reply(req, reply);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_DETACH;
+
+ ret = send_request_to_secondary_async(&tmp_req);
+ if (ret) {
+ ethdev_log(ERR, "couldn't send async request\n");
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+ } else {
+ send_response_to_secondary(req, ret);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+}
+
+static int
+handle_async_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)request->param;
+ struct mp_request *entry;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (!entry) {
+ ethdev_log(ERR, "wrong request ID\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ /* we have nothing to do if rollback still fail, just detach */
+ do_eth_dev_detach(req->port_id);
+ /* send response to secondary with the reason of rollback */
+ send_response_to_secondary(req, req->result);
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+}
+
+static int
+send_request_to_secondary_async(const struct eth_dev_mp_req *req)
+{
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ struct rte_mp_msg mp_req;
+ rte_mp_async_reply_t clb;
+ int ret = 0;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strcpy(mp_req.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ if (req->t == REQ_TYPE_ATTACH)
+ clb = handle_async_attach_response;
+ else if (req->t == REQ_TYPE_PRE_DETACH)
+ clb = handle_async_pre_detach_response;
+ else if (req->t == REQ_TYPE_DETACH)
+ clb = handle_async_detach_response;
+ else if (req->t == REQ_TYPE_ATTACH_ROLLBACK)
+ clb = handle_async_rollback_response;
+ else
+ return -1;
+ do {
+ ret = rte_mp_request_async(&mp_req, &ts, clb);
+ } while (ret != 0 && rte_errno == EEXIST);
+
+ if (ret)
+ ethdev_log(ERR, "couldn't send async request\n");
+
+ return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+ struct rte_mp_msg *msg = param;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct eth_dev_mp_req tmp_req;
+ struct mp_request *entry;
+ uint16_t port_id;
+ int ret = 0;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (entry) {
+ ethdev_log(ERR, "duplicate request id\n");
+ ret = -EEXIST;
+ goto finish;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ ethdev_log(ERR, "not enough memory to allocate request entry\n");
+ ret = -ENOMEM;
+ goto finish;
+ }
+
+ if (req->t == REQ_TYPE_ATTACH) {
+ ret = do_eth_dev_attach(req->devargs, &port_id);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.port_id = port_id;
+ ret = send_request_to_secondary_async(&tmp_req);
+ }
+ } else if (req->t == REQ_TYPE_DETACH) {
+ if (!rte_eth_dev_is_valid_port(req->port_id))
+ ret = -EINVAL;
+ if (!ret)
+ ret = process_lock_callbacks(req->port_id);
+ if (!ret) {
+ tmp_req = *req;
+ tmp_req.t = REQ_TYPE_PRE_DETACH;
+ ret = send_request_to_secondary_async(&tmp_req);
+ }
+ } else {
+ ethdev_log(ERR, "unsupported secondary to primary request\n");
+ ret = -ENOTSUP;
+ goto finish;
+ }
+
+ if (ret) {
+ ret = send_response_to_secondary(req, ret);
+ if (ret) {
+ ethdev_log(ERR, "failed to send response to secondary\n");
+ goto finish;
+ }
+ } else {
+ memcpy(&entry->user_req, req, sizeof(*req));
+ entry->state = REQ_STATE_ACTIVE;
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+ entry = NULL;
+ }
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ if (entry)
+ free(entry);
+ free(msg);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
+{
+ struct rte_mp_msg *msg_cpy;
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ int ret = 0;
+
+ msg_cpy = malloc(sizeof(*msg_cpy));
+ if (msg_cpy == NULL) {
+ ethdev_log(ERR, "not enough memory\n");
+ return send_response_to_secondary(req, -ENOMEM);
+ }
+
+ memcpy(msg_cpy, msg, sizeof(*msg_cpy));
+
+ /**
+ * We can't handle the secondary request in mp callback because
+ * we are running in primary process, we are going to invoke SYNC IPC
+ * in rte_malloc.
+ */
+ ret = rte_eal_alarm_set(1, __handle_secondary_request, msg_cpy);
+ if (ret) {
+ ethdev_log(ERR, "failed to set alarm callback\n");
+ return send_response_to_secondary(req, ret);
+ }
+ return 0;
+}
+
+static int
+handle_primary_response(const struct rte_mp_msg *msg,
+ const void *peer __rte_unused)
+{
+ const struct eth_dev_mp_req *req =
+ (const struct eth_dev_mp_req *)msg->param;
+ struct mp_request *entry;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(req->id);
+ if (entry) {
+ entry->user_req.result = req->result;
+ entry->user_req.port_id = req->port_id;
+ entry->state = REQ_STATE_COMPLETE;
+
+ pthread_cond_signal(&entry->cond);
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+
+ return 0;
}

static int
@@ -134,8 +561,74 @@ handle_primary_request(const struct rte_mp_msg *msg, const void *peer)

int eth_dev_request_to_primary(struct eth_dev_mp_req *req)
{
- RTE_SET_USED(req);
- return -ENOTSUP;
+ struct rte_mp_msg msg;
+ struct eth_dev_mp_req *msg_req = (struct eth_dev_mp_req *)msg.param;
+ struct mp_request *entry;
+ struct timespec ts;
+ struct timeval now;
+ int ret = 0;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&ts, 0, sizeof(ts));
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ ethdev_log(ERR, "not enough memory to allocate request entry\n");
+ return -ENOMEM;
+ }
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ ret = gettimeofday(&now, NULL);
+ if (ret) {
+ ethdev_log(ERR, "cannot get current time\n");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+ ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+ (now.tv_usec * 1000) / 1000000000;
+
+ pthread_cond_init(&entry->cond, NULL);
+
+ msg.len_param = sizeof(*req);
+ strcpy(msg.name, ETH_DEV_MP_ACTION_REQUEST);
+
+ req->id = get_unique_id();
+
+ memcpy(msg_req, req, sizeof(*req));
+
+ ret = rte_mp_sendmsg(&msg);
+ if (ret) {
+ ethdev_log(ERR, "cannot send message to primary");
+ goto finish;
+ }
+
+ memcpy(&entry->user_req, req, sizeof(*req));
+
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+ do {
+ ret = pthread_cond_timedwait(&entry->cond,
+ &mp_request_list.lock, &ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ if (entry->state != REQ_STATE_COMPLETE) {
+ RTE_LOG(ERR, EAL, "request time out\n");
+ ret = -ETIMEDOUT;
+ } else {
+ req->port_id = entry->user_req.port_id;
+ req->result = entry->user_req.result;
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+
+finish:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return ret;
}

/**
diff --git a/lib/librte_ethdev/ethdev_mp.h b/lib/librte_ethdev/ethdev_mp.h
index 40be46c89..94ff21cdd 100644
--- a/lib/librte_ethdev/ethdev_mp.h
+++ b/lib/librte_ethdev/ethdev_mp.h
@@ -18,6 +18,7 @@ enum eth_dev_req_type {
};

struct eth_dev_mp_req {
+ uint64_t id;
enum eth_dev_req_type t;
char devargs[MAX_DEV_ARGS_LEN];
uint16_t port_id;
--
2.13.6
Qi Zhang
2018-06-25 07:17:31 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/ixgbe/ixgbe_ethdev.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 87d2ad090..f9d560835 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1792,6 +1792,9 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, ixgbe_vf_representor_uninit);
else
@@ -1809,6 +1812,15 @@ static struct rte_pci_driver rte_ixgbe_pmd = {
static int eth_ixgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_probe(pci_dev,
sizeof(struct ixgbe_adapter), eth_ixgbevf_dev_init);
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:32 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/e1000/em_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7039dc100..e6b7ce63a 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -349,6 +349,15 @@ static int eth_em_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_em_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_em_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:30 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/i40e/i40e_ethdev.c | 2 ++
drivers/net/i40e/i40e_ethdev_vf.c | 9 +++++++++
2 files changed, 11 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 13c5d3296..7d1f98422 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -678,6 +678,8 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev)
if (!ethdev)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);

if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, i40e_vf_representor_uninit);
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 804e44530..fc6f079d5 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1500,6 +1500,15 @@ static int eth_i40evf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev;
+ ethdev = rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, i40evf_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:33 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/e1000/igb_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index edc7be319..db07a83e3 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1089,6 +1089,15 @@ static int eth_igb_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_igb_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_igb_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:35 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/af_packet/rte_eth_af_packet.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index ea47abbf8..33ac19de8 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -935,6 +935,7 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -986,6 +987,16 @@ rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
internals = eth_dev->data->dev_private;
for (q = 0; q < internals->nb_queues; q++) {
rte_free(internals->rx_queue[q].rd);
--
2.13.6
Qi Zhang
2018-06-25 07:17:37 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/failsafe/failsafe.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index eafbb75df..c5e8651f6 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -328,6 +328,7 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &failsafe_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -338,10 +339,25 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
static int
rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
{
+ struct rte_eth_dev *eth_dev;
const char *name;

name = rte_vdev_device_name(vdev);
INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+
+ eth_dev = rte_eth_dev_allocated(name);
+ if (!eth_dev)
+ return -ENODEV;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario.
+ */
+ }
+
return fs_rte_eth_free(name);
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:34 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/fm10k/fm10k_ethdev.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 3ff1b0e0f..f73301182 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -3264,6 +3264,15 @@ static int eth_fm10k_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

static int eth_fm10k_pci_remove(struct rte_pci_device *pci_dev)
{
+ struct rte_eth_dev *ethdev =
+ rte_eth_dev_allocated(pci_dev->device.name);
+
+ if (!ethdev)
+ return -ENODEV;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return rte_eth_dev_release_port_private(ethdev);
+
return rte_eth_dev_pci_generic_remove(pci_dev, eth_fm10k_dev_uninit);
}
--
2.13.6
Qi Zhang
2018-06-25 07:17:36 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/bonding/rte_eth_bond_pmd.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index f155ff779..da45ba9ba 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3062,6 +3062,7 @@ bond_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &default_dev_ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -3168,6 +3169,16 @@ bond_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
RTE_ASSERT(eth_dev->device == &dev->device);

internals = eth_dev->data->dev_private;
--
2.13.6
Qi Zhang
2018-06-25 07:17:38 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/kni/rte_eth_kni.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index ab63ea427..e5679c76a 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -419,6 +419,7 @@ eth_kni_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &eth_kni_ops;
+ eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -463,6 +464,16 @@ eth_kni_remove(struct rte_vdev_device *vdev)
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(vdev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
eth_kni_dev_stop(eth_dev);

internals = eth_dev->data->dev_private;
--
2.13.6
Qi Zhang
2018-06-25 07:17:39 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/null/rte_eth_null.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 1d2e6b9e9..2f040729b 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -623,6 +623,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -667,18 +668,31 @@ static int
rte_pmd_null_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
+ const char *name;

if (!dev)
return -EINVAL;

+ name = rte_vdev_device_name(dev);
+
PMD_LOG(INFO, "Closing null ethdev on numa socket %u",
rte_socket_id());

/* find the ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
rte_free(eth_dev->data->dev_private);

rte_eth_dev_release_port(eth_dev);
--
2.13.6
Qi Zhang
2018-06-25 07:17:44 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index ba9d768a0..f773711b4 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -1353,6 +1353,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1435,6 +1436,16 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
eth_dev_close(eth_dev);

rte_free(vring_states[eth_dev->data->port_id]);
--
2.13.6
Qi Zhang
2018-06-25 07:17:40 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/octeontx/octeontx_ethdev.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c
index 1eb453b21..497bacdc6 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -1016,6 +1016,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,

eth_dev->tx_pkt_burst = octeontx_xmit_pkts;
eth_dev->rx_pkt_burst = octeontx_recv_pkts;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1138,6 +1139,18 @@ octeontx_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0) {
+ rte_eth_dev_release_port_private(eth_dev);
+ continue;
+ }
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
nic = octeontx_pmd_priv(eth_dev);
rte_event_dev_stop(nic->evdev);
PMD_INIT_LOG(INFO, "Closing octeontx device %s", octtx_name);
@@ -1148,6 +1161,9 @@ octeontx_remove(struct rte_vdev_device *dev)
rte_event_dev_close(nic->evdev);
}

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
/* Free FC resource */
octeontx_pko_fc_free();
--
2.13.6
Qi Zhang
2018-06-25 07:17:41 UTC
Permalink
Previously, detach port on secondary process will mess primary
process and cause same device can't be attached again, by take
advantage of rte_eth_release_port_private, we can support this
with minor change.

Signed-off-by: Qi Zhang <***@intel.com>
---
drivers/net/pcap/rte_eth_pcap.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 6bd4a7d79..6cc20c2b2 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -925,6 +925,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1016,6 +1017,7 @@ static int
pmd_pcap_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
+ const char *name;

PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
rte_socket_id());
@@ -1023,11 +1025,22 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
if (!dev)
return -1;

+ name = rte_vdev_device_name(dev);
/* reserve an ethdev entry */
- eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+ eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;

+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /* detach device on local pprocess only */
+ if (strlen(rte_vdev_device_args(dev)) == 0)
+ return rte_eth_dev_release_port_private(eth_dev);
+ /**
+ * else this is a private device for current process
+ * so continue with normal detach scenario
+ */
+ }
+
rte_free(eth_dev->data->dev_private);

rte_eth_dev_release_port(eth_dev);
--
2.13.6
Loading...