For PCI hotplug interface, the partner device will be added or
removed if configured with the primary device add / remove.
Whereas for PHB interface, drmgr notifies the user to add / remove
partner device if configured with the primary device. So there is
a possibility of partner PCI node exists in the device tree when
PCI interface for ADD is executed. The current ADD code in
drslot_chrp_pci.c does not check whether the partner device node
is present and may add / enable partner device again which may give
EEH errors.
See the following sequence to get this scenario:
drmgr -r -c phb -s "PHB 1336" --> Remove primary PHB node
drmgr -r -c pci -s "U50EE.001.WZS000E-P3-C24-R2" --> Remove
partner PCI node
drmgr -a -c phb -s "PHB 1336" --> Add primary PHB node
drmgr -a -c pci -s "U50EE.001.WZS000E-P3-C24-R2" --> Add partner
PCI node and can try to add primary PCI node again. In this case
"U50EE.001.WZS000E-P3-C24-R1".
This patch fixes the issue by checking the device node in the
device tree and add the device if does not present and remove
only if the corresponding device node exists.
Fixes: 4e6670df0da2b92 ("drmgr/pci: Add multipath partner device support for hotplug add")
Signed-off-by: Haren Myneni <
ha...@linux.ibm.com>
---
src/drmgr/drslot_chrp_pci.c | 72 ++++++++++++++++++++++++++-----------
1 file changed, 52 insertions(+), 20 deletions(-)
diff --git a/src/drmgr/drslot_chrp_pci.c b/src/drmgr/drslot_chrp_pci.c
index ec3c77c..ed420d7 100644
--- a/src/drmgr/drslot_chrp_pci.c
+++ b/src/drmgr/drslot_chrp_pci.c
@@ -536,12 +536,15 @@ static int do_insert_card_work(struct dr_node *node, bool partner_device)
* Find the partner DRC index and retrieve the partner node.
*/
static struct dr_node *
-find_partner_node(struct dr_node *node, struct dr_node *all_nodes)
+find_partner_node(struct dr_node *node, struct dr_node *all_nodes,
+ int *dt_entry_present)
{
struct dr_node *partner_node = NULL;
uint32_t partner_index = 0;
int rc;
+ *dt_entry_present = 0;
+
/*
* Expect the partner device only for the PCI node
*/
@@ -550,8 +553,20 @@ find_partner_node(struct dr_node *node, struct dr_node *all_nodes)
/* Find the multipath partner device index if available */
rc = get_my_partner_drc_index(node->children, &partner_index);
- if (!rc)
+ if (!rc) {
partner_node = find_slot(NULL, partner_index, all_nodes, 1);
+ /*
+ * Do not add if the partner entry is already present.
+ * Nothing to remove if the partner node does not exists.
+ */
+ if (partner_node && partner_node->children) {
+ uint32_t index;
+ rc = get_my_drc_index(partner_node->children->ofdt_path,
+ &index);
+ if (!rc && (partner_index == index))
+ *dt_entry_present = 1;
+ }
+ }
return partner_node;
}
@@ -626,6 +641,7 @@ static int insert_add_work(struct dr_node *node, bool partner_device)
static int do_add(struct dr_node *all_nodes)
{
struct dr_node *node, *partner_node = NULL;
+ int dt_entry;
int rc;
node = find_slot(usr_drc_name, 0, all_nodes, 0);
@@ -641,8 +657,8 @@ static int do_add(struct dr_node *all_nodes)
if (rc <= 0)
return rc;
- partner_node = find_partner_node(node, all_nodes);
- if (partner_node) {
+ partner_node = find_partner_node(node, all_nodes, &dt_entry);
+ if (partner_node && !dt_entry) {
printf("<%s> and <%s> are\nmultipath partner devices. "
"So <%s> is\nalso added.\n", node->drc_name,
partner_node->drc_name, partner_node->drc_name);
@@ -839,16 +855,25 @@ static int remove_work(struct dr_node *node, bool partner_device)
static int do_remove(struct dr_node *all_nodes)
{
struct dr_node *node, *partner_node = NULL;
+ int dt_entry;
node = find_slot(usr_drc_name, 0, all_nodes, 0);
if (node == NULL)
return -1;
- partner_node = find_partner_node(node, all_nodes);
- if (partner_node)
- printf("<%s> and <%s> are\nmultipath partner devices. "
- "So <%s> will\nbe also removed.\n", node->drc_name,
- partner_node->drc_name, partner_node->drc_name);
+ partner_node = find_partner_node(node, all_nodes, &dt_entry);
+ if (partner_node) {
+ /*
+ * Remove partner device if exists in the device tree.
+ */
+ if (dt_entry)
+ printf("<%s> and <%s> are\nmultipath partner devices. "
+ "So <%s> will\nbe also removed.\n",
+ node->drc_name, partner_node->drc_name,
+ partner_node->drc_name);
+ else
+ partner_node = NULL;
+ }
/* Remove the specified slot and update the device-tree */
if (remove_work(node, false))
@@ -939,18 +964,23 @@ static int replace_add_work(struct dr_node *node, bool partner_device)
static int do_replace(struct dr_node *all_nodes)
{
struct dr_node *repl_node, *partner_node = NULL;
+ int dt_entry;
int rc;
repl_node = find_slot(usr_drc_name, 0, all_nodes, 0);
if (repl_node == NULL)
return -1;
- partner_node = find_partner_node(repl_node, all_nodes);
- if (partner_node)
- printf("<%s> and <%s> are\nmultipath partner devices. "
- "So <%s> will\nbe also replaced.\n",
- repl_node->drc_name, partner_node->drc_name,
- partner_node->drc_name);
+ partner_node = find_partner_node(repl_node, all_nodes, &dt_entry);
+ if (partner_node) {
+ if (dt_entry)
+ printf("<%s> and <%s> are\nmultipath partner devices. "
+ "So <%s> will\nbe also replaced.\n",
+ repl_node->drc_name, partner_node->drc_name,
+ partner_node->drc_name);
+ else
+ partner_node = NULL;
+ }
/* Call the routine which does the work of getting the node info,
* then removing it from the OF device tree.
@@ -972,9 +1002,11 @@ static int do_replace(struct dr_node *all_nodes)
if (rc <= 0)
return rc;
- rc = replace_add_work(partner_node, true);
- if (rc <= 0)
- return rc;
+ if (partner_node) {
+ rc = replace_add_work(partner_node, true);
+ if (rc <= 0)
+ return rc;
+ }
if (repl_node->post_replace_processing) {
int prompt_save = usr_prompt;
@@ -988,8 +1020,8 @@ static int do_replace(struct dr_node *all_nodes)
if (remove_work(repl_node, false))
return -1;
- partner_node = find_partner_node(repl_node, node);
- if (partner_node) {
+ partner_node = find_partner_node(repl_node, node, &dt_entry);
+ if (partner_node && dt_entry) {
if (remove_work(partner_node, true))
return -1;
}
--
2.31.1