[PATCH 6/6] habanalabs/gaudi2: verify return code after scrubbing ARCs DCCMs
Oded Gabbay
ogabbay at kernel.org
Mon Feb 27 11:13:06 UTC 2023
From: Koby Elbaz <kelbaz at habana.ai>
In case the KDMA fails scrubbing the DCCMs (following a soft-reset
upon device release), the driver will only print failure until reset
flow ends, rather than escalating it into a hard-reset.
Signed-off-by: Koby Elbaz <kelbaz at habana.ai>
Reviewed-by: Oded Gabbay <ogabbay at kernel.org>
Signed-off-by: Oded Gabbay <ogabbay at kernel.org>
---
drivers/accel/habanalabs/gaudi2/gaudi2.c | 26 ++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index f01fa4bca381..2186f8bd547e 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -3024,16 +3024,21 @@ static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
return 0;
}
-static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
+static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
{
u16 arc_id;
+ int rc;
for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
if (!gaudi2_is_arc_enabled(hdev, arc_id))
continue;
- gaudi2_scrub_arc_dccm(hdev, arc_id);
+ rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
+ if (rc)
+ return rc;
}
+
+ return 0;
}
static int gaudi2_late_init(struct hl_device *hdev)
@@ -3057,7 +3062,13 @@ static int gaudi2_late_init(struct hl_device *hdev)
}
gaudi2_init_arcs(hdev);
- gaudi2_scrub_arcs_dccm(hdev);
+
+ rc = gaudi2_scrub_arcs_dccm(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
+ goto disable_pci_access;
+ }
+
gaudi2_init_security(hdev);
return 0;
@@ -6643,12 +6654,19 @@ static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
size_t irq_arr_size;
+ int rc;
/* TODO: missing gaudi2_nic_resume.
* Until implemented nic_hw_cap_initialized will remain zeroed
*/
gaudi2_init_arcs(hdev);
- gaudi2_scrub_arcs_dccm(hdev);
+
+ rc = gaudi2_scrub_arcs_dccm(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
+ return rc;
+ }
+
gaudi2_init_security(hdev);
/* Unmask all IRQs since some could have been received during the soft reset */
--
2.39.2
More information about the dri-devel
mailing list