From d84106477733cb155c5dcaea664ddf120bf69eb7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sat, 5 Sep 2009 20:36:16 -0700 Subject: IB/mthca: Don't allow userspace open while recovering from catastrophic error Userspace apps are supposed to release all ib device resources if they receive a fatal async event (IBV_EVENT_DEVICE_FATAL). However, the app has no way of knowing when the device has come back up, except to repeatedly attempt ibv_open_device() until it succeeds. However, currently there is no protection against the open succeeding while the device is in being removed following the fatal event. In this case, the open will succeed, but as a result the device waits in the middle of its removal until the new app releases its resources -- and the new app will not do so, since the open succeeded at a point following the fatal event generation. This patch adds an "active" flag to the device. The active flag is set to false (in the fatal event flow) before the "fatal" event is generated, so any subsequent ibv_dev_open() call to the device will fail until the device comes back up, thus preventing the above deadlock. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_catas.c | 1 + drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_main.c | 2 ++ drivers/infiniband/hw/mthca/mthca_provider.c | 3 +++ 4 files changed, 7 insertions(+) (limited to 'drivers/infiniband/hw/mthca') diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 65ad359fdf1..056b2a4c697 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -88,6 +88,7 @@ static void handle_catas(struct mthca_dev *dev) event.device = &dev->ib_dev; event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; + dev->active = false; ib_dispatch_event(&event); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 9ef611f6dd3..7e6a6d64ad4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -357,6 +357,7 @@ struct mthca_dev { struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; spinlock_t sm_lock; u8 rate[MTHCA_MAX_PORTS]; + bool active; }; #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 13da9f1d24c..518cc540e51 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1116,6 +1116,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) pci_set_drvdata(pdev, mdev); mdev->hca_type = hca_type; + mdev->active = true; + return 0; err_unregister: diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 87ad889e367..bcf7a401482 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -334,6 +334,9 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, struct mthca_ucontext *context; int err; + if (!(to_mdev(ibdev)->active)) + return ERR_PTR(-EAGAIN); + memset(&uresp, 0, sizeof uresp); uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; -- cgit v1.2.3