From 7c1eb45a22d76bb99236e7485958f87ef7c449cf Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 30 Jul 2015 17:50:14 +0300 Subject: IB/core: lock client data with lists_rwsem An ib_client callback that is called with the lists_rwsem locked only for read is protected from changes to the IB client lists, but not from ib_unregister_device() freeing its client data. This is because ib_unregister_device() will remove the device from the device list with lists_rwsem locked for write, but perform the rest of the cleanup, including the call to remove() without that lock. Mark client data that is undergoing de-registration with a new going_down flag in the client data context. Lock the client data list with lists_rwsem for write in addition to using the spinlock, so that functions calling the callback would be able to lock only lists_rwsem for read and let callbacks sleep. Since ib_unregister_client() now marks the client data context, no need for remove() to search the context again, so pass the client data directly to remove() callbacks. Reviewed-by: Jason Gunthorpe Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford --- net/rds/ib.c | 5 ++--- net/rds/iw.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib.c b/net/rds/ib.c index ba2dffeff608..348ac37c1161 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -230,11 +230,10 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) * * This can be called at any time and can be racing with any other RDS path. */ -static void rds_ib_remove_one(struct ib_device *device) +static void rds_ib_remove_one(struct ib_device *device, void *client_data) { - struct rds_ib_device *rds_ibdev; + struct rds_ib_device *rds_ibdev = client_data; - rds_ibdev = ib_get_client_data(device, &rds_ib_client); if (!rds_ibdev) return; diff --git a/net/rds/iw.c b/net/rds/iw.c index 589935661d66..7cc2f32a0cb3 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -125,12 +125,11 @@ free_attr: kfree(dev_attr); } -static void rds_iw_remove_one(struct ib_device *device) +static void rds_iw_remove_one(struct ib_device *device, void *client_data) { - struct rds_iw_device *rds_iwdev; + struct rds_iw_device *rds_iwdev = client_data; struct rds_iw_cm_id *i_cm_id, *next; - rds_iwdev = ib_get_client_data(device, &rds_iw_client); if (!rds_iwdev) return; -- cgit v1.2.3 From fc27995942960c894bc4725435dce8750e44cd64 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Jul 2015 10:32:40 +0300 Subject: RDS: Convert to ib_alloc_mr Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- net/rds/iw_rdma.c | 5 +++-- net/rds/iw_send.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net/rds') diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index dba8d0864f18..6a8fbd6e69e7 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -667,11 +667,12 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct ib_mr *mr; int err; - mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size); + mr = ib_alloc_mr(rds_iwdev->pd, IB_MR_TYPE_MEM_REG, + pool->max_message_size); if (IS_ERR(mr)) { err = PTR_ERR(mr); - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed (err=%d)\n", err); + printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed (err=%d)\n", err); return err; } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 334fe98c5084..86152ec3b887 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -153,9 +153,10 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic) sge->length = sizeof(struct rds_header); sge->lkey = 0; - send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size); + send->s_mr = ib_alloc_mr(ic->i_pd, IB_MR_TYPE_MEM_REG, + fastreg_message_size); if (IS_ERR(send->s_mr)) { - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n"); + printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n"); break; } -- cgit v1.2.3 From e5580242aa8fab292579a1661463f7479275f7ff Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 30 Jul 2015 17:22:26 -0600 Subject: rds/ib: Remove ib_get_dma_mr calls The pd now has a local_dma_lkey member which completely replaces ib_get_dma_mr, use it instead. Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- net/rds/ib.c | 8 -------- net/rds/ib.h | 2 -- net/rds/ib_cm.c | 4 +--- net/rds/ib_recv.c | 6 +++--- net/rds/ib_send.c | 8 ++++---- 5 files changed, 8 insertions(+), 20 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib.c b/net/rds/ib.c index 348ac37c1161..c6f95b9494ae 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -99,8 +99,6 @@ static void rds_ib_dev_free(struct work_struct *work) if (rds_ibdev->mr_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); - if (rds_ibdev->mr) - ib_dereg_mr(rds_ibdev->mr); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); @@ -164,12 +162,6 @@ static void rds_ib_add_one(struct ib_device *device) goto put_dev; } - rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(rds_ibdev->mr)) { - rds_ibdev->mr = NULL; - goto put_dev; - } - rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); if (IS_ERR(rds_ibdev->mr_pool)) { rds_ibdev->mr_pool = NULL; diff --git a/net/rds/ib.h b/net/rds/ib.h index 86d88ec5d556..36f7d808ffaa 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -100,7 +100,6 @@ struct rds_ib_connection { /* alphabet soup, IBTA style */ struct rdma_cm_id *i_cm_id; struct ib_pd *i_pd; - struct ib_mr *i_mr; struct ib_cq *i_send_cq; struct ib_cq *i_recv_cq; @@ -173,7 +172,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; struct rds_ib_mr_pool *mr_pool; unsigned int fmr_max_remaps; unsigned int max_fmrs; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0da2a45b33bd..a75e8832bc23 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -269,7 +269,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; - ic->i_mr = rds_ibdev->mr; cq_attr.cqe = ic->i_send_ring.w_nr + 1; ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, @@ -375,7 +374,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rds_ib_recv_init_ack(ic); - rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, + rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); out: @@ -678,7 +677,6 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) ic->i_cm_id = NULL; ic->i_pd = NULL; - ic->i_mr = NULL; ic->i_send_cq = NULL; ic->i_recv_cq = NULL; ic->i_send_hdrs = NULL; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index cac5b4506ee3..0ceb4c60d2a3 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -62,12 +62,12 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic) sge = &recv->r_sge[0]; sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); sge->length = sizeof(struct rds_header); - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; sge = &recv->r_sge[1]; sge->addr = 0; sge->length = RDS_FRAG_SIZE; - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; } } @@ -520,7 +520,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) sge->addr = ic->i_ack_dma; sge->length = sizeof(struct rds_header); - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; wr->sg_list = sge; wr->num_sge = 1; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 5d0a704fa039..f6c829d43373 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -202,9 +202,9 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic) sge = &send->s_sge[0]; sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); sge->length = sizeof(struct rds_header); - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; - send->s_sge[1].lkey = ic->i_mr->lkey; + send->s_sge[1].lkey = ic->i_pd->local_dma_lkey; } } @@ -813,7 +813,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) /* Convert our struct scatterlist to struct ib_sge */ send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); - send->s_sge[0].lkey = ic->i_mr->lkey; + send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, send->s_sge[0].addr, send->s_sge[0].length); @@ -927,7 +927,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); send->s_sge[j].length = len; - send->s_sge[j].lkey = ic->i_mr->lkey; + send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; sent += len; rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); -- cgit v1.2.3 From 7dd78647a2c2c224e376fc72797d411a3a0bb047 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 5 Aug 2015 14:34:31 -0600 Subject: IB/core: Make ib_dealloc_pd return void The majority of callers never check the return value, and even if they did, they can't do anything about a failure. All possible failure cases represent a bug in the caller, so just WARN_ON inside the function instead. This fixes a few random errors: net/rd/iw.c infinite loops while it fails. (racing with EBUSY?) This also lays the ground work to get rid of error return from the drivers. Most drivers do not error, the few that do are broken since it cannot be handled. Since uverbs can legitimately make use of EBUSY, open code the check. Signed-off-by: Jason Gunthorpe Reviewed-by: Chuck Lever Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 22 ++++++++++++++++------ drivers/infiniband/core/verbs.c | 26 ++++++++++++++++++++------ drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 +--- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- include/rdma/ib_verbs.h | 6 +----- net/rds/iw.c | 5 +---- net/sunrpc/xprtrdma/verbs.c | 2 +- 7 files changed, 41 insertions(+), 26 deletions(-) (limited to 'net/rds') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 258485ee46b2..4c98696e3626 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -606,6 +606,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; + struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -614,15 +615,20 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; + pd = uobj->object; - ret = ib_dealloc_pd(uobj->object); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + if (atomic_read(&pd->usecnt)) { + ret = -EBUSY; + goto err_put; + } + ret = pd->device->dealloc_pd(uobj->object); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); if (ret) - return ret; + goto err_put; + + uobj->live = 0; + put_uobj_write(uobj); idr_remove_uobj(&ib_uverbs_pd_idr, uobj); @@ -633,6 +639,10 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, put_uobj(uobj); return in_len; + +err_put: + put_uobj_write(uobj); + return ret; } struct xrcd_table_entry { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2e5fd89a8929..aad8b3ce66cc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -260,18 +260,32 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) } EXPORT_SYMBOL(ib_alloc_pd); -int ib_dealloc_pd(struct ib_pd *pd) +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + * + * It is an error to call this function while any resources in the pd still + * exist. The caller is responsible to synchronously destroy them and + * guarantee no new allocations will happen. + */ +void ib_dealloc_pd(struct ib_pd *pd) { + int ret; + if (pd->local_mr) { - if (ib_dereg_mr(pd->local_mr)) - return -EBUSY; + ret = ib_dereg_mr(pd->local_mr); + WARN_ON(ret); pd->local_mr = NULL; } - if (atomic_read(&pd->usecnt)) - return -EBUSY; + /* uverbs manipulates usecnt with proper locking, while the kabi + requires the caller to guarantee we can't race here. */ + WARN_ON(atomic_read(&pd->usecnt)); - return pd->device->dealloc_pd(pd); + /* Making delalloc_pd a void return is a WIP, no driver should return + an error here. */ + ret = pd->device->dealloc_pd(pd); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8c451983d8a5..78845b6e8b81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -280,9 +280,7 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) priv->wq = NULL; } - if (ib_dealloc_pd(priv->pd)) - ipoib_warn(priv, "ib_dealloc_pd failed\n"); - + ib_dealloc_pd(priv->pd); } void ipoib_event(struct ib_event_handler *handler, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ad2d2b50cd7f..ae70cc1463ac 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -185,7 +185,7 @@ static void iser_free_device_ib_res(struct iser_device *device) (void)ib_unregister_event_handler(&device->event_handler); (void)ib_dereg_mr(device->mr); - (void)ib_dealloc_pd(device->pd); + ib_dealloc_pd(device->pd); kfree(device->comps); device->comps = NULL; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 09400512d579..128abf2888ab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2196,11 +2196,7 @@ int ib_find_pkey(struct ib_device *device, struct ib_pd *ib_alloc_pd(struct ib_device *device); -/** - * ib_dealloc_pd - Deallocates a protection domain. - * @pd: The protection domain to deallocate. - */ -int ib_dealloc_pd(struct ib_pd *pd); +void ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. diff --git a/net/rds/iw.c b/net/rds/iw.c index 7cc2f32a0cb3..c7dcddbf17cb 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -148,10 +148,7 @@ static void rds_iw_remove_one(struct ib_device *device, void *client_data) if (rds_iwdev->mr) ib_dereg_mr(rds_iwdev->mr); - while (ib_dealloc_pd(rds_iwdev->pd)) { - rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd); - msleep(1); - } + ib_dealloc_pd(rds_iwdev->pd); list_del(&rds_iwdev->list); kfree(rds_iwdev); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 891c4ede2c20..afd504375a9a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -624,7 +624,7 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) /* If the pd is still busy, xprtrdma missed freeing a resource */ if (ia->ri_pd && !IS_ERR(ia->ri_pd)) - WARN_ON(ib_dealloc_pd(ia->ri_pd)); + ib_dealloc_pd(ia->ri_pd); } /* -- cgit v1.2.3