diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-26 20:41:08 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-26 20:41:08 +0100 |
commit | 70a3a06d01ed9ca887316a881813cdefb8a20170 (patch) | |
tree | fbdb7982040ba77818e4b738d76eef8bb06fb47f | |
parent | Merge tag 'iommu-updates-v3.9' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff) | |
parent | Merge branches 'core', 'cxgb4', 'ipoib', 'iser', 'misc', 'mlx4', 'qib' and 's... (diff) | |
download | linux-70a3a06d01ed9ca887316a881813cdefb8a20170.tar.xz linux-70a3a06d01ed9ca887316a881813cdefb8a20170.zip |
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull infiniband update from Roland Dreier:
"Main batch of InfiniBand/RDMA changes for 3.9:
- SRP error handling fixes from Bart Van Assche
- Implementation of memory windows for mlx4 from Shani Michaeli
- Lots of cxgb4 HW driver fixes from Vipul Pandya
- Make iSER work for virtual functions, other fixes from Or Gerlitz
- Fix for bug in qib HW driver from Mike Marciniszyn
- IPoIB fixes from me, Itai Garbi, Shlomo Pongratz, Yan Burman
- Various cleanups and warning fixes from Julia Lawall, Paul Bolle,
Wei Yongjun"
* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (41 commits)
IB/mlx4: Advertise MW support
IB/mlx4: Support memory window binding
mlx4: Implement memory windows allocation and deallocation
mlx4_core: Enable memory windows in {INIT, QUERY}_HCA
mlx4_core: Disable memory windows for virtual functions
IPoIB: Free ipoib neigh on path record failure so path rec queries are retried
IB/srp: Fail I/O requests if the transport is offline
IB/srp: Avoid endless SCSI error handling loop
IB/srp: Avoid sending a task management function needlessly
IB/srp: Track connection state properly
IB/mlx4: Remove redundant NULL check before kfree
IB/mlx4: Fix compiler warning about uninitialized 'vlan' variable
IB/mlx4: Convert is_xxx variables in build_mlx_header() to bool
IB/iser: Enable iser when FMRs are not supported
IB/iser: Avoid error prints on EAGAIN registration failures
IB/iser: Use proper define for the commands per LUN value advertised to SCSI ML
IB/uverbs: Implement memory windows support in uverbs
IB/core: Add "type 2" memory windows support
mlx4_core: Propagate MR deregistration failures to caller
mlx4_core: Rename MPT-related functions to have mpt_ prefix
...
43 files changed, 885 insertions, 253 deletions
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 5bcb2afd3dcb..0fcd7aa26fa2 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -188,6 +188,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(alloc_mw); +IB_UVERBS_DECLARE_CMD(dealloc_mw); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(resize_cq); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 0cb0007724a2..3983a0552775 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -48,6 +48,7 @@ struct uverbs_lock_class { static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; @@ -1049,6 +1050,126 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, return in_len; } +ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_alloc_mw cmd; + struct ib_uverbs_alloc_mw_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_mw *mw; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, 0, file->ucontext, &mw_lock_class); + down_write(&uobj->mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_free; + } + + mw = pd->device->alloc_mw(pd, cmd.mw_type); + if (IS_ERR(mw)) { + ret = PTR_ERR(mw); + goto err_put; + } + + mw->device = pd->device; + mw->pd = pd; + mw->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mw; + ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); + if (ret) + goto err_unalloc; + + memset(&resp, 0, sizeof(resp)); + resp.rkey = mw->rkey; + resp.mw_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err_copy; + } + + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->mw_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + +err_unalloc: + ib_dealloc_mw(mw); + +err_put: + put_pd_read(pd); + +err_free: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dealloc_mw cmd; + struct ib_mw *mw; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); + if (!uobj) + return -EINVAL; + + mw = uobj->object; + + ret = ib_dealloc_mw(mw); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; +} + ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6f2ce6fa98f8..2c6f0f2ecd9d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -87,6 +87,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, + [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, @@ -201,6 +203,15 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + /* Remove MWs before QPs, in order to support type 2A MWs. */ + list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { + struct ib_mw *mw = uobj->object; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + ib_dealloc_mw(mw); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = @@ -240,8 +251,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uevent); } - /* XXX Free MWs */ - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 30f199e8579f..a8fdd3381405 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1099,18 +1099,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list); /* Memory windows */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd) +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *mw; if (!pd->device->alloc_mw) return ERR_PTR(-ENOSYS); - mw = pd->device->alloc_mw(pd); + mw = pd->device->alloc_mw(pd, type); if (!IS_ERR(mw)) { mw->device = pd->device; mw->pd = pd; mw->uobject = NULL; + mw->type = type; atomic_inc(&pd->usecnt); } diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 7275e727e0f5..d53cf519f42a 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -1238,15 +1238,4 @@ static struct pci_driver c2_pci_driver = { .remove = c2_remove, }; -static int __init c2_init_module(void) -{ - return pci_register_driver(&c2_pci_driver); -} - -static void __exit c2_exit_module(void) -{ - pci_unregister_driver(&c2_pci_driver); -} - -module_init(c2_init_module); -module_exit(c2_exit_module); +module_pci_driver(c2_pci_driver); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 145d82a64d0a..9c12da0cbd32 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -738,7 +738,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) return ibmr; } -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -747,6 +747,9 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) u32 stag = 0; int ret; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + php = to_iwch_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 6de8463f453b..e5649e8b215d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -567,18 +567,19 @@ int iwch_bind_mw(struct ib_qp *qp, if (mw_bind->send_flags & IB_SEND_SIGNALED) t3_wr_flags = T3_COMPLETION_FLAG; - sgl.addr = mw_bind->addr; - sgl.lkey = mw_bind->mr->lkey; - sgl.length = mw_bind->length; + sgl.addr = mw_bind->bind_info.addr; + sgl.lkey = mw_bind->bind_info.mr->lkey; + sgl.length = mw_bind->bind_info.length; wqe->bind.reserved = 0; wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags); - wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); + wqe->bind.perms = iwch_ib_to_tpt_bind_access( + mw_bind->bind_info.mw_access_flags); + wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); - wqe->bind.mw_len = cpu_to_be32(mw_bind->length); - wqe->bind.mw_va = cpu_to_be64(mw_bind->addr); + wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length); + wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr); err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { spin_unlock_irqrestore(&qhp->lock, flag); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c13745cde7fa..565bfb161c1a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -143,14 +143,28 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status); static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + static void start_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (timer_pending(&ep->timer)) { - PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - c4iw_get_ep(&ep->com); + pr_err("%s timer already started! ep %p\n", + __func__, ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; ep->timer.data = (unsigned long)ep; ep->timer.function = ep_timeout; @@ -159,14 +173,10 @@ static void start_ep_timer(struct c4iw_ep *ep) static void stop_ep_timer(struct c4iw_ep *ep) { - PDBG("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! " - "ep %p state %u\n", __func__, ep, ep->com.state); - return; - } + PDBG("%s ep %p stopping\n", __func__, ep); del_timer_sync(&ep->timer); - c4iw_put_ep(&ep->com); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) + c4iw_put_ep(&ep->com); } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -271,11 +281,13 @@ void _c4iw_free_ep(struct kref *kref) ep = container_of(kref, struct c4iw_ep, com.kref); PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); } kfree(ep); } @@ -687,7 +699,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; + mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { @@ -863,7 +875,6 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -906,7 +917,6 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -946,7 +956,6 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if (status < 0) { ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; } } @@ -1291,11 +1300,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1306,6 +1317,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1314,6 +1326,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1391,30 +1404,33 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - /* update RX credits */ update_rx_credits(ep, dlen); switch (state_read(&ep->com)) { case MPA_REQ_SENT: + ep->rcv_seq += dlen; process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: + ep->rcv_seq += dlen; process_mpa_request(ep, skb); break; - case MPA_REP_SENT: + case FPDU_MODE: { + struct c4iw_qp_attributes attrs; + BUG_ON(!ep->com.qp); + if (status) + pr_err("%s Unexpected streaming data." \ + " qpid %u ep %p state %d tid %u status %d\n", + __func__, ep->com.qp->wq.sq.qid, ep, + state_read(&ep->com), ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); break; + } default: - pr_err("%s Unexpected streaming data." \ - " ep %p state %d tid %u status %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid, status); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ break; } return 0; @@ -1437,6 +1453,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); __state_set(&ep->com, DEAD); release = 1; break; @@ -1475,11 +1492,11 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) V_FW_OFLD_CONNECTION_WR_ASTID(atid)); req->tcb.cplrxdataack_cplpassacceptrpl = htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); - req->tcb.tx_max = jiffies; + req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - req->tcb.opt0 = TCAM_BYPASS(1) | + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -1490,20 +1507,20 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) SMAC_SEL(ep->smac_idx) | DSCP(ep->tos) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win >> 10); - req->tcb.opt2 = PACE(1) | + RCV_BUFSIZ(rcv_win >> 10)); + req->tcb.opt2 = (__force __be32) (PACE(1) | TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | RX_CHANNEL(0) | CCTRL_ECN(enable_ecn) | - RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid)); if (enable_tcp_timestamps) - req->tcb.opt2 |= TSTAMPS_EN(1); + req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1); if (enable_tcp_sack) - req->tcb.opt2 |= SACK_EN(1); + req->tcb.opt2 |= (__force __be32) SACK_EN(1); if (wscale && enable_tcp_window_scaling) - req->tcb.opt2 |= WND_SCALE_EN(1); - req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); - req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2); set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); set_bit(ACT_OFLD_CONN, &ep->com.history); c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); @@ -1993,6 +2010,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); + insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); accept_cr(child_ep, peer_ip, skb, req); set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; @@ -2018,7 +2036,6 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(req->tcp_opt)); set_emss(ep, ntohs(req->tcp_opt)); - insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); @@ -2163,7 +2180,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: stop_ep_timer(ep); - if (mpa_rev == 2 && ep->tried_with_mpa_v1) + if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* @@ -2235,9 +2252,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) out: if (release) release_ep_resources(ep); - - /* retry with mpa-v1 */ - if (ep && ep->retry_with_mpa_v1) { + else if (ep->retry_with_mpa_v1) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -2430,6 +2446,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; + ref_qp(ep); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -2460,7 +2477,6 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return 0; err1: ep->com.cm_id = NULL; - ep->com.qp = NULL; cm_id->rem_ref(cm_id); err: c4iw_put_ep(&ep->com); @@ -2501,6 +2517,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.cm_id = cm_id; ep->com.qp = get_qhp(dev, conn_param->qpn); BUG_ON(!ep->com.qp); + ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, ep->com.qp, cm_id); @@ -2756,7 +2773,8 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct c4iw_ep *ep; int atid = be32_to_cpu(req->tid); - ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, + (__force u32) req->tid); if (!ep) return; @@ -2800,7 +2818,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct cpl_pass_accept_req *cpl; int ret; - rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; BUG_ON(!rpl_skb); if (req->retval) { PDBG("%s passive open failure %d\n", __func__, req->retval); @@ -2811,7 +2829,8 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, } else { cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, - htonl(req->tid))); + (__force u32) htonl( + (__force u32) req->tid))); ret = pass_accept_req(dev, rpl_skb); if (!ret) kfree_skb(rpl_skb); @@ -2857,10 +2876,10 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) struct tcp_options_received tmp_opt; /* Store values from cpl_rx_pkt in temporary location. */ - vlantag = cpl->vlan; - len = cpl->len; - l2info = cpl->l2info; - hdr_len = cpl->hdr_len; + vlantag = (__force u16) cpl->vlan; + len = (__force u16) cpl->len; + l2info = (__force u32) cpl->l2info; + hdr_len = (__force u16) cpl->hdr_len; intf = cpl->iff; __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); @@ -2871,19 +2890,24 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); req->l2info = cpu_to_be16(V_SYN_INTF(intf) | - V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + V_SYN_MAC_IDX(G_RX_MACIDX( + (__force int) htonl(l2info))) | F_SYN_XACT_MATCH); - req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | - V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | - V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | - V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); - req->vlan = vlantag; - req->len = len; + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN( + (__force int) htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN( + (__force int) htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN( + (__force int) htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN( + (__force int) htonl(l2info)))); + req->vlan = (__force __be16) vlantag; + req->len = (__force __be16) len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | PASS_OPEN_TOS(tos)); req->tcpopt.mss = htons(tmp_opt.mss_clamp); @@ -2912,7 +2936,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); - req->le.filter = filter; + req->le.filter = (__force __be32) filter; req->le.lport = lport; req->le.pport = rport; req->le.u.ipv4.lip = laddr; @@ -2938,7 +2962,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); - req->cookie = cpu_to_be64((u64)skb); + req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); @@ -2988,7 +3012,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) /* * Calculate the server tid from filter hit index from cpl_rx_pkt. */ - stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val) + - dev->rdev.lldi.tids->sftid_base + dev->rdev.lldi.tids->nstids; lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); @@ -3049,10 +3074,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; - window = htons(tcph->window); + window = (__force u16) htons((__force u16)tcph->window); /* Calcuate filter portion for LE region. */ - filter = cpu_to_be32(select_ntuple(dev, dst, e)); + filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e)); /* * Synthesize the cpl_pass_accept_req. We have everything except the @@ -3175,11 +3200,16 @@ static DECLARE_WORK(skb_work, process_work); static void ep_timeout(unsigned long arg) { struct c4iw_ep *ep = (struct c4iw_ep *)arg; + int kickit = 0; spin_lock(&timeout_lock); - list_add_tail(&ep->entry, &timeout_list); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } spin_unlock(&timeout_lock); - queue_work(workq, &skb_work); + if (kickit) + queue_work(workq, &skb_work); } /* @@ -3268,8 +3298,14 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). + * However, if we are on MPAv2 and want to retry with MPAv1 + * then, don't wake up yet. */ - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + } else + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ba11c76c0b5a..80069ad595c1 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -533,7 +533,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " "qpmask 0x%x cqshift %lu cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (void *)pci_resource_start(rdev->lldi.pdev, 2), + (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2), rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpshift, rdev->qpmask, @@ -797,7 +797,8 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, "RSS %#llx, FL %#llx, len %u\n", pci_name(ctx->lldi.pdev), gl->va, (unsigned long long)be64_to_cpu(*rsp), - (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + (unsigned long long)be64_to_cpu( + *(__force __be64 *)gl->va), gl->tot_len); return 0; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index cf2f6b47617a..1a840b2211dd 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -46,9 +46,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { - PDBG("%s AE received after RTS - " - "qp state %d qpid 0x%x status 0x%x\n", __func__, - qhp->attr.state, qhp->wq.sq.qid, CQE_STATUS(err_cqe)); + pr_err("%s AE after RTS - qpid 0x%x opcode %d status 0x%x "\ + "type %d wrid.hi 0x%x wrid.lo 0x%x\n", + __func__, CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), + CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), + CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); return; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9c1644fb0259..4c07fc069766 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -716,6 +716,8 @@ enum c4iw_ep_flags { ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, + TIMEOUT = 4, + QP_REFERENCED = 5, }; enum c4iw_ep_history { @@ -866,7 +868,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( int page_list_len); struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth); int c4iw_dealloc_mw(struct ib_mw *mw); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index afd81790ab3c..903a92d6f91d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -650,7 +650,7 @@ err: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd) +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -659,6 +659,9 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd) u32 stag = 0; int ret; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + php = to_c4iw_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 05bfe53bff64..17ba4f8bc12d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1383,6 +1383,7 @@ err: qhp->ep = NULL; set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; + abort = 1; wake_up(&qhp->wait); BUG_ON(!ep); flush_qp(qhp); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 8f7f282ead65..22f79afa7fc1 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int ehca_dereg_mr(struct ib_mr *mr); -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 87844869dcc2..bcfb0c183620 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -688,7 +688,7 @@ dereg_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *ib_mw; u64 h_ret; @@ -698,6 +698,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) container_of(pd->device, struct ehca_shca, ib_device); struct ehca_mw_hipzout_parms hipzout; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + e_mw = ehca_mw_new(); if (!e_mw) { ib_mw = ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 0a903c129f0a..934792c477bc 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1999,16 +1999,17 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) goto demux_err; err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); if (err) - goto demux_err; + goto free_pv; } mlx4_ib_master_tunnels(dev, 1); return 0; +free_pv: + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); demux_err: - while (i > 0) { + while (--i >= 0) { free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); - --i; } mlx4_ib_device_unregister_sysfs(dev); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e7d81c0d1ac5..23d734349d8e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -137,6 +137,14 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; + else + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; + } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -1434,6 +1442,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; } + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; + ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; + ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; + + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; @@ -1601,8 +1620,7 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: - if (dm) - kfree(dm); + kfree(dm); return; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index dcd845bc30f0..f61ec26500c4 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -116,6 +116,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_mw { + struct ib_mw ibmw; + struct mlx4_mw mmw; +}; + struct mlx4_ib_fast_reg_page_list { struct ib_fast_reg_page_list ibfrpl; __be64 *mapped_page_list; @@ -533,6 +538,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx4_ib_mw, ibmw); +} + static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) { return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); @@ -581,6 +591,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); +int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, @@ -652,12 +666,12 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port); -static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) +static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) - return 1; + return true; return !!(ah->av.ib.g_slid & 0x80); } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index bbaf6176f207..e471f089ff00 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -41,9 +41,19 @@ static u32 convert_access(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | MLX4_PERM_LOCAL_READ; } +static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) +{ + switch (type) { + case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; + case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; + default: return -1; + } +} + struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx4_ib_mr *mr; @@ -68,7 +78,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); @@ -163,7 +173,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); @@ -177,8 +187,11 @@ err_free: int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); + int ret; - mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (ret) + return ret; if (mr->umem) ib_umem_release(mr->umem); kfree(mr); @@ -186,6 +199,70 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mw *mw; + int err; + + mw = kmalloc(sizeof(*mw), GFP_KERNEL); + if (!mw) + return ERR_PTR(-ENOMEM); + + err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, + to_mlx4_type(type), &mw->mmw); + if (err) + goto err_free; + + err = mlx4_mw_enable(dev->dev, &mw->mmw); + if (err) + goto err_mw; + + mw->ibmw.rkey = mw->mmw.key; + + return &mw->ibmw; + +err_mw: + mlx4_mw_free(dev->dev, &mw->mmw); + +err_free: + kfree(mw); + + return ERR_PTR(err); +} + +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + int ret; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_BIND_MW; + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + wr.wr.bind_mw.mw = mw; + wr.wr.bind_mw.bind_info = mw_bind->bind_info; + wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + if (!ret) + mw->rkey = wr.wr.bind_mw.rkey; + + return ret; +} + +int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) +{ + struct mlx4_ib_mw *mw = to_mmw(ibmw); + + mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); + kfree(mw); + + return 0; +} + struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { @@ -212,7 +289,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, return &mr->ibmr; err_mr: - mlx4_mr_free(dev->dev, &mr->mmr); + (void) mlx4_mr_free(dev->dev, &mr->mmr); err_free: kfree(mr); @@ -291,7 +368,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, return &fmr->ibfmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 19e0637220b9..35cced2a4da8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -104,6 +104,7 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), + [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -1746,11 +1747,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int header_size; int spc; int i; - int is_eth; - int is_vlan = 0; - int is_grh; - u16 vlan; int err = 0; + u16 vlan = 0xffff; + bool is_eth; + bool is_vlan = false; + bool is_grh; send_size = 0; for (i = 0; i < wr->num_sge; ++i) @@ -1953,9 +1954,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq static __be32 convert_access(int acc) { - return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | - (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + return (acc & IB_ACCESS_REMOTE_ATOMIC ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } @@ -1981,12 +1985,28 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) fseg->reserved[1] = 0; } +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +{ + bseg->flags1 = + convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); + bseg->flags2 = 0; + if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); + if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); + bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); + bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); + bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); +} + static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { - iseg->flags = 0; - iseg->mem_key = cpu_to_be32(rkey); - iseg->guest_id = 0; - iseg->pa = 0; + memset(iseg, 0, sizeof(*iseg)); + iseg->mem_key = cpu_to_be32(rkey); } static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, @@ -2291,6 +2311,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; + case IB_WR_BIND_MW: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_bind_seg(wqe, wr); + wqe += sizeof(struct mlx4_wqe_bind_seg); + size += sizeof(struct mlx4_wqe_bind_seg) / 16; + break; default: /* No extra segments required for sends */ break; diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 5b2a01dfb907..97516eb363b7 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -732,7 +732,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) dev->ports_parent = kobject_create_and_add("ports", kobject_get(dev->iov_parent)); - if (!dev->iov_parent) { + if (!dev->ports_parent) { ret = -ENOMEM; goto err_ports; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 07e4fbad987a..8f67fe2e91e6 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -55,7 +55,8 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); /** * nes_alloc_mw */ -static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { +static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type) +{ struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -71,6 +72,9 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { u32 driver_key = 0; u8 stag_key = 0; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); stag_key = (u8)next_stag_index; @@ -244,20 +248,19 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, if (ibmw_bind->send_flags & IB_SEND_SIGNALED) wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; - if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) { + if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE) wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE; - } - if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) { + if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ) wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ; - } set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, + ibmw_bind->bind_info.mr->lkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX, - ibmw_bind->length); + ibmw_bind->bind_info.length); wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0; - u64temp = (u64)ibmw_bind->addr; + u64temp = (u64)ibmw_bind->bind_info.addr; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp); head++; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 35275099cafd..a6a2cc2ba260 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -268,8 +268,9 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) qpp = &q->next) if (q == qp) { atomic_dec(&qp->refcount); - *qpp = qp->next; - rcu_assign_pointer(qp->next, NULL); + rcu_assign_pointer(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))); break; } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 07ca6fd5546b..eb71aaa26a9a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -117,6 +117,8 @@ enum { #define IPOIB_OP_CM (0) #endif +#define IPOIB_QPN_MASK ((__force u32) cpu_to_be32(0xFFFFFF)) + /* structs */ struct ipoib_header { @@ -760,4 +762,6 @@ extern int ipoib_debug_level; #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) +extern const char ipoib_driver_version[]; + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index ca131335417b..c4b3940845e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -39,7 +39,24 @@ static void ipoib_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - strlcpy(drvinfo->driver, "ipoib", sizeof(drvinfo->driver)); + struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ib_device_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (attr && !ib_query_device(priv->ca, attr)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", (int)(attr->fw_ver >> 32), + (int)(attr->fw_ver >> 16) & 0xffff, + (int)attr->fw_ver & 0xffff); + kfree(attr); + + strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), + sizeof(drvinfo->bus_info)); + + strlcpy(drvinfo->version, ipoib_driver_version, + sizeof(drvinfo->version)); + + strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); } static int ipoib_get_coalesce(struct net_device *dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6fdc9e78da0d..8534afd04e7c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,9 +49,14 @@ #include <linux/jhash.h> #include <net/arp.h> +#define DRV_VERSION "1.0.0" + +const char ipoib_driver_version[] = DRV_VERSION; + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; @@ -505,6 +510,9 @@ static void path_rec_completion(int status, spin_unlock_irqrestore(&priv->lock, flags); + if (IS_ERR_OR_NULL(ah)) + ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); + if (old_ah) ipoib_put_ah(old_ah); @@ -844,10 +852,10 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) * different subnets. */ /* qpn octets[1:4) & port GUID octets[12:20) */ - u32 *daddr_32 = (u32 *) daddr; + u32 *d32 = (u32 *) daddr; u32 hv; - hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0); + hv = jhash_3words(d32[3], d32[4], IPOIB_QPN_MASK & d32[0], 0); return hv & htbl->mask; } @@ -1688,6 +1696,8 @@ static void ipoib_remove_one(struct ib_device *device) return; dev_list = ib_get_client_data(device, &ipoib_client); + if (!dev_list) + return; list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ef7d3be46c31..5babdb35bda7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -94,7 +94,7 @@ /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) -#define ISER_DEF_CMD_PER_LUN 128 +#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2033a928d34d..be1edb04b085 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -369,10 +369,11 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents) { + if (aligned_len != mem->dma_nents || + (!ib_conn->fmr_pool && mem->dma_nents > 1)) { iscsi_conn->fmr_unalign_cnt++; - iser_warn("rdma alignment violation %d/%d aligned\n", - aligned_len, mem->size); + iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", + aligned_len, mem->size); iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ @@ -404,7 +405,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, } else { /* use FMR for multiple dma entries */ iser_page_vec_build(mem, ib_conn->page_vec, ibdev); err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); - if (err) { + if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 95a49affee44..4debadc53106 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -242,10 +242,14 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); - if (IS_ERR(ib_conn->fmr_pool)) { - ret = PTR_ERR(ib_conn->fmr_pool); + ret = PTR_ERR(ib_conn->fmr_pool); + if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) { ib_conn->fmr_pool = NULL; goto out_err; + } else if (ret == -ENOSYS) { + ib_conn->fmr_pool = NULL; + iser_warn("FMRs are not supported, using unaligned mode\n"); + ret = 0; } memset(&init_attr, 0, sizeof init_attr); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d5088ce78290..7ccf3284dda3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -700,23 +700,24 @@ static int srp_reconnect_target(struct srp_target_port *target) struct Scsi_Host *shost = target->scsi_host; int i, ret; - if (target->state != SRP_TARGET_LIVE) - return -EAGAIN; - scsi_target_block(&shost->shost_gendev); srp_disconnect_target(target); /* - * Now get a new local CM ID so that we avoid confusing the - * target in case things are really fouled up. + * Now get a new local CM ID so that we avoid confusing the target in + * case things are really fouled up. Doing so also ensures that all CM + * callbacks will have finished before a new QP is allocated. */ ret = srp_new_cm_id(target); - if (ret) - goto unblock; - - ret = srp_create_target_ib(target); - if (ret) - goto unblock; + /* + * Whether or not creating a new CM ID succeeded, create a new + * QP. This guarantees that all completion callback function + * invocations have finished before request resetting starts. + */ + if (ret == 0) + ret = srp_create_target_ib(target); + else + srp_create_target_ib(target); for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { struct srp_request *req = &target->req_ring[i]; @@ -728,11 +729,12 @@ static int srp_reconnect_target(struct srp_target_port *target) for (i = 0; i < SRP_SQ_SIZE; ++i) list_add(&target->tx_ring[i]->list, &target->free_tx); - ret = srp_connect_target(target); + if (ret == 0) + ret = srp_connect_target(target); -unblock: scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : SDEV_TRANSPORT_OFFLINE); + target->transport_offline = !!ret; if (ret) goto err; @@ -1352,6 +1354,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) unsigned long flags; int len; + if (unlikely(target->transport_offline)) { + scmnd->result = DID_NO_CONNECT << 16; + scmnd->scsi_done(scmnd); + return 0; + } + spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); if (!iu) @@ -1695,6 +1703,9 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; + if (!target->connected || target->qp_in_error) + return -1; + init_completion(&target->tsk_mgmt_done); spin_lock_irq(&target->lock); @@ -1736,7 +1747,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd)) + if (!req || !srp_claim_req(target, req, scmnd)) return FAILED; srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, SRP_TSK_ABORT_TASK); @@ -1754,8 +1765,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); - if (target->qp_in_error) - return FAILED; if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun, SRP_TSK_LUN_RESET)) return FAILED; @@ -1972,7 +1981,6 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) spin_unlock(&host->target_lock); target->state = SRP_TARGET_LIVE; - target->connected = false; scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index de2d0b3c0bfe..66fbedda4571 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -140,6 +140,7 @@ struct srp_target_port { unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; + bool transport_offline; /* Everything above this point is used in the hot path of * command processing. Try to keep them packed into cachelines. diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index b2cca58de910..fc27800e9c38 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -198,7 +198,7 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); - mlx4_mr_free(dev, &mdev->mr); + (void) mlx4_mr_free(dev, &mdev->mr); iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); @@ -303,7 +303,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) return mdev; err_mr: - mlx4_mr_free(dev, &mdev->mr); + (void) mlx4_mr_free(dev, &mdev->mr); err_map: if (!mdev->uar_map) iounmap(mdev->uar_map); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 38b62c78d5da..50917eb3013e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -762,15 +762,19 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, u64 flags; int err = 0; u8 field; + u32 bmme_flags; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; - /* add port mng change event capability unconditionally to slaves */ + /* add port mng change event capability and disable mw type 1 + * unconditionally to slaves + */ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; + flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); /* For guests, report Blueflame disabled */ @@ -778,6 +782,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); + /* For guests, disable mw type 2 */ + MLX4_GET(bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + return 0; } @@ -1203,6 +1212,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) #define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) #define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) @@ -1319,6 +1329,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) /* TPT attributes */ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_PUT(inbox, param->mw_enabled, INIT_HCA_TPT_MW_OFFSET); MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); @@ -1415,6 +1426,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 3af33ff669cc..151c2bb380a6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -170,6 +170,7 @@ struct mlx4_init_hca_param { u8 log_mc_table_sz; u8 log_mpt_sz; u8 log_uar_sz; + u8 mw_enabled; /* Enable memory windows */ u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b9dde139dac5..d180bc46826a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1431,6 +1431,10 @@ static int mlx4_init_hca(struct mlx4_dev *dev) init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; + init_hca.mw_enabled = 0; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) + init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE; err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index ed4a6959e828..cf883345af88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -60,6 +60,8 @@ #define MLX4_FS_MGM_LOG_ENTRY_SIZE 7 #define MLX4_FS_NUM_MCG (1 << 17) +#define INIT_HCA_TPT_MW_ENABLE (1 << 7) + #define MLX4_NUM_UP 8 #define MLX4_NUM_TC 8 #define MLX4_RATELIMIT_UNITS 3 /* 100 Mbps */ @@ -113,10 +115,10 @@ enum { MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT }; -enum mlx4_mr_state { - MLX4_MR_DISABLED = 0, - MLX4_MR_EN_HW, - MLX4_MR_EN_SW +enum mlx4_mpt_state { + MLX4_MPT_DISABLED = 0, + MLX4_MPT_EN_HW, + MLX4_MPT_EN_SW }; #define MLX4_COMM_TIME 10000 @@ -263,6 +265,22 @@ struct mlx4_icm_table { struct mlx4_icm **icm; }; +#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MLX4_MPT_FLAG_FREE (0x3UL << 28) +#define MLX4_MPT_FLAG_MIO (1 << 17) +#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) +#define MLX4_MPT_FLAG_REGION (1 << 8) + +#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) +#define MLX4_MPT_PD_FLAG_RAE (1 << 28) +#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) + +#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7) + +#define MLX4_MPT_STATUS_SW 0xF0 +#define MLX4_MPT_STATUS_HW 0x00 + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ @@ -863,10 +881,10 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); -int __mlx4_mr_reserve(struct mlx4_dev *dev); -void __mlx4_mr_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index); -void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_reserve(struct mlx4_dev *dev); +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index c202d3ad2a0e..602ca9bf78e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -44,20 +44,6 @@ #include "mlx4.h" #include "icm.h" -#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) -#define MLX4_MPT_FLAG_FREE (0x3UL << 28) -#define MLX4_MPT_FLAG_MIO (1 << 17) -#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) -#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) -#define MLX4_MPT_FLAG_REGION (1 << 8) - -#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) -#define MLX4_MPT_PD_FLAG_RAE (1 << 28) -#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) - -#define MLX4_MPT_STATUS_SW 0xF0 -#define MLX4_MPT_STATUS_HW 0x00 - static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) { int o; @@ -321,7 +307,7 @@ static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, mr->size = size; mr->pd = pd; mr->access = access; - mr->enabled = MLX4_MR_DISABLED; + mr->enabled = MLX4_MPT_DISABLED; mr->key = hw_index_to_key(mridx); return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); @@ -335,14 +321,14 @@ static int mlx4_WRITE_MTT(struct mlx4_dev *dev, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } -int __mlx4_mr_reserve(struct mlx4_dev *dev) +int __mlx4_mpt_reserve(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); } -static int mlx4_mr_reserve(struct mlx4_dev *dev) +static int mlx4_mpt_reserve(struct mlx4_dev *dev) { u64 out_param; @@ -353,17 +339,17 @@ static int mlx4_mr_reserve(struct mlx4_dev *dev) return -1; return get_param_l(&out_param); } - return __mlx4_mr_reserve(dev); + return __mlx4_mpt_reserve(dev); } -void __mlx4_mr_release(struct mlx4_dev *dev, u32 index) +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); } -static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) +static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { u64 in_param; @@ -376,17 +362,17 @@ static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) index); return; } - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); } -int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; return mlx4_table_get(dev, &mr_table->dmpt_table, index); } -static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param; @@ -397,17 +383,17 @@ static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mr_alloc_icm(dev, index); + return __mlx4_mpt_alloc_icm(dev, index); } -void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; mlx4_table_put(dev, &mr_table->dmpt_table, index); } -static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) +static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { u64 in_param; @@ -420,7 +406,7 @@ static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) index); return; } - return __mlx4_mr_free_icm(dev, index); + return __mlx4_mpt_free_icm(dev, index); } int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, @@ -429,41 +415,52 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, u32 index; int err; - index = mlx4_mr_reserve(dev); + index = mlx4_mpt_reserve(dev); if (index == -1) return -ENOMEM; err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size, access, npages, page_shift, mr); if (err) - mlx4_mr_release(dev, index); + mlx4_mpt_release(dev, index); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_alloc); -static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) +static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { int err; - if (mr->enabled == MLX4_MR_EN_HW) { + if (mr->enabled == MLX4_MPT_EN_HW) { err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); - if (err) - mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d),", err); + mlx4_warn(dev, "MR has MWs bound to it.\n"); + return err; + } - mr->enabled = MLX4_MR_EN_SW; + mr->enabled = MLX4_MPT_EN_SW; } mlx4_mtt_cleanup(dev, &mr->mtt); + + return 0; } -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) { - mlx4_mr_free_reserved(dev, mr); + int ret; + + ret = mlx4_mr_free_reserved(dev, mr); + if (ret) + return ret; if (mr->enabled) - mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); - mlx4_mr_release(dev, key_to_hw_index(mr->key)); + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_release(dev, key_to_hw_index(mr->key)); + + return 0; } EXPORT_SYMBOL_GPL(mlx4_mr_free); @@ -473,7 +470,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mr_alloc_icm(dev, key_to_hw_index(mr->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; @@ -520,7 +517,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); goto err_cmd; } - mr->enabled = MLX4_MR_EN_HW; + mr->enabled = MLX4_MPT_EN_HW; mlx4_free_cmd_mailbox(dev, mailbox); @@ -530,7 +527,7 @@ err_cmd: mlx4_free_cmd_mailbox(dev, mailbox); err_table: - mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_enable); @@ -657,6 +654,101 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, } EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw) +{ + u32 index; + + if ((type == MLX4_MW_TYPE_1 && + !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) || + (type == MLX4_MW_TYPE_2 && + !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN))) + return -ENOTSUPP; + + index = mlx4_mpt_reserve(dev); + if (index == -1) + return -ENOMEM; + + mw->key = hw_index_to_key(index); + mw->pd = pd; + mw->type = type; + mw->enabled = MLX4_MPT_DISABLED; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mw_alloc); + +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned + * off, thus creating a memory window and not a memory region. + */ + mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key)); + mpt_entry->pd_flags = cpu_to_be32(mw->pd); + if (mw->type == MLX4_MW_TYPE_2) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV); + } + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + mw->enabled = MLX4_MPT_EN_HW; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mw_enable); + +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + int err; + + if (mw->enabled == MLX4_MPT_EN_HW) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) + mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + + mw->enabled = MLX4_MPT_EN_SW; + } + if (mw->enabled) + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + mlx4_mpt_release(dev, key_to_hw_index(mw->key)); +} +EXPORT_SYMBOL_GPL(mlx4_mw_free); + int mlx4_init_mr_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -831,7 +923,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return 0; err_free: - mlx4_mr_free(dev, &fmr->mr); + (void) mlx4_mr_free(dev, &fmr->mr); return err; } EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); @@ -882,17 +974,21 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, err); return; } - fmr->mr.enabled = MLX4_MR_EN_SW; + fmr->mr.enabled = MLX4_MPT_EN_SW; } EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { + int ret; + if (fmr->maps) return -EBUSY; - mlx4_mr_free(dev, &fmr->mr); - fmr->mr.enabled = MLX4_MR_DISABLED; + ret = mlx4_mr_free(dev, &fmr->mr); + if (ret) + return ret; + fmr->mr.enabled = MLX4_MPT_DISABLED; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 5997adc943d0..083fb48dc3d7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1231,14 +1231,14 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE: - index = __mlx4_mr_reserve(dev); + index = __mlx4_mpt_reserve(dev); if (index == -1) break; id = index & mpt_mask(dev); err = add_res_range(dev, slave, id, 1, RES_MPT, index); if (err) { - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); break; } set_param_l(out_param, index); @@ -1251,7 +1251,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mr_alloc_icm(dev, mpt->key); + err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; @@ -1586,7 +1586,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, err = rem_res_range(dev, slave, id, 1, RES_MPT, 0); if (err) break; - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); @@ -1596,7 +1596,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - __mlx4_mr_free_icm(dev, mpt->key); + __mlx4_mpt_free_icm(dev, mpt->key); res_end_move(dev, slave, RES_MPT, id); return err; break; @@ -1796,6 +1796,26 @@ static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) return be32_to_cpu(mpt->mtt_sz); } +static u32 mr_get_pd(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & 0x00ffffff; +} + +static int mr_is_fmr(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG; +} + +static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE; +} + +static int mr_is_region(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION; +} + static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; @@ -1856,12 +1876,41 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; + u32 pd; + int pd_slave; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt); if (err) return err; + /* Disable memory windows for VFs. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + + /* Make sure that the PD bits related to the slave id are zeros. */ + pd = mr_get_pd(inbox->buf); + pd_slave = (pd >> 17) & 0x7f; + if (pd_slave != 0 && pd_slave != slave) { + err = -EPERM; + goto ex_abort; + } + + if (mr_is_fmr(inbox->buf)) { + /* FMR and Bind Enable are forbidden in slave devices. */ + if (mr_is_bind_enabled(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + /* FMR and Memory Windows are also forbidden. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + } + phys = mr_phys_mpt(inbox->buf); if (!phys) { err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); @@ -3480,7 +3529,7 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) while (state != 0) { switch (state) { case RES_MPT_RESERVED: - __mlx4_mr_release(dev, mpt->key); + __mlx4_mpt_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mpt->com.node, &tracker->res_tree[RES_MPT]); @@ -3491,7 +3540,7 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) break; case RES_MPT_MAPPED: - __mlx4_mr_free_icm(dev, mpt->key); + __mlx4_mpt_free_icm(dev, mpt->key); state = RES_MPT_RESERVED; break; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6d48fce06b4a..811f91cf5e8c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -171,6 +171,7 @@ enum { #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { + MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1, MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, @@ -238,7 +239,8 @@ enum { MLX4_PERM_LOCAL_WRITE = 1 << 11, MLX4_PERM_REMOTE_READ = 1 << 12, MLX4_PERM_REMOTE_WRITE = 1 << 13, - MLX4_PERM_ATOMIC = 1 << 14 + MLX4_PERM_ATOMIC = 1 << 14, + MLX4_PERM_BIND_MW = 1 << 15, }; enum { @@ -504,6 +506,18 @@ struct mlx4_mr { int enabled; }; +enum mlx4_mw_type { + MLX4_MW_TYPE_1 = 1, + MLX4_MW_TYPE_2 = 2, +}; + +struct mlx4_mw { + u32 key; + u32 pd; + enum mlx4_mw_type type; + int enabled; +}; + struct mlx4_fmr { struct mlx4_mr mr; struct mlx4_mpt_entry *mpt; @@ -802,8 +816,12 @@ u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw); +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw); +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 4b4ad6ffef92..67f46ad6920a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -265,6 +265,11 @@ struct mlx4_wqe_lso_seg { __be32 header[0]; }; +enum mlx4_wqe_bind_seg_flags2 { + MLX4_WQE_BIND_ZERO_BASED = (1 << 30), + MLX4_WQE_BIND_TYPE_2 = (1 << 31), +}; + struct mlx4_wqe_bind_seg { __be32 flags1; __be32 flags2; @@ -277,9 +282,9 @@ struct mlx4_wqe_bind_seg { enum { MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, - MLX4_WQE_FMR_PERM_REMOTE_READ = 1 << 29, - MLX4_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, - MLX4_WQE_FMR_PERM_ATOMIC = 1 << 31 + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ = 1 << 29, + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE = 1 << 30, + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1 << 31 }; struct mlx4_wqe_fmr_seg { @@ -304,12 +309,10 @@ struct mlx4_wqe_fmr_ext_seg { }; struct mlx4_wqe_local_inval_seg { - __be32 flags; - u32 reserved1; + u64 reserved1; __be32 mem_key; - u32 reserved2[2]; - __be32 guest_id; - __be64 pa; + u32 reserved2; + u64 reserved3[2]; }; struct mlx4_wqe_raddr_seg { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 46bc045bbe15..98cc4b29fc5b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -115,6 +115,8 @@ enum ib_device_cap_flags { IB_DEVICE_XRC = (1<<20), IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), + IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24) }; enum ib_atomic_cap { @@ -715,6 +717,11 @@ enum ib_mig_state { IB_MIG_ARMED }; +enum ib_mw_type { + IB_MW_TYPE_1 = 1, + IB_MW_TYPE_2 = 2 +}; + struct ib_qp_attr { enum ib_qp_state qp_state; enum ib_qp_state cur_qp_state; @@ -758,6 +765,7 @@ enum ib_wr_opcode { IB_WR_FAST_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_BIND_MW, }; enum ib_send_flags { @@ -780,6 +788,23 @@ struct ib_fast_reg_page_list { unsigned int max_page_list_len; }; +/** + * struct ib_mw_bind_info - Parameters for a memory window bind operation. + * @mr: A memory region to bind the memory window to. + * @addr: The address where the memory window should begin. + * @length: The length of the memory window, in bytes. + * @mw_access_flags: Access flags from enum ib_access_flags for the window. + * + * This struct contains the shared parameters for type 1 and type 2 + * memory window bind operations. + */ +struct ib_mw_bind_info { + struct ib_mr *mr; + u64 addr; + u64 length; + int mw_access_flags; +}; + struct ib_send_wr { struct ib_send_wr *next; u64 wr_id; @@ -823,6 +848,12 @@ struct ib_send_wr { int access_flags; u32 rkey; } fast_reg; + struct { + struct ib_mw *mw; + /* The new rkey for the memory window. */ + u32 rkey; + struct ib_mw_bind_info bind_info; + } bind_mw; } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; @@ -839,7 +870,8 @@ enum ib_access_flags { IB_ACCESS_REMOTE_WRITE = (1<<1), IB_ACCESS_REMOTE_READ = (1<<2), IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4) + IB_ACCESS_MW_BIND = (1<<4), + IB_ZERO_BASED = (1<<5) }; struct ib_phys_buf { @@ -862,13 +894,16 @@ enum ib_mr_rereg_flags { IB_MR_REREG_ACCESS = (1<<2) }; +/** + * struct ib_mw_bind - Parameters for a type 1 memory window bind operation. + * @wr_id: Work request id. + * @send_flags: Flags from ib_send_flags enum. + * @bind_info: More parameters of the bind operation. + */ struct ib_mw_bind { - struct ib_mr *mr; - u64 wr_id; - u64 addr; - u32 length; - int send_flags; - int mw_access_flags; + u64 wr_id; + int send_flags; + struct ib_mw_bind_info bind_info; }; struct ib_fmr_attr { @@ -991,6 +1026,7 @@ struct ib_mw { struct ib_pd *pd; struct ib_uobject *uobject; u32 rkey; + enum ib_mw_type type; }; struct ib_fmr { @@ -1202,7 +1238,8 @@ struct ib_device { int num_phys_buf, int mr_access_flags, u64 *iova_start); - struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd, + enum ib_mw_type type); int (*bind_mw)(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); @@ -2019,6 +2056,8 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. + * + * This function can fail, if the memory region has memory windows bound to it. */ int ib_dereg_mr(struct ib_mr *mr); @@ -2071,10 +2110,22 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) } /** + * ib_inc_rkey - increments the key portion of the given rkey. Can be used + * for calculating a new rkey for type 2 memory windows. + * @rkey - the rkey to increment. + */ +static inline u32 ib_inc_rkey(u32 rkey) +{ + const u32 mask = 0x000000ff; + return ((rkey + 1) & mask) | (rkey & ~mask); +} + +/** * ib_alloc_mw - Allocates a memory window. * @pd: The protection domain associated with the memory window. + * @type: The type of the memory window (1 or 2). */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd); +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); /** * ib_bind_mw - Posts a work request to the send queue of the specified @@ -2084,6 +2135,10 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd); * @mw: The memory window to bind. * @mw_bind: Specifies information about the memory window, including * its address range, remote access rights, and associated memory region. + * + * If there is no immediate error, the function will update the rkey member + * of the mw parameter to its new value. The bind operation can still fail + * asynchronously. */ static inline int ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 81aba3a73aa3..805711ea2005 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -261,6 +261,22 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; +struct ib_uverbs_alloc_mw { + __u64 response; + __u32 pd_handle; + __u8 mw_type; + __u8 reserved[3]; +}; + +struct ib_uverbs_alloc_mw_resp { + __u32 mw_handle; + __u32 rkey; +}; + +struct ib_uverbs_dealloc_mw { + __u32 mw_handle; +}; + struct ib_uverbs_create_comp_channel { __u64 response; }; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 745973b729af..93726560eaa8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1086,7 +1086,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, case RPCRDMA_MEMWINDOWS: /* Allocate one extra request's worth, for full cycling */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { - r->r.mw = ib_alloc_mw(ia->ri_pd); + r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1); if (IS_ERR(r->r.mw)) { rc = PTR_ERR(r->r.mw); dprintk("RPC: %s: ib_alloc_mw" @@ -1673,12 +1673,12 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, *nsegs = 1; rpcrdma_map_one(ia, seg, writing); - param.mr = ia->ri_bind_mem; + param.bind_info.mr = ia->ri_bind_mem; param.wr_id = 0ULL; /* no send cookie */ - param.addr = seg->mr_dma; - param.length = seg->mr_len; + param.bind_info.addr = seg->mr_dma; + param.bind_info.length = seg->mr_len; param.send_flags = 0; - param.mw_access_flags = mem_priv; + param.bind_info.mw_access_flags = mem_priv; DECR_CQCOUNT(&r_xprt->rx_ep); rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); @@ -1690,7 +1690,7 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, rpcrdma_unmap_one(ia, seg); } else { seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; - seg->mr_base = param.addr; + seg->mr_base = param.bind_info.addr; seg->mr_nsegs = 1; } return rc; @@ -1706,10 +1706,10 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, int rc; BUG_ON(seg->mr_nsegs != 1); - param.mr = ia->ri_bind_mem; - param.addr = 0ULL; /* unbind */ - param.length = 0; - param.mw_access_flags = 0; + param.bind_info.mr = ia->ri_bind_mem; + param.bind_info.addr = 0ULL; /* unbind */ + param.bind_info.length = 0; + param.bind_info.mw_access_flags = 0; if (*r) { param.wr_id = (u64) (unsigned long) *r; param.send_flags = IB_SEND_SIGNALED; |