diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 23:54:53 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 23:54:53 +0100 |
commit | ad0835a93008e5901415a0a27847d6a27649aa3a (patch) | |
tree | e48be396ebfbb4f1fb02e7ca76461bdb1427490d /drivers/infiniband/ulp | |
parent | Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/c... (diff) | |
parent | RDMA/core: Rename kernel modify_cq to better describe its usage (diff) | |
download | linux-ad0835a93008e5901415a0a27847d6a27649aa3a.tar.xz linux-ad0835a93008e5901415a0a27847d6a27649aa3a.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
"This is a fairly plain pull request. Lots of driver updates across the
stack, a huge number of static analysis cleanups including a close to
50 patch series from Bart Van Assche, and a number of new features
inside the stack such as general CQ moderation support.
Nothing really stands out, but there might be a few conflicts as you
take things in. In particular, the cleanups touched some of the same
lines as the new timer_setup changes.
Everything in this pull request has been through 0day and at least two
days of linux-next (since Stephen doesn't necessarily flag new
errors/warnings until day2). A few more items (about 30 patches) from
Intel and Mellanox showed up on the list on Tuesday. I've excluded
those from this pull request, and I'm sure some of them qualify as
fixes suitable to send any time, but I still have to review them
fully. If they contain mostly fixes and little or no new development,
then I will probably send them through by the end of the week just to
get them out of the way.
There was a break in my acceptance of patches which coincides with the
computer problems I had, and then when I got things mostly back under
control I had a backlog of patches to process, which I did mostly last
Friday and Monday. So there is a larger number of patches processed in
that timeframe than I was striving for.
Summary:
- Add iWARP support to qedr driver
- Lots of misc fixes across subsystem
- Multiple update series to hns roce driver
- Multiple update series to hfi1 driver
- Updates to vnic driver
- Add kref to wait struct in cxgb4 driver
- Updates to i40iw driver
- Mellanox shared pull request
- timer_setup changes
- massive cleanup series from Bart Van Assche
- Two series of SRP/SRPT changes from Bart Van Assche
- Core updates from Mellanox
- i40iw updates
- IPoIB updates
- mlx5 updates
- mlx4 updates
- hns updates
- bnxt_re fixes
- PCI write padding support
- Sparse/Smatch/warning cleanups/fixes
- CQ moderation support
- SRQ support in vmw_pvrdma"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (296 commits)
RDMA/core: Rename kernel modify_cq to better describe its usage
IB/mlx5: Add CQ moderation capability to query_device
IB/mlx4: Add CQ moderation capability to query_device
IB/uverbs: Add CQ moderation capability to query_device
IB/mlx5: Exposing modify CQ callback to uverbs layer
IB/mlx4: Exposing modify CQ callback to uverbs layer
IB/uverbs: Allow CQ moderation with modify CQ
iw_cxgb4: atomically flush the qp
iw_cxgb4: only call the cq comp_handler when the cq is armed
iw_cxgb4: Fix possible circular dependency locking warning
RDMA/bnxt_re: report vlan_id and sl in qp1 recv completion
IB/core: Only maintain real QPs in the security lists
IB/ocrdma_hw: remove unnecessary code in ocrdma_mbx_dealloc_lkey
RDMA/core: Make function rdma_copy_addr return void
RDMA/vmw_pvrdma: Add shared receive queue support
RDMA/core: avoid uninitialized variable warning in create_udata
RDMA/bnxt_re: synchronize poll_cq and req_notify_cq verbs
RDMA/bnxt_re: Flush CQ notification Work Queue before destroying QP
RDMA/bnxt_re: Set QP state in case of response completion errors
RDMA/bnxt_re: Add memory barriers when processing CQ/EQ entries
...
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 16 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 56 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 135 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 29 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 17 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/isert/ib_isert.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | 42 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h | 22 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 44 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 90 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 331 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.h | 9 |
18 files changed, 577 insertions, 268 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4a5c7a07a631..8033a006277f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -111,7 +111,7 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, IPOIB_MCAST_FLAG_ATTACHED = 3, - MAX_SEND_CQE = 16, + MAX_SEND_CQE = 64, IPOIB_CM_COPYBREAK = 256, IPOIB_NON_CHILD = 0, @@ -331,7 +331,8 @@ struct ipoib_dev_priv { struct net_device *dev; - struct napi_struct napi; + struct napi_struct send_napi; + struct napi_struct recv_napi; unsigned long flags; @@ -381,7 +382,6 @@ struct ipoib_dev_priv { unsigned tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; - unsigned tx_outstanding; struct ib_wc send_wc[MAX_SEND_CQE]; struct ib_recv_wr rx_wr; @@ -409,7 +409,6 @@ struct ipoib_dev_priv { #endif u64 hca_caps; struct ipoib_ethtool_st ethtool; - struct timer_list poll_timer; unsigned max_send_sge; bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; @@ -476,9 +475,10 @@ extern struct workqueue_struct *ipoib_workqueue; /* functions */ -int ipoib_poll(struct napi_struct *napi, int budget); -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); -void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr); +int ipoib_rx_poll(struct napi_struct *napi, int budget); +int ipoib_tx_poll(struct napi_struct *napi, int budget); +void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr); +void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct rdma_ah_attr *attr); @@ -500,7 +500,7 @@ void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, const char *format); -void ipoib_ib_tx_timer_func(unsigned long ctx); +void ipoib_ib_tx_timer_func(struct timer_list *t); void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7774654c2ccb..87f4bd99cdf7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -594,9 +594,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb = rx_ring[wr_id].skb; if (unlikely(wc->status != IB_WC_SUCCESS)) { - ipoib_dbg(priv, "cm recv error " - "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); + ipoib_dbg(priv, + "cm recv error (status=%d, wrid=%d vend_err %#x)\n", + wc->status, wr_id, wc->vendor_err); ++dev->stats.rx_dropped; if (has_srq) goto repost; @@ -757,30 +757,35 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } + if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) { + ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", + tx->qp->qp_num); + netif_stop_queue(dev); + } + skb_orphan(skb); skb_dst_drop(skb); + if (netif_queue_stopped(dev)) + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) { + ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n"); + napi_schedule(&priv->send_napi); + } + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); if (unlikely(rc)) { - ipoib_warn(priv, "post_send failed, error %d\n", rc); + ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc); ++dev->stats.tx_errors; ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); + + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); } else { netif_trans_update(dev); ++tx->tx_head; - - if (++priv->tx_outstanding == ipoib_sendq_size) { - ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", - tx->qp->qp_num); - netif_stop_queue(dev); - rc = ib_req_notify_cq(priv->send_cq, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); - if (rc < 0) - ipoib_warn(priv, "request notify on send CQ failed\n"); - else if (rc) - ipoib_send_comp_handler(priv->send_cq, dev); - } + ++priv->tx_head; } } @@ -814,9 +819,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) netif_tx_lock(dev); ++tx->tx_tail; - if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && - netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + ++priv->tx_tail; + + if (unlikely(netif_queue_stopped(dev) && + (priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 && + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); if (wc->status != IB_WC_SUCCESS && @@ -829,11 +836,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->status == IB_WC_RNR_RETRY_EXC_ERR || wc->status == IB_WC_RETRY_EXC_ERR) ipoib_dbg(priv, - "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n", + "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n", __func__, wc->status, wr_id, wc->vendor_err); else ipoib_warn(priv, - "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n", + "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n", __func__, wc->status, wr_id, wc->vendor_err); spin_lock_irqsave(&priv->lock, flags); @@ -1045,7 +1052,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_init_attr attr = { - .send_cq = priv->recv_cq, + .send_cq = priv->send_cq, .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, @@ -1219,9 +1226,10 @@ timeout: tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); - ++p->tx_tail; netif_tx_lock_bh(p->dev); - if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && + ++p->tx_tail; + ++priv->tx_tail; + if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 8dc1e6225cc8..2706bf26cbac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -99,8 +99,9 @@ static int ipoib_set_coalesce(struct net_device *dev, coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, - coal->rx_coalesce_usecs); + ret = rdma_set_cq_moderation(priv->recv_cq, + coal->rx_max_coalesced_frames, + coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index fe690f82af29..3b96cdaf9a83 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -192,8 +192,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) - ipoib_warn(priv, "failed recv event " - "(status=%d, wrid=%d vend_err %x)\n", + ipoib_warn(priv, + "failed recv event (status=%d, wrid=%d vend_err %#x)\n", wc->status, wr_id, wc->vendor_err); ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); dev_kfree_skb_any(skb); @@ -264,7 +264,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) likely(wc->wc_flags & IB_WC_IP_CSUM_OK)) skb->ip_summed = CHECKSUM_UNNECESSARY; - napi_gro_receive(&priv->napi, skb); + napi_gro_receive(&priv->recv_napi, skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -406,16 +406,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; - if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && - netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + + if (unlikely(netif_queue_stopped(dev) && + ((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) && + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) { struct ipoib_qp_state_validate *qp_work; - ipoib_warn(priv, "failed send event " - "(status=%d, wrid=%d vend_err %x)\n", + ipoib_warn(priv, + "failed send event (status=%d, wrid=%d vend_err %#x)\n", wc->status, wr_id, wc->vendor_err); qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC); if (!qp_work) @@ -430,17 +431,23 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static int poll_tx(struct ipoib_dev_priv *priv) { int n, i; + struct ib_wc *wc; n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); - for (i = 0; i < n; ++i) - ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); - + for (i = 0; i < n; ++i) { + wc = priv->send_wc + i; + if (wc->wr_id & IPOIB_OP_CM) + ipoib_cm_handle_tx_wc(priv->dev, priv->send_wc + i); + else + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); + } return n == MAX_SEND_CQE; } -int ipoib_poll(struct napi_struct *napi, int budget) +int ipoib_rx_poll(struct napi_struct *napi, int budget) { - struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); + struct ipoib_dev_priv *priv = + container_of(napi, struct ipoib_dev_priv, recv_napi); struct net_device *dev = priv->dev; int done; int t; @@ -464,8 +471,9 @@ poll_more: ipoib_cm_handle_rx_wc(dev, wc); else ipoib_ib_handle_rx_wc(dev, wc); - } else - ipoib_cm_handle_tx_wc(priv->dev, wc); + } else { + pr_warn("%s: Got unexpected wqe id\n", __func__); + } } if (n != t) @@ -484,33 +492,47 @@ poll_more: return done; } -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) +int ipoib_tx_poll(struct napi_struct *napi, int budget) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, + send_napi); + struct net_device *dev = priv->dev; + int n, i; + struct ib_wc *wc; - napi_schedule(&priv->napi); -} +poll_more: + n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); -static void drain_tx_cq(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); + for (i = 0; i < n; i++) { + wc = priv->send_wc + i; + if (wc->wr_id & IPOIB_OP_CM) + ipoib_cm_handle_tx_wc(dev, wc); + else + ipoib_ib_handle_tx_wc(dev, wc); + } - netif_tx_lock(dev); - while (poll_tx(priv)) - ; /* nothing */ + if (n < budget) { + napi_complete(napi); + if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) && + napi_reschedule(napi)) + goto poll_more; + } + return n < 0 ? 0 : n; +} - if (netif_queue_stopped(dev)) - mod_timer(&priv->poll_timer, jiffies + 1); +void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr) +{ + struct ipoib_dev_priv *priv = ctx_ptr; - netif_tx_unlock(dev); + napi_schedule(&priv->recv_napi); } -void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) +void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr) { - struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr); + struct ipoib_dev_priv *priv = ctx_ptr; - mod_timer(&priv->poll_timer, jiffies); + napi_schedule(&priv->send_napi); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -611,23 +633,25 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM; else priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; - - if (++priv->tx_outstanding == ipoib_sendq_size) { + /* increase the tx_head after send success, but use it for queue state */ + if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); - if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) - ipoib_warn(priv, "request notify on send CQ failed\n"); netif_stop_queue(dev); } skb_orphan(skb); skb_dst_drop(skb); + if (netif_queue_stopped(dev)) + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) + ipoib_warn(priv, "request notify on send CQ failed\n"); + rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), address, dqpn, tx_req, phead, hlen); if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; - --priv->tx_outstanding; ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) @@ -639,11 +663,6 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, rc = priv->tx_head; ++priv->tx_head; } - - if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) - while (poll_tx(priv)) - ; /* nothing */ - return rc; } @@ -732,6 +751,22 @@ static void check_qp_movement_and_print(struct ipoib_dev_priv *priv, new_state, qp_attr.qp_state); } +static void ipoib_napi_enable(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + napi_enable(&priv->recv_napi); + napi_enable(&priv->send_napi); +} + +static void ipoib_napi_disable(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + napi_disable(&priv->recv_napi); + napi_disable(&priv->send_napi); +} + int ipoib_ib_dev_stop_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -741,7 +776,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) int i; if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_disable(&priv->napi); + ipoib_napi_disable(dev); ipoib_cm_dev_stop(dev); @@ -773,7 +808,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; - --priv->tx_outstanding; } for (i = 0; i < ipoib_recvq_size; ++i) { @@ -799,7 +833,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_dbg(priv, "All sends and receives done.\n"); timeout: - del_timer_sync(&priv->poll_timer); qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); @@ -821,11 +854,6 @@ int ipoib_ib_dev_stop(struct net_device *dev) return 0; } -void ipoib_ib_tx_timer_func(unsigned long ctx) -{ - drain_tx_cq((struct net_device *)ctx); -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -850,7 +878,7 @@ int ipoib_ib_dev_open_default(struct net_device *dev) } if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_enable(&priv->napi); + ipoib_napi_enable(dev); return 0; out: @@ -965,8 +993,9 @@ void ipoib_drain_cq(struct net_device *dev) ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); else ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); - } else - ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); + } else { + pr_warn("%s: Got unexpected wqe id\n", __func__); + } } } while (n == IPOIB_NUM_WC); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index dcc77014018d..12b7f911f0e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -51,7 +51,6 @@ #include <net/addrconf.h> #include <linux/inetdevice.h> #include <rdma/ib_cache.h> -#include <linux/pci.h> #define DRV_VERSION "1.0.0" @@ -1617,13 +1616,29 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) wait_for_completion(&priv->ntbl.deleted); } +static void ipoib_napi_add(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC); + netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE); +} + +static void ipoib_napi_del(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + netif_napi_del(&priv->recv_napi); + netif_napi_del(&priv->send_napi); +} + static void ipoib_dev_uninit_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_transport_dev_cleanup(dev); - netif_napi_del(&priv->napi); + ipoib_napi_del(dev); ipoib_cm_dev_cleanup(dev); @@ -1638,7 +1653,7 @@ static int ipoib_dev_init_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT); + ipoib_napi_add(dev); /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, @@ -1666,9 +1681,6 @@ static int ipoib_dev_init_default(struct net_device *dev) priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; - setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func, - (unsigned long)dev); - return 0; out_tx_ring_cleanup: @@ -1678,7 +1690,7 @@ out_rx_ring_cleanup: kfree(priv->rx_ring); out: - netif_napi_del(&priv->napi); + ipoib_napi_del(dev); return -ENOMEM; } @@ -2314,7 +2326,8 @@ static void ipoib_add_one(struct ib_device *device) } if (!count) { - kfree(dev_list); + pr_err("Failed to init port, removing it\n"); + ipoib_remove_one(device, dev_list); return; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index bb64baf25309..a1ed25422b72 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -156,7 +156,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) }; struct ib_cq_init_attr cq_attr = {}; - int ret, size; + int ret, size, req_vec; int i; size = ipoib_recvq_size + 1; @@ -171,17 +171,21 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -ENOSYS) return -ENODEV; + req_vec = (priv->port - 1) * 2; + cq_attr.cqe = size; - priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, - dev, &cq_attr); + cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, + priv, &cq_attr); if (IS_ERR(priv->recv_cq)) { printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_cm_dev_cleanup; } cq_attr.cqe = ipoib_sendq_size; - priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, - dev, &cq_attr); + cq_attr.comp_vector = (req_vec + 1) % priv->ca->num_comp_vectors; + priv->send_cq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, + priv, &cq_attr); if (IS_ERR(priv->send_cq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_recv_cq; @@ -208,6 +212,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_send_cq; } + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) + goto out_free_send_cq; + for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 55a73b0ed4c6..56b7240a3fc3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1146,7 +1146,7 @@ void iser_err_comp(struct ib_wc *wc, const char *type) if (wc->status != IB_WC_WR_FLUSH_ERR) { struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context); - iser_err("%s failure: %s (%d) vend_err %x\n", type, + iser_err("%s failure: %s (%d) vend_err %#x\n", type, ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ceabdb85df8b..720dfb3a1ac2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -788,10 +788,11 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) * the rdma cm id */ return 1; - case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_REJECTED: isert_info("Connection rejected: %s\n", rdma_reject_msg(cma_id, event->status)); - case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ + /* fall through */ + case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_CONNECT_ERROR: ret = isert_connect_error(cma_id); break; @@ -1569,9 +1570,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) transport_generic_free_cmd(&cmd->se_cmd, 0); break; } - /* - * Fall-through - */ + /* fall through */ default: iscsit_release_cmd(cmd); break; @@ -1749,8 +1748,9 @@ isert_do_control_comp(struct work_struct *work) switch (cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: iscsit_tmr_post_handler(cmd, cmd->conn); - case ISTATE_SEND_REJECT: /* FALLTHRU */ - case ISTATE_SEND_TEXTRSP: /* FALLTHRU */ + /* fall through */ + case ISTATE_SEND_REJECT: + case ISTATE_SEND_TEXTRSP: cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c index afa938bd26d6..4be3aef40bd2 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -139,6 +139,7 @@ void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) rcu_assign_pointer(adapter->mactbl, NULL); synchronize_rcu(); opa_vnic_free_mac_tbl(mactbl); + adapter->info.vport.mac_tbl_digest = 0; mutex_unlock(&adapter->mactbl_lock); } @@ -405,6 +406,42 @@ u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb) return vl; } +/* opa_vnic_get_rc - return the routing control */ +static u8 opa_vnic_get_rc(struct __opa_veswport_info *info, + struct sk_buff *skb) +{ + u8 proto, rout_ctrl; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IPV6): + proto = ipv6_hdr(skb)->nexthdr; + if (proto == IPPROTO_TCP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV6_TCP); + else if (proto == IPPROTO_UDP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV6_UDP); + else + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV6); + break; + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + if (proto == IPPROTO_TCP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV4_TCP); + else if (proto == IPPROTO_UDP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV4_UDP); + else + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV4); + break; + default: + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, DEFAULT); + } + + return rout_ctrl; +} + /* opa_vnic_calc_entropy - calculate the packet entropy */ u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb) { @@ -447,7 +484,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) { struct __opa_veswport_info *info = &adapter->info; struct opa_vnic_skb_mdata *mdata; - u8 def_port, sc, entropy, *hdr; + u8 def_port, sc, rc, entropy, *hdr; u16 len, l4_hdr; u32 dlid; @@ -458,6 +495,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) len = opa_vnic_wire_length(skb); dlid = opa_vnic_get_dlid(adapter, skb, def_port); sc = opa_vnic_get_sc(info, skb); + rc = opa_vnic_get_rc(info, skb); l4_hdr = info->vesw.vesw_id; mdata = skb_push(skb, sizeof(*mdata)); @@ -470,6 +508,6 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) } opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len, - info->vesw.pkey, entropy, sc, 0, + info->vesw.pkey, entropy, sc, rc, OPA_VNIC_L4_ETHR, l4_hdr); } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index 4c434b9dd84c..e4c9bf2ef7e2 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -103,6 +103,17 @@ #define OPA_VNIC_ETH_LINK_UP 1 #define OPA_VNIC_ETH_LINK_DOWN 2 +/* routing control */ +#define OPA_VNIC_ENCAP_RC_DEFAULT 0 +#define OPA_VNIC_ENCAP_RC_IPV4 4 +#define OPA_VNIC_ENCAP_RC_IPV4_UDP 8 +#define OPA_VNIC_ENCAP_RC_IPV4_TCP 12 +#define OPA_VNIC_ENCAP_RC_IPV6 16 +#define OPA_VNIC_ENCAP_RC_IPV6_TCP 20 +#define OPA_VNIC_ENCAP_RC_IPV6_UDP 24 + +#define OPA_VNIC_ENCAP_RC_EXT(w, b) (((w) >> OPA_VNIC_ENCAP_RC_ ## b) & 0x7) + /** * struct opa_vesw_info - OPA vnic switch information * @fabric_id: 10-bit fabric id @@ -111,8 +122,8 @@ * @pkey: partition key * @u_mcast_dlid: unknown multicast dlid * @u_ucast_dlid: array of unknown unicast dlids - * @eth_mtu: MTUs for each vlan PCP - * @eth_mtu_non_vlan: MTU for non vlan packets + * @rc: routing control + * @eth_mtu: Ethernet MTU */ struct opa_vesw_info { __be16 fabric_id; @@ -128,9 +139,10 @@ struct opa_vesw_info { __be32 u_mcast_dlid; __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; - u8 rsvd3[44]; - __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP]; - __be16 eth_mtu_non_vlan; + __be32 rc; + + u8 rsvd3[56]; + __be16 eth_mtu; u8 rsvd4[2]; } __packed; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index ca29e6d5aedc..afd95f432262 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -89,9 +89,10 @@ struct __opa_vesw_info { u32 u_mcast_dlid; u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; - u8 rsvd3[44]; - u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP]; - u16 eth_mtu_non_vlan; + u32 rc; + + u8 rsvd3[56]; + u16 eth_mtu; u8 rsvd4[2]; } __packed; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 1a3c25364b64..ce57e0f10289 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -112,6 +112,27 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, return rc; } +static void opa_vnic_update_state(struct opa_vnic_adapter *adapter, bool up) +{ + struct __opa_veswport_info *info = &adapter->info; + + mutex_lock(&adapter->lock); + /* Operational state can only be DROP_ALL or FORWARDING */ + if ((info->vport.config_state == OPA_VNIC_STATE_FORWARDING) && up) { + info->vport.oper_state = OPA_VNIC_STATE_FORWARDING; + info->vport.eth_link_status = OPA_VNIC_ETH_LINK_UP; + } else { + info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; + info->vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + } + + if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) + netif_dormant_off(adapter->netdev); + else + netif_dormant_on(adapter->netdev); + mutex_unlock(&adapter->lock); +} + /* opa_vnic_process_vema_config - process vema configuration updates */ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) { @@ -130,7 +151,7 @@ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) memcpy(saddr.sa_data, info->vport.base_mac_addr, ARRAY_SIZE(info->vport.base_mac_addr)); mutex_lock(&adapter->lock); - eth_mac_addr(netdev, &saddr); + eth_commit_mac_addr_change(netdev, &saddr); memcpy(adapter->vema_mac_addr, info->vport.base_mac_addr, ETH_ALEN); mutex_unlock(&adapter->lock); @@ -140,7 +161,7 @@ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) /* Handle MTU limit change */ rtnl_lock(); - netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan, + netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu, netdev->min_mtu); if (netdev->mtu > netdev->max_mtu) dev_set_mtu(netdev, netdev->max_mtu); @@ -164,14 +185,8 @@ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) adapter->flow_tbl[i] = port_count ? port_num[i % port_count] : OPA_VNIC_INVALID_PORT; - /* Operational state can only be DROP_ALL or FORWARDING */ - if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) { - info->vport.oper_state = OPA_VNIC_STATE_FORWARDING; - netif_dormant_off(netdev); - } else { - info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; - netif_dormant_on(netdev); - } + /* update state */ + opa_vnic_update_state(adapter, !!(netdev->flags & IFF_UP)); } /* @@ -183,6 +198,7 @@ static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter) adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT; adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL; adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + adapter->info.vesw.eth_mtu = ETH_DATA_LEN; } /* opa_vnic_set_mac_addr - change mac address */ @@ -268,8 +284,8 @@ static int opa_netdev_open(struct net_device *netdev) return rc; } - /* Update eth link status and send trap */ - adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP; + /* Update status and send trap */ + opa_vnic_update_state(adapter, true); opa_vnic_vema_report_event(adapter, OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); return 0; @@ -287,8 +303,8 @@ static int opa_netdev_close(struct net_device *netdev) return rc; } - /* Update eth link status and send trap */ - adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + /* Update status and send trap */ + opa_vnic_update_state(adapter, false); opa_vnic_vema_report_event(adapter, OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); return 0; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 21f0b481edcc..4b615c1451e7 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -186,6 +186,7 @@ static inline void vema_get_pod_values(struct opa_veswport_info *port_info) cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT); port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL; + port_info->vesw.eth_mtu = cpu_to_be16(ETH_DATA_LEN); } /** diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index c2733964379c..868b5aec1537 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -176,11 +176,10 @@ void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]); - memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); - for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++) - info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]); + info->rc = cpu_to_be32(src->rc); - info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan); + memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); + info->eth_mtu = cpu_to_be16(src->eth_mtu); memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4)); } @@ -211,11 +210,10 @@ void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]); - memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); - for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++) - dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]); + dst->rc = be32_to_cpu(info->rc); - dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan); + memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); + dst->eth_mtu = be16_to_cpu(info->eth_mtu); memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4)); } @@ -348,7 +346,7 @@ void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, struct opa_veswport_iface_macs *macs) { - u16 start_idx, tot_macs, num_macs, idx = 0, count = 0; + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0; struct netdev_hw_addr *ha; start_idx = be16_to_cpu(macs->start_idx); @@ -359,8 +357,10 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, /* Do not include EM specified MAC address */ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr, - ARRAY_SIZE(adapter->info.vport.base_mac_addr))) + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) { + em_macs++; continue; + } if (start_idx > idx++) continue; @@ -383,7 +383,7 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, } tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) + - netdev_uc_count(adapter->netdev); + netdev_uc_count(adapter->netdev) - em_macs; macs->tot_macs_in_lst = cpu_to_be16(tot_macs); macs->num_macs_in_msg = cpu_to_be16(count); macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index fa5ccdb3bb2a..972d4b3c5223 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -464,20 +464,20 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) /** * srp_destroy_qp() - destroy an RDMA queue pair - * @qp: RDMA queue pair. + * @ch: SRP RDMA channel. * * Drain the qp before destroying it. This avoids that the receive * completion handler can access the queue pair while it is * being destroyed. */ -static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp) +static void srp_destroy_qp(struct srp_rdma_ch *ch) { spin_lock_irq(&ch->lock); ib_process_cq_direct(ch->send_cq, -1); spin_unlock_irq(&ch->lock); - ib_drain_qp(qp); - ib_destroy_qp(qp); + ib_drain_qp(ch->qp); + ib_destroy_qp(ch->qp); } static int srp_create_ch_ib(struct srp_rdma_ch *ch) @@ -550,7 +550,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) } if (ch->qp) - srp_destroy_qp(ch, ch->qp); + srp_destroy_qp(ch); if (ch->recv_cq) ib_free_cq(ch->recv_cq); if (ch->send_cq) @@ -617,7 +617,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, ib_destroy_fmr_pool(ch->fmr_pool); } - srp_destroy_qp(ch, ch->qp); + srp_destroy_qp(ch); ib_free_cq(ch->send_cq); ib_free_cq(ch->recv_cq); @@ -665,12 +665,19 @@ static void srp_path_rec_completion(int status, static int srp_lookup_path(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; - int ret; + int ret = -ENODEV; ch->path.numb_path = 1; init_completion(&ch->done); + /* + * Avoid that the SCSI host can be removed by srp_remove_target() + * before srp_path_rec_completion() is called. + */ + if (!scsi_host_get(target->scsi_host)) + goto out; + ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, target->srp_host->srp_dev->dev, target->srp_host->port, @@ -684,18 +691,41 @@ static int srp_lookup_path(struct srp_rdma_ch *ch) GFP_KERNEL, srp_path_rec_completion, ch, &ch->path_query); - if (ch->path_query_id < 0) - return ch->path_query_id; + ret = ch->path_query_id; + if (ret < 0) + goto put; ret = wait_for_completion_interruptible(&ch->done); if (ret < 0) - return ret; + goto put; - if (ch->status < 0) + ret = ch->status; + if (ret < 0) shost_printk(KERN_WARNING, target->scsi_host, PFX "Path record query failed\n"); - return ch->status; +put: + scsi_host_put(target->scsi_host); + +out: + return ret; +} + +static u8 srp_get_subnet_timeout(struct srp_host *host) +{ + struct ib_port_attr attr; + int ret; + u8 subnet_timeout = 18; + + ret = ib_query_port(host->srp_dev->dev, host->port, &attr); + if (ret == 0) + subnet_timeout = attr.subnet_timeout; + + if (unlikely(subnet_timeout < 15)) + pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", + dev_name(&host->srp_dev->dev->dev), subnet_timeout); + + return subnet_timeout; } static int srp_send_req(struct srp_rdma_ch *ch, bool multich) @@ -706,6 +736,9 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) struct srp_login_req priv; } *req = NULL; int status; + u8 subnet_timeout; + + subnet_timeout = srp_get_subnet_timeout(target->srp_host); req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) @@ -728,8 +761,8 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) * module parameters if anyone cared about setting them. */ req->param.responder_resources = 4; - req->param.remote_cm_response_timeout = 20; - req->param.local_cm_response_timeout = 20; + req->param.remote_cm_response_timeout = subnet_timeout + 2; + req->param.local_cm_response_timeout = subnet_timeout + 2; req->param.retry_count = target->tl_retry_count; req->param.rnr_retry_count = 7; req->param.max_cm_retries = 15; @@ -1279,7 +1312,6 @@ static int srp_map_finish_fmr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_pd *pd = target->pd; struct ib_pool_fmr *fmr; u64 io_addr = 0; @@ -1295,9 +1327,9 @@ static int srp_map_finish_fmr(struct srp_map_state *state, if (state->npages == 0) return 0; - if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (state->npages == 1 && target->global_rkey) { srp_map_desc(state, state->base_dma_addr, state->dma_len, - pd->unsafe_global_rkey); + target->global_rkey); goto reset_state; } @@ -1337,7 +1369,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_pd *pd = target->pd; struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; @@ -1353,12 +1384,12 @@ static int srp_map_finish_fr(struct srp_map_state *state, WARN_ON_ONCE(!dev->use_fast_reg); - if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (sg_nents == 1 && target->global_rkey) { unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, sg_dma_len(state->sg) - sg_offset, - pd->unsafe_global_rkey); + target->global_rkey); if (sg_offset_p) *sg_offset_p = 0; return 1; @@ -1520,7 +1551,7 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), - target->pd->unsafe_global_rkey); + target->global_rkey); } return 0; @@ -1618,7 +1649,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; - struct ib_pd *pd = target->pd; struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; int len, nents, count, ret; @@ -1654,7 +1684,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (count == 1 && target->global_rkey) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1664,7 +1694,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf = (void *) cmd->add_data; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); - buf->key = cpu_to_be32(pd->unsafe_global_rkey); + buf->key = cpu_to_be32(target->global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); req->nmdesc = 0; @@ -1735,14 +1765,14 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, memcpy(indirect_hdr->desc_list, req->indirect_desc, count * sizeof (struct srp_direct_buf)); - if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (!target->global_rkey) { ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, idb_len, &idb_rkey); if (ret < 0) goto unmap; req->nmdesc++; } else { - idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); + idb_rkey = cpu_to_be32(target->global_rkey); } indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); @@ -2403,7 +2433,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, switch (event->param.rej_rcvd.reason) { case IB_CM_REJ_PORT_CM_REDIRECT: cpi = event->param.rej_rcvd.ari; - sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid))); + sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid)); ch->path.pkey = cpi->redirect_pkey; cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); @@ -3318,8 +3348,8 @@ static ssize_t srp_create_target(struct device *dev, target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; - target->pd = host->srp_dev->pd; target->lkey = host->srp_dev->pd->local_dma_lkey; + target->global_rkey = host->srp_dev->global_rkey; target->cmd_sg_cnt = cmd_sg_entries; target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; target->allow_ext_sg = allow_ext_sg; @@ -3638,6 +3668,10 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->pd)) goto free_dev; + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; + WARN_ON_ONCE(srp_dev->global_rkey == 0); + } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ab9077b81d5a..a814f5ef16f9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -90,6 +90,7 @@ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; + u32 global_rkey; u64 mr_page_mask; int mr_page_size; int mr_max_size; @@ -179,7 +180,7 @@ struct srp_target_port { spinlock_t lock; /* read only in the hot path */ - struct ib_pd *pd; + u32 global_rkey; struct srp_rdma_ch *ch; u32 ch_count; u32 lkey; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9612e5bdfb00..8a1bd354b1cc 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -295,6 +295,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, { struct srpt_device *sdev = sport->sdev; struct ib_dm_ioc_profile *iocp; + int send_queue_depth; iocp = (struct ib_dm_ioc_profile *)mad->data; @@ -310,6 +311,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, return; } + if (sdev->use_srq) + send_queue_depth = sdev->srq_size; + else + send_queue_depth = min(SRPT_RQ_SIZE, + sdev->device->attrs.max_qp_wr); + memset(iocp, 0, sizeof(*iocp)); strcpy(iocp->id_string, SRPT_ID_STRING); iocp->guid = cpu_to_be64(srpt_service_guid); @@ -322,7 +329,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); iocp->protocol = cpu_to_be16(SRP_PROTOCOL); iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); - iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); + iocp->send_queue_depth = cpu_to_be16(send_queue_depth); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, @@ -686,6 +693,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, { int i; + if (!ioctx_ring) + return; + for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); kfree(ioctx_ring); @@ -757,7 +767,7 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, /** * srpt_post_recv() - Post an IB receive request. */ -static int srpt_post_recv(struct srpt_device *sdev, +static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *ioctx) { struct ib_sge list; @@ -766,7 +776,7 @@ static int srpt_post_recv(struct srpt_device *sdev, BUG_ON(!sdev); list.addr = ioctx->ioctx.dma; list.length = srp_max_req_size; - list.lkey = sdev->pd->local_dma_lkey; + list.lkey = sdev->lkey; ioctx->ioctx.cqe.done = srpt_recv_done; wr.wr_cqe = &ioctx->ioctx.cqe; @@ -774,7 +784,10 @@ static int srpt_post_recv(struct srpt_device *sdev, wr.sg_list = &list; wr.num_sge = 1; - return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + if (sdev->use_srq) + return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + else + return ib_post_recv(ch->qp, &wr, &bad_wr); } /** @@ -1517,7 +1530,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, break; } - srpt_post_recv(ch->sport->sdev, recv_ioctx); + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); return; out_wait: @@ -1616,7 +1629,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_device *sdev = sport->sdev; const struct ib_device_attr *attrs = &sdev->device->attrs; u32 srp_sq_size = sport->port_attrib.srp_sq_size; - int ret; + int i, ret; WARN_ON(ch->rq_size < 1); @@ -1640,7 +1653,6 @@ retry: = (void(*)(struct ib_event *, void*))srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; - qp_init->srq = sdev->srq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; qp_init->qp_type = IB_QPT_RC; /* @@ -1650,10 +1662,16 @@ retry: * both both, as RDMA contexts will also post completions for the * RDMA READ case. */ - qp_init->cap.max_send_wr = srp_sq_size / 2; + qp_init->cap.max_send_wr = min(srp_sq_size / 2, attrs->max_qp_wr + 0U); qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; + if (sdev->use_srq) { + qp_init->srq = sdev->srq; + } else { + qp_init->cap.max_recv_wr = ch->rq_size; + qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + } ch->qp = ib_create_qp(sdev->pd, qp_init); if (IS_ERR(ch->qp)) { @@ -1679,6 +1697,10 @@ retry: if (ret) goto err_destroy_qp; + if (!sdev->use_srq) + for (i = 0; i < ch->rq_size; i++) + srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); + out: kfree(qp_init); return ret; @@ -1765,19 +1787,65 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) return ret; } -static void __srpt_close_all_ch(struct srpt_device *sdev) +/* + * Send DREQ and wait for DREP. Return true if and only if this function + * changed the state of @ch. + */ +static bool srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) + __must_hold(&sdev->mutex) { + DECLARE_COMPLETION_ONSTACK(release_done); + struct srpt_device *sdev = ch->sport->sdev; + bool wait; + + lockdep_assert_held(&sdev->mutex); + + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); + + WARN_ON(ch->release_done); + ch->release_done = &release_done; + wait = !list_empty(&ch->list); + srpt_disconnect_ch(ch); + mutex_unlock(&sdev->mutex); + + if (!wait) + goto out; + + while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, + ch->sess_name, ch->qp->qp_num, ch->state); + +out: + mutex_lock(&sdev->mutex); + return wait; +} + +static void srpt_set_enabled(struct srpt_port *sport, bool enabled) + __must_hold(&sdev->mutex) +{ + struct srpt_device *sdev = sport->sdev; struct srpt_rdma_ch *ch; lockdep_assert_held(&sdev->mutex); + if (sport->enabled == enabled) + return; + sport->enabled = enabled; + if (sport->enabled) + return; + +again: list_for_each_entry(ch, &sdev->rch_list, list) { - if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s has been disabled\n", - ch->sess_name, ch->qp->qp_num, - sdev->device->name); - srpt_close_ch(ch); + if (ch->sport == sport) { + pr_info("%s: closing channel %s-%d\n", + sdev->device->name, ch->sess_name, + ch->qp->qp_num); + if (srpt_disconnect_ch_sync(ch)) + goto again; + } } + } static void srpt_free_ch(struct kref *kref) @@ -1818,6 +1886,10 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_size, DMA_TO_DEVICE); + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + sdev, ch->rq_size, + srp_max_req_size, DMA_FROM_DEVICE); + mutex_lock(&sdev->mutex); list_del_init(&ch->list); if (ch->release_done) @@ -1953,10 +2025,11 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->cm_id = cm_id; cm_id->context = ch; /* - * Avoid QUEUE_FULL conditions by limiting the number of buffers used - * for the SRP protocol to the command queue size. + * ch->rq_size should be at least as large as the initiator queue + * depth to avoid that the initiator driver has to report QUEUE_FULL + * to the SCSI mid-layer. */ - ch->rq_size = SRPT_RQ_SIZE; + ch->rq_size = min(SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr); spin_lock_init(&ch->spinlock); ch->state = CH_CONNECTING; INIT_LIST_HEAD(&ch->cmd_wait_list); @@ -1974,6 +2047,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->ioctx_ring[i]->ch = ch; list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); } + if (!sdev->use_srq) { + ch->ioctx_recv_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_recv_ring[0]), + srp_max_req_size, + DMA_FROM_DEVICE); + if (!ch->ioctx_recv_ring) { + pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n"); + rej->reason = + cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + goto free_ring; + } + } ret = srpt_create_ch_ib(ch); if (ret) { @@ -1981,7 +2067,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); - goto free_ring; + goto free_recv_ring; } ret = srpt_ch_qp_rtr(ch, ch->qp); @@ -2072,6 +2158,11 @@ release_channel: destroy_ib: srpt_destroy_ch_ib(ch); +free_recv_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + ch->sport->sdev, ch->rq_size, + srp_max_req_size, DMA_FROM_DEVICE); + free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, @@ -2342,7 +2433,7 @@ static void srpt_queue_response(struct se_cmd *cmd) sge.addr = ioctx->ioctx.dma; sge.length = resp_len; - sge.lkey = sdev->pd->local_dma_lkey; + sge.lkey = sdev->lkey; ioctx->ioctx.cqe.done = srpt_send_done; send_wr.next = NULL; @@ -2417,8 +2508,7 @@ static int srpt_release_sdev(struct srpt_device *sdev) mutex_lock(&sdev->mutex); for (i = 0; i < ARRAY_SIZE(sdev->port); i++) - sdev->port[i].enabled = false; - __srpt_close_all_ch(sdev); + srpt_set_enabled(&sdev->port[i], false); mutex_unlock(&sdev->mutex); res = wait_event_interruptible(sdev->ch_releaseQ, @@ -2465,6 +2555,74 @@ static struct se_wwn *srpt_lookup_wwn(const char *name) return wwn; } +static void srpt_free_srq(struct srpt_device *sdev) +{ + if (!sdev->srq) + return; + + ib_destroy_srq(sdev->srq); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + sdev->srq = NULL; +} + +static int srpt_alloc_srq(struct srpt_device *sdev) +{ + struct ib_srq_init_attr srq_attr = { + .event_handler = srpt_srq_event, + .srq_context = (void *)sdev, + .attr.max_wr = sdev->srq_size, + .attr.max_sge = 1, + .srq_type = IB_SRQT_BASIC, + }; + struct ib_device *device = sdev->device; + struct ib_srq *srq; + int i; + + WARN_ON_ONCE(sdev->srq); + srq = ib_create_srq(sdev->pd, &srq_attr); + if (IS_ERR(srq)) { + pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(srq)); + return PTR_ERR(srq); + } + + pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, + sdev->device->attrs.max_srq_wr, device->name); + + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) { + ib_destroy_srq(srq); + return -ENOMEM; + } + + sdev->use_srq = true; + sdev->srq = srq; + + for (i = 0; i < sdev->srq_size; ++i) + srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); + + return 0; +} + +static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) +{ + struct ib_device *device = sdev->device; + int ret = 0; + + if (!use_srq) { + srpt_free_srq(sdev); + sdev->use_srq = false; + } else if (use_srq && !sdev->srq) { + ret = srpt_alloc_srq(sdev); + } + pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, device->name, + sdev->use_srq, ret); + return ret; +} + /** * srpt_add_one() - Infiniband device addition callback function. */ @@ -2472,7 +2630,6 @@ static void srpt_add_one(struct ib_device *device) { struct srpt_device *sdev; struct srpt_port *sport; - struct ib_srq_init_attr srq_attr; int i; pr_debug("device = %p\n", device); @@ -2490,29 +2647,18 @@ static void srpt_add_one(struct ib_device *device) if (IS_ERR(sdev->pd)) goto free_dev; - sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr); - - srq_attr.event_handler = srpt_srq_event; - srq_attr.srq_context = (void *)sdev; - srq_attr.attr.max_wr = sdev->srq_size; - srq_attr.attr.max_sge = 1; - srq_attr.attr.srq_limit = 0; - srq_attr.srq_type = IB_SRQT_BASIC; + sdev->lkey = sdev->pd->local_dma_lkey; - sdev->srq = ib_create_srq(sdev->pd, &srq_attr); - if (IS_ERR(sdev->srq)) - goto err_pd; + sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr); - pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", - __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr, - device->name); + srpt_use_srq(sdev, sdev->port[0].port_attrib.use_srq); if (!srpt_service_guid) srpt_service_guid = be64_to_cpu(device->node_guid); sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); if (IS_ERR(sdev->cm_id)) - goto err_srq; + goto err_ring; /* print out target login information */ pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," @@ -2532,16 +2678,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - sdev->ioctx_ring = (struct srpt_recv_ioctx **) - srpt_alloc_ioctx_ring(sdev, sdev->srq_size, - sizeof(*sdev->ioctx_ring[0]), - srp_max_req_size, DMA_FROM_DEVICE); - if (!sdev->ioctx_ring) - goto err_event; - - for (i = 0; i < sdev->srq_size; ++i) - srpt_post_recv(sdev, sdev->ioctx_ring[i]); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { @@ -2551,12 +2687,13 @@ static void srpt_add_one(struct ib_device *device) sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; + sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", sdev->device->name, i); - goto err_ring; + goto err_event; } } @@ -2569,17 +2706,12 @@ out: pr_debug("added %s.\n", device->name); return; -err_ring: - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, - DMA_FROM_DEVICE); err_event: ib_unregister_event_handler(&sdev->event_handler); err_cm: ib_destroy_cm_id(sdev->cm_id); -err_srq: - ib_destroy_srq(sdev->srq); -err_pd: +err_ring: + srpt_free_srq(sdev); ib_dealloc_pd(sdev->pd); free_dev: kfree(sdev); @@ -2622,12 +2754,10 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) spin_unlock(&srpt_dev_lock); srpt_release_sdev(sdev); - ib_destroy_srq(sdev->srq); + srpt_free_srq(sdev); + ib_dealloc_pd(sdev->pd); - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); - sdev->ioctx_ring = NULL; kfree(sdev); } @@ -2706,27 +2836,12 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) */ static void srpt_close_session(struct se_session *se_sess) { - DECLARE_COMPLETION_ONSTACK(release_done); struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; struct srpt_device *sdev = ch->sport->sdev; - bool wait; - - pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, - ch->state); mutex_lock(&sdev->mutex); - BUG_ON(ch->release_done); - ch->release_done = &release_done; - wait = !list_empty(&ch->list); - srpt_disconnect_ch(ch); + srpt_disconnect_ch_sync(ch); mutex_unlock(&sdev->mutex); - - if (!wait) - return; - - while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) - pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, - ch->sess_name, ch->qp->qp_num, ch->state); } /** @@ -2777,7 +2892,7 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) { const char *p; unsigned len, count, leading_zero_bytes; - int ret, rc; + int ret; p = name; if (strncasecmp(p, "0x", 2) == 0) @@ -2789,10 +2904,9 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) count = min(len / 2, 16U); leading_zero_bytes = 16 - count; memset(i_port_id, 0, leading_zero_bytes); - rc = hex2bin(i_port_id + leading_zero_bytes, p, count); - if (rc < 0) - pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc); - ret = 0; + ret = hex2bin(i_port_id + leading_zero_bytes, p, count); + if (ret < 0) + pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", ret); out: return ret; } @@ -2926,14 +3040,55 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, return count; } +static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sprintf(page, "%d\n", sport->port_attrib.use_srq); +} + +static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + struct srpt_device *sdev = sport->sdev; + unsigned long val; + bool enabled; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) + return ret; + if (val != !!val) + return -EINVAL; + + ret = mutex_lock_interruptible(&sdev->mutex); + if (ret < 0) + return ret; + enabled = sport->enabled; + /* Log out all initiator systems before changing 'use_srq'. */ + srpt_set_enabled(sport, false); + sport->port_attrib.use_srq = val; + srpt_use_srq(sdev, sport->port_attrib.use_srq); + srpt_set_enabled(sport, enabled); + mutex_unlock(&sdev->mutex); + + return count; +} + CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, use_srq); static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { &srpt_tpg_attrib_attr_srp_max_rdma_size, &srpt_tpg_attrib_attr_srp_max_rsp_size, &srpt_tpg_attrib_attr_srp_sq_size, + &srpt_tpg_attrib_attr_use_srq, NULL, }; @@ -2951,7 +3106,6 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); struct srpt_device *sdev = sport->sdev; - struct srpt_rdma_ch *ch; unsigned long tmp; int ret; @@ -2965,24 +3119,11 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); return -EINVAL; } - if (sport->enabled == tmp) - goto out; - sport->enabled = tmp; - if (sport->enabled) - goto out; mutex_lock(&sdev->mutex); - list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->sport == sport) { - pr_debug("%s: ch %p %s-%d\n", __func__, ch, - ch->sess_name, ch->qp->qp_num); - srpt_disconnect_ch(ch); - srpt_close_ch(ch); - } - } + srpt_set_enabled(sport, tmp); mutex_unlock(&sdev->mutex); -out: return count; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 1b817e51b84b..673387d365a3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -252,6 +252,7 @@ enum rdma_ch_state { * @free_list: Head of list with free send I/O contexts. * @state: channel state. See also enum rdma_ch_state. * @ioctx_ring: Send ring. + * @ioctx_recv_ring: Receive I/O context ring. * @list: Node for insertion in the srpt_device.rch_list list. * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This * list contains struct srpt_ioctx elements and is protected @@ -281,6 +282,7 @@ struct srpt_rdma_ch { struct list_head free_list; enum rdma_ch_state state; struct srpt_send_ioctx **ioctx_ring; + struct srpt_recv_ioctx **ioctx_recv_ring; struct list_head list; struct list_head cmd_wait_list; struct se_session *sess; @@ -295,11 +297,13 @@ struct srpt_rdma_ch { * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. + * @use_srq: Whether or not to use SRQ. */ struct srpt_port_attrib { u32 srp_max_rdma_size; u32 srp_max_rsp_size; u32 srp_sq_size; + bool use_srq; }; /** @@ -343,10 +347,11 @@ struct srpt_port { * struct srpt_device - Information associated by SRPT with a single HCA. * @device: Backpointer to the struct ib_device managed by the IB core. * @pd: IB protection domain. - * @mr: L_Key (local key) with write access to all local memory. + * @lkey: L_Key (local key) with write access to all local memory. * @srq: Per-HCA SRQ (shared receive queue). * @cm_id: Connection identifier. * @srq_size: SRQ size. + * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @ch_releaseQ: Enables waiting for removal from rch_list. @@ -358,9 +363,11 @@ struct srpt_port { struct srpt_device { struct ib_device *device; struct ib_pd *pd; + u32 lkey; struct ib_srq *srq; struct ib_cm_id *cm_id; int srq_size; + bool use_srq; struct srpt_recv_ioctx **ioctx_ring; struct list_head rch_list; wait_queue_head_t ch_releaseQ; |