summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c7
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c7
-rw-r--r--drivers/infiniband/core/uverbs_main.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h7
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c303
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h19
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c198
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c115
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c664
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h52
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c189
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h34
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c26
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h5
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c2
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h16
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h64
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c53
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c178
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h13
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c4
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c108
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c23
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h47
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_mmu.h48
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c137
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h1
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c51
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c49
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c43
-rw-r--r--drivers/infiniband/hw/irdma/uk.c10
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c53
-rw-r--r--drivers/infiniband/hw/mana/qp.c5
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c89
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c276
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h16
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c103
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h15
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c3
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h12
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c93
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c28
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c21
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c22
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c51
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c42
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c110
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c72
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h21
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c16
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c56
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c4
72 files changed, 2607 insertions, 1124 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93a1c48d0c32..1ee87c3aaeab 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
- route->path_rec->rate = iboe_get_rate(ndev);
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
/* In case ACK timeout is set, use this value to calculate
@@ -4805,8 +4805,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
event->param.ud.qkey = id_priv->qkey;
out:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
}
static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
@@ -4964,7 +4963,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!ndev)
return -ENODEV;
- ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4796f6a8828c..e836c9c477f6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mtu = cmd->base.path_mtu;
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
attr->path_mig_state = cmd->base.path_mig_state;
- if (cmd->base.attr_mask & IB_QP_QKEY)
+ if (cmd->base.attr_mask & IB_QP_QKEY) {
+ if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ goto release_qp;
+ }
attr->qkey = cmd->base.qkey;
+ }
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
attr->rq_psn = cmd->base.rq_psn;
if (cmd->base.attr_mask & IB_QP_SQ_PSN)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fbace69672ca..7c9c79c13941 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
while (list_empty(&ev_queue->event_list)) {
- spin_unlock_irq(&ev_queue->lock);
+ if (ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
@@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
return -ERESTARTSYS;
spin_lock_irq(&ev_queue->lock);
-
- /* If device was disassociated and no event exists set an error */
- if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
- spin_unlock_irq(&ev_queue->lock);
- return -EIO;
- }
}
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 5a2baf49ecaa..ea81b2497511 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -39,6 +39,7 @@
#ifndef __BNXT_RE_H__
#define __BNXT_RE_H__
+#include <rdma/uverbs_ioctl.h>
#include "hw_counters.h"
#define ROCE_DRV_MODULE_NAME "bnxt_re"
@@ -135,8 +136,6 @@ struct bnxt_re_dev {
struct delayed_work worker;
u8 cur_prio_map;
- u16 active_speed;
- u8 active_width;
/* FP Notification Queue (CQ & SRQ) */
struct tasklet_struct nq_task;
@@ -181,10 +180,14 @@ struct bnxt_re_dev {
#define BNXT_RE_ROCEV2_IPV4_PACKET 2
#define BNXT_RE_ROCEV2_IPV6_PACKET 3
+#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
+
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
if (rdev)
return &rdev->ibdev.dev;
return NULL;
}
+
+extern const struct uapi_definition bnxt_re_uapi_defs[];
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index e86afecfbe46..abef0b8baa7c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -61,6 +61,15 @@
#include "bnxt_re.h"
#include "ib_verbs.h"
+
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+
+#include <rdma/ib_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME bnxt_re
+#include <rdma/uverbs_named_ioctl.h>
+
#include <rdma/bnxt_re-abi.h>
static int __from_ib_access_flags(int iflags)
@@ -199,6 +208,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ int rc;
memset(port_attr, 0, sizeof(*port_attr));
@@ -228,10 +238,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
- port_attr->active_speed = rdev->active_speed;
- port_attr->active_width = rdev->active_width;
+ rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
+ &port_attr->active_width);
- return 0;
+ return rc;
}
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
@@ -533,12 +543,57 @@ fail:
return rc;
}
+static struct bnxt_re_user_mmap_entry*
+bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset,
+ enum bnxt_re_mmap_flag mmap_flag, u64 *offset)
+{
+ struct bnxt_re_user_mmap_entry *entry;
+ int ret;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->mem_offset = mem_offset;
+ entry->mmap_flag = mmap_flag;
+ entry->uctx = uctx;
+
+ switch (mmap_flag) {
+ case BNXT_RE_MMAP_SH_PAGE:
+ ret = rdma_user_mmap_entry_insert_exact(&uctx->ib_uctx,
+ &entry->rdma_entry, PAGE_SIZE, 0);
+ break;
+ case BNXT_RE_MMAP_UC_DB:
+ case BNXT_RE_MMAP_WC_DB:
+ ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx,
+ &entry->rdma_entry, PAGE_SIZE);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+ if (offset)
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return entry;
+}
+
/* Protection Domains */
int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
+ if (udata) {
+ rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+ pd->pd_db_mmap = NULL;
+ }
+
bnxt_re_destroy_fence_mr(pd);
if (pd->qplib_pd.id) {
@@ -557,7 +612,8 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
- int rc;
+ struct bnxt_re_user_mmap_entry *entry = NULL;
+ int rc = 0;
pd->rdev = rdev;
if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
@@ -567,15 +623,15 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
}
if (udata) {
- struct bnxt_re_pd_resp resp;
+ struct bnxt_re_pd_resp resp = {};
if (!ucntx->dpi.dbr) {
/* Allocate DPI in alloc_pd to avoid failing of
* ibv_devinfo and family of application when DPIs
* are depleted.
*/
- if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
- &ucntx->dpi, ucntx)) {
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res,
+ &ucntx->dpi, ucntx, BNXT_QPLIB_DPI_TYPE_UC)) {
rc = -ENOMEM;
goto dbfail;
}
@@ -584,12 +640,21 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
resp.pdid = pd->qplib_pd.id;
/* Still allow mapping this DBR to the new user PD. */
resp.dpi = ucntx->dpi.dpi;
- resp.dbr = (u64)ucntx->dpi.umdbr;
- rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ entry = bnxt_re_mmap_entry_insert(ucntx, (u64)ucntx->dpi.umdbr,
+ BNXT_RE_MMAP_UC_DB, &resp.dbr);
+
+ if (!entry) {
+ rc = -ENOMEM;
+ goto dbfail;
+ }
+
+ pd->pd_db_mmap = &entry->rdma_entry;
+
+ rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
if (rc) {
- ibdev_err(&rdev->ibdev,
- "Failed to copy user response\n");
+ rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+ rc = -EFAULT;
goto dbfail;
}
}
@@ -613,12 +678,20 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
+ bool block = true;
+ int rc = 0;
- bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
- !(flags & RDMA_DESTROY_AH_SLEEPABLE));
+ block = !(flags & RDMA_DESTROY_AH_SLEEPABLE);
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block);
+ if (BNXT_RE_CHECK_RC(rc)) {
+ if (rc == -ETIMEDOUT)
+ rc = 0;
+ else
+ goto fail;
+ }
atomic_dec(&rdev->ah_count);
-
- return 0;
+fail:
+ return rc;
}
static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
@@ -3341,9 +3414,7 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
/* post data received in the send queue */
- rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
-
- return 0;
+ return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
}
static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
@@ -3956,6 +4027,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_re_user_mmap_entry *entry;
struct bnxt_re_uctx_resp resp = {};
u32 chip_met_rev_num = 0;
int rc;
@@ -3994,6 +4066,16 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE;
resp.mode = rdev->chip_ctx->modes.wqe_mode;
+ if (rdev->chip_ctx->modes.db_push)
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED;
+
+ entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto cfail;
+ }
+ uctx->shpage_mmap = &entry->rdma_entry;
+
rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (rc) {
ibdev_err(ibdev, "Failed to copy user context");
@@ -4017,6 +4099,8 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
struct bnxt_re_dev *rdev = uctx->rdev;
+ rdma_user_mmap_entry_remove(uctx->shpage_mmap);
+ uctx->shpage_mmap = NULL;
if (uctx->shpg)
free_page((unsigned long)uctx->shpg);
@@ -4024,8 +4108,7 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
/* Free DPI only if this is the first PD allocated by the
* application and mark the context dpi as NULL
*/
- bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
- &rdev->qplib_res.dpi_tbl, &uctx->dpi);
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->dpi);
uctx->dpi.dbr = NULL;
}
}
@@ -4036,27 +4119,177 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
struct bnxt_re_ucontext,
ib_uctx);
- struct bnxt_re_dev *rdev = uctx->rdev;
+ struct bnxt_re_user_mmap_entry *bnxt_entry;
+ struct rdma_user_mmap_entry *rdma_entry;
+ int ret = 0;
u64 pfn;
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ rdma_entry = rdma_user_mmap_entry_get(&uctx->ib_uctx, vma);
+ if (!rdma_entry)
return -EINVAL;
- if (vma->vm_pgoff) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- PAGE_SIZE, vma->vm_page_prot)) {
- ibdev_err(&rdev->ibdev, "Failed to map DPI");
- return -EAGAIN;
- }
- } else {
- pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT;
- if (remap_pfn_range(vma, vma->vm_start,
- pfn, PAGE_SIZE, vma->vm_page_prot)) {
- ibdev_err(&rdev->ibdev, "Failed to map shared page");
- return -EAGAIN;
+ bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry,
+ rdma_entry);
+
+ switch (bnxt_entry->mmap_flag) {
+ case BNXT_RE_MMAP_WC_DB:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_UC_DB:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_SH_PAGE:
+ ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg));
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return ret;
+}
+
+void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct bnxt_re_user_mmap_entry *bnxt_entry;
+
+ bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry,
+ rdma_entry);
+
+ kfree(bnxt_entry);
+}
+
+static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
+ enum bnxt_re_alloc_page_type alloc_type;
+ struct bnxt_re_user_mmap_entry *entry;
+ enum bnxt_re_mmap_flag mmap_flag;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_dev *rdev;
+ u64 mmap_offset;
+ u32 length;
+ u32 dpi;
+ u64 dbr;
+ int err;
+
+ uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
+ if (IS_ERR(uctx))
+ return PTR_ERR(uctx);
+
+ err = uverbs_get_const(&alloc_type, attrs, BNXT_RE_ALLOC_PAGE_TYPE);
+ if (err)
+ return err;
+
+ rdev = uctx->rdev;
+ cctx = rdev->chip_ctx;
+
+ switch (alloc_type) {
+ case BNXT_RE_ALLOC_WC_PAGE:
+ if (cctx->modes.db_push) {
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res, &uctx->wcdpi,
+ uctx, BNXT_QPLIB_DPI_TYPE_WC))
+ return -ENOMEM;
+ length = PAGE_SIZE;
+ dpi = uctx->wcdpi.dpi;
+ dbr = (u64)uctx->wcdpi.umdbr;
+ mmap_flag = BNXT_RE_MMAP_WC_DB;
+ } else {
+ return -EINVAL;
}
+
+ break;
+
+ default:
+ return -EOPNOTSUPP;
}
+ entry = bnxt_re_mmap_entry_insert(uctx, dbr, mmap_flag, &mmap_offset);
+ if (!entry)
+ return -ENOMEM;
+
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI,
+ &dpi, sizeof(length));
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int alloc_page_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_user_mmap_entry *entry = uobject->object;
+ struct bnxt_re_ucontext *uctx = entry->uctx;
+
+ switch (entry->mmap_flag) {
+ case BNXT_RE_MMAP_WC_DB:
+ if (uctx && uctx->wcdpi.dbr) {
+ struct bnxt_re_dev *rdev = uctx->rdev;
+
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi);
+ uctx->wcdpi.dbr = NULL;
+ }
+ break;
+ default:
+ goto exit;
+ }
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+exit:
return 0;
}
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_ALLOC_PAGE,
+ UVERBS_ATTR_IDR(BNXT_RE_ALLOC_PAGE_HANDLE,
+ BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(BNXT_RE_ALLOC_PAGE_TYPE,
+ enum bnxt_re_alloc_page_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_DPI,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_DESTROY_PAGE,
+ UVERBS_ATTR_IDR(BNXT_RE_DESTROY_PAGE_HANDLE,
+ BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_TYPE_ALLOC_IDR(alloc_page_obj_cleanup),
+ &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE),
+ &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE));
+
+const struct uapi_definition bnxt_re_uapi_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE),
+ {}
+};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 31f7e34040f7..32d9e9d09791 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -60,6 +60,8 @@ struct bnxt_re_pd {
struct bnxt_re_dev *rdev;
struct bnxt_qplib_pd qplib_pd;
struct bnxt_re_fence_data fence;
+ struct rdma_user_mmap_entry *pd_db_mmap;
+ struct rdma_user_mmap_entry *pd_wcdb_mmap;
};
struct bnxt_re_ah {
@@ -134,8 +136,23 @@ struct bnxt_re_ucontext {
struct ib_ucontext ib_uctx;
struct bnxt_re_dev *rdev;
struct bnxt_qplib_dpi dpi;
+ struct bnxt_qplib_dpi wcdpi;
void *shpg;
spinlock_t sh_lock; /* protect shpg */
+ struct rdma_user_mmap_entry *shpage_mmap;
+};
+
+enum bnxt_re_mmap_flag {
+ BNXT_RE_MMAP_SH_PAGE,
+ BNXT_RE_MMAP_UC_DB,
+ BNXT_RE_MMAP_WC_DB,
+};
+
+struct bnxt_re_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ struct bnxt_re_ucontext *uctx;
+ u64 mem_offset;
+ u8 mmap_flag;
};
static inline u16 bnxt_re_get_swqe_size(int nsge)
@@ -213,6 +230,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+
unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b9e2f89337e8..b42166fe7454 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -83,6 +83,45 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
unsigned long event, void *ptr);
static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev);
+static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
+
+static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
+ u32 *offset);
+static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_qplib_res *res;
+ u32 l2db_len = 0;
+ u32 offset = 0;
+ u32 barlen;
+ int rc;
+
+ res = &rdev->qplib_res;
+ en_dev = rdev->en_dev;
+ cctx = rdev->chip_ctx;
+
+ /* Issue qcfg */
+ rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
+ if (rc)
+ dev_info(rdev_to_dev(rdev),
+ "Couldn't get DB bar size, Low latency framework is disabled\n");
+ /* set register offsets for both UC and WC */
+ res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
+ BNXT_QPLIB_DBR_PF_DB_OFFSET;
+ res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
+
+ /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
+ * is equal to the DB-Bar actual size. This indicates that L2
+ * is mapping entire bar as UC-. RoCE driver can't enable WC mapping
+ * in such cases and DB-push will be disabled.
+ */
+ barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
+ if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
+ res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+ dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n");
+ }
+}
static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
{
@@ -91,6 +130,9 @@ static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
cctx = rdev->chip_ctx;
cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
mode : BNXT_QPLIB_WQE_MODE_STATIC;
+ if (bnxt_re_hwrm_qcaps(rdev))
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query hwrm qcaps\n");
}
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
@@ -112,6 +154,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
{
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
+ int rc;
en_dev = rdev->en_dev;
@@ -130,6 +173,12 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
bnxt_re_set_drv_mode(rdev, wqe_mode);
+
+ bnxt_re_set_db_offset(rdev);
+ rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
+ if (rc)
+ return rc;
+
if (bnxt_qplib_determine_atomics(en_dev->pdev))
ibdev_info(&rdev->ibdev,
"platform doesn't support global atomics.");
@@ -283,15 +332,21 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
for (indx = 0; indx < rdev->num_msix; indx++)
rdev->en_dev->msix_entries[indx].vector = ent[indx].vector;
- bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
- false);
+ rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
+ false);
+ if (rc) {
+ ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
+ return;
+ }
for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
nq = &rdev->nq[indx - 1];
rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
msix_ent[indx].vector, false);
- if (rc)
+ if (rc) {
ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
indx - 1);
+ return;
+ }
}
}
@@ -315,12 +370,11 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
return rc;
}
-static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
- u16 opcd, u16 crid, u16 trid)
+static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
{
hdr->req_type = cpu_to_le16(opcd);
- hdr->cmpl_ring = cpu_to_le16(crid);
- hdr->target_id = cpu_to_le16(trid);
+ hdr->cmpl_ring = cpu_to_le16(-1);
+ hdr->target_id = cpu_to_le16(-1);
}
static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
@@ -334,13 +388,60 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
fw_msg->timeout = timeout;
}
+/* Query device config using common hwrm */
+static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
+ u32 *offset)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_func_qcfg_output resp = {0};
+ struct hwrm_func_qcfg_input req = {0};
+ struct bnxt_fw_msg fw_msg;
+ int rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
+ req.fid = cpu_to_le16(0xffff);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (!rc) {
+ *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
+ *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
+ }
+ return rc;
+}
+
+/* Query function capabilities using common hwrm */
+int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_func_qcaps_output resp = {};
+ struct hwrm_func_qcaps_input req = {};
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ cctx = rdev->chip_ctx;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS);
+ req.fid = cpu_to_le16(0xffff);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ return rc;
+ cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
+
+ return 0;
+}
+
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
u16 fw_ring_id, int type)
{
struct bnxt_en_dev *en_dev;
- struct hwrm_ring_free_input req = {0};
+ struct hwrm_ring_free_input req = {};
struct hwrm_ring_free_output resp;
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
if (!rdev)
@@ -354,9 +455,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
return 0;
- memset(&fw_msg, 0, sizeof(fw_msg));
-
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
@@ -373,16 +472,15 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
u16 *fw_ring_id)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_ring_alloc_input req = {0};
+ struct hwrm_ring_alloc_input req = {};
struct hwrm_ring_alloc_output resp;
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
if (!en_dev)
return rc;
- memset(&fw_msg, 0, sizeof(fw_msg));
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
req.enables = 0;
req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
if (ring_attr->pages > 1) {
@@ -411,7 +509,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_stat_ctx_free_input req = {};
struct hwrm_stat_ctx_free_output resp = {};
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
if (!en_dev)
@@ -420,9 +518,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
return 0;
- memset(&fw_msg, 0, sizeof(fw_msg));
-
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@@ -439,10 +535,10 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
u32 *fw_stats_ctx_id)
{
struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
- struct hwrm_stat_ctx_alloc_output resp = {0};
- struct hwrm_stat_ctx_alloc_input req = {0};
+ struct hwrm_stat_ctx_alloc_output resp = {};
+ struct hwrm_stat_ctx_alloc_input req = {};
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
*fw_stats_ctx_id = INVALID_STATS_CTX_ID;
@@ -450,9 +546,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
if (!en_dev)
return rc;
- memset(&fw_msg, 0, sizeof(fw_msg));
-
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
req.update_period_ms = cpu_to_le32(1000);
req.stats_dma_addr = cpu_to_le64(dma_map);
req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
@@ -466,6 +560,10 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
return rc;
}
+static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
/* Device */
static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
@@ -532,6 +630,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.destroy_qp = bnxt_re_destroy_qp,
.destroy_srq = bnxt_re_destroy_srq,
.device_group = &bnxt_re_dev_attr_group,
+ .disassociate_ucontext = bnxt_re_disassociate_ucontext,
.get_dev_fw_str = bnxt_re_query_fw_str,
.get_dma_mr = bnxt_re_get_dma_mr,
.get_hw_stats = bnxt_re_ib_get_hw_stats,
@@ -539,6 +638,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.get_port_immutable = bnxt_re_get_port_immutable,
.map_mr_sg = bnxt_re_map_mr_sg,
.mmap = bnxt_re_mmap,
+ .mmap_free = bnxt_re_mmap_free,
.modify_qp = bnxt_re_modify_qp,
.modify_srq = bnxt_re_modify_srq,
.poll_cq = bnxt_re_poll_cq,
@@ -579,6 +679,9 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->dev.parent = &rdev->en_dev->pdev->dev;
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ ibdev->driver_def = bnxt_re_uapi_defs;
+
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
if (ret)
@@ -822,7 +925,6 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
if (rdev->qplib_res.dpi_tbl.max) {
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
- &rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
}
if (rdev->qplib_res.rcfw) {
@@ -850,9 +952,9 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc)
goto fail;
- rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res,
&rdev->dpi_privileged,
- rdev);
+ rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
if (rc)
goto dealloc_res;
@@ -892,7 +994,6 @@ free_nq:
bnxt_qplib_free_nq(&rdev->nq[i]);
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
- &rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
dealloc_res:
bnxt_qplib_free_res(&rdev->qplib_res);
@@ -963,12 +1064,6 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
if (!ib_device_try_get(&rdev->ibdev))
return 0;
- if (!sgid_tbl) {
- ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated");
- rc = -EINVAL;
- goto out;
- }
-
for (index = 0; index < sgid_tbl->active; index++) {
gid_idx = sgid_tbl->hw_id[index];
@@ -986,7 +1081,7 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
rdev->qplib_res.netdev->dev_addr);
}
-out:
+
ib_device_put(&rdev->ibdev);
return rc;
}
@@ -1039,14 +1134,13 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_ver_get_output resp = {0};
- struct hwrm_ver_get_input req = {0};
- struct bnxt_fw_msg fw_msg;
+ struct hwrm_ver_get_output resp = {};
+ struct hwrm_ver_get_input req = {};
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
int rc = 0;
- memset(&fw_msg, 0, sizeof(fw_msg));
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
- HWRM_VER_GET, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET);
req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
req.hwrm_intf_min = HWRM_VERSION_MINOR;
req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
@@ -1058,11 +1152,18 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
rc);
return;
}
- rdev->qplib_ctx.hwrm_intf_ver =
+
+ cctx = rdev->chip_ctx;
+ cctx->hwrm_intf_ver =
(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
le16_to_cpu(resp.hwrm_intf_patch);
+
+ cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
+
+ if (!cctx->hwrm_cmd_max_timeout)
+ cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
}
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
@@ -1077,8 +1178,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
- ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
- &rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
@@ -1202,7 +1301,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector;
rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
- vid, db_offt, rdev->is_virtfn,
+ vid, db_offt,
&bnxt_re_aeq_handler);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
@@ -1336,6 +1435,10 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
{
struct bnxt_qplib_cc_param cc_param = {};
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
+
/* Currently enabling only for GenP5 adapters */
if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
return;
@@ -1495,6 +1598,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
*/
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
mutex_unlock(&bnxt_re_mutex);
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index f139d4cd1712..91aed77ce40d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -399,6 +399,9 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
{
+ if (!nq->requested)
+ return;
+
tasklet_disable(&nq->nq_tasklet);
/* Mask h/w interrupt */
bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
@@ -406,11 +409,12 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
synchronize_irq(nq->msix_vec);
if (kill)
tasklet_kill(&nq->nq_tasklet);
- if (nq->requested) {
- irq_set_affinity_hint(nq->msix_vec, NULL);
- free_irq(nq->msix_vec, nq);
- nq->requested = false;
- }
+
+ irq_set_affinity_hint(nq->msix_vec, NULL);
+ free_irq(nq->msix_vec, nq);
+ kfree(nq->name);
+ nq->name = NULL;
+ nq->requested = false;
}
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
@@ -436,6 +440,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
int msix_vector, bool need_init)
{
+ struct bnxt_qplib_res *res = nq->res;
int rc;
if (nq->requested)
@@ -447,10 +452,17 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
else
tasklet_enable(&nq->nq_tasklet);
- snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx);
+ nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s",
+ nq_indx, pci_name(res->pdev));
+ if (!nq->name)
+ return -ENOMEM;
rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq);
- if (rc)
+ if (rc) {
+ kfree(nq->name);
+ nq->name = NULL;
+ tasklet_disable(&nq->nq_tasklet);
return rc;
+ }
cpumask_clear(&nq->mask);
cpumask_set_cpu(nq_indx, &nq->mask);
@@ -461,7 +473,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
nq->msix_vec, nq_indx);
}
nq->requested = true;
- bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true);
return rc;
}
@@ -471,7 +483,6 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt)
resource_size_t reg_base;
struct bnxt_qplib_nq_db *nq_db;
struct pci_dev *pdev;
- int rc = 0;
pdev = nq->pdev;
nq_db = &nq->nq_db;
@@ -481,8 +492,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt)
if (!nq_db->reg.bar_base) {
dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!",
nq_db->reg.bar_id);
- rc = -ENOMEM;
- goto fail;
+ return -ENOMEM;
}
reg_base = nq_db->reg.bar_base + reg_offt;
@@ -492,15 +502,14 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt)
if (!nq_db->reg.bar_reg) {
dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed",
nq_db->reg.bar_id);
- rc = -ENOMEM;
- goto fail;
+ return -ENOMEM;
}
nq_db->dbinfo.db = nq_db->reg.bar_reg;
nq_db->dbinfo.hwq = &nq->hwq;
nq_db->dbinfo.xid = nq->ring_id;
-fail:
- return rc;
+
+ return 0;
}
int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
@@ -614,7 +623,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr);
if (rc)
- goto exit;
+ return rc;
srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq),
GFP_KERNEL);
@@ -659,7 +668,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
srq->dbinfo.xid = srq->id;
srq->dbinfo.db = srq->dpi->dbr;
srq->dbinfo.max_slot = 1;
- srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem;
+ srq->dbinfo.priv_db = res->dpi_tbl.priv_db;
if (srq->threshold)
bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA);
srq->arm_req = false;
@@ -668,7 +677,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
fail:
bnxt_qplib_free_hwq(res, &srq->hwq);
kfree(srq->swq);
-exit:
+
return rc;
}
@@ -732,15 +741,14 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
struct rq_wqe *srqe;
struct sq_sge *hw_sge;
u32 sw_prod, sw_cons, count = 0;
- int i, rc = 0, next;
+ int i, next;
spin_lock(&srq_hwq->lock);
if (srq->start_idx == srq->last_idx) {
dev_err(&srq_hwq->pdev->dev,
"FP: SRQ (0x%x) is full!\n", srq->id);
- rc = -EINVAL;
spin_unlock(&srq_hwq->lock);
- goto done;
+ return -EINVAL;
}
next = srq->start_idx;
srq->start_idx = srq->swq[next].next_idx;
@@ -781,22 +789,19 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
srq->arm_req = false;
bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold);
}
-done:
- return rc;
+
+ return 0;
}
/* QP */
static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
{
- int rc = 0;
int indx;
que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL);
- if (!que->swq) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!que->swq)
+ return -ENOMEM;
que->swq_start = 0;
que->swq_last = que->max_wqe - 1;
@@ -804,8 +809,8 @@ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
que->swq[indx].next_idx = indx + 1;
que->swq[que->swq_last].next_idx = 0; /* Make it circular */
que->swq_last = 0;
-out:
- return rc;
+
+ return 0;
}
int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
@@ -839,7 +844,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
- goto exit;
+ return rc;
rc = bnxt_qplib_alloc_init_swq(sq);
if (rc)
@@ -927,7 +932,6 @@ sq_swq:
kfree(sq->swq);
fail_sq:
bnxt_qplib_free_hwq(res, &sq->hwq);
-exit:
return rc;
}
@@ -992,7 +996,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
- goto exit;
+ return rc;
rc = bnxt_qplib_alloc_init_swq(sq);
if (rc)
@@ -1140,7 +1144,6 @@ sq_swq:
kfree(sq->swq);
fail_sq:
bnxt_qplib_free_hwq(res, &sq->hwq);
-exit:
return rc;
}
@@ -1614,7 +1617,7 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
il_src = (void *)wqe->sg_list[indx].addr;
t_len += len;
if (t_len > qp->max_inline_data)
- goto bad;
+ return -ENOMEM;
while (len) {
if (pull_dst) {
pull_dst = false;
@@ -1638,8 +1641,6 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
}
return t_len;
-bad:
- return -ENOMEM;
}
static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq,
@@ -2056,6 +2057,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
u32 pg_sz_lvl;
int rc;
+ if (!cq->dpi) {
+ dev_err(&rcfw->pdev->dev,
+ "FP: CREATE_CQ failed due to NULL DPI\n");
+ return -EINVAL;
+ }
+
hwq_attr.res = res;
hwq_attr.depth = cq->max_wqe;
hwq_attr.stride = sizeof(struct cq_base);
@@ -2063,17 +2070,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
hwq_attr.sginfo = &cq->sg_info;
rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr);
if (rc)
- goto exit;
+ return rc;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_CREATE_CQ,
sizeof(req));
- if (!cq->dpi) {
- dev_err(&rcfw->pdev->dev,
- "FP: CREATE_CQ failed due to NULL DPI\n");
- return -EINVAL;
- }
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->hwq.max_elements);
@@ -2103,7 +2105,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
cq->dbinfo.hwq = &cq->hwq;
cq->dbinfo.xid = cq->id;
cq->dbinfo.db = cq->dpi->dbr;
- cq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem;
+ cq->dbinfo.priv_db = res->dpi_tbl.priv_db;
bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA);
@@ -2111,7 +2113,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
fail:
bnxt_qplib_free_hwq(res, &cq->hwq);
-exit:
return rc;
}
@@ -2504,7 +2505,6 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *rq;
u32 wr_id_idx;
- int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
@@ -2515,7 +2515,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
- goto done;
+ return 0;
}
cqe = *pcqe;
@@ -2571,8 +2571,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
}
}
-done:
- return rc;
+ return 0;
}
static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
@@ -2585,7 +2584,6 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *rq;
u32 wr_id_idx;
- int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
@@ -2596,7 +2594,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
- goto done;
+ return 0;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
@@ -2658,8 +2656,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
bnxt_qplib_add_flush_qp(qp);
}
}
-done:
- return rc;
+
+ return 0;
}
bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
@@ -2686,7 +2684,6 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
u32 wr_id_idx;
- int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
@@ -2697,7 +2694,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
- goto done;
+ return 0;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
@@ -2766,8 +2763,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
}
}
-done:
- return rc;
+ return 0;
}
static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
@@ -2789,11 +2785,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
- if (!qp) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process terminal qp is NULL\n");
+ if (!qp)
return -EINVAL;
- }
/* Must block new posting of SQ and RQ */
qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index d74d5ead2e32..a42820821c47 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -472,7 +472,7 @@ typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq,
struct bnxt_qplib_nq {
struct pci_dev *pdev;
struct bnxt_qplib_res *res;
- char name[32];
+ char *name;
struct bnxt_qplib_hwq hwq;
struct bnxt_qplib_nq_db nq_db;
u16 ring_id;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index de9069103177..b30e66b64827 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -53,112 +53,289 @@
static void bnxt_qplib_service_creq(struct tasklet_struct *t);
-/* Hardware communication channel */
+/**
+ * bnxt_qplib_map_rc - map return type based on opcode
+ * @opcode - roce slow path opcode
+ *
+ * case #1
+ * Firmware initiated error recovery is a safe state machine and
+ * driver can consider all the underlying rdma resources are free.
+ * In this state, it is safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * case #2
+ * If driver detect potential firmware stall, it is not safe state machine
+ * and the driver can not consider all the underlying rdma resources are
+ * freed.
+ * In this state, it is not safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * Scope of this helper function is only for case #1.
+ *
+ * Returns:
+ * 0 to communicate success to caller.
+ * Non zero error code to communicate failure to caller.
+ */
+static int bnxt_qplib_map_rc(u8 opcode)
+{
+ switch (opcode) {
+ case CMDQ_BASE_OPCODE_DESTROY_QP:
+ case CMDQ_BASE_OPCODE_DESTROY_SRQ:
+ case CMDQ_BASE_OPCODE_DESTROY_CQ:
+ case CMDQ_BASE_OPCODE_DEALLOCATE_KEY:
+ case CMDQ_BASE_OPCODE_DEREGISTER_MR:
+ case CMDQ_BASE_OPCODE_DELETE_GID:
+ case CMDQ_BASE_OPCODE_DESTROY_QP1:
+ case CMDQ_BASE_OPCODE_DESTROY_AH:
+ case CMDQ_BASE_OPCODE_DEINITIALIZE_FW:
+ case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC:
+ case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE:
+ return 0;
+ default:
+ return -ETIMEDOUT;
+ }
+}
+
+/**
+ * bnxt_re_is_fw_stalled - Check firmware health
+ * @rcfw - rcfw channel instance of rdev
+ * @cookie - cookie to track the command
+ *
+ * If firmware has not responded any rcfw command within
+ * rcfw->max_timeout, consider firmware as stalled.
+ *
+ * Returns:
+ * 0 if firmware is responding
+ * -ENODEV if firmware is not responding
+ */
+static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
+ u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ cmdq = &rcfw->cmdq;
+
+ if (time_after(jiffies, cmdq->last_seen +
+ (rcfw->max_timeout * HZ))) {
+ dev_warn_ratelimited(&rcfw->pdev->dev,
+ "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
+ __func__, cookie, crsqe->opcode,
+ jiffies_to_msecs(jiffies - cmdq->last_seen),
+ rcfw->max_timeout * 1000,
+ crsqe->is_in_used);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * __wait_for_resp - Don't hold the cpu context and wait for response
+ * @rcfw - rcfw channel instance of rdev
+ * @cookie - cookie to track the command
+ *
+ * Wait for command completion in sleepable context.
+ *
+ * Returns:
+ * 0 if command is completed by firmware.
+ * Non zero error code for rest of the case.
+ */
static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
struct bnxt_qplib_cmdq_ctx *cmdq;
- u16 cbit;
- int rc;
+ struct bnxt_qplib_crsqe *crsqe;
+ int ret;
cmdq = &rcfw->cmdq;
- cbit = cookie % rcfw->cmdq_depth;
- rc = wait_event_timeout(cmdq->waitq,
- !test_bit(cbit, cmdq->cmdq_bitmap),
- msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
- return rc ? 0 : -ETIMEDOUT;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ wait_event_timeout(cmdq->waitq,
+ !crsqe->is_in_used ||
+ test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
+ msecs_to_jiffies(rcfw->max_timeout * 1000));
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie);
+ if (ret)
+ return ret;
+
+ } while (true);
};
+/**
+ * __block_for_resp - hold the cpu context and wait for response
+ * @rcfw - rcfw channel instance of rdev
+ * @cookie - cookie to track the command
+ *
+ * This function will hold the cpu (non-sleepable context) and
+ * wait for command completion. Maximum holding interval is 8 second.
+ *
+ * Returns:
+ * -ETIMEOUT if command is not completed in specific time interval.
+ * 0 if command is completed by firmware.
+ */
static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
- u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
- struct bnxt_qplib_cmdq_ctx *cmdq;
- u16 cbit;
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long issue_time = 0;
+
+ issue_time = jiffies;
+ crsqe = &rcfw->crsqe_tbl[cookie];
- cmdq = &rcfw->cmdq;
- cbit = cookie % rcfw->cmdq_depth;
- if (!test_bit(cbit, cmdq->cmdq_bitmap))
- goto done;
do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
udelay(1);
+
bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
- } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count);
-done:
- return count ? 0 : -ETIMEDOUT;
+ if (!crsqe->is_in_used)
+ return 0;
+
+ } while (time_before(jiffies, issue_time + (8 * HZ)));
+
+ return -ETIMEDOUT;
};
-static int __send_message(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_cmdqmsg *msg)
+/* __send_message_no_waiter - get cookie and post the message.
+ * @rcfw - rcfw channel instance of rdev
+ * @msg - qplib message internal
+ *
+ * This function will just post and don't bother about completion.
+ * Current design of this function is -
+ * user must hold the completion queue hwq->lock.
+ * user must have used existing completion and free the resources.
+ * this function will not check queue full condition.
+ * this function will explicitly set is_waiter_alive=false.
+ * current use case is - send destroy_ah if create_ah is return
+ * after waiter of create_ah is lost. It can be extended for other
+ * use case as well.
+ *
+ * Returns: Nothing
+ *
+ */
+static void __send_message_no_waiter(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
{
struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
struct bnxt_qplib_hwq *hwq = &cmdq->hwq;
struct bnxt_qplib_crsqe *crsqe;
struct bnxt_qplib_cmdqe *cmdqe;
u32 sw_prod, cmdq_prod;
- struct pci_dev *pdev;
- unsigned long flags;
- u32 bsize, opcode;
- u16 cookie, cbit;
+ u16 cookie;
+ u32 bsize;
u8 *preq;
- pdev = rcfw->pdev;
+ cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+ crsqe = &rcfw->crsqe_tbl[cookie];
- opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
- if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
- (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
- opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
- opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
- dev_err(&pdev->dev,
- "RCFW not initialized, reject opcode 0x%x\n", opcode);
- return -EINVAL;
- }
+ /* Set cmd_size in terms of 16B slots in req. */
+ bsize = bnxt_qplib_set_cmd_slots(msg->req);
+ /* GET_CMD_SIZE would return number of slots in either case of tlv
+ * and non-tlv commands after call to bnxt_qplib_set_cmd_slots()
+ */
+ crsqe->is_internal_cmd = true;
+ crsqe->is_waiter_alive = false;
+ crsqe->is_in_used = true;
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
- if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
- opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
- dev_err(&pdev->dev, "RCFW already initialized!\n");
- return -EINVAL;
- }
+ preq = (u8 *)msg->req;
+ do {
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
- if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags))
- return -ETIMEDOUT;
+ cmdq_prod = hwq->prod;
+ atomic_inc(&rcfw->timeout_send);
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+}
+
+static int __send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg, u8 opcode)
+{
+ u32 bsize, free_slots, required_slots;
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ struct bnxt_qplib_cmdqe *cmdqe;
+ struct bnxt_qplib_hwq *hwq;
+ u32 sw_prod, cmdq_prod;
+ struct pci_dev *pdev;
+ unsigned long flags;
+ u16 cookie;
+ u8 *preq;
+
+ cmdq = &rcfw->cmdq;
+ hwq = &cmdq->hwq;
+ pdev = rcfw->pdev;
/* Cmdq are in 16-byte units, each request can consume 1 or more
* cmdqe
*/
spin_lock_irqsave(&hwq->lock, flags);
- if (msg->req->cmd_size >= HWQ_FREE_SLOTS(hwq)) {
- dev_err(&pdev->dev, "RCFW: CMDQ is full!\n");
+ required_slots = bnxt_qplib_get_cmd_slots(msg->req);
+ free_slots = HWQ_FREE_SLOTS(hwq);
+ cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (required_slots >= free_slots) {
+ dev_info_ratelimited(&pdev->dev,
+ "CMDQ is full req/free %d/%d!",
+ required_slots, free_slots);
spin_unlock_irqrestore(&hwq->lock, flags);
return -EAGAIN;
}
-
-
- cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % rcfw->cmdq_depth;
if (msg->block)
cookie |= RCFW_CMD_IS_BLOCKING;
-
- set_bit(cbit, cmdq->cmdq_bitmap);
__set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
- crsqe = &rcfw->crsqe_tbl[cbit];
- if (crsqe->resp) {
- spin_unlock_irqrestore(&hwq->lock, flags);
- return -EBUSY;
- }
- /* change the cmd_size to the number of 16byte cmdq unit.
- * req->cmd_size is modified here
- */
bsize = bnxt_qplib_set_cmd_slots(msg->req);
-
- memset(msg->resp, 0, sizeof(*msg->resp));
+ crsqe->free_slots = free_slots;
crsqe->resp = (struct creq_qp_event *)msg->resp;
crsqe->resp->cookie = cpu_to_le16(cookie);
+ crsqe->is_internal_cmd = false;
+ crsqe->is_waiter_alive = true;
+ crsqe->is_in_used = true;
+ crsqe->opcode = opcode;
+
crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb;
- __set_cmdq_base_resp_addr(msg->req, msg->req_sz, cpu_to_le64(sbuf->dma_addr));
+
+ __set_cmdq_base_resp_addr(msg->req, msg->req_sz,
+ cpu_to_le64(sbuf->dma_addr));
__set_cmdq_base_resp_size(msg->req, msg->req_sz,
- ALIGN(sbuf->size, BNXT_QPLIB_CMDQE_UNITS));
+ ALIGN(sbuf->size,
+ BNXT_QPLIB_CMDQE_UNITS));
}
preq = (u8 *)msg->req;
@@ -166,11 +343,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(hwq->prod, hwq);
cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
- if (!cmdqe) {
- dev_err(&pdev->dev,
- "RCFW request failed with no cmdqe!\n");
- goto done;
- }
/* Copy a segment of the req cmd to the cmdq */
memset(cmdqe, 0, sizeof(*cmdqe));
memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
@@ -180,7 +352,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
} while (bsize > 0);
cmdq->seq_num++;
- cmdq_prod = hwq->prod;
+ cmdq_prod = hwq->prod & 0xFFFF;
if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
/* The very first doorbell write
* is required to set this flag
@@ -194,51 +366,158 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
wmb();
writel(cmdq_prod, cmdq->cmdq_mbox.prod);
writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
-done:
spin_unlock_irqrestore(&hwq->lock, flags);
/* Return the CREQ response pointer */
return 0;
}
-int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_cmdqmsg *msg)
+/**
+ * __poll_for_resp - self poll completion for rcfw command
+ * @rcfw - rcfw channel instance of rdev
+ * @cookie - cookie to track the command
+ *
+ * It works same as __wait_for_resp except this function will
+ * do self polling in sort interval since interrupt is disabled.
+ * This function can not be called from non-sleepable context.
+ *
+ * Returns:
+ * -ETIMEOUT if command is not completed in specific time interval.
+ * 0 if command is completed by firmware.
+ */
+static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long issue_time;
+ int ret;
+
+ issue_time = jiffies;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ usleep_range(1000, 1001);
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+ if (!crsqe->is_in_used)
+ return 0;
+ if (jiffies_to_msecs(jiffies - issue_time) >
+ (rcfw->max_timeout * 1000)) {
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie);
+ if (ret)
+ return ret;
+ }
+ } while (true);
+};
+
+static int __send_message_basic_sanity(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg,
+ u8 opcode)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+
+ cmdq = &rcfw->cmdq;
+
+ /* Prevent posting if f/w is not in a state to process */
+ if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags))
+ return bnxt_qplib_map_rc(opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ return -EINVAL;
+ }
+
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
+ opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/* This function will just post and do not bother about completion */
+static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_create_ah_resp *create_ah_resp)
+{
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_ah req = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_AH,
+ sizeof(req));
+ req.ah_cid = create_ah_resp->xid;
+ msg.req = (struct cmdq_base *)&req;
+ msg.req_sz = sizeof(req);
+ __send_message_no_waiter(rcfw, &msg);
+ dev_info_ratelimited(&rcfw->pdev->dev,
+ "From %s: ah_cid = %d timeout_send %d\n",
+ __func__, req.ah_cid,
+ atomic_read(&rcfw->timeout_send));
+}
+
+/**
+ * __bnxt_qplib_rcfw_send_message - qplib interface to send
+ * and complete rcfw command.
+ * @rcfw - rcfw channel instance of rdev
+ * @msg - qplib message internal
+ *
+ * This function does not account shadow queue depth. It will send
+ * all the command unconditionally as long as send queue is not full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
{
struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long flags;
u16 cookie;
- u8 opcode, retry_cnt = 0xFF;
int rc = 0;
+ u8 opcode;
- /* Prevent posting if f/w is not in a state to process */
- if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags))
- return 0;
+ opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
- do {
- opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
- rc = __send_message(rcfw, msg);
- cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz)) &
- RCFW_MAX_COOKIE_VALUE;
- if (!rc)
- break;
- if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
- /* send failed */
- dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
- cookie, opcode);
- return rc;
- }
- msg->block ? mdelay(1) : usleep_range(500, 1000);
+ rc = __send_message_basic_sanity(rcfw, msg, opcode);
+ if (rc)
+ return rc;
- } while (retry_cnt--);
+ rc = __send_message(rcfw, msg, opcode);
+ if (rc)
+ return rc;
+
+ cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
+ & RCFW_MAX_COOKIE_VALUE;
if (msg->block)
rc = __block_for_resp(rcfw, cookie);
- else
+ else if (atomic_read(&rcfw->rcfw_intr_enabled))
rc = __wait_for_resp(rcfw, cookie);
+ else
+ rc = __poll_for_resp(rcfw, cookie);
+
if (rc) {
- /* timed out */
- dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
- cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
- set_bit(FIRMWARE_TIMED_OUT, &rcfw->cmdq.flags);
- return rc;
+ spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ crsqe->is_waiter_alive = false;
+ if (rc == -ENODEV)
+ set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
+ spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags);
+ return -ETIMEDOUT;
}
if (evnt->status) {
@@ -250,6 +529,48 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
return rc;
}
+
+/**
+ * bnxt_qplib_rcfw_send_message - qplib interface to send
+ * and complete rcfw command.
+ * @rcfw - rcfw channel instance of rdev
+ * @msg - qplib message internal
+ *
+ * Driver interact with Firmware through rcfw channel/slow path in two ways.
+ * a. Blocking rcfw command send. In this path, driver cannot hold
+ * the context for longer period since it is holding cpu until
+ * command is not completed.
+ * b. Non-blocking rcfw command send. In this path, driver can hold the
+ * context for longer period. There may be many pending command waiting
+ * for completion because of non-blocking nature.
+ *
+ * Driver will use shadow queue depth. Current queue depth of 8K
+ * (due to size of rcfw message there can be actual ~4K rcfw outstanding)
+ * is not optimal for rcfw command processing in firmware.
+ *
+ * Restrict at max #RCFW_CMD_NON_BLOCKING_SHADOW_QD Non-Blocking rcfw commands.
+ * Allow all blocking commands until there is no queue full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
+{
+ int ret;
+
+ if (!msg->block) {
+ down(&rcfw->rcfw_inflight);
+ ret = __bnxt_qplib_rcfw_send_message(rcfw, msg);
+ up(&rcfw->rcfw_inflight);
+ } else {
+ ret = __bnxt_qplib_rcfw_send_message(rcfw, msg);
+ }
+
+ return ret;
+}
+
/* Completions */
static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
struct creq_func_event *func_event)
@@ -295,19 +616,20 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
}
static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
- struct creq_qp_event *qp_event)
+ struct creq_qp_event *qp_event,
+ u32 *num_wait)
{
struct creq_qp_error_notification *err_event;
struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
struct bnxt_qplib_crsqe *crsqe;
+ u32 qp_id, tbl_indx, req_size;
struct bnxt_qplib_qp *qp;
- u16 cbit, blocked = 0;
+ u16 cookie, blocked = 0;
+ bool is_waiter_alive;
struct pci_dev *pdev;
unsigned long flags;
- __le16 mcookie;
- u16 cookie;
+ u32 wait_cmds = 0;
int rc = 0;
- u32 qp_id, tbl_indx;
pdev = rcfw->pdev;
switch (qp_event->event) {
@@ -339,33 +661,60 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
spin_lock_irqsave_nested(&hwq->lock, flags,
SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
- mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % rcfw->cmdq_depth;
- crsqe = &rcfw->crsqe_tbl[cbit];
- if (crsqe->resp &&
- crsqe->resp->cookie == mcookie) {
- memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
- crsqe->resp = NULL;
- } else {
- if (crsqe->resp && crsqe->resp->cookie)
- dev_err(&pdev->dev,
- "CMD %s cookie sent=%#x, recd=%#x\n",
- crsqe->resp ? "mismatch" : "collision",
- crsqe->resp ? crsqe->resp->cookie : 0,
- mcookie);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ crsqe->is_in_used = false;
+
+ if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
+ &rcfw->cmdq.flags),
+ "QPLIB: Unreponsive rcfw channel detected.!!")) {
+ dev_info(&pdev->dev,
+ "rcfw timedout: cookie = %#x, free_slots = %d",
+ cookie, crsqe->free_slots);
+ spin_unlock_irqrestore(&hwq->lock, flags);
+ return rc;
}
- if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap))
- dev_warn(&pdev->dev,
- "CMD bit %d was not requested\n", cbit);
- hwq->cons += crsqe->req_size;
+
+ if (crsqe->is_internal_cmd && !qp_event->status)
+ atomic_dec(&rcfw->timeout_send);
+
+ if (crsqe->is_waiter_alive) {
+ if (crsqe->resp)
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ if (!blocked)
+ wait_cmds++;
+ }
+
+ req_size = crsqe->req_size;
+ is_waiter_alive = crsqe->is_waiter_alive;
+
crsqe->req_size = 0;
+ if (!is_waiter_alive)
+ crsqe->resp = NULL;
- if (!blocked)
- wake_up(&rcfw->cmdq.waitq);
+ hwq->cons += req_size;
+
+ /* This is a case to handle below scenario -
+ * Create AH is completed successfully by firmware,
+ * but completion took more time and driver already lost
+ * the context of create_ah from caller.
+ * We have already return failure for create_ah verbs,
+ * so let's destroy the same address vector since it is
+ * no more used in stack. We don't care about completion
+ * in __send_message_no_waiter.
+ * If destroy_ah is failued by firmware, there will be AH
+ * resource leak and relatively not critical + unlikely
+ * scenario. Current design is not to handle such case.
+ */
+ if (!is_waiter_alive && !qp_event->status &&
+ qp_event->event == CREQ_QP_EVENT_EVENT_CREATE_AH)
+ __destroy_timedout_ah(rcfw,
+ (struct creq_create_ah_resp *)
+ qp_event);
spin_unlock_irqrestore(&hwq->lock, flags);
}
+ *num_wait += wait_cmds;
return rc;
}
@@ -379,6 +728,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
struct creq_base *creqe;
u32 sw_cons, raw_cons;
unsigned long flags;
+ u32 num_wakeup = 0;
/* Service the CREQ until budget is over */
spin_lock_irqsave(&hwq->lock, flags);
@@ -392,12 +742,14 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
* reading any further.
*/
dma_rmb();
+ rcfw->cmdq.last_seen = jiffies;
type = creqe->type & CREQ_BASE_TYPE_MASK;
switch (type) {
case CREQ_BASE_TYPE_QP_EVENT:
bnxt_qplib_process_qp_event
- (rcfw, (struct creq_qp_event *)creqe);
+ (rcfw, (struct creq_qp_event *)creqe,
+ &num_wakeup);
creq->stats.creq_qp_event_processed++;
break;
case CREQ_BASE_TYPE_FUNC_EVENT:
@@ -425,6 +777,8 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
rcfw->res->cctx, true);
}
spin_unlock_irqrestore(&hwq->lock, flags);
+ if (num_wakeup)
+ wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
}
static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
@@ -556,7 +910,6 @@ skip_ctx_setup:
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- bitmap_free(rcfw->cmdq.cmdq_bitmap);
kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
@@ -593,13 +946,11 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
"HW channel CREQ allocation failed\n");
goto fail;
}
- if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK)
- rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256;
- else
- rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
+
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT;
sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth);
- hwq_attr.depth = rcfw->cmdq_depth;
+ hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS;
hwq_attr.type = HWQ_TYPE_CTX;
if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
@@ -613,10 +964,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->crsqe_tbl)
goto fail;
- cmdq->cmdq_bitmap = bitmap_zalloc(rcfw->cmdq_depth, GFP_KERNEL);
- if (!cmdq->cmdq_bitmap)
- goto fail;
-
/* Allocate one extra to hold the QP1 entries */
rcfw->qp_tbl_size = qp_tbl_sz + 1;
rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
@@ -624,6 +971,8 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->qp_tbl)
goto fail;
+ rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
+
return 0;
fail:
@@ -636,6 +985,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
struct bnxt_qplib_creq_ctx *creq;
creq = &rcfw->creq;
+
+ if (!creq->requested)
+ return;
+
tasklet_disable(&creq->creq_tasklet);
/* Mask h/w interrupts */
bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
@@ -644,17 +997,17 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
if (kill)
tasklet_kill(&creq->creq_tasklet);
- if (creq->requested) {
- free_irq(creq->msix_vec, rcfw);
- creq->requested = false;
- }
+ free_irq(creq->msix_vec, rcfw);
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ creq->requested = false;
+ atomic_set(&rcfw->rcfw_intr_enabled, 0);
}
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
struct bnxt_qplib_creq_ctx *creq;
struct bnxt_qplib_cmdq_ctx *cmdq;
- unsigned long indx;
creq = &rcfw->creq;
cmdq = &rcfw->cmdq;
@@ -664,11 +1017,6 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
iounmap(cmdq->cmdq_mbox.reg.bar_reg);
iounmap(creq->creq_db.reg.bar_reg);
- indx = find_first_bit(cmdq->cmdq_bitmap, rcfw->cmdq_depth);
- if (indx != rcfw->cmdq_depth)
- dev_err(&rcfw->pdev->dev,
- "disabling RCFW with pending cmd-bit %lx\n", indx);
-
cmdq->cmdq_mbox.reg.bar_reg = NULL;
creq->creq_db.reg.bar_reg = NULL;
creq->aeq_handler = NULL;
@@ -679,9 +1027,11 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
bool need_init)
{
struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_res *res;
int rc;
creq = &rcfw->creq;
+ res = rcfw->res;
if (creq->requested)
return -EFAULT;
@@ -691,24 +1041,32 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
else
tasklet_enable(&creq->creq_tasklet);
+
+ creq->irq_name = kasprintf(GFP_KERNEL, "bnxt_re-creq@pci:%s",
+ pci_name(res->pdev));
+ if (!creq->irq_name)
+ return -ENOMEM;
rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
- "bnxt_qplib_creq", rcfw);
- if (rc)
+ creq->irq_name, rcfw);
+ if (rc) {
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ tasklet_disable(&creq->creq_tasklet);
return rc;
+ }
creq->requested = true;
- bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true);
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
+ atomic_inc(&rcfw->rcfw_intr_enabled);
return 0;
}
-static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw, bool is_vf)
+static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw)
{
struct bnxt_qplib_cmdq_mbox *mbox;
resource_size_t bar_reg;
struct pci_dev *pdev;
- u16 prod_offt;
- int rc = 0;
pdev = rcfw->pdev;
mbox = &rcfw->cmdq.cmdq_mbox;
@@ -733,11 +1091,10 @@ static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw, bool is_vf)
return -ENOMEM;
}
- prod_offt = is_vf ? RCFW_VF_COMM_PROD_OFFSET :
- RCFW_PF_COMM_PROD_OFFSET;
- mbox->prod = (void __iomem *)(mbox->reg.bar_reg + prod_offt);
+ mbox->prod = (void __iomem *)(mbox->reg.bar_reg +
+ RCFW_PF_VF_COMM_PROD_OFFSET);
mbox->db = (void __iomem *)(mbox->reg.bar_reg + RCFW_COMM_TRIG_OFFSET);
- return rc;
+ return 0;
}
static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
@@ -798,7 +1155,7 @@ static void bnxt_qplib_start_rcfw(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
int msix_vector,
- int cp_bar_reg_off, int virt_fn,
+ int cp_bar_reg_off,
aeq_handler_t aeq_handler)
{
struct bnxt_qplib_cmdq_ctx *cmdq;
@@ -818,7 +1175,7 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
creq->stats.creq_func_event_processed = 0;
creq->aeq_handler = aeq_handler;
- rc = bnxt_qplib_map_cmdq_mbox(rcfw, virt_fn);
+ rc = bnxt_qplib_map_cmdq_mbox(rcfw);
if (rc)
return rc;
@@ -834,6 +1191,7 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
return rc;
}
+ sema_init(&rcfw->rcfw_inflight, RCFW_CMD_NON_BLOCKING_SHADOW_QD);
bnxt_qplib_start_rcfw(rcfw);
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index dd5651478bbb..7b31bee3e000 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -45,13 +45,13 @@
#define RCFW_COMM_PCI_BAR_REGION 0
#define RCFW_COMM_CONS_PCI_BAR_REGION 2
#define RCFW_COMM_BASE_OFFSET 0x600
-#define RCFW_PF_COMM_PROD_OFFSET 0xc
-#define RCFW_VF_COMM_PROD_OFFSET 0xc
+#define RCFW_PF_VF_COMM_PROD_OFFSET 0xc
#define RCFW_COMM_TRIG_OFFSET 0x100
#define RCFW_COMM_SIZE 0x104
#define RCFW_DBR_PCI_BAR_REGION 2
#define RCFW_DBR_BASE_PAGE_SHIFT 12
+#define RCFW_FW_STALL_MAX_TIMEOUT 40
/* Cmdq contains a fix number of a 16-Byte slots */
struct bnxt_qplib_cmdqe {
@@ -67,11 +67,12 @@ static inline void bnxt_qplib_rcfw_cmd_prep(struct cmdq_base *req,
req->cmd_size = cmd_size;
}
+/* Shadow queue depth for non blocking command */
+#define RCFW_CMD_NON_BLOCKING_SHADOW_QD 64
#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
/* CMDQ elements */
-#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256
-#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192
+#define BNXT_QPLIB_CMDQE_MAX_CNT 8192
#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS)
static inline u32 bnxt_qplib_cmdqe_npages(u32 depth)
@@ -89,6 +90,26 @@ static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth)
return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE);
}
+/* Get the number of command units required for the req. The
+ * function returns correct value only if called before
+ * setting using bnxt_qplib_set_cmd_slots
+ */
+static inline u32 bnxt_qplib_get_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_units = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_units = tlv_req->total_size;
+ } else {
+ cmd_units = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
+
+ return cmd_units;
+}
+
static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
{
u32 cmd_byte = 0;
@@ -106,11 +127,10 @@ static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
return cmd_byte;
}
-#define RCFW_MAX_COOKIE_VALUE 0x7FFF
+#define RCFW_MAX_COOKIE_VALUE (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
#define RCFW_CMD_IS_BLOCKING 0x8000
-#define RCFW_BLOCKED_CMD_WAIT_COUNT 20000000UL /* 20 sec */
-#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
+#define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL
/* Crsq buf is 1024-Byte */
struct bnxt_qplib_crsbe {
@@ -132,6 +152,12 @@ typedef int (*aeq_handler_t)(struct bnxt_qplib_rcfw *, void *, void *);
struct bnxt_qplib_crsqe {
struct creq_qp_event *resp;
u32 req_size;
+ /* Free slots at the time of submission */
+ u32 free_slots;
+ u8 opcode;
+ bool is_waiter_alive;
+ bool is_internal_cmd;
+ bool is_in_used;
};
struct bnxt_qplib_rcfw_sbuf {
@@ -149,7 +175,7 @@ struct bnxt_qplib_qp_node {
#define FIRMWARE_INITIALIZED_FLAG (0)
#define FIRMWARE_FIRST_FLAG (31)
-#define FIRMWARE_TIMED_OUT (3)
+#define FIRMWARE_STALL_DETECTED (3)
#define ERR_DEVICE_DETACHED (4)
struct bnxt_qplib_cmdq_mbox {
@@ -163,7 +189,7 @@ struct bnxt_qplib_cmdq_ctx {
struct bnxt_qplib_cmdq_mbox cmdq_mbox;
wait_queue_head_t waitq;
unsigned long flags;
- unsigned long *cmdq_bitmap;
+ unsigned long last_seen;
u32 seq_num;
};
@@ -186,6 +212,7 @@ struct bnxt_qplib_creq_ctx {
u16 ring_id;
int msix_vec;
bool requested; /*irq handler installed */
+ char *irq_name;
};
/* RCFW Communication Channels */
@@ -200,6 +227,11 @@ struct bnxt_qplib_rcfw {
u64 oos_prev;
u32 init_oos_stats;
u32 cmdq_depth;
+ atomic_t rcfw_intr_enabled;
+ struct semaphore rcfw_inflight;
+ atomic_t timeout_send;
+ /* cached from chip cctx for quick reference in slow path */
+ u16 max_timeout;
};
struct bnxt_qplib_cmdqmsg {
@@ -234,7 +266,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
bool need_init);
int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
int msix_vector,
- int cp_bar_reg_off, int virt_fn,
+ int cp_bar_reg_off,
aeq_handler_t aeq_handler);
struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 126d4f26f75a..5fd8f7c90bb0 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -215,17 +215,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
return -EINVAL;
hwq_attr->sginfo->npages = npages;
} else {
- unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
- hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
-
+ npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem,
+ hwq_attr->sginfo->pgsize);
hwq->is_user = true;
- npages = sginfo_num_pages;
- npages = (npages * PAGE_SIZE) /
- BIT_ULL(hwq_attr->sginfo->pgshft);
- if ((sginfo_num_pages * PAGE_SIZE) %
- BIT_ULL(hwq_attr->sginfo->pgshft))
- if (!npages)
- npages++;
}
if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
@@ -704,44 +696,76 @@ static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
}
/* DPIs */
-int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
- struct bnxt_qplib_dpi *dpi,
- void *app)
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi,
+ void *app, u8 type)
{
+ struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl;
+ struct bnxt_qplib_reg_desc *reg;
u32 bit_num;
+ u64 umaddr;
+
+ reg = &dpit->wcreg;
+ mutex_lock(&res->dpi_tbl_lock);
bit_num = find_first_bit(dpit->tbl, dpit->max);
- if (bit_num == dpit->max)
+ if (bit_num == dpit->max) {
+ mutex_unlock(&res->dpi_tbl_lock);
return -ENOMEM;
+ }
/* Found unused DPI */
clear_bit(bit_num, dpit->tbl);
dpit->app_tbl[bit_num] = app;
- dpi->dpi = bit_num;
- dpi->dbr = dpit->dbr_bar_reg_iomem + (bit_num * PAGE_SIZE);
- dpi->umdbr = dpit->unmapped_dbr + (bit_num * PAGE_SIZE);
+ dpi->bit = bit_num;
+ dpi->dpi = bit_num + (reg->offset - dpit->ucreg.offset) / PAGE_SIZE;
+
+ umaddr = reg->bar_base + reg->offset + bit_num * PAGE_SIZE;
+ dpi->umdbr = umaddr;
+
+ switch (type) {
+ case BNXT_QPLIB_DPI_TYPE_KERNEL:
+ /* privileged dbr was already mapped just initialize it. */
+ dpi->umdbr = dpit->ucreg.bar_base +
+ dpit->ucreg.offset + bit_num * PAGE_SIZE;
+ dpi->dbr = dpit->priv_db;
+ dpi->dpi = dpi->bit;
+ break;
+ case BNXT_QPLIB_DPI_TYPE_WC:
+ dpi->dbr = ioremap_wc(umaddr, PAGE_SIZE);
+ break;
+ default:
+ dpi->dbr = ioremap(umaddr, PAGE_SIZE);
+ break;
+ }
+ dpi->type = type;
+ mutex_unlock(&res->dpi_tbl_lock);
return 0;
+
}
int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
- struct bnxt_qplib_dpi_tbl *dpit,
- struct bnxt_qplib_dpi *dpi)
+ struct bnxt_qplib_dpi *dpi)
{
- if (dpi->dpi >= dpit->max) {
- dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
- return -EINVAL;
- }
- if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
- dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
- dpi->dpi);
+ struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl;
+
+ mutex_lock(&res->dpi_tbl_lock);
+ if (dpi->dpi && dpi->type != BNXT_QPLIB_DPI_TYPE_KERNEL)
+ pci_iounmap(res->pdev, dpi->dbr);
+
+ if (test_and_set_bit(dpi->bit, dpit->tbl)) {
+ dev_warn(&res->pdev->dev,
+ "Freeing an unused DPI? dpi = %d, bit = %d\n",
+ dpi->dpi, dpi->bit);
+ mutex_unlock(&res->dpi_tbl_lock);
return -EINVAL;
}
if (dpit->app_tbl)
- dpit->app_tbl[dpi->dpi] = NULL;
+ dpit->app_tbl[dpi->bit] = NULL;
memset(dpi, 0, sizeof(*dpi));
-
+ mutex_unlock(&res->dpi_tbl_lock);
return 0;
}
@@ -750,52 +774,38 @@ static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res *res,
{
kfree(dpit->tbl);
kfree(dpit->app_tbl);
- if (dpit->dbr_bar_reg_iomem)
- pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
- memset(dpit, 0, sizeof(*dpit));
+ dpit->tbl = NULL;
+ dpit->app_tbl = NULL;
+ dpit->max = 0;
}
-static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_dpi_tbl *dpit,
- u32 dbr_offset)
+static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dev_attr *dev_attr)
{
- u32 dbr_bar_reg = RCFW_DBR_PCI_BAR_REGION;
- resource_size_t bar_reg_base;
- u32 dbr_len, bytes;
-
- if (dpit->dbr_bar_reg_iomem) {
- dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
- dbr_bar_reg);
- return -EALREADY;
- }
+ struct bnxt_qplib_dpi_tbl *dpit;
+ struct bnxt_qplib_reg_desc *reg;
+ unsigned long bar_len;
+ u32 dbr_offset;
+ u32 bytes;
- bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
- if (!bar_reg_base) {
- dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
- dbr_bar_reg);
- return -ENOMEM;
- }
+ dpit = &res->dpi_tbl;
+ reg = &dpit->wcreg;
- dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
- if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
- dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
- return -ENOMEM;
+ if (!bnxt_qplib_is_chip_gen_p5(res->cctx)) {
+ /* Offest should come from L2 driver */
+ dbr_offset = dev_attr->l2_db_size;
+ dpit->ucreg.offset = dbr_offset;
+ dpit->wcreg.offset = dbr_offset;
}
- dpit->dbr_bar_reg_iomem = ioremap(bar_reg_base + dbr_offset,
- dbr_len);
- if (!dpit->dbr_bar_reg_iomem) {
- dev_err(&res->pdev->dev,
- "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
- return -ENOMEM;
- }
+ bar_len = pci_resource_len(res->pdev, reg->bar_id);
+ dpit->max = (bar_len - reg->offset) / PAGE_SIZE;
+ if (dev_attr->max_dpi)
+ dpit->max = min_t(u32, dpit->max, dev_attr->max_dpi);
- dpit->unmapped_dbr = bar_reg_base + dbr_offset;
- dpit->max = dbr_len / PAGE_SIZE;
-
- dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
+ dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
if (!dpit->app_tbl)
- goto unmap_io;
+ return -ENOMEM;
bytes = dpit->max >> 3;
if (!bytes)
@@ -805,17 +815,14 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
if (!dpit->tbl) {
kfree(dpit->app_tbl);
dpit->app_tbl = NULL;
- goto unmap_io;
+ return -ENOMEM;
}
memset((u8 *)dpit->tbl, 0xFF, bytes);
+ dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset;
return 0;
-unmap_io:
- pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
- dpit->dbr_bar_reg_iomem = NULL;
- return -ENOMEM;
}
/* Stats */
@@ -882,7 +889,7 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
if (rc)
goto fail;
- rc = bnxt_qplib_alloc_dpi_tbl(res, &res->dpi_tbl, dev_attr->l2_db_size);
+ rc = bnxt_qplib_alloc_dpi_tbl(res, dev_attr);
if (rc)
goto fail;
@@ -892,6 +899,46 @@ fail:
return rc;
}
+void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res)
+{
+ struct bnxt_qplib_reg_desc *reg;
+
+ reg = &res->dpi_tbl.ucreg;
+ if (reg->bar_reg)
+ pci_iounmap(res->pdev, reg->bar_reg);
+ reg->bar_reg = NULL;
+ reg->bar_base = 0;
+ reg->len = 0;
+ reg->bar_id = 0;
+}
+
+int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res)
+{
+ struct bnxt_qplib_reg_desc *ucreg;
+ struct bnxt_qplib_reg_desc *wcreg;
+
+ wcreg = &res->dpi_tbl.wcreg;
+ wcreg->bar_id = RCFW_DBR_PCI_BAR_REGION;
+ wcreg->bar_base = pci_resource_start(res->pdev, wcreg->bar_id);
+
+ ucreg = &res->dpi_tbl.ucreg;
+ ucreg->bar_id = RCFW_DBR_PCI_BAR_REGION;
+ ucreg->bar_base = pci_resource_start(res->pdev, ucreg->bar_id);
+ ucreg->len = ucreg->offset + PAGE_SIZE;
+ if (!ucreg->len || ((ucreg->len & (PAGE_SIZE - 1)) != 0)) {
+ dev_err(&res->pdev->dev, "QPLIB: invalid dbr length %d",
+ (int)ucreg->len);
+ return -EINVAL;
+ }
+ ucreg->bar_reg = ioremap(ucreg->bar_base, ucreg->len);
+ if (!ucreg->bar_reg) {
+ dev_err(&res->pdev->dev, "privileged dpi map failed!");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int bnxt_qplib_determine_atomics(struct pci_dev *dev)
{
int comp;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 982e2c96dac2..d850a553821e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -47,7 +47,7 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
struct bnxt_qplib_drv_modes {
u8 wqe_mode;
- /* Other modes to follow here */
+ bool db_push;
};
struct bnxt_qplib_chip_ctx {
@@ -55,9 +55,14 @@ struct bnxt_qplib_chip_ctx {
u8 chip_rev;
u8 chip_metal;
u16 hw_stats_size;
+ u16 hwrm_cmd_max_timeout;
struct bnxt_qplib_drv_modes modes;
+ u64 hwrm_intf_ver;
};
+#define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000
+#define BNXT_QPLIB_DBR_VF_DB_OFFSET 0x4000
+
#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
@@ -109,6 +114,7 @@ enum bnxt_qplib_hwrm_pg_size {
struct bnxt_qplib_reg_desc {
u8 bar_id;
resource_size_t bar_base;
+ unsigned long offset;
void __iomem *bar_reg;
size_t len;
};
@@ -185,18 +191,27 @@ struct bnxt_qplib_sgid_tbl {
u8 *vlan;
};
+enum {
+ BNXT_QPLIB_DPI_TYPE_KERNEL = 0,
+ BNXT_QPLIB_DPI_TYPE_UC = 1,
+ BNXT_QPLIB_DPI_TYPE_WC = 2
+};
+
struct bnxt_qplib_dpi {
u32 dpi;
+ u32 bit;
void __iomem *dbr;
u64 umdbr;
+ u8 type;
};
struct bnxt_qplib_dpi_tbl {
void **app_tbl;
unsigned long *tbl;
u16 max;
- void __iomem *dbr_bar_reg_iomem;
- u64 unmapped_dbr;
+ struct bnxt_qplib_reg_desc ucreg; /* Hold entire DB bar. */
+ struct bnxt_qplib_reg_desc wcreg;
+ void __iomem *priv_db;
};
struct bnxt_qplib_stats {
@@ -241,7 +256,6 @@ struct bnxt_qplib_ctx {
struct bnxt_qplib_tqm_ctx tqm_ctx;
struct bnxt_qplib_stats stats;
struct bnxt_qplib_vf_res vf_res;
- u64 hwrm_intf_ver;
};
struct bnxt_qplib_res {
@@ -253,6 +267,8 @@ struct bnxt_qplib_res {
struct bnxt_qplib_pd_tbl pd_tbl;
struct bnxt_qplib_sgid_tbl sgid_tbl;
struct bnxt_qplib_dpi_tbl dpi_tbl;
+ /* To protect the dpi table bit map */
+ struct mutex dpi_tbl_lock;
bool prio;
bool is_vf;
};
@@ -344,11 +360,10 @@ int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd_tbl *pd_tbl,
struct bnxt_qplib_pd *pd);
-int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
- struct bnxt_qplib_dpi *dpi,
- void *app);
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi,
+ void *app, u8 type);
int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
- struct bnxt_qplib_dpi_tbl *dpi_tbl,
struct bnxt_qplib_dpi *dpi);
void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
@@ -361,6 +376,9 @@ void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
struct bnxt_qplib_ctx *ctx,
bool virt_fn, bool is_p5);
+int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res);
+void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
+
int bnxt_qplib_determine_atomics(struct pci_dev *dev);
static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 1714a1e23113..ab45f9d4bb02 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -170,6 +170,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
+ if (rcfw->res->cctx->hwrm_intf_ver >= HWRM_VERSION_DEV_ATTR_MAX_DPI)
+ attr->max_dpi = le32_to_cpu(sb->max_dpi);
+
attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
@@ -233,10 +236,6 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
int index;
- if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "SGID table not allocated\n");
- return -EINVAL;
- }
/* Do we need a sgid_lock here? */
if (!sgid_tbl->active) {
dev_err(&res->pdev->dev, "SGID table has no active entries\n");
@@ -297,10 +296,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
int i, free_idx;
- if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "SGID table not allocated\n");
- return -EINVAL;
- }
/* Do we need a sgid_lock here? */
if (sgid_tbl->active == sgid_tbl->max) {
dev_err(&res->pdev->dev, "SGID table is full\n");
@@ -468,13 +463,14 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
return 0;
}
-void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
- bool block)
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_destroy_ah_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_destroy_ah req = {};
+ int rc;
/* Clean up the AH table in the device */
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
@@ -485,7 +481,8 @@ void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
sizeof(resp), block);
- bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ return rc;
}
/* MRW */
@@ -617,16 +614,15 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
/* Free the hwq if it already exist, must be a rereg */
if (mr->hwq.max_elements)
bnxt_qplib_free_hwq(res, &mr->hwq);
- /* Use system PAGE_SIZE */
hwq_attr.res = res;
hwq_attr.depth = pages;
- hwq_attr.stride = buf_pg_size;
+ hwq_attr.stride = sizeof(dma_addr_t);
hwq_attr.type = HWQ_TYPE_MR;
hwq_attr.sginfo = &sginfo;
hwq_attr.sginfo->umem = umem;
hwq_attr.sginfo->npages = pages;
- hwq_attr.sginfo->pgsize = PAGE_SIZE;
- hwq_attr.sginfo->pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo->pgsize = buf_pg_size;
+ hwq_attr.sginfo->pgshft = ilog2(buf_pg_size);
rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
if (rc) {
dev_err(&res->pdev->dev,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 5de874659cdf..264ef3cedc45 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -72,6 +72,7 @@ struct bnxt_qplib_dev_attr {
u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
bool is_atomic;
u16 dev_cap_flags;
+ u32 max_dpi;
};
struct bnxt_qplib_pd {
@@ -327,8 +328,8 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct bnxt_qplib_ctx *ctx);
int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
bool block);
-void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
- bool block);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mrw);
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 8eca6c14d0cf..2a195c4b0f17 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1403,7 +1403,7 @@ static int pbl_continuous_initialize(struct efa_dev *dev,
*/
static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
{
- u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
+ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
struct scatterlist *sgl;
int sg_dma_cnt, err;
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index e819e4032490..f190111840e9 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -128,13 +128,8 @@ struct erdma_devattr {
int numa_node;
enum erdma_cc_alg cc;
- u32 grp_num;
u32 irq_num;
- bool disable_dwqe;
- u16 dwqe_pages;
- u16 dwqe_entries;
-
u32 max_qp;
u32 max_send_wr;
u32 max_recv_wr;
@@ -215,15 +210,6 @@ struct erdma_dev {
u32 next_alloc_qpn;
u32 next_alloc_cqn;
- spinlock_t db_bitmap_lock;
- /* We provide max 64 uContexts that each has one SQ doorbell Page. */
- DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
- /*
- * We provide max 496 uContexts that each has one SQ normal Db,
- * and one directWQE db.
- */
- DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);
-
atomic_t num_ctx;
struct list_head cep_list;
};
@@ -268,6 +254,8 @@ static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg,
return FIELD_GET(filed_mask, val);
}
+#define ERDMA_GET(val, name) FIELD_GET(ERDMA_CMD_##name##_MASK, val)
+
int erdma_cmdq_init(struct erdma_dev *dev);
void erdma_finish_cmdq_init(struct erdma_dev *dev);
void erdma_cmdq_destroy(struct erdma_dev *dev);
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 76ce2856be28..a882b57aa118 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -82,19 +82,6 @@
#define ERDMA_BAR_CQDB_SPACE_OFFSET \
(ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE)
-/* Doorbell page resources related. */
-/*
- * Max # of parallelly issued directSQE is 3072 per device,
- * hardware organizes this into 24 group, per group has 128 credits.
- */
-#define ERDMA_DWQE_MAX_GRP_CNT 24
-#define ERDMA_DWQE_NUM_PER_GRP 128
-
-#define ERDMA_DWQE_TYPE0_CNT 64
-#define ERDMA_DWQE_TYPE1_CNT 496
-/* type1 DB contains 2 DBs, takes 256Byte. */
-#define ERDMA_DWQE_TYPE1_CNT_PER_PAGE 16
-
#define ERDMA_SDB_SHARED_PAGE_INDEX 95
/* Doorbell related. */
@@ -134,7 +121,7 @@
/* CMDQ related. */
#define ERDMA_CMDQ_MAX_OUTSTANDING 128
-#define ERDMA_CMDQ_SQE_SIZE 64
+#define ERDMA_CMDQ_SQE_SIZE 128
/* cmdq sub module definition. */
enum CMDQ_WQE_SUB_MOD {
@@ -159,6 +146,9 @@ enum CMDQ_COMMON_OPCODE {
CMDQ_OPCODE_DESTROY_EQ = 1,
CMDQ_OPCODE_QUERY_FW_INFO = 2,
CMDQ_OPCODE_CONF_MTU = 3,
+ CMDQ_OPCODE_CONF_DEVICE = 5,
+ CMDQ_OPCODE_ALLOC_DB = 8,
+ CMDQ_OPCODE_FREE_DB = 9,
};
/* cmdq-SQE HDR */
@@ -196,11 +186,41 @@ struct erdma_cmdq_destroy_eq_req {
u8 qtype;
};
+/* config device cfg */
+#define ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK BIT(31)
+#define ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK GENMASK(4, 0)
+
+struct erdma_cmdq_config_device_req {
+ u64 hdr;
+ u32 cfg;
+ u32 rsvd[5];
+};
+
struct erdma_cmdq_config_mtu_req {
u64 hdr;
u32 mtu;
};
+/* ext db requests(alloc and free) cfg */
+#define ERDMA_CMD_EXT_DB_CQ_EN_MASK BIT(2)
+#define ERDMA_CMD_EXT_DB_RQ_EN_MASK BIT(1)
+#define ERDMA_CMD_EXT_DB_SQ_EN_MASK BIT(0)
+
+struct erdma_cmdq_ext_db_req {
+ u64 hdr;
+ u32 cfg;
+ u16 rdb_off;
+ u16 sdb_off;
+ u16 rsvd0;
+ u16 cdb_off;
+ u32 rsvd1[3];
+};
+
+/* alloc db response qword 0 definition */
+#define ERDMA_CMD_ALLOC_DB_RESP_RDB_MASK GENMASK_ULL(63, 48)
+#define ERDMA_CMD_ALLOC_DB_RESP_CDB_MASK GENMASK_ULL(47, 32)
+#define ERDMA_CMD_ALLOC_DB_RESP_SDB_MASK GENMASK_ULL(15, 0)
+
/* create_cq cfg0 */
#define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24)
#define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20)
@@ -209,8 +229,12 @@ struct erdma_cmdq_config_mtu_req {
/* create_cq cfg1 */
#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15)
+#define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11)
#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
+/* create_cq cfg2 */
+#define ERDMA_CMD_CREATE_CQ_DB_CFG_MASK GENMASK(15, 0)
+
struct erdma_cmdq_create_cq_req {
u64 hdr;
u32 cfg0;
@@ -219,6 +243,7 @@ struct erdma_cmdq_create_cq_req {
u32 cfg1;
u64 cq_db_info_addr;
u32 first_page_offset;
+ u32 cfg2;
};
/* regmr/deregmr cfg0 */
@@ -278,6 +303,7 @@ struct erdma_cmdq_modify_qp_req {
/* create qp cqn_mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
+#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25)
#define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0)
/* create qp mtt_cfg */
@@ -285,6 +311,10 @@ struct erdma_cmdq_modify_qp_req {
#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1)
#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0)
+/* create qp db cfg */
+#define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16)
+#define ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK GENMASK(15, 0)
+
#define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0)
struct erdma_cmdq_create_qp_req {
@@ -299,6 +329,11 @@ struct erdma_cmdq_create_qp_req {
u32 rq_mtt_cfg;
u64 sq_db_info_dma_addr;
u64 rq_db_info_dma_addr;
+
+ u64 sq_mtt_entry[3];
+ u64 rq_mtt_entry[3];
+
+ u32 db_cfg;
};
struct erdma_cmdq_destroy_qp_req {
@@ -329,6 +364,7 @@ struct erdma_cmdq_reflush_req {
enum {
ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
+ ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3,
};
#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 7c74abeee864..0880c79a978c 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -130,33 +130,6 @@ static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
-static void erdma_dwqe_resource_init(struct erdma_dev *dev)
-{
- int total_pages, type0, type1;
-
- dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
-
- if (dev->attrs.grp_num < 4)
- dev->attrs.disable_dwqe = true;
- else
- dev->attrs.disable_dwqe = false;
-
- /* One page contains 4 goups. */
- total_pages = dev->attrs.grp_num * 4;
-
- if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
- dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
- type0 = ERDMA_DWQE_TYPE0_CNT;
- type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
- } else {
- type1 = total_pages / 3;
- type0 = total_pages - type1 - 1;
- }
-
- dev->attrs.dwqe_pages = type0;
- dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
-}
-
static int erdma_request_vectors(struct erdma_dev *dev)
{
int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
@@ -199,8 +172,6 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
{
int ret;
- erdma_dwqe_resource_init(dev);
-
ret = dma_set_mask_and_coherent(&pdev->dev,
DMA_BIT_MASK(ERDMA_PCI_WIDTH));
if (ret)
@@ -426,6 +397,22 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
return err;
}
+static int erdma_device_config(struct erdma_dev *dev)
+{
+ struct erdma_cmdq_config_device_req req = {};
+
+ if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB))
+ return 0;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CONF_DEVICE);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
+ FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
static int erdma_res_cb_init(struct erdma_dev *dev)
{
int i, j;
@@ -512,6 +499,10 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
if (ret)
return ret;
+ ret = erdma_device_config(dev);
+ if (ret)
+ return ret;
+
ibdev->node_type = RDMA_NODE_RNIC;
memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
@@ -537,10 +528,6 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
if (ret)
return ret;
- spin_lock_init(&dev->db_bitmap_lock);
- bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
- bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
-
atomic_set(&dev->num_ctx, 0);
mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 83e1b0d55977..517676fbb8b1 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -19,10 +19,11 @@
#include "erdma_cm.h"
#include "erdma_verbs.h"
-static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
+static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
{
- struct erdma_cmdq_create_qp_req req;
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
struct erdma_pd *pd = to_epd(qp->ibqp.pd);
+ struct erdma_cmdq_create_qp_req req;
struct erdma_uqp *user_qp;
u64 resp0, resp1;
int err;
@@ -93,6 +94,16 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
+
+ if (uctx->ext_db.enable) {
+ req.sq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
+ req.db_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
+ uctx->ext_db.sdb_off) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
+ uctx->ext_db.rdb_off);
+ }
}
err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
@@ -146,11 +157,12 @@ post_cmd:
return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
}
-static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
+static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
struct erdma_cmdq_create_cq_req req;
- u32 page_size;
struct erdma_mem *mtt;
+ u32 page_size;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_CREATE_CQ);
@@ -192,6 +204,13 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
req.first_page_offset = mtt->page_offset;
req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
+
+ if (uctx->ext_db.enable) {
+ req.cfg1 |= FIELD_PREP(
+ ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
+ uctx->ext_db.cdb_off);
+ }
}
return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
@@ -753,7 +772,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
qp->attrs.state = ERDMA_QP_STATE_IDLE;
INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
- ret = create_qp_cmd(dev, qp);
+ ret = create_qp_cmd(uctx, qp);
if (ret)
goto err_out_cmd;
@@ -1130,62 +1149,73 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
kfree(entry);
}
-#define ERDMA_SDB_PAGE 0
-#define ERDMA_SDB_ENTRY 1
-#define ERDMA_SDB_SHARED 2
-
-static void alloc_db_resources(struct erdma_dev *dev,
- struct erdma_ucontext *ctx)
+static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
+ bool ext_db_en)
{
- u32 bitmap_idx;
- struct erdma_devattr *attrs = &dev->attrs;
-
- if (attrs->disable_dwqe)
- goto alloc_normal_db;
-
- /* Try to alloc independent SDB page. */
- spin_lock(&dev->db_bitmap_lock);
- bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
- if (bitmap_idx != attrs->dwqe_pages) {
- set_bit(bitmap_idx, dev->sdb_page);
- spin_unlock(&dev->db_bitmap_lock);
-
- ctx->sdb_type = ERDMA_SDB_PAGE;
- ctx->sdb_idx = bitmap_idx;
- ctx->sdb_page_idx = bitmap_idx;
- ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
- (bitmap_idx << PAGE_SHIFT);
- ctx->sdb_page_off = 0;
+ struct erdma_cmdq_ext_db_req req = {};
+ u64 val0, val1;
+ int ret;
- return;
+ /*
+ * CAP_SYS_RAWIO is required if hardware does not support extend
+ * doorbell mechanism.
+ */
+ if (!ext_db_en && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!ext_db_en) {
+ ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
+ ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
+ ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
+ return 0;
}
- bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
- if (bitmap_idx != attrs->dwqe_entries) {
- set_bit(bitmap_idx, dev->sdb_entry);
- spin_unlock(&dev->db_bitmap_lock);
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_ALLOC_DB);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1);
+ if (ret)
+ return ret;
- ctx->sdb_type = ERDMA_SDB_ENTRY;
- ctx->sdb_idx = bitmap_idx;
- ctx->sdb_page_idx = attrs->dwqe_pages +
- bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
- ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
+ ctx->ext_db.enable = true;
+ ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
+ ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
+ ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
- ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
- (ctx->sdb_page_idx << PAGE_SHIFT);
+ ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
+ ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
+ ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
+
+ return 0;
+}
+static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
+{
+ struct erdma_cmdq_ext_db_req req = {};
+ int ret;
+
+ if (!ctx->ext_db.enable)
return;
- }
- spin_unlock(&dev->db_bitmap_lock);
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_FREE_DB);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
-alloc_normal_db:
- ctx->sdb_type = ERDMA_SDB_SHARED;
- ctx->sdb_idx = 0;
- ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
- ctx->sdb_page_off = 0;
+ req.sdb_off = ctx->ext_db.sdb_off;
+ req.rdb_off = ctx->ext_db.rdb_off;
+ req.cdb_off = ctx->ext_db.cdb_off;
- ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ if (ret)
+ ibdev_err_ratelimited(&dev->ibdev,
+ "free db resources failed %d", ret);
}
static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
@@ -1207,71 +1237,67 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
goto err_out;
}
- INIT_LIST_HEAD(&ctx->dbrecords_page_list);
- mutex_init(&ctx->dbrecords_page_mutex);
-
- alloc_db_resources(dev, ctx);
-
- ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
- ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
-
if (udata->outlen < sizeof(uresp)) {
ret = -EINVAL;
goto err_out;
}
+ INIT_LIST_HEAD(&ctx->dbrecords_page_list);
+ mutex_init(&ctx->dbrecords_page_mutex);
+
+ ret = alloc_db_resources(dev, ctx,
+ !!(dev->attrs.cap_flags &
+ ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
+ if (ret)
+ goto err_out;
+
ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
if (!ctx->sq_db_mmap_entry) {
ret = -ENOMEM;
- goto err_out;
+ goto err_free_ext_db;
}
ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
if (!ctx->rq_db_mmap_entry) {
ret = -EINVAL;
- goto err_out;
+ goto err_put_mmap_entries;
}
ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
if (!ctx->cq_db_mmap_entry) {
ret = -EINVAL;
- goto err_out;
+ goto err_put_mmap_entries;
}
uresp.dev_id = dev->pdev->device;
- uresp.sdb_type = ctx->sdb_type;
- uresp.sdb_offset = ctx->sdb_page_off;
ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (ret)
- goto err_out;
+ goto err_put_mmap_entries;
return 0;
-err_out:
+err_put_mmap_entries:
erdma_uctx_user_mmap_entries_remove(ctx);
+
+err_free_ext_db:
+ free_db_resources(dev, ctx);
+
+err_out:
atomic_dec(&dev->num_ctx);
return ret;
}
void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
{
- struct erdma_ucontext *ctx = to_ectx(ibctx);
struct erdma_dev *dev = to_edev(ibctx->device);
-
- spin_lock(&dev->db_bitmap_lock);
- if (ctx->sdb_type == ERDMA_SDB_PAGE)
- clear_bit(ctx->sdb_idx, dev->sdb_page);
- else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
- clear_bit(ctx->sdb_idx, dev->sdb_entry);
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
erdma_uctx_user_mmap_entries_remove(ctx);
-
- spin_unlock(&dev->db_bitmap_lock);
-
+ free_db_resources(dev, ctx);
atomic_dec(&dev->num_ctx);
}
@@ -1438,7 +1464,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_out_xa;
}
- ret = create_cq_cmd(dev, cq);
+ ret = create_cq_cmd(ctx, cq);
if (ret)
goto err_free_res;
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index 131cf5f40982..429fc3063f98 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -31,13 +31,18 @@ struct erdma_user_mmap_entry {
u8 mmap_flag;
};
+struct erdma_ext_db_info {
+ bool enable;
+ u16 sdb_off;
+ u16 rdb_off;
+ u16 cdb_off;
+};
+
struct erdma_ucontext {
struct ib_ucontext ibucontext;
- u32 sdb_type;
- u32 sdb_idx;
- u32 sdb_page_idx;
- u32 sdb_page_off;
+ struct erdma_ext_db_info ext_db;
+
u64 sdb;
u64 rdb;
u64 cdb;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index 8973a081d641..e7d831330278 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -215,11 +215,11 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
ret = sdma_txadd_page(dd,
- NULL,
txreq,
skb_frag_page(frag),
frag->bv_offset,
- skb_frag_size(frag));
+ skb_frag_size(frag),
+ NULL, NULL, NULL);
if (unlikely(ret))
break;
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 1cea8b0c78e0..7a51f7d73b61 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -19,8 +19,7 @@ static int mmu_notifier_range_start(struct mmu_notifier *,
const struct mmu_notifier_range *);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list);
+static void release_immediate(struct kref *refcount);
static void handle_remove(struct work_struct *work);
static const struct mmu_notifier_ops mn_opts = {
@@ -106,7 +105,11 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
}
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&rbnode->list);
+ kref_put(&rbnode->refcount, release_immediate);
+ }
/* Now the mm may be freed. */
mmdrop(handler->mn.mm);
@@ -121,7 +124,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
unsigned long flags;
int ret = 0;
- trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
+ trace_hfi1_mmu_rb_insert(mnode);
if (current->mm != handler->mn.mm)
return -EPERM;
@@ -134,12 +137,6 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
}
__mmu_int_rb_insert(mnode, &handler->root);
list_add_tail(&mnode->list, &handler->lru_list);
-
- ret = handler->ops->insert(handler->ops_arg, mnode);
- if (ret) {
- __mmu_int_rb_remove(mnode, &handler->root);
- list_del(&mnode->list); /* remove from LRU list */
- }
mnode->handler = handler;
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
@@ -183,6 +180,49 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
+/*
+ * Must NOT call while holding mnode->handler->lock.
+ * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
+ * spinlock.
+ */
+static void release_immediate(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ trace_hfi1_mmu_release_node(mnode);
+ mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
+}
+
+/* Caller must hold mnode->handler->lock */
+static void release_nolock(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ list_move(&mnode->list, &mnode->handler->del_list);
+ queue_work(mnode->handler->wq, &mnode->handler->del_work);
+}
+
+/*
+ * struct mmu_rb_node->refcount kref_put() callback.
+ * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
+ * handler->del_work on handler->wq.
+ * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
+ * Acquires mmu_rb_node->handler->lock; do not call while already holding
+ * handler->lock.
+ */
+void hfi1_mmu_rb_release(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ struct mmu_rb_handler *handler = mnode->handler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ list_move(&mnode->list, &mnode->handler->del_list);
+ spin_unlock_irqrestore(&handler->lock, flags);
+ queue_work(handler->wq, &handler->del_work);
+}
+
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
{
struct mmu_rb_node *rbnode, *ptr;
@@ -197,6 +237,10 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
spin_lock_irqsave(&handler->lock, flags);
list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
+ /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
+ if (kref_read(&rbnode->refcount) > 1)
+ continue;
+
if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
&stop)) {
__mmu_int_rb_remove(rbnode, &handler->root);
@@ -209,7 +253,8 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
spin_unlock_irqrestore(&handler->lock, flags);
list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
- handler->ops->remove(handler->ops_arg, rbnode);
+ trace_hfi1_mmu_rb_evict(rbnode);
+ kref_put(&rbnode->refcount, release_immediate);
}
}
@@ -221,7 +266,6 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
struct rb_root_cached *root = &handler->root;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
- bool added = false;
spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
@@ -229,40 +273,18 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
/* Guard against node removal. */
ptr = __mmu_int_rb_iter_next(node, range->start,
range->end - 1);
- trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
- if (handler->ops->invalidate(handler->ops_arg, node)) {
- __mmu_int_rb_remove(node, root);
- /* move from LRU list to delete list */
- list_move(&node->list, &handler->del_list);
- added = true;
- }
+ trace_hfi1_mmu_mem_invalidate(node);
+ /* Remove from rb tree and lru_list. */
+ __mmu_int_rb_remove(node, root);
+ list_del_init(&node->list);
+ kref_put(&node->refcount, release_nolock);
}
spin_unlock_irqrestore(&handler->lock, flags);
- if (added)
- queue_work(handler->wq, &handler->del_work);
-
return 0;
}
/*
- * Call the remove function for the given handler and the list. This
- * is expected to be called with a delete list extracted from handler.
- * The caller should not be holding the handler lock.
- */
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list)
-{
- struct mmu_rb_node *node;
-
- while (!list_empty(del_list)) {
- node = list_first_entry(del_list, struct mmu_rb_node, list);
- list_del(&node->list);
- handler->ops->remove(handler->ops_arg, node);
- }
-}
-
-/*
* Work queue function to remove all nodes that have been queued up to
* be removed. The key feature is that mm->mmap_lock is not being held
* and the remove callback can sleep while taking it, if needed.
@@ -274,11 +296,17 @@ static void handle_remove(struct work_struct *work)
del_work);
struct list_head del_list;
unsigned long flags;
+ struct mmu_rb_node *node;
/* remove anything that is queued to get removed */
spin_lock_irqsave(&handler->lock, flags);
list_replace_init(&handler->del_list, &del_list);
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ node = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&node->list);
+ trace_hfi1_mmu_release_node(node);
+ handler->ops->remove(handler->ops_arg, node);
+ }
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index c4da064188c9..751dc3fe1e02 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -16,18 +16,14 @@ struct mmu_rb_node {
struct rb_node node;
struct mmu_rb_handler *handler;
struct list_head list;
+ struct kref refcount;
};
-/*
- * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is
- * allowed to sleep.
- */
+/* filter and evict must not sleep. Only remove is allowed to sleep. */
struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
- int (*insert)(void *ops_arg, struct mmu_rb_node *mnode);
void (*remove)(void *ops_arg, struct mmu_rb_node *mnode);
- int (*invalidate)(void *ops_arg, struct mmu_rb_node *node);
int (*evict)(void *ops_arg, struct mmu_rb_node *mnode,
void *evict_arg, bool *stop);
};
@@ -61,6 +57,8 @@ int hfi1_mmu_rb_register(void *ops_arg,
void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
+void hfi1_mmu_rb_release(struct kref *refcount);
+
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
unsigned long addr,
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index bb2552dd29c1..26c62162759b 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1593,7 +1593,20 @@ static inline void sdma_unmap_desc(
struct hfi1_devdata *dd,
struct sdma_desc *descp)
{
- system_descriptor_complete(dd, descp);
+ switch (sdma_mapping_type(descp)) {
+ case SDMA_MAP_SINGLE:
+ dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp),
+ sdma_mapping_len(descp), DMA_TO_DEVICE);
+ break;
+ case SDMA_MAP_PAGE:
+ dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp),
+ sdma_mapping_len(descp), DMA_TO_DEVICE);
+ break;
+ }
+
+ if (descp->pinning_ctx && descp->ctx_put)
+ descp->ctx_put(descp->pinning_ctx);
+ descp->pinning_ctx = NULL;
}
/*
@@ -3113,8 +3126,8 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
/* Add descriptor for coalesce buffer */
tx->desc_limit = MAX_DESC;
- return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx,
- addr, tx->tlen);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
+ addr, tx->tlen, NULL, NULL, NULL);
}
return 1;
@@ -3157,9 +3170,9 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
make_tx_sdma_desc(
tx,
SDMA_MAP_NONE,
- NULL,
dd->sdma_pad_phys,
- sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
+ sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)),
+ NULL, NULL, NULL);
tx->num_desc++;
_sdma_close_tx(dd, tx);
return rval;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 95aaec14c6c2..7fdebab202c4 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -594,9 +594,11 @@ static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d)
static inline void make_tx_sdma_desc(
struct sdma_txreq *tx,
int type,
- void *pinning_ctx,
dma_addr_t addr,
- size_t len)
+ size_t len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
struct sdma_desc *desc = &tx->descp[tx->num_desc];
@@ -613,7 +615,11 @@ static inline void make_tx_sdma_desc(
<< SDMA_DESC0_PHY_ADDR_SHIFT) |
(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
<< SDMA_DESC0_BYTE_COUNT_SHIFT);
+
desc->pinning_ctx = pinning_ctx;
+ desc->ctx_put = ctx_put;
+ if (pinning_ctx && ctx_get)
+ ctx_get(pinning_ctx);
}
/* helper to extend txreq */
@@ -645,18 +651,20 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
static inline int _sdma_txadd_daddr(
struct hfi1_devdata *dd,
int type,
- void *pinning_ctx,
struct sdma_txreq *tx,
dma_addr_t addr,
- u16 len)
+ u16 len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
int rval = 0;
make_tx_sdma_desc(
tx,
type,
- pinning_ctx,
- addr, len);
+ addr, len,
+ pinning_ctx, ctx_get, ctx_put);
WARN_ON(len > tx->tlen);
tx->num_desc++;
tx->tlen -= len;
@@ -676,11 +684,18 @@ static inline int _sdma_txadd_daddr(
/**
* sdma_txadd_page() - add a page to the sdma_txreq
* @dd: the device to use for mapping
- * @pinning_ctx: context to be released at descriptor retirement
* @tx: tx request to which the page is added
* @page: page to map
* @offset: offset within the page
* @len: length in bytes
+ * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not
+ * added if coalesce buffer is used. E.g. pointer to pinned-page
+ * cache entry for the sdma_desc.
+ * @ctx_get: optional function to take reference to @pinning_ctx. Not called if
+ * @pinning_ctx is NULL.
+ * @ctx_put: optional function to release reference to @pinning_ctx after
+ * sdma_desc completes. May be called in interrupt context so must
+ * not sleep. Not called if @pinning_ctx is NULL.
*
* This is used to add a page/offset/length descriptor.
*
@@ -692,11 +707,13 @@ static inline int _sdma_txadd_daddr(
*/
static inline int sdma_txadd_page(
struct hfi1_devdata *dd,
- void *pinning_ctx,
struct sdma_txreq *tx,
struct page *page,
unsigned long offset,
- u16 len)
+ u16 len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
dma_addr_t addr;
int rval;
@@ -720,7 +737,8 @@ static inline int sdma_txadd_page(
return -ENOSPC;
}
- return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, pinning_ctx, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len,
+ pinning_ctx, ctx_get, ctx_put);
}
/**
@@ -754,8 +772,8 @@ static inline int sdma_txadd_daddr(
return rval;
}
- return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, NULL, tx,
- addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len,
+ NULL, NULL, NULL);
}
/**
@@ -801,7 +819,8 @@ static inline int sdma_txadd_kvaddr(
return -ENOSPC;
}
- return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len,
+ NULL, NULL, NULL);
}
struct iowait_work;
@@ -1034,6 +1053,4 @@ u16 sdma_get_descq_cnt(void);
extern uint mod_num_sdma;
void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
-
-void system_descriptor_complete(struct hfi1_devdata *dd, struct sdma_desc *descp);
#endif
diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index fad946cb5e0d..85ae7293c274 100644
--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
@@ -20,6 +20,8 @@ struct sdma_desc {
/* private: don't use directly */
u64 qw[2];
void *pinning_ctx;
+ /* Release reference to @pinning_ctx. May be called in interrupt context. Must not sleep. */
+ void (*ctx_put)(void *ctx);
};
/**
diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
index 57900ebb7702..82cc12aa3fb8 100644
--- a/drivers/infiniband/hw/hfi1/trace_mmu.h
+++ b/drivers/infiniband/hw/hfi1/trace_mmu.h
@@ -15,31 +15,53 @@
#define TRACE_SYSTEM hfi1_mmu
DECLARE_EVENT_CLASS(hfi1_mmu_rb_template,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len),
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node),
TP_STRUCT__entry(__field(unsigned long, addr)
__field(unsigned long, len)
+ __field(unsigned int, refcount)
),
- TP_fast_assign(__entry->addr = addr;
- __entry->len = len;
+ TP_fast_assign(__entry->addr = node->addr;
+ __entry->len = node->len;
+ __entry->refcount = kref_read(&node->refcount);
),
- TP_printk("MMU node addr 0x%lx, len %lu",
+ TP_printk("MMU node addr 0x%lx, len %lu, refcount %u",
__entry->addr,
- __entry->len
+ __entry->len,
+ __entry->refcount
)
);
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
-DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
+TRACE_EVENT(hfi1_mmu_rb_search,
+ TP_PROTO(unsigned long addr, unsigned long len),
+ TP_ARGS(addr, len),
+ TP_STRUCT__entry(__field(unsigned long, addr)
+ __field(unsigned long, len)
+ ),
+ TP_fast_assign(__entry->addr = addr;
+ __entry->len = len;
+ ),
+ TP_printk("MMU node addr 0x%lx, len %lu",
+ __entry->addr,
+ __entry->len
+ )
+);
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_evict,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_release_node,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
#endif /* __HFI1_TRACE_RC_H */
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index ae58b48afe07..02bd62b857b7 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -62,18 +62,14 @@ static int defer_packet_queue(
static void activate_packet_queue(struct iowait *wait, int reason);
static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
void *arg2, bool *stop);
static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
static struct mmu_rb_ops sdma_rb_ops = {
.filter = sdma_rb_filter,
- .insert = sdma_rb_insert,
.evict = sdma_rb_evict,
.remove = sdma_rb_remove,
- .invalidate = sdma_rb_invalidate
};
static int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
@@ -247,14 +243,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
spin_unlock(&fd->pq_rcu_lock);
synchronize_srcu(&fd->pq_srcu);
/* at this point there can be no more new requests */
- if (pq->handler)
- hfi1_mmu_rb_unregister(pq->handler);
iowait_sdma_drain(&pq->busy);
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
!atomic_read(&pq->n_reqs));
kfree(pq->reqs);
+ if (pq->handler)
+ hfi1_mmu_rb_unregister(pq->handler);
bitmap_free(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
flush_pq_iowait(pq);
@@ -1275,25 +1271,17 @@ static void free_system_node(struct sdma_mmu_node *node)
kfree(node);
}
-static inline void acquire_node(struct sdma_mmu_node *node)
-{
- atomic_inc(&node->refcount);
- WARN_ON(atomic_read(&node->refcount) < 0);
-}
-
-static inline void release_node(struct mmu_rb_handler *handler,
- struct sdma_mmu_node *node)
-{
- atomic_dec(&node->refcount);
- WARN_ON(atomic_read(&node->refcount) < 0);
-}
-
+/*
+ * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
+ * from being released until after rb_node is assigned to an SDMA descriptor
+ * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
+ * virtual address range for rb_node is invalidated between now and then.
+ */
static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
unsigned long start,
unsigned long end)
{
struct mmu_rb_node *rb_node;
- struct sdma_mmu_node *node;
unsigned long flags;
spin_lock_irqsave(&handler->lock, flags);
@@ -1302,11 +1290,12 @@ static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
spin_unlock_irqrestore(&handler->lock, flags);
return NULL;
}
- node = container_of(rb_node, struct sdma_mmu_node, rb);
- acquire_node(node);
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&rb_node->refcount);
spin_unlock_irqrestore(&handler->lock, flags);
- return node;
+ return container_of(rb_node, struct sdma_mmu_node, rb);
}
static int pin_system_pages(struct user_sdma_request *req,
@@ -1355,6 +1344,13 @@ retry:
return 0;
}
+/*
+ * kref refcount on *node_p will be 2 on successful addition: one kref from
+ * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
+ * released until after *node_p is assigned to an SDMA descriptor (struct
+ * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
+ * address range for *node_p is invalidated between now and then.
+ */
static int add_system_pinning(struct user_sdma_request *req,
struct sdma_mmu_node **node_p,
unsigned long start, unsigned long len)
@@ -1368,6 +1364,12 @@ static int add_system_pinning(struct user_sdma_request *req,
if (!node)
return -ENOMEM;
+ /* First kref "moves" to mmu_rb_handler */
+ kref_init(&node->rb.refcount);
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&node->rb.refcount);
+
node->pq = pq;
ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
if (ret == 0) {
@@ -1431,15 +1433,15 @@ static int get_system_cache_entry(struct user_sdma_request *req,
return 0;
}
- SDMA_DBG(req, "prepend: node->rb.addr %lx, node->refcount %d",
- node->rb.addr, atomic_read(&node->refcount));
+ SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
+ node->rb.addr, kref_read(&node->rb.refcount));
prepend_len = node->rb.addr - start;
/*
* This node will not be returned, instead a new node
* will be. So release the reference.
*/
- release_node(handler, node);
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
/* Prepend a node to cover the beginning of the allocation */
ret = add_system_pinning(req, node_p, start, prepend_len);
@@ -1451,6 +1453,20 @@ static int get_system_cache_entry(struct user_sdma_request *req,
}
}
+static void sdma_mmu_rb_node_get(void *ctx)
+{
+ struct mmu_rb_node *node = ctx;
+
+ kref_get(&node->refcount);
+}
+
+static void sdma_mmu_rb_node_put(void *ctx)
+{
+ struct sdma_mmu_node *node = ctx;
+
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+}
+
static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
struct user_sdma_txreq *tx,
struct sdma_mmu_node *cache_entry,
@@ -1494,9 +1510,12 @@ static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
ctx = cache_entry;
}
- ret = sdma_txadd_page(pq->dd, ctx, &tx->txreq,
+ ret = sdma_txadd_page(pq->dd, &tx->txreq,
cache_entry->pages[page_index],
- page_offset, from_this_page);
+ page_offset, from_this_page,
+ ctx,
+ sdma_mmu_rb_node_get,
+ sdma_mmu_rb_node_put);
if (ret) {
/*
* When there's a failure, the entire request is freed by
@@ -1518,8 +1537,6 @@ static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
struct user_sdma_iovec *iovec,
size_t from_this_iovec)
{
- struct mmu_rb_handler *handler = req->pq->handler;
-
while (from_this_iovec > 0) {
struct sdma_mmu_node *cache_entry;
size_t from_this_cache_entry;
@@ -1540,15 +1557,15 @@ static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
from_this_cache_entry);
+
+ /*
+ * Done adding cache_entry to zero or more sdma_desc. Can
+ * kref_put() the "safety" kref taken under
+ * get_system_cache_entry().
+ */
+ kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
+
if (ret) {
- /*
- * We're guaranteed that there will be no descriptor
- * completion callback that releases this node
- * because only the last descriptor referencing it
- * has a context attached, and a failure means the
- * last descriptor was never added.
- */
- release_node(handler, cache_entry);
SDMA_DBG(req, "add system segment failed %d", ret);
return ret;
}
@@ -1599,42 +1616,12 @@ static int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
return 0;
}
-void system_descriptor_complete(struct hfi1_devdata *dd,
- struct sdma_desc *descp)
-{
- switch (sdma_mapping_type(descp)) {
- case SDMA_MAP_SINGLE:
- dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp),
- sdma_mapping_len(descp), DMA_TO_DEVICE);
- break;
- case SDMA_MAP_PAGE:
- dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp),
- sdma_mapping_len(descp), DMA_TO_DEVICE);
- break;
- }
-
- if (descp->pinning_ctx) {
- struct sdma_mmu_node *node = descp->pinning_ctx;
-
- release_node(node->rb.handler, node);
- }
-}
-
static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
unsigned long len)
{
return (bool)(node->addr == addr);
}
-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- atomic_inc(&node->refcount);
- return 0;
-}
-
/*
* Return 1 to remove the node from the rb tree and call the remove op.
*
@@ -1647,10 +1634,6 @@ static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
container_of(mnode, struct sdma_mmu_node, rb);
struct evict_data *evict_data = evict_arg;
- /* is this node still being used? */
- if (atomic_read(&node->refcount))
- return 0; /* keep this node */
-
/* this node will be evicted, add its pages to our count */
evict_data->cleared += node->npages;
@@ -1668,13 +1651,3 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
free_system_node(node);
}
-
-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- if (!atomic_read(&node->refcount))
- return 1;
- return 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index a241836371dc..548347d4c5bc 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -104,7 +104,6 @@ struct hfi1_user_sdma_comp_q {
struct sdma_mmu_node {
struct mmu_rb_node rb;
struct hfi1_user_sdma_pkt_q *pq;
- atomic_t refcount;
struct page **pages;
unsigned int npages;
};
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 727eedfba332..cc6324d2d1dd 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -64,11 +64,11 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
/* combine physically continuous fragments later? */
ret = sdma_txadd_page(sde->dd,
- NULL,
&tx->txreq,
skb_frag_page(frag),
skb_frag_off(frag),
- skb_frag_size(frag));
+ skb_frag_size(frag),
+ NULL, NULL, NULL);
if (unlikely(ret))
goto bail_txadd;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index aa8a08d1c014..47c0efed1821 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -595,11 +595,12 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
}
/* Set HEM base address(128K/page, pa) to Hardware */
- if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret) {
hns_roce_free_hem(hr_dev, table->hem[i]);
table->hem[i] = NULL;
- ret = -ENODEV;
- dev_err(dev, "set HEM base address to HW failed.\n");
+ dev_err(dev, "set HEM base address to HW failed, ret = %d.\n",
+ ret);
goto out;
}
@@ -618,6 +619,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
u32 hop_num = mhop->hop_num;
u32 chunk_ba_num;
u32 step_idx;
+ int ret;
index->inited = HEM_INDEX_BUF;
chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
@@ -641,16 +643,24 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
else
step_idx = hop_num;
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx))
- ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num);
-
- if (index->inited & HEM_INDEX_L1)
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
- ibdev_warn(ibdev, "failed to clear HEM step 1.\n");
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
+ if (ret)
+ ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n",
+ hop_num, ret);
+
+ if (index->inited & HEM_INDEX_L1) {
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
+ if (ret)
+ ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n",
+ ret);
+ }
- if (index->inited & HEM_INDEX_L0)
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
- ibdev_warn(ibdev, "failed to clear HEM step 0.\n");
+ if (index->inited & HEM_INDEX_L0) {
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
+ if (ret)
+ ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n",
+ ret);
+ }
}
}
@@ -687,6 +697,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
unsigned long i;
+ int ret;
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
hns_roce_table_mhop_put(hr_dev, table, obj, 1);
@@ -699,8 +710,10 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
&table->mutex))
return;
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT))
- dev_warn(dev, "failed to clear HEM base address.\n");
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret)
+ dev_warn(dev, "failed to clear HEM base address, ret = %d.\n",
+ ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
table->hem[i] = NULL;
@@ -916,6 +929,8 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
unsigned long i;
+ int obj;
+ int ret;
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
hns_roce_cleanup_mhop_hem_table(hr_dev, table);
@@ -924,9 +939,11 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
for (i = 0; i < table->num_hem; ++i)
if (table->hem[i]) {
- if (hr_dev->hw->clear_hem(hr_dev, table,
- i * table->table_chunk_size / table->obj_size, 0))
- dev_err(dev, "clear HEM base address failed.\n");
+ obj = i * table->table_chunk_size / table->obj_size;
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
+ if (ret)
+ dev_err(dev, "clear HEM base address failed, ret = %d.\n",
+ ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 84f1167de1d9..8f7eb11066b4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -373,17 +373,10 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- struct ib_qp *ibqp = &hr_qp->ibqp;
- if (unlikely(ibqp->qp_type != IB_QPT_RC &&
- ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_UD)) {
- ibdev_err(ibdev, "not supported QP(0x%x)type!\n",
- ibqp->qp_type);
- return -EOPNOTSUPP;
- } else if (unlikely(hr_qp->state == IB_QPS_RESET ||
- hr_qp->state == IB_QPS_INIT ||
- hr_qp->state == IB_QPS_RTR)) {
+ if (unlikely(hr_qp->state == IB_QPS_RESET ||
+ hr_qp->state == IB_QPS_INIT ||
+ hr_qp->state == IB_QPS_RTR)) {
ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
hr_qp->state);
return -EINVAL;
@@ -771,17 +764,6 @@ out:
static int check_recv_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct ib_qp *ibqp = &hr_qp->ibqp;
-
- if (unlikely(ibqp->qp_type != IB_QPT_RC &&
- ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_UD)) {
- ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n",
- ibqp->qp_type);
- return -EOPNOTSUPP;
- }
-
if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
return -EIO;
@@ -4583,11 +4565,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
mtu = ib_mtu_enum_to_int(ib_mtu);
if (WARN_ON(mtu <= 0))
return -EINVAL;
-#define MAX_LP_MSG_LEN 16384
- /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */
- lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
- if (WARN_ON(lp_pktn_ini >= 0xF))
- return -EINVAL;
+#define MIN_LP_MSG_LEN 1024
+ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
+ lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
if (attr_mask & IB_QP_PATH_MTU) {
hr_reg_write(context, QPC_MTU, ib_mtu);
@@ -5012,7 +4992,6 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
{
#define QP_ACK_TIMEOUT_MAX_HIP08 20
-#define QP_ACK_TIMEOUT_OFFSET 10
#define QP_ACK_TIMEOUT_MAX 31
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
@@ -5021,7 +5000,7 @@ static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
"local ACK timeout shall be 0 to 20.\n");
return false;
}
- *timeout += QP_ACK_TIMEOUT_OFFSET;
+ *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
} else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
if (*timeout > QP_ACK_TIMEOUT_MAX) {
ibdev_warn(&hr_dev->ib_dev,
@@ -5307,6 +5286,18 @@ out:
return ret;
}
+static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context)
+{
+ u8 timeout;
+
+ timeout = (u8)hr_reg_read(context, QPC_AT);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+
+ return timeout;
+}
+
static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
@@ -5384,7 +5375,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX);
qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME);
- qp_attr->timeout = (u8)hr_reg_read(&context, QPC_AT);
+ qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context);
qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 1b44d2434ab4..7033eae2407c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -44,6 +44,8 @@
#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
#define HNS_ROCE_V2_RSV_XRCD_NUM 0
+#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
+
#define HNS_ROCE_V3_SCCC_SZ 64
#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 37a5cf62f88b..14376490ac22 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -33,6 +33,7 @@
#include <linux/vmalloc.h>
#include <rdma/ib_umem.h>
+#include <linux/math.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
@@ -909,6 +910,44 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
return page_cnt;
}
+static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
+{
+ return int_pow(ba_per_bt, hopnum - 1);
+}
+
+static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ unsigned int pg_shift)
+{
+ unsigned long cap = hr_dev->caps.page_size_cap;
+ struct hns_roce_buf_region *re;
+ unsigned int pgs_per_l1ba;
+ unsigned int ba_per_bt;
+ unsigned int ba_num;
+ int i;
+
+ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
+ if (!(BIT(pg_shift) & cap))
+ continue;
+
+ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
+ ba_num = 0;
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ re = &mtr->hem_cfg.region[i];
+ if (re->hopnum == 0)
+ continue;
+
+ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
+ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
+ }
+
+ if (ba_num <= ba_per_bt)
+ return pg_shift;
+ }
+
+ return 0;
+}
+
static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned int ba_page_shift)
{
@@ -917,6 +956,10 @@ static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
hns_roce_hem_list_init(&mtr->hem_list);
if (!cfg->is_direct) {
+ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
+ if (!ba_page_shift)
+ return -ERANGE;
+
ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
cfg->region, cfg->region_count,
ba_page_shift);
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index 16183e894da7..dd428d915c17 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -93,16 +93,18 @@ static int irdma_nop_1(struct irdma_qp_uk *qp)
*/
void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx)
{
- __le64 *wqe;
+ struct irdma_qp_quanta *sq;
u32 wqe_idx;
if (!(qp_wqe_idx & 0x7F)) {
wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
- wqe = qp->sq_base[wqe_idx].elem;
+ sq = qp->sq_base + wqe_idx;
if (wqe_idx)
- memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000);
+ memset(sq, qp->swqe_polarity ? 0 : 0xFF,
+ 128 * sizeof(*sq));
else
- memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000);
+ memset(sq, qp->swqe_polarity ? 0xFF : 0,
+ 128 * sizeof(*sq));
}
}
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index ab5cdf782785..9c4fe4fa9001 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -522,11 +522,6 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (!iwqp->user_mode)
cancel_delayed_work_sync(&iwqp->dwork_flush);
- irdma_qp_rem_ref(&iwqp->ibqp);
- wait_for_completion(&iwqp->free_qp);
- irdma_free_lsmm_rsrc(iwqp);
- irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
-
if (!iwqp->user_mode) {
if (iwqp->iwscq) {
irdma_clean_cqes(iwqp, iwqp->iwscq);
@@ -534,6 +529,12 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
irdma_clean_cqes(iwqp, iwqp->iwrcq);
}
}
+
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
+
irdma_remove_push_mmap_entries(iwqp);
irdma_free_qp_rsrc(iwqp);
@@ -3291,6 +3292,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
break;
case IB_WR_LOCAL_INV:
info.op_type = IRDMA_OP_TYPE_INV_STAG;
+ info.local_fence = info.read_fence;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
break;
@@ -4450,8 +4452,16 @@ static const struct ib_device_ops irdma_roce_dev_ops = {
};
static const struct ib_device_ops irdma_iw_dev_ops = {
- .modify_qp = irdma_modify_qp,
.get_port_immutable = irdma_iw_port_immutable,
+ .iw_accept = irdma_accept,
+ .iw_add_ref = irdma_qp_add_ref,
+ .iw_connect = irdma_connect,
+ .iw_create_listen = irdma_create_listen,
+ .iw_destroy_listen = irdma_destroy_listen,
+ .iw_get_qp = irdma_get_qp,
+ .iw_reject = irdma_reject,
+ .iw_rem_ref = irdma_qp_rem_ref,
+ .modify_qp = irdma_modify_qp,
.query_gid = irdma_query_gid,
};
@@ -4515,50 +4525,35 @@ static void irdma_init_roce_device(struct irdma_device *iwdev)
* irdma_init_iw_device - initialization of iwarp rdma device
* @iwdev: irdma device
*/
-static int irdma_init_iw_device(struct irdma_device *iwdev)
+static void irdma_init_iw_device(struct irdma_device *iwdev)
{
struct net_device *netdev = iwdev->netdev;
iwdev->ibdev.node_type = RDMA_NODE_RNIC;
addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
netdev->dev_addr);
- iwdev->ibdev.ops.iw_add_ref = irdma_qp_add_ref;
- iwdev->ibdev.ops.iw_rem_ref = irdma_qp_rem_ref;
- iwdev->ibdev.ops.iw_get_qp = irdma_get_qp;
- iwdev->ibdev.ops.iw_connect = irdma_connect;
- iwdev->ibdev.ops.iw_accept = irdma_accept;
- iwdev->ibdev.ops.iw_reject = irdma_reject;
- iwdev->ibdev.ops.iw_create_listen = irdma_create_listen;
- iwdev->ibdev.ops.iw_destroy_listen = irdma_destroy_listen;
memcpy(iwdev->ibdev.iw_ifname, netdev->name,
sizeof(iwdev->ibdev.iw_ifname));
ib_set_device_ops(&iwdev->ibdev, &irdma_iw_dev_ops);
-
- return 0;
}
/**
* irdma_init_rdma_device - initialization of rdma device
* @iwdev: irdma device
*/
-static int irdma_init_rdma_device(struct irdma_device *iwdev)
+static void irdma_init_rdma_device(struct irdma_device *iwdev)
{
struct pci_dev *pcidev = iwdev->rf->pcidev;
- int ret;
- if (iwdev->roce_mode) {
+ if (iwdev->roce_mode)
irdma_init_roce_device(iwdev);
- } else {
- ret = irdma_init_iw_device(iwdev);
- if (ret)
- return ret;
- }
+ else
+ irdma_init_iw_device(iwdev);
+
iwdev->ibdev.phys_port_cnt = 1;
iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count;
iwdev->ibdev.dev.parent = &pcidev->dev;
ib_set_device_ops(&iwdev->ibdev, &irdma_dev_ops);
-
- return 0;
}
/**
@@ -4596,9 +4591,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev)
{
int ret;
- ret = irdma_init_rdma_device(iwdev);
- if (ret)
- return ret;
+ irdma_init_rdma_device(iwdev);
ret = ib_device_set_netdev(&iwdev->ibdev, iwdev->netdev, 1);
if (ret)
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 54b61930a7fd..4b3b5b274e84 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -13,7 +13,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
u8 *rx_hash_key)
{
struct mana_port_context *mpc = netdev_priv(ndev);
- struct mana_cfg_rx_steer_req *req = NULL;
+ struct mana_cfg_rx_steer_req_v2 *req;
struct mana_cfg_rx_steer_resp resp = {};
mana_handle_t *req_indir_tab;
struct gdma_context *gc;
@@ -33,6 +33,8 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
sizeof(resp));
+ req->hdr.req.msg_version = GDMA_MESSAGE_V2;
+
req->vport = mpc->port_handle;
req->rx_enable = 1;
req->update_default_rxobj = 1;
@@ -46,6 +48,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
req->indir_tab_offset = sizeof(*req);
req->update_indir_tab = true;
+ req->cqe_coalescing_enable = 1;
req_indir_tab = (mana_handle_t *)(req + 1);
/* The ind table passed to the hardware must have
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 1c06920505d2..93257fa5aae8 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
!vport_qcounters_supported(dev)) || !port_num)
return &dev->port[0].cnts;
- return &dev->port[port_num - 1].cnts;
+ return is_mdev_switchdev_mode(dev->mdev) ?
+ &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
}
/**
@@ -262,7 +263,7 @@ static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
return do_alloc_stats(cnts);
}
@@ -329,6 +330,7 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ struct mlx5_core_dev *mdev;
__be32 val;
int ret, i;
@@ -336,12 +338,16 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
return 0;
+ mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
+ if (!mdev)
+ return -EOPNOTSUPP;
+
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, other_vport, 1);
MLX5_SET(query_q_counter_in, in, vport_number,
dev->port[port_num].rep->vport);
MLX5_SET(query_q_counter_in, in, aggregate, 1);
- ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
@@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
const struct mlx5_ib_counter *names;
- int j = 0, i;
+ int j = 0, i, size;
names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = basic_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = extended_err_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
@@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts, u32 port_num)
{
- u32 num_counters, num_op_counters = 0;
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ u32 num_counters, num_op_counters = 0, size;
- num_counters = ARRAY_SIZE(basic_q_cnts);
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ num_counters = size;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
+ num_counters += size;
cnts->num_q_counters = num_counters;
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF)
+ if (is_vport)
goto skip_non_qcounters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -725,11 +753,11 @@ err:
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
+ int num_cnt_ports = dev->num_ports;
int i, j;
- num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
- vport_qcounters_supported(dev)) ? dev->num_ports : 1;
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
@@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
+ int num_cnt_ports = dev->num_ports;
int err = 0;
int i;
bool is_shared;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
- vport_qcounters_supported(dev)) ? dev->num_ports : 1;
+
+ /*
+ * In switchdev we need to allocate two ports, one that is used for
+ * the device Q_counters and it is essentially the real Q_counters of
+ * this device, while the other is used as a helper for PF to be able to
+ * query all other vports.
+ */
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 3008632a6c20..1e419e080b53 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
- if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
- ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
@@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
return 0;
}
+static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+
+ if (ft_prio->anchor.ft)
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
+ ft_attr.prio = 0;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ ft_prio->anchor.ft = ft;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.ft) {
+ mlx5_destroy_flow_table(ft_prio->anchor.ft);
+ ft_prio->anchor.ft = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_drop)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+
+ ft_prio->anchor.fg_drop = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_drop) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
+ ft_prio->anchor.fg_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_goto_table)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+ ft_prio->anchor.fg_goto_table = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_goto_table) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
+ ft_prio->anchor.fg_goto_table = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_drop)
+ return 0;
+
+ flow_act.fg = ft_prio->anchor.fg_drop;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ NULL, 0);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_drop = handle;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_drop) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
+ ft_prio->anchor.rule_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_goto_table)
+ return 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.fg = ft_prio->anchor.fg_goto_table;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = ft_prio->flow_table;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_goto_table = handle;
+
+ return 0;
+}
+
+static void
+steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_goto_table) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
+ ft_prio->anchor.rule_goto_table = NULL;
+ }
+}
+
+static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ int err;
+
+ err = steering_anchor_create_ft(dev, ft_prio, ns_type);
+ if (err)
+ return err;
+
+ err = steering_anchor_create_fg_drop(ft_prio);
+ if (err)
+ goto destroy_ft;
+
+ err = steering_anchor_create_fg_goto_table(ft_prio);
+ if (err)
+ goto destroy_fg_drop;
+
+ err = steering_anchor_create_rule_drop(ft_prio);
+ if (err)
+ goto destroy_fg_goto_table;
+
+ err = steering_anchor_create_rule_goto_table(ft_prio);
+ if (err)
+ goto destroy_rule_drop;
+
+ return 0;
+
+destroy_rule_drop:
+ steering_anchor_destroy_rule_drop(ft_prio);
+destroy_fg_goto_table:
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+destroy_fg_drop:
+ steering_anchor_destroy_fg_drop(ft_prio);
+destroy_ft:
+ steering_anchor_destroy_ft(ft_prio);
+
+ return err;
+}
+
+static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
+{
+ steering_anchor_destroy_rule_goto_table(ft_prio);
+ steering_anchor_destroy_rule_drop(ft_prio);
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+ steering_anchor_destroy_fg_drop(ft_prio);
+ steering_anchor_destroy_ft(ft_prio);
+}
+
static int steering_anchor_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
@@ -2035,6 +2264,9 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return -EBUSY;
mutex_lock(&obj->dev->flow_db->lock);
+ if (!--obj->ft_prio->anchor.rule_goto_table_ref)
+ steering_anchor_destroy_rule_goto_table(obj->ft_prio);
+
put_flow_table(obj->dev, obj->ft_prio, true);
mutex_unlock(&obj->dev->flow_db->lock);
@@ -2042,6 +2274,24 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return 0;
}
+static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
+ int count)
+{
+ while (count--)
+ mlx5_steering_anchor_destroy_res(&prio[count]);
+}
+
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
+{
+ fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
+ fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
+ fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
+ fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
+}
+
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
struct mlx5_ib_flow_matcher *obj)
{
@@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return -ENOMEM;
mutex_lock(&dev->flow_db->lock);
+
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
if (IS_ERR(ft_prio)) {
- mutex_unlock(&dev->flow_db->lock);
err = PTR_ERR(ft_prio);
goto free_obj;
}
ft_prio->refcount++;
- ft_id = mlx5_flow_table_id(ft_prio->flow_table);
- mutex_unlock(&dev->flow_db->lock);
+
+ if (!ft_prio->anchor.rule_goto_table_ref) {
+ err = steering_anchor_create_res(dev, ft_prio, ns_type);
+ if (err)
+ goto put_flow_table;
+ }
+
+ ft_prio->anchor.rule_goto_table_ref++;
+
+ ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
&ft_id, sizeof(ft_id));
if (err)
- goto put_flow_table;
+ goto destroy_res;
+
+ mutex_unlock(&dev->flow_db->lock);
uobj->object = obj;
obj->dev = dev;
@@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return 0;
+destroy_res:
+ --ft_prio->anchor.rule_goto_table_ref;
+ mlx5_steering_anchor_destroy_res(ft_prio);
put_flow_table:
- mutex_lock(&dev->flow_db->lock);
put_flow_table(dev, ft_prio, true);
mutex_unlock(&dev->flow_db->lock);
free_obj:
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index ad320adaf321..b9734904f5f0 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
#else
static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
@@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->flow_db->lock);
return 0;
}
+
+inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
#endif
+
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
+ /* When a steering anchor is created, a special flow table is also
+ * created for the user to reference. Since the user can reference it,
+ * the kernel cannot trust that when the user destroys the steering
+ * anchor, they no longer reference the flow table.
+ *
+ * To address this issue, when a user destroys a steering anchor, only
+ * the flow steering rule in the table is destroyed, but the table
+ * itself is kept to deal with the above scenario. The remaining
+ * resources are only removed when the RDMA device is destroyed, which
+ * is a safe assumption that all references are gone.
+ */
+ mlx5_ib_fs_cleanup_anchor(dev);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index ddcfc116b19a..c7a4ee896121 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_mpesw(peer_dev))
+ *num_ports += peer_num_ports;
+ else
+ /* Only 1 ib port is the representor for all uplinks */
+ *num_ports += peer_num_ports - 1;
+ }
+}
+
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+ struct mlx5_core_dev *lag_master = dev;
const struct mlx5_ib_profile *profile;
struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
- int second_uplink = false;
- u32 peer_num_ports;
+ int new_uplink = false;
int vport_index;
int ret;
+ int i;
vport_index = rep->vport_index;
if (mlx5_lag_is_shared_fdb(dev)) {
- peer_dev = mlx5_lag_get_peer_mdev(dev);
- peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
if (mlx5_lag_is_master(dev)) {
- if (mlx5_lag_is_mpesw(dev))
- num_ports += peer_num_ports;
- else
- num_ports += peer_num_ports - 1;
-
+ mlx5_ib_num_ports_update(dev, &num_ports);
} else {
if (rep->vport == MLX5_VPORT_UPLINK) {
if (!mlx5_lag_is_mpesw(dev))
return 0;
- second_uplink = true;
+ new_uplink = true;
}
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_master(peer_dev))
+ lag_master = peer_dev;
+ else if (!mlx5_lag_is_mpesw(dev))
+ /* Only 1 ib port is the representor for all uplinks */
+ peer_n_ports--;
- vport_index += peer_num_ports;
- dev = peer_dev;
+ if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+ vport_index += peer_n_ports;
+ }
}
}
- if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
+ if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
profile = &raw_eth_profile;
else
- return mlx5_ib_set_vport_rep(dev, rep, vport_index);
+ return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
@@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
- ibdev->mdev = dev;
+ mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
+ ibdev->mdev = lag_master;
ibdev->num_ports = num_ports;
ret = __mlx5_ib_add(ibdev, profile);
@@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
- if (mlx5_lag_is_shared_fdb(dev))
- mlx5_ib_register_peer_vport_reps(dev);
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_register_peer_vport_reps(lag_master);
return 0;
@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
int vport_index = rep->vport_index;
struct mlx5_ib_port *port;
+ int i;
if (WARN_ON(!mdev))
return;
+ if (!dev)
+ return;
+
if (mlx5_lag_is_shared_fdb(mdev) &&
!mlx5_lag_is_master(mdev)) {
- struct mlx5_core_dev *peer_mdev;
-
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
return;
- peer_mdev = mlx5_lag_get_peer_mdev(mdev);
- vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+ for (i = 0; i < dev->num_ports; i++) {
+ if (dev->port[i].rep == rep)
+ break;
+ }
+ if (WARN_ON(i == dev->num_ports))
+ return;
+ vport_index = i;
}
- if (!dev)
- return;
-
port = &dev->port[vport_index];
write_lock(&port->roce.netdev_lock);
port->roce.netdev = NULL;
@@ -143,13 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
port->rep = NULL;
if (rep->vport == MLX5_VPORT_UPLINK) {
- struct mlx5_core_dev *peer_mdev;
- struct mlx5_eswitch *esw;
+
+ if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
+ return;
if (mlx5_lag_is_shared_fdb(mdev)) {
- peer_mdev = mlx5_lag_get_peer_mdev(mdev);
- esw = peer_mdev->priv.eswitch;
- mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
}
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
@@ -163,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ struct mlx5_core_dev *peer_mdev;
struct mlx5_eswitch *esw;
+ int i;
- if (!peer_mdev)
- return;
-
- esw = peer_mdev->priv.eswitch;
- mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ }
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5d45de223c43..f0b394ed7452 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4275,6 +4275,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
mlx5_ib_restrack_init,
NULL),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index efa4dc6e7dee..9c33d960af3c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -25,6 +25,7 @@
#include <rdma/mlx5_user_ioctl_verbs.h>
#include "srq.h"
+#include "qp.h"
#define mlx5_ib_dbg(_dev, format, arg...) \
dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
@@ -237,8 +238,19 @@ enum {
#define MLX5_IB_NUM_SNIFFER_FTS 2
#define MLX5_IB_NUM_EGRESS_FTS 1
#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
+
+struct mlx5_ib_anchor {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg_goto_table;
+ struct mlx5_flow_group *fg_drop;
+ struct mlx5_flow_handle *rule_goto_table;
+ struct mlx5_flow_handle *rule_drop;
+ unsigned int rule_goto_table_ref;
+};
+
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
+ struct mlx5_ib_anchor anchor;
unsigned int refcount;
};
@@ -1587,6 +1599,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
return 0;
+ if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
+ return 0;
+
return dev->lag_active ||
(MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 70ca8ffa9256..78b96bfb4e6a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
+ if (!mlx5_ib_lag_should_assign_affinity(dev) &&
+ mlx5_lag_is_lacp_owner(dev->mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index 77f9b4a54816..b6ee7c3ee1ca 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -6,7 +6,17 @@
#ifndef _MLX5_IB_QP_H
#define _MLX5_IB_QP_H
-#include "mlx5_ib.h"
+struct mlx5_ib_dev;
+
+struct mlx5_qp_table {
+ struct notifier_block nb;
+ struct xarray dct_xa;
+
+ /* protect radix tree
+ */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
int mlx5_init_qp_table(struct mlx5_ib_dev *dev);
void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index bae0334d6e7f..d9cf6982d645 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -88,23 +88,35 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
}
}
+static int dct_event_notifier(struct mlx5_ib_dev *dev, struct mlx5_eqe *eqe)
+{
+ struct mlx5_core_dct *dct;
+ unsigned long flags;
+ u32 qpn;
+
+ qpn = be32_to_cpu(eqe->data.dct.dctn) & 0xFFFFFF;
+ xa_lock_irqsave(&dev->qp_table.dct_xa, flags);
+ dct = xa_load(&dev->qp_table.dct_xa, qpn);
+ if (dct)
+ complete(&dct->drained);
+ xa_unlock_irqrestore(&dev->qp_table.dct_xa, flags);
+ return NOTIFY_OK;
+}
+
static int rsc_event_notifier(struct notifier_block *nb,
unsigned long type, void *data)
{
+ struct mlx5_ib_dev *dev =
+ container_of(nb, struct mlx5_ib_dev, qp_table.nb);
struct mlx5_core_rsc_common *common;
- struct mlx5_qp_table *table;
- struct mlx5_core_dct *dct;
+ struct mlx5_eqe *eqe = data;
u8 event_type = (u8)type;
struct mlx5_core_qp *qp;
- struct mlx5_eqe *eqe;
u32 rsn;
switch (event_type) {
case MLX5_EVENT_TYPE_DCT_DRAINED:
- eqe = data;
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
- break;
+ return dct_event_notifier(dev, eqe);
case MLX5_EVENT_TYPE_PATH_MIG:
case MLX5_EVENT_TYPE_COMM_EST:
case MLX5_EVENT_TYPE_SQ_DRAINED:
@@ -113,7 +125,6 @@ static int rsc_event_notifier(struct notifier_block *nb,
case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- eqe = data;
rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
break;
@@ -121,8 +132,7 @@ static int rsc_event_notifier(struct notifier_block *nb,
return NOTIFY_DONE;
}
- table = container_of(nb, struct mlx5_qp_table, nb);
- common = mlx5_get_rsc(table, rsn);
+ common = mlx5_get_rsc(&dev->qp_table, rsn);
if (!common)
return NOTIFY_OK;
@@ -137,11 +147,6 @@ static int rsc_event_notifier(struct notifier_block *nb,
qp->event(qp, event_type);
/* Need to put resource in event handler */
return NOTIFY_OK;
- case MLX5_RES_DCT:
- dct = (struct mlx5_core_dct *)common;
- if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
- complete(&dct->drained);
- break;
default:
break;
}
@@ -188,28 +193,15 @@ static void destroy_resource_common(struct mlx5_ib_dev *dev,
}
static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
- struct mlx5_core_dct *dct, bool need_cleanup)
+ struct mlx5_core_dct *dct)
{
u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {};
struct mlx5_core_qp *qp = &dct->mqp;
- int err;
- err = mlx5_core_drain_dct(dev, dct);
- if (err) {
- if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
- goto destroy;
-
- return err;
- }
- wait_for_completion(&dct->drained);
-destroy:
- if (need_cleanup)
- destroy_resource_common(dev, &dct->mqp);
MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
MLX5_SET(destroy_dct_in, in, uid, qp->uid);
- err = mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
- return err;
+ return mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
}
int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
@@ -227,13 +219,13 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
qp->qpn = MLX5_GET(create_dct_out, out, dctn);
qp->uid = MLX5_GET(create_dct_in, in, uid);
- err = create_resource_common(dev, qp, MLX5_RES_DCT);
+ err = xa_err(xa_store_irq(&dev->qp_table.dct_xa, qp->qpn, dct, GFP_KERNEL));
if (err)
goto err_cmd;
return 0;
err_cmd:
- _mlx5_core_destroy_dct(dev, dct, false);
+ _mlx5_core_destroy_dct(dev, dct);
return err;
}
@@ -284,7 +276,31 @@ static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
struct mlx5_core_dct *dct)
{
- return _mlx5_core_destroy_dct(dev, dct, true);
+ struct mlx5_qp_table *table = &dev->qp_table;
+ struct mlx5_core_dct *tmp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto destroy;
+
+ return err;
+ }
+ wait_for_completion(&dct->drained);
+
+destroy:
+ tmp = xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, dct, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (WARN_ON(tmp != dct))
+ return xa_err(tmp) ?: -EINVAL;
+
+ err = _mlx5_core_destroy_dct(dev, dct);
+ if (err) {
+ xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0);
+ return err;
+ }
+ xa_erase_irq(&table->dct_xa, dct->mqp.qpn);
+ return 0;
}
int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
@@ -298,8 +314,7 @@ int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
MLX5_SET(destroy_qp_in, in, uid, qp->uid);
- mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
- return 0;
+ return mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
}
int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev,
@@ -488,6 +503,7 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ xa_init(&table->dct_xa);
mlx5_qp_debugfs_init(dev->mdev);
table->nb.notifier_call = rsc_event_notifier;
@@ -551,14 +567,14 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn)
return mlx5_cmd_exec_in(dev->mdev, dealloc_xrcd, in);
}
-static void destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
+static int destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
MLX5_SET(destroy_rq_in, in, rqn, rqn);
MLX5_SET(destroy_rq_in, in, uid, uid);
- mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
}
int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
@@ -589,8 +605,7 @@ int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *rq)
{
destroy_resource_common(dev, rq);
- destroy_rq_tracked(dev, rq->qpn, rq->uid);
- return 0;
+ return destroy_rq_tracked(dev, rq->qpn, rq->uid);
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index f693bc753b6b..1bb7507325bc 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -111,7 +111,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
ret = pin_user_pages(start_page + got * PAGE_SIZE,
num_pages - got,
FOLL_LONGTERM | FOLL_WRITE,
- p + got, NULL);
+ p + got);
if (ret < 0) {
mmap_read_unlock(current->mm);
goto bail_release;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 2a5cac2658ec..84e0f41e7dfa 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -140,7 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
ret = pin_user_pages(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof(struct page *)),
- gup_flags, page_list, NULL);
+ gup_flags, page_list);
if (ret < 0)
goto out;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index f83cd4a9d992..98b2a0090bf2 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -709,14 +709,6 @@ int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- if (unlikely(wr->opcode < 0)) {
- dev_warn_ratelimited(&dev->pdev->dev,
- "invalid send opcode\n");
- *bad_wr = wr;
- ret = -EINVAL;
- goto out;
- }
-
/*
* Only support UD, RC.
* Need to check opcode table for thorough checking.
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 7a7e713de52d..54c723a6edda 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -212,10 +212,16 @@ static int __init rxe_module_init(void)
{
int err;
- err = rxe_net_init();
+ err = rxe_alloc_wq();
if (err)
return err;
+ err = rxe_net_init();
+ if (err) {
+ rxe_destroy_wq();
+ return err;
+ }
+
rdma_link_register(&rxe_link_ops);
pr_info("loaded\n");
return 0;
@@ -226,6 +232,7 @@ static void __exit rxe_module_exit(void)
rdma_link_unregister(&rxe_link_ops);
ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
+ rxe_destroy_wq();
pr_info("unloaded\n");
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index db18ace74d2b..5111735aafae 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -115,15 +115,16 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
+ unsigned long flags;
rxe_dbg_qp(qp, "retransmit timer fired\n");
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
rxe_sched_task(&qp->comp.task);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
@@ -481,11 +482,13 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
static void comp_check_sq_drain_done(struct rxe_qp *qp)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
qp->attr.sq_draining = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -499,7 +502,7 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp)
return;
}
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static inline enum comp_state complete_ack(struct rxe_qp *qp,
@@ -625,13 +628,15 @@ static void free_pkt(struct rxe_pkt_info *pkt)
*/
static void reset_retry_timer(struct rxe_qp *qp)
{
+ unsigned long flags;
+
if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) >= IB_QPS_RTS &&
psn_compare(qp->req.psn, qp->comp.psn) > 0)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
}
@@ -643,18 +648,19 @@ int rxe_completer(struct rxe_qp *qp)
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
int ret;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_resp_pkts(qp);
flush_send_queue(qp, notify);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
@@ -826,7 +832,7 @@ int rxe_completer(struct rxe_qp *qp)
}
/* A non-zero return value will cause rxe_do_task to
- * exit its loop and end the tasklet. A zero return
+ * exit its loop and end the work item. A zero return
* will continue looping and return to rxe_completer
*/
done:
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 20ff0c0c4605..d5486cbb3f10 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -113,15 +113,14 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
- if ((cq->notify == IB_CQ_NEXT_COMP) ||
- (cq->notify == IB_CQ_SOLICITED && solicited)) {
+ if ((cq->notify & IB_CQ_NEXT_COMP) ||
+ (cq->notify & IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
-
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 804b15e929dd..666e06a82bc9 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -31,8 +31,6 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe,
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
-void rxe_cq_disable(struct rxe_cq *cq);
-
void rxe_cq_cleanup(struct rxe_pool_elem *elem);
/* rxe_mcast.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 0e538fafcc20..f54042e9aeb2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -45,22 +45,17 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
}
}
-#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
- | IB_ACCESS_REMOTE_WRITE \
- | IB_ACCESS_REMOTE_ATOMIC)
-
static void rxe_mr_init(int access, struct rxe_mr *mr)
{
- u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
- u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
+ u32 key = mr->elem.index << 8 | rxe_get_next_key(-1);
/* set ibmr->l/rkey and also copy into private l/rkey
* for user MRs these will always be the same
* for cases where caller 'owns' the key portion
* they may be different until REG_MR WQE is executed.
*/
- mr->lkey = mr->ibmr.lkey = lkey;
- mr->rkey = mr->ibmr.rkey = rkey;
+ mr->lkey = mr->ibmr.lkey = key;
+ mr->rkey = mr->ibmr.rkey = key;
mr->access = access;
mr->ibmr.page_size = PAGE_SIZE;
@@ -195,7 +190,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
int err;
/* always allow remote access for FMRs */
- rxe_mr_init(IB_ACCESS_REMOTE, mr);
+ rxe_mr_init(RXE_ACCESS_REMOTE, mr);
err = rxe_mr_alloc(mr, max_pages);
if (err)
@@ -644,6 +639,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mr *mr;
+ int remote;
int ret;
mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
@@ -653,9 +649,10 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
goto err;
}
- if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
+ remote = mr->access & RXE_ACCESS_REMOTE;
+ if (remote ? (key != mr->rkey) : (key != mr->lkey)) {
rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
- key, (mr->rkey ? mr->rkey : mr->lkey));
+ key, (remote ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
@@ -715,7 +712,7 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
mr->access = access;
mr->lkey = key;
- mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
+ mr->rkey = key;
mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
mr->state = RXE_MR_STATE_VALID;
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index afa5ce1a7116..d8a43d87de93 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -48,7 +48,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
}
static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_mw *mw, struct rxe_mr *mr)
+ struct rxe_mw *mw, struct rxe_mr *mr, int access)
{
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
@@ -58,7 +58,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
/* o10-36.2.2 */
- if (unlikely((mw->access & IB_ZERO_BASED))) {
+ if (unlikely((access & IB_ZERO_BASED))) {
rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n");
return -EINVAL;
}
@@ -104,7 +104,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
/* C10-74 */
- if (unlikely((mw->access &
+ if (unlikely((access &
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
rxe_dbg_mw(mw,
@@ -113,7 +113,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
/* C10-75 */
- if (mw->access & IB_ZERO_BASED) {
+ if (access & IB_ZERO_BASED) {
if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
rxe_dbg_mw(mw,
"attempt to bind a ZB MW outside of the MR\n");
@@ -133,12 +133,12 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_mw *mw, struct rxe_mr *mr)
+ struct rxe_mw *mw, struct rxe_mr *mr, int access)
{
u32 key = wqe->wr.wr.mw.rkey & 0xff;
mw->rkey = (mw->rkey & ~0xff) | key;
- mw->access = wqe->wr.wr.mw.access;
+ mw->access = access;
mw->state = RXE_MW_STATE_VALID;
mw->addr = wqe->wr.wr.mw.addr;
mw->length = wqe->wr.wr.mw.length;
@@ -169,6 +169,7 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
u32 mw_rkey = wqe->wr.wr.mw.mw_rkey;
u32 mr_lkey = wqe->wr.wr.mw.mr_lkey;
+ int access = wqe->wr.wr.mw.access;
mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8);
if (unlikely(!mw)) {
@@ -196,13 +197,18 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
mr = NULL;
}
+ if (access & ~RXE_ACCESS_SUPPORTED_MW) {
+ rxe_err_mw(mw, "access %#x not supported", access);
+ return -EOPNOTSUPP;
+ }
+
spin_lock_bh(&mw->lock);
- ret = rxe_check_bind_mw(qp, wqe, mw, mr);
+ ret = rxe_check_bind_mw(qp, wqe, mw, mr, access);
if (ret)
goto err_unlock;
- rxe_do_bind_mw(qp, wqe, mw, mr);
+ rxe_do_bind_mw(qp, wqe, mw, mr, access);
err_unlock:
spin_unlock_bh(&mw->lock);
err_drop_mr:
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 2bc7361152ea..cd59666158b1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -159,6 +159,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
pkt->mask = RXE_GRH_MASK;
pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
+ /* remove udp header */
+ skb_pull(skb, sizeof(struct udphdr));
+
rxe_rcv(skb);
return 0;
@@ -401,6 +404,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
return -EIO;
}
+ /* remove udp header */
+ skb_pull(skb, sizeof(struct udphdr));
+
rxe_rcv(skb);
return 0;
@@ -412,15 +418,16 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
(!is_request && (qp_state(qp) < IB_QPS_RTR))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_icrc_generate(skb, pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index cea4e0a63919..5686b691d6b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -91,6 +91,9 @@ enum rxe_hdr_mask {
RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),
+ RXE_RDMA_OP_MASK = (RXE_READ_MASK | RXE_WRITE_MASK |
+ RXE_ATOMIC_WRITE_MASK | RXE_FLUSH_MASK |
+ RXE_ATOMIC_MASK),
};
#define OPCODE_NONE (-1)
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 7b41d79e72b2..d2f57ead78ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -112,7 +112,7 @@ enum rxe_device_param {
RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
- /* Max number of interations of each tasklet
+ /* Max number of interations of each work item
* before yielding the cpu to let other
* work make progress
*/
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c5451a4488ca..a569b111a9d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -176,6 +176,9 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
+ skb_queue_head_init(&qp->req_pkts);
+ skb_queue_head_init(&qp->resp_pkts);
+
atomic_set(&qp->ssn, 0);
atomic_set(&qp->skb_out, 0);
}
@@ -234,8 +237,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
- skb_queue_head_init(&qp->req_pkts);
-
rxe_init_task(&qp->req.task, qp, rxe_requester);
rxe_init_task(&qp->comp.task, qp, rxe_completer);
@@ -279,8 +280,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
}
}
- skb_queue_head_init(&qp->resp_pkts);
-
rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
@@ -300,6 +299,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
+ unsigned long flags;
rxe_get(pd);
rxe_get(rcq);
@@ -325,10 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (err)
goto err2;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return 0;
@@ -392,6 +392,13 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
goto err1;
+ if (mask & IB_QP_ACCESS_FLAGS) {
+ if (!(qp_type(qp) == IB_QPT_RC || qp_type(qp) == IB_QPT_UC))
+ goto err1;
+ if (attr->qp_access_flags & ~RXE_ACCESS_SUPPORTED_QP)
+ goto err1;
+ }
+
if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr))
goto err1;
@@ -492,24 +499,28 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
rxe_sched_task(&qp->resp.task);
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
/* caller should hold qp->state_lock */
@@ -555,14 +566,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.cur_qp_state = attr->qp_state;
if (mask & IB_QP_STATE) {
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
err = __qp_chk_state(qp, attr, mask);
if (!err) {
qp->attr.qp_state = attr->qp_state;
rxe_dbg_qp(qp, "state -> %s\n",
qps2str[attr->qp_state]);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (err)
return err;
@@ -688,6 +701,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
/* called by the query qp verb */
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
{
+ unsigned long flags;
+
*attr = qp->attr;
attr->rq_psn = qp->resp.psn;
@@ -708,12 +723,13 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
/* Applications that get this state typically spin on it.
* Yield the processor
*/
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->attr.sq_draining) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();
+ } else {
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
- spin_unlock_bh(&qp->state_lock);
return 0;
}
@@ -736,10 +752,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->valid = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;
if (qp_type(qp) == IB_QPT_RC) {
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 2f953cc74256..5861e4244049 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -14,6 +14,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
unsigned int pkt_type;
+ unsigned long flags;
if (unlikely(!qp->valid))
return -EINVAL;
@@ -38,19 +39,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EINVAL;
}
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
} else {
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 65134a9aefe7..2171f19494bc 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -99,17 +99,18 @@ static void req_retry(struct rxe_qp *qp)
void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
+ unsigned long flags;
rxe_dbg_qp(qp, "nak timer fired\n");
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
rxe_sched_task(&qp->req.task);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static void req_check_sq_drain_done(struct rxe_qp *qp)
@@ -118,8 +119,9 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
unsigned int index;
unsigned int cons;
struct rxe_send_wqe *wqe;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_SQD) {
q = qp->sq.queue;
index = qp->req.wqe_index;
@@ -140,7 +142,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
break;
qp->attr.sq_draining = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -154,7 +156,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
return;
} while (0);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
@@ -173,6 +175,7 @@ static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
+ unsigned long flags;
req_check_sq_drain_done(qp);
@@ -180,13 +183,13 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
if (wqe == NULL)
return NULL;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
(wqe->state != wqe_state_processing))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return NULL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
@@ -676,16 +679,17 @@ int rxe_requester(struct rxe_qp *qp)
struct rxe_queue *q = qp->sq.queue;
struct rxe_ah *ah;
struct rxe_av *av;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = __req_next_wqe(qp);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (wqe)
goto err;
else
@@ -700,10 +704,10 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
/* we come here if the retransmit timer has fired
* or if the rnr timer has fired. If the retransmit
@@ -853,7 +857,7 @@ int rxe_requester(struct rxe_qp *qp)
update_state(qp, &pkt);
/* A non-zero return value will cause rxe_do_task to
- * exit its loop and end the tasklet. A zero return
+ * exit its loop and end the work item. A zero return
* will continue looping and return to rxe_requester
*/
done:
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 68f6cd188d8e..64c64f5f36a8 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -387,7 +387,10 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
}
}
- return RESPST_CHK_RKEY;
+ if (pkt->mask & RXE_RDMA_OP_MASK)
+ return RESPST_CHK_RKEY;
+ else
+ return RESPST_EXECUTE;
}
/* if the reth length field is zero we can assume nothing
@@ -434,6 +437,10 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
enum resp_states state;
int access = 0;
+ /* parse RETH or ATMETH header for first/only packets
+ * for va, length, rkey, etc. or use current value for
+ * middle/last packets.
+ */
if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (pkt->mask & RXE_RETH_MASK)
qp_resp_from_reth(qp, pkt);
@@ -454,7 +461,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
- return RESPST_EXECUTE;
+ /* shouldn't happen */
+ WARN_ON(1);
}
/* A zero-byte read or write op is not required to
@@ -489,8 +497,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
- rxe_put(mw);
rxe_get(mr);
+ rxe_put(mw);
+ mw = NULL;
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
@@ -1047,6 +1056,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
if (!wqe)
goto finish;
@@ -1137,12 +1147,12 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW;
finish:
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return RESPST_CHK_RESOURCE;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (unlikely(!pkt))
return RESPST_DONE;
@@ -1447,8 +1457,17 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify)
struct rxe_recv_wqe *wqe;
int err;
- if (qp->srq)
+ if (qp->srq) {
+ if (notify && qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
return;
+ }
while ((wqe = queue_head(q, q->type))) {
if (notify) {
@@ -1468,18 +1487,19 @@ int rxe_responder(struct rxe_qp *qp)
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_req_pkts(qp);
flush_recv_queue(qp, notify);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
@@ -1654,7 +1674,7 @@ int rxe_responder(struct rxe_qp *qp)
}
/* A non-zero return value will cause rxe_do_task to
- * exit its loop and end the tasklet. A zero return
+ * exit its loop and end the work item. A zero return
* will continue looping and return to rxe_responder
*/
done:
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index fb9a6bc8e620..1501120d4f52 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -6,8 +6,24 @@
#include "rxe.h"
+static struct workqueue_struct *rxe_wq;
+
+int rxe_alloc_wq(void)
+{
+ rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE);
+ if (!rxe_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void rxe_destroy_wq(void)
+{
+ destroy_workqueue(rxe_wq);
+}
+
/* Check if task is idle i.e. not running, not scheduled in
- * tasklet queue and not draining. If so move to busy to
+ * work queue and not draining. If so move to busy to
* reserve a slot in do_task() by setting to busy and taking
* a qp reference to cover the gap from now until the task finishes.
* state will move out of busy if task returns a non zero value
@@ -21,9 +37,6 @@ static bool __reserve_if_idle(struct rxe_task *task)
{
WARN_ON(rxe_read(task->qp) <= 0);
- if (task->tasklet.state & BIT(TASKLET_STATE_SCHED))
- return false;
-
if (task->state == TASK_STATE_IDLE) {
rxe_get(task->qp);
task->state = TASK_STATE_BUSY;
@@ -38,7 +51,7 @@ static bool __reserve_if_idle(struct rxe_task *task)
}
/* check if task is idle or drained and not currently
- * scheduled in the tasklet queue. This routine is
+ * scheduled in the work queue. This routine is
* called by rxe_cleanup_task or rxe_disable_task to
* see if the queue is empty.
* Context: caller should hold task->lock.
@@ -46,7 +59,7 @@ static bool __reserve_if_idle(struct rxe_task *task)
*/
static bool __is_done(struct rxe_task *task)
{
- if (task->tasklet.state & BIT(TASKLET_STATE_SCHED))
+ if (work_pending(&task->work))
return false;
if (task->state == TASK_STATE_IDLE ||
@@ -77,23 +90,23 @@ static bool is_done(struct rxe_task *task)
* schedules the task. They must call __reserve_if_idle to
* move the task to busy before calling or scheduling.
* The task can also be moved to drained or invalid
- * by calls to rxe-cleanup_task or rxe_disable_task.
+ * by calls to rxe_cleanup_task or rxe_disable_task.
* In that case tasks which get here are not executed but
* just flushed. The tasks are designed to look to see if
- * there is work to do and do part of it before returning
+ * there is work to do and then do part of it before returning
* here with a return value of zero until all the work
- * has been consumed then it retuens a non-zero value.
+ * has been consumed then it returns a non-zero value.
* The number of times the task can be run is limited by
* max iterations so one task cannot hold the cpu forever.
+ * If the limit is hit and work remains the task is rescheduled.
*/
-static void do_task(struct tasklet_struct *t)
+static void do_task(struct rxe_task *task)
{
- int cont;
- int ret;
- struct rxe_task *task = from_tasklet(task, t, tasklet);
unsigned int iterations;
unsigned long flags;
int resched = 0;
+ int cont;
+ int ret;
WARN_ON(rxe_read(task->qp) <= 0);
@@ -115,25 +128,22 @@ static void do_task(struct tasklet_struct *t)
} while (ret == 0 && iterations-- > 0);
spin_lock_irqsave(&task->lock, flags);
+ /* we're not done yet but we ran out of iterations.
+ * yield the cpu and reschedule the task
+ */
+ if (!ret) {
+ task->state = TASK_STATE_IDLE;
+ resched = 1;
+ goto exit;
+ }
+
switch (task->state) {
case TASK_STATE_BUSY:
- if (ret) {
- task->state = TASK_STATE_IDLE;
- } else {
- /* This can happen if the client
- * can add work faster than the
- * tasklet can finish it.
- * Reschedule the tasklet and exit
- * the loop to give up the cpu
- */
- task->state = TASK_STATE_IDLE;
- resched = 1;
- }
+ task->state = TASK_STATE_IDLE;
break;
- /* someone tried to run the task since the last time we called
- * func, so we will call one more time regardless of the
- * return value
+ /* someone tried to schedule the task while we
+ * were running, keep going
*/
case TASK_STATE_ARMED:
task->state = TASK_STATE_BUSY;
@@ -141,22 +151,24 @@ static void do_task(struct tasklet_struct *t)
break;
case TASK_STATE_DRAINING:
- if (ret)
- task->state = TASK_STATE_DRAINED;
- else
- cont = 1;
+ task->state = TASK_STATE_DRAINED;
break;
default:
WARN_ON(1);
- rxe_info_qp(task->qp, "unexpected task state = %d", task->state);
+ rxe_dbg_qp(task->qp, "unexpected task state = %d",
+ task->state);
+ task->state = TASK_STATE_IDLE;
}
+exit:
if (!cont) {
task->num_done++;
if (WARN_ON(task->num_done != task->num_sched))
- rxe_err_qp(task->qp, "%ld tasks scheduled, %ld tasks done",
- task->num_sched, task->num_done);
+ rxe_dbg_qp(
+ task->qp,
+ "%ld tasks scheduled, %ld tasks done",
+ task->num_sched, task->num_done);
}
spin_unlock_irqrestore(&task->lock, flags);
} while (cont);
@@ -169,6 +181,12 @@ static void do_task(struct tasklet_struct *t)
rxe_put(task->qp);
}
+/* wrapper around do_task to fix argument for work queue */
+static void do_work(struct work_struct *work)
+{
+ do_task(container_of(work, struct rxe_task, work));
+}
+
int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
int (*func)(struct rxe_qp *))
{
@@ -176,11 +194,9 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
task->qp = qp;
task->func = func;
-
- tasklet_setup(&task->tasklet, do_task);
-
task->state = TASK_STATE_IDLE;
spin_lock_init(&task->lock);
+ INIT_WORK(&task->work, do_work);
return 0;
}
@@ -213,8 +229,6 @@ void rxe_cleanup_task(struct rxe_task *task)
while (!is_done(task))
cond_resched();
- tasklet_kill(&task->tasklet);
-
spin_lock_irqsave(&task->lock, flags);
task->state = TASK_STATE_INVALID;
spin_unlock_irqrestore(&task->lock, flags);
@@ -226,7 +240,7 @@ void rxe_cleanup_task(struct rxe_task *task)
void rxe_run_task(struct rxe_task *task)
{
unsigned long flags;
- int run;
+ bool run;
WARN_ON(rxe_read(task->qp) <= 0);
@@ -235,11 +249,11 @@ void rxe_run_task(struct rxe_task *task)
spin_unlock_irqrestore(&task->lock, flags);
if (run)
- do_task(&task->tasklet);
+ do_task(task);
}
-/* schedule the task to run later as a tasklet.
- * the tasklet)schedule call can be called holding
+/* schedule the task to run later as a work queue entry.
+ * the queue_work call can be called holding
* the lock.
*/
void rxe_sched_task(struct rxe_task *task)
@@ -250,7 +264,7 @@ void rxe_sched_task(struct rxe_task *task)
spin_lock_irqsave(&task->lock, flags);
if (__reserve_if_idle(task))
- tasklet_schedule(&task->tasklet);
+ queue_work(rxe_wq, &task->work);
spin_unlock_irqrestore(&task->lock, flags);
}
@@ -277,7 +291,9 @@ void rxe_disable_task(struct rxe_task *task)
while (!is_done(task))
cond_resched();
- tasklet_disable(&task->tasklet);
+ spin_lock_irqsave(&task->lock, flags);
+ task->state = TASK_STATE_DRAINED;
+ spin_unlock_irqrestore(&task->lock, flags);
}
void rxe_enable_task(struct rxe_task *task)
@@ -291,7 +307,7 @@ void rxe_enable_task(struct rxe_task *task)
spin_unlock_irqrestore(&task->lock, flags);
return;
}
+
task->state = TASK_STATE_IDLE;
- tasklet_enable(&task->tasklet);
spin_unlock_irqrestore(&task->lock, flags);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index facb7c8e3729..a63e258b3d66 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -22,7 +22,7 @@ enum {
* called again.
*/
struct rxe_task {
- struct tasklet_struct tasklet;
+ struct work_struct work;
int state;
spinlock_t lock;
struct rxe_qp *qp;
@@ -32,6 +32,10 @@ struct rxe_task {
long num_done;
};
+int rxe_alloc_wq(void);
+
+void rxe_destroy_wq(void);
+
/*
* init rxe_task structure
* qp => parameter to pass to func
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index dea605b7f683..903f0b71447e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -904,10 +904,10 @@ static int rxe_post_send_kernel(struct rxe_qp *qp,
if (!err)
rxe_sched_task(&qp->req.task);
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->comp.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
}
@@ -917,22 +917,23 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
{
struct rxe_qp *qp = to_rqp(ibqp);
int err;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
*bad_wr = wr;
rxe_err_qp(qp, "qp not ready to send");
return -EINVAL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->is_user) {
/* Utilize process context to do protocol processing */
@@ -1008,22 +1009,22 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
struct rxe_rq *rq = &qp->rq;
unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
/* see C10-97.2.1 */
if (unlikely((qp_state(qp) < IB_QPS_INIT))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
*bad_wr = wr;
rxe_dbg_qp(qp, "qp not ready to post recv");
return -EINVAL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (unlikely(qp->srq)) {
*bad_wr = wr;
@@ -1044,10 +1045,10 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->resp.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
}
@@ -1181,9 +1182,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
unsigned long irq_flags;
spin_lock_irqsave(&cq->cq_lock, irq_flags);
- if (cq->notify != IB_CQ_NEXT_COMP)
- cq->notify = flags & IB_CQ_SOLICITED_MASK;
-
+ cq->notify |= flags & IB_CQ_SOLICITED_MASK;
empty = queue_empty(cq->queue, QUEUE_TYPE_TO_ULP);
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
@@ -1260,6 +1259,12 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
struct rxe_mr *mr;
int err, cleanup_err;
+ if (access & ~RXE_ACCESS_SUPPORTED_MR) {
+ rxe_err_pd(pd, "access = %#x not supported (%#x)", access,
+ RXE_ACCESS_SUPPORTED_MR);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -1293,6 +1298,40 @@ err_free:
return ERR_PTR(err);
}
+static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags,
+ u64 start, u64 length, u64 iova,
+ int access, struct ib_pd *ibpd,
+ struct ib_udata *udata)
+{
+ struct rxe_mr *mr = to_rmr(ibmr);
+ struct rxe_pd *old_pd = to_rpd(ibmr->pd);
+ struct rxe_pd *pd = to_rpd(ibpd);
+
+ /* for now only support the two easy cases:
+ * rereg_pd and rereg_access
+ */
+ if (flags & ~RXE_MR_REREG_SUPPORTED) {
+ rxe_err_mr(mr, "flags = %#x not supported", flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (flags & IB_MR_REREG_PD) {
+ rxe_put(old_pd);
+ rxe_get(pd);
+ mr->ibmr.pd = ibpd;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ if (access & ~RXE_ACCESS_SUPPORTED_MR) {
+ rxe_err_mr(mr, "access = %#x not supported", access);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ mr->access = access;
+ }
+
+ return NULL;
+}
+
static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
@@ -1356,7 +1395,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (cleanup_err)
rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err);
- kfree_rcu(mr);
+ kfree_rcu_mightsleep(mr);
return 0;
err_out:
@@ -1445,6 +1484,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.query_srq = rxe_query_srq,
.reg_user_mr = rxe_reg_user_mr,
.req_notify_cq = rxe_req_notify_cq,
+ .rereg_user_mr = rxe_rereg_user_mr,
.resize_cq = rxe_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 26a20f088692..ccb9d19ffe8a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -253,6 +253,22 @@ struct rxe_qp {
struct execute_work cleanup_work;
};
+enum {
+ RXE_ACCESS_REMOTE = IB_ACCESS_REMOTE_READ
+ | IB_ACCESS_REMOTE_WRITE
+ | IB_ACCESS_REMOTE_ATOMIC,
+ RXE_ACCESS_SUPPORTED_MR = RXE_ACCESS_REMOTE
+ | IB_ACCESS_LOCAL_WRITE
+ | IB_ACCESS_MW_BIND
+ | IB_ACCESS_ON_DEMAND
+ | IB_ACCESS_FLUSH_GLOBAL
+ | IB_ACCESS_FLUSH_PERSISTENT
+ | IB_ACCESS_OPTIONAL,
+ RXE_ACCESS_SUPPORTED_QP = RXE_ACCESS_SUPPORTED_MR,
+ RXE_ACCESS_SUPPORTED_MW = RXE_ACCESS_SUPPORTED_MR
+ | IB_ZERO_BASED,
+};
+
enum rxe_mr_state {
RXE_MR_STATE_INVALID,
RXE_MR_STATE_FREE,
@@ -269,6 +285,11 @@ enum rxe_mr_lookup_type {
RXE_LOOKUP_REMOTE,
};
+enum rxe_rereg {
+ RXE_MR_REREG_SUPPORTED = IB_MR_REREG_PD
+ | IB_MR_REREG_ACCESS,
+};
+
static inline int rkey_is_mw(u32 rkey)
{
u32 index = rkey >> 8;
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index f51ab2ccf151..e6e25f15567d 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -422,7 +422,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
umem->page_chunk[i].plist = plist;
while (nents) {
rv = pin_user_pages(first_page_va, nents, foll_flags,
- plist, NULL);
+ plist);
if (rv < 0)
goto out_sem_up;
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 4b292e0504f1..7c7a51d36d0c 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -312,7 +312,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
}
/*
- * 0copy TCP transmit interface: Use do_tcp_sendpages.
+ * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES.
*
* Using sendpage to push page by page appears to be less efficient
* than using sendmsg, even if data are copied.
@@ -323,20 +323,26 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
size_t size)
{
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = (MSG_MORE | MSG_DONTWAIT | MSG_SPLICE_PAGES),
+ };
struct sock *sk = s->sk;
- int i = 0, rv = 0, sent = 0,
- flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST;
+ int i = 0, rv = 0, sent = 0;
while (size) {
size_t bytes = min_t(size_t, PAGE_SIZE - offset, size);
if (size + offset <= PAGE_SIZE)
- flags = MSG_MORE | MSG_DONTWAIT;
+ msg.msg_flags &= ~MSG_MORE;
tcp_rate_check_app_limited(sk);
+ bvec_set_page(&bvec, page[i], bytes, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
try_page_again:
lock_sock(sk);
- rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags);
+ rv = tcp_sendmsg_locked(sk, &msg, size);
release_sock(sk);
if (rv > 0) {
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f290cd49698e..92e1e7587af8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -657,9 +657,13 @@ static int
isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ struct isert_np *isert_np = cma_id->context;
ib_drain_qp(isert_conn->qp);
+
+ mutex_lock(&isert_np->mutex);
list_del_init(&isert_conn->node);
+ mutex_unlock(&isert_np->mutex);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
@@ -2431,6 +2435,7 @@ isert_free_np(struct iscsi_np *np)
{
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
+ LIST_HEAD(drop_conn_list);
if (isert_np->cm_id)
rdma_destroy_id(isert_np->cm_id);
@@ -2450,7 +2455,7 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
- isert_connect_release(isert_conn);
+ list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
@@ -2461,11 +2466,16 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
- isert_connect_release(isert_conn);
+ list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
mutex_unlock(&isert_np->mutex);
+ list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) {
+ list_del_init(&isert_conn->node);
+ isert_connect_release(isert_conn);
+ }
+
np->np_context = NULL;
kfree(isert_np);
}
@@ -2560,8 +2570,6 @@ static void isert_wait_conn(struct iscsit_conn *conn)
isert_put_unsol_pending_cmds(conn);
isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
-
- queue_work(isert_release_wq, &isert_conn->release_work);
}
static void isert_free_conn(struct iscsit_conn *conn)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index edb2e3a25880..b32941dd67cb 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1710,7 +1710,6 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
return -ENOMEM;
con->queue_num = cq_num;
}
- cq_num = max_send_wr + max_recv_wr;
cq_vector = con->cpu % clt_path->s.dev->ib_dev->num_comp_vectors;
if (con->c.cid >= clt_path->s.irq_con_num)
err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge,
@@ -2040,6 +2039,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
return 0;
}
+/* The caller should do the cleanup in case of error */
static int create_cm(struct rtrs_clt_con *con)
{
struct rtrs_path *s = con->c.path;
@@ -2062,14 +2062,14 @@ static int create_cm(struct rtrs_clt_con *con)
err = rdma_set_reuseaddr(cm_id, 1);
if (err != 0) {
rtrs_err(s, "Set address reuse failed, err: %d\n", err);
- goto destroy_cm;
+ return err;
}
err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
(struct sockaddr *)&clt_path->s.dst_addr,
RTRS_CONNECT_TIMEOUT_MS);
if (err) {
rtrs_err(s, "Failed to resolve address, err: %d\n", err);
- goto destroy_cm;
+ return err;
}
/*
* Combine connection status and session events. This is needed
@@ -2084,29 +2084,15 @@ static int create_cm(struct rtrs_clt_con *con)
if (err == 0)
err = -ETIMEDOUT;
/* Timedout or interrupted */
- goto errr;
- }
- if (con->cm_err < 0) {
- err = con->cm_err;
- goto errr;
+ return err;
}
- if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) {
+ if (con->cm_err < 0)
+ return con->cm_err;
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING)
/* Device removal */
- err = -ECONNABORTED;
- goto errr;
- }
+ return -ECONNABORTED;
return 0;
-
-errr:
- stop_cm(con);
- mutex_lock(&con->con_mutex);
- destroy_con_cq_qp(con);
- mutex_unlock(&con->con_mutex);
-destroy_cm:
- destroy_cm(con);
-
- return err;
}
static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path)
@@ -2334,7 +2320,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
static int init_conns(struct rtrs_clt_path *clt_path)
{
unsigned int cid;
- int err;
+ int err, i;
/*
* On every new session connections increase reconnect counter
@@ -2350,10 +2336,8 @@ static int init_conns(struct rtrs_clt_path *clt_path)
goto destroy;
err = create_cm(to_clt_con(clt_path->s.con[cid]));
- if (err) {
- destroy_con(to_clt_con(clt_path->s.con[cid]));
+ if (err)
goto destroy;
- }
}
err = alloc_path_reqs(clt_path);
if (err)
@@ -2364,15 +2348,21 @@ static int init_conns(struct rtrs_clt_path *clt_path)
return 0;
destroy:
- while (cid--) {
- struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]);
+ /* Make sure we do the cleanup in the order they are created */
+ for (i = 0; i <= cid; i++) {
+ struct rtrs_clt_con *con;
- stop_cm(con);
+ if (!clt_path->s.con[i])
+ break;
- mutex_lock(&con->con_mutex);
- destroy_con_cq_qp(con);
- mutex_unlock(&con->con_mutex);
- destroy_cm(con);
+ con = to_clt_con(clt_path->s.con[i]);
+ if (con->c.cm_id) {
+ stop_cm(con);
+ mutex_lock(&con->con_mutex);
+ destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
+ destroy_cm(con);
+ }
destroy_con(con);
}
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 4bf9d868cc52..3696f367ff51 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
goto err;
iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
- if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
+ if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) {
+ kfree(iu->buf);
goto err;
+ }
iu->cqe.done = done;
iu->size = size;