diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 59 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 511 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 36 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 43 |
6 files changed, 560 insertions, 93 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 77d257ec899b..ad39d64b8108 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -637,7 +637,7 @@ repoll: } static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, - struct ib_wc *wc) + struct ib_wc *wc, bool is_fatal_err) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_ib_wc *soft_wc, *next; @@ -650,6 +650,10 @@ static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", cq->mcq.cqn); + if (unlikely(is_fatal_err)) { + soft_wc->wc.status = IB_WC_WR_FLUSH_ERR; + soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + } wc[npolled++] = soft_wc->wc; list_del(&soft_wc->list); kfree(soft_wc); @@ -670,12 +674,17 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + /* make sure no soft wqe's are waiting */ + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc, true); + + mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled, + wc + soft_polled, &npolled); goto out; } if (unlikely(!list_empty(&cq->wc_list))) - soft_polled = poll_soft_wc(cq, num_entries, wc); + soft_polled = poll_soft_wc(cq, num_entries, wc, false); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) @@ -742,6 +751,28 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, return 0; } +enum { + MLX5_CQE_RES_FORMAT_HASH = 0, + MLX5_CQE_RES_FORMAT_CSUM = 1, + MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3, +}; + +static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) +{ + switch (format) { + case MLX5_IB_CQE_RES_FORMAT_HASH: + return MLX5_CQE_RES_FORMAT_HASH; + case MLX5_IB_CQE_RES_FORMAT_CSUM: + return MLX5_CQE_RES_FORMAT_CSUM; + case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX: + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + return MLX5_CQE_RES_FORMAT_CSUM_STRIDX; + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, int entries, u32 **cqb, @@ -807,6 +838,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { + int mini_cqe_format; + if (!((*cqe_size == 128 && MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || (*cqe_size == 64 && @@ -817,20 +850,18 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_cqb; } - if (unlikely(!ucmd.cqe_comp_res_format || - !(ucmd.cqe_comp_res_format < - MLX5_IB_CQE_RES_RESERVED) || - (ucmd.cqe_comp_res_format & - (ucmd.cqe_comp_res_format - 1)))) { - err = -EOPNOTSUPP; - mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", - ucmd.cqe_comp_res_format); + mini_cqe_format = + mini_cqe_res_format_to_hw(dev, + ucmd.cqe_comp_res_format); + if (mini_cqe_format < 0) { + err = mini_cqe_format; + mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n", + ucmd.cqe_comp_res_format, err); goto err_cqb; } MLX5_SET(cqc, cqc, cqe_comp_en, 1); - MLX5_SET(cqc, cqc, mini_cqe_res_format, - ilog2(ucmd.cqe_comp_res_format)); + MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format); } if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { @@ -849,7 +880,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return 0; err_cqb: - kfree(*cqb); + kvfree(*cqb); err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 0e04fdddf670..35a0e04c38f2 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 046fd942fd46..2ba73636a2fb 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 69716a7ea993..e52dd21519b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -982,13 +982,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { - resp.cqe_comp_caps.max_num = - MLX5_CAP_GEN(dev->mdev, cqe_compression) ? - MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; - resp.cqe_comp_caps.supported_format = - MLX5_IB_CQE_RES_FORMAT_HASH | - MLX5_IB_CQE_RES_FORMAT_CSUM; resp.response_length += sizeof(resp.cqe_comp_caps); + + if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { + resp.cqe_comp_caps.max_num = + MLX5_CAP_GEN(dev->mdev, + cqe_compression_max_num); + + resp.cqe_comp_caps.supported_format = + MLX5_IB_CQE_RES_FORMAT_HASH | + MLX5_IB_CQE_RES_FORMAT_CSUM; + + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + resp.cqe_comp_caps.supported_format |= + MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX; + } } if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && @@ -1084,6 +1092,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_GRE) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_UDP) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } if (uhw->outlen) { @@ -1953,49 +1969,15 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - int ret; struct vm_area_struct *vma; struct mlx5_ib_vma_private_data *vma_private, *n; struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - /* need to protect from a race on closing the vma as part of - * mlx5_ib_vma_close. - */ - down_write(&owning_mm->mmap_sem); mutex_lock(&context->vma_private_list_mutex); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; - ret = zap_vma_ptes(vma, vma->vm_start, - PAGE_SIZE); - WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); + zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); /* context going to be destroyed, should * not access ops any more. */ @@ -2005,9 +1987,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) kfree(vma_private); } mutex_unlock(&context->vma_private_list_mutex); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -2051,10 +2030,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (err) return err; - mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); - return mlx5_ib_set_vma_data(vma, context); } @@ -2149,15 +2124,14 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, err = io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot); if (err) { - mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", - err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); + mlx5_ib_err(dev, + "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", + err, mmap_cmd2str(cmd)); err = -EAGAIN; goto err; } pa = pfn << PAGE_SHIFT; - mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), - vma->vm_start, &pa); err = mlx5_ib_set_vma_data(vma, context); if (err) @@ -2243,10 +2217,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - - mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); break; case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -2386,7 +2356,8 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) enum { MATCH_CRITERIA_ENABLE_OUTER_BIT, MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -2406,6 +2377,9 @@ static u8 get_match_criteria_enable(u32 *match_criteria) match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, inner_headers)) << MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; return match_criteria_enable; } @@ -2440,6 +2414,27 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); } +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos @@ -2448,6 +2443,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_TUNNEL_FIELD tunnel_id #define LAST_FLOW_TAG_FIELD tag_id #define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -2479,12 +2475,16 @@ static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec, const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action) + struct mlx5_flow_act *action, u32 prev_type) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); void *headers_c; void *headers_v; int match_ipv; @@ -2689,6 +2689,93 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + 0xffff); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key_h), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key_h), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; case IB_FLOW_SPEC_VXLAN_TUNNEL: if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, LAST_TUNNEL_FIELD)) @@ -2720,6 +2807,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ret) return ret; break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; default: return -EINVAL; } @@ -2867,6 +2966,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev, } } +static void counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); @@ -2886,8 +2996,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); - mutex_unlock(&dev->flow_db->lock); + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); kfree(handler); return 0; @@ -3007,21 +3120,143 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev, } } +static int read_flow_counters(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr) +{ + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_fc_query(dev->mdev, fc, + &read_attr->out[IB_COUNTER_PACKETS], + &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description(struct ib_counters *counters, + enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, + u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->read_counters = read_flow_counters; + mcounters->counters_num = FLOW_COUNTERS_NUM; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +static int flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (!mcounters->hw_cntrs_hndl) { + ret = -ENOMEM; + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst, - u32 underlay_qpn) + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_flow_spec *spec; - struct mlx5_flow_destination *rule_dst = dst; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; + u32 prev_type = 0; int err = 0; - int dest_num = 1; + int dest_num = 0; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; if (!is_valid_attr(dev->mdev, flow_attr)) @@ -3035,14 +3270,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); + if (dst) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, - ib_flow, flow_attr, &flow_act); + ib_flow, flow_attr, &flow_act, + prev_type); if (err < 0) goto free; + prev_type = ((union ib_flow_spec *)ib_flow)->type; ib_flow += ((union ib_flow_spec *)ib_flow)->size; } @@ -3069,15 +3310,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, goto free; } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter = + to_mcounters(flow_act.counters)->hw_cntrs_hndl; + dest_num++; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - rule_dst = NULL; - dest_num = 0; + if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + rule_dst = NULL; + dest_num = 0; + } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; else flow_act.action |= - dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } @@ -3103,8 +3359,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->flow_table = ft; free: - if (err) + if (err && handler) { + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); kfree(handler); + } kvfree(spec); return err ? ERR_PTR(err) : handler; } @@ -3114,7 +3374,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); } static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, @@ -3244,7 +3504,8 @@ err: static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain) + int domain, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -3253,9 +3514,44 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; int err; int underlay_qpn; + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) { + kfree(ucmd); + return ERR_PTR(err); + } + } + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -3309,7 +3605,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, - dst, underlay_qpn); + dst, underlay_qpn, ucmd); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { @@ -3330,6 +3626,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); return &handler->ibflow; @@ -3340,6 +3637,7 @@ destroy_ft: unlock: mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); kfree(handler); return ERR_PTR(err); } @@ -5000,6 +5298,76 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev) uverbs_free_spec_tree(dev->ib_dev.specs_root); } +static int mlx5_ib_read_counters(struct ib_counters *counters, + struct ib_counters_read_attr *read_attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + struct mlx5_read_counters_attr mread_attr = {}; + struct mlx5_ib_flow_counters_desc *desc; + int ret, i; + + mutex_lock(&mcounters->mcntrs_mutex); + if (mcounters->cntrs_max_index > read_attr->ncounters) { + ret = -EINVAL; + goto err_bound; + } + + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), + GFP_KERNEL); + if (!mread_attr.out) { + ret = -ENOMEM; + goto err_bound; + } + + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; + mread_attr.flags = read_attr->flags; + ret = mcounters->read_counters(counters->device, &mread_attr); + if (ret) + goto err_read; + + /* do the pass over the counters data array to assign according to the + * descriptions and indexing pairs + */ + desc = mcounters->counters_data; + for (i = 0; i < mcounters->ncounters; i++) + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: + kfree(mread_attr.out); +err_bound: + mutex_unlock(&mcounters->mcntrs_mutex); + return ret; +} + +static int mlx5_ib_destroy_counters(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); + + kfree(mcounters); + + return 0; +} + +static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters; + + mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); + if (!mcounters) + return ERR_PTR(-ENOMEM); + + mutex_init(&mcounters->mcntrs_mutex); + + return &mcounters->ibcntrs; +} + void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -5243,6 +5611,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; + dev->ib_dev.create_counters = mlx5_ib_create_counters; + dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; + dev->ib_dev.read_counters = mlx5_ib_read_counters; err = init_node_data(dev); if (err) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 49a1aa0ff429..d89c8fe626f6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -175,6 +175,7 @@ struct mlx5_ib_flow_handler { struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_handle *rule; + struct ib_counters *ibcounters; }; struct mlx5_ib_flow_db { @@ -813,6 +814,41 @@ struct mlx5_memic { DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); }; +struct mlx5_read_counters_attr { + struct mlx5_fc *hw_cntrs_hndl; + u64 *out; + u32 flags; +}; + +enum mlx5_ib_counters_type { + MLX5_IB_COUNTERS_FLOW, +}; + +struct mlx5_ib_mcounters { + struct ib_counters ibcntrs; + enum mlx5_ib_counters_type type; + /* number of counters supported for this counters type */ + u32 counters_num; + struct mlx5_fc *hw_cntrs_hndl; + /* read function for this counters type */ + int (*read_counters)(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr); + /* max index set as part of create_flow */ + u32 cntrs_max_index; + /* number of counters data entries (<description,index> pair) */ + u32 ncounters; + /* counters data array for descriptions and indexes */ + struct mlx5_ib_flow_counters_desc *counters_data; + /* protects access to mcounters internal data */ + struct mutex mcntrs_mutex; +}; + +static inline struct mlx5_ib_mcounters * +to_mcounters(struct ib_counters *ibcntrs) +{ + return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs); +} + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2193dc1765fb..a4f1f638509f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -54,6 +54,7 @@ enum { enum { MLX5_IB_SQ_STRIDE = 6, + MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, }; static const u32 mlx5_ib_opcode[] = { @@ -302,7 +303,9 @@ static int sq_overhead(struct ib_qp_init_attr *attr) max(sizeof(struct mlx5_wqe_atomic_seg) + sizeof(struct mlx5_wqe_raddr_seg), sizeof(struct mlx5_wqe_umr_ctrl_seg) + - sizeof(struct mlx5_mkey_seg)); + sizeof(struct mlx5_mkey_seg) + + MLX5_IB_SQ_UMR_INLINE_THRESHOLD / + MLX5_IB_UMR_OCTOWORD); break; case IB_QPT_XRC_TGT: @@ -3641,13 +3644,15 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr) + struct mlx5_ib_mr *mr, bool umr_inline) { int size = mr->ndescs * mr->desc_size; memset(umr, 0, sizeof(*umr)); umr->flags = MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + umr->flags |= MLX5_UMR_INLINE; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(); } @@ -3831,6 +3836,24 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } +static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, + struct mlx5_ib_mr *mr, int mr_list_size) +{ + void *qend = qp->sq.qend; + void *addr = mr->descs; + int copy; + + if (unlikely(seg + mr_list_size > qend)) { + copy = qend - seg; + memcpy(seg, addr, copy); + addr += copy; + mr_list_size -= copy; + seg = mlx5_get_send_wqe(qp, 0); + } + memcpy(seg, addr, mr_list_size); + seg += mr_list_size; +} + static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4225,6 +4248,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + int mr_list_size = mr->ndescs * mr->desc_size; + bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), @@ -4232,7 +4257,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return -EINVAL; } - set_reg_umr_seg(*seg, mr); + set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) @@ -4244,10 +4269,14 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - set_reg_data_seg(*seg, mr, pd); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - + if (umr_inline) { + set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); + *size += get_xlt_octo(mr_list_size); + } else { + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } return 0; } |