summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig1
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c294
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h10
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c203
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h3
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c139
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h9
-rw-r--r--drivers/infiniband/hw/hfi1/common.h8
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c170
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c35
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c185
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c59
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h26
-rw-r--r--drivers/infiniband/hw/hfi1/init.c48
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c258
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c32
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c32
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c146
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c377
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c103
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c31
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h13
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h14
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h4
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c15
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c61
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c45
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c60
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h93
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c8
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c26
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c23
-rw-r--r--drivers/infiniband/hw/mlx4/main.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c14
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c37
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c132
-rw-r--r--drivers/infiniband/hw/mlx5/main.c30
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c6
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c184
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c202
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c73
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h94
64 files changed, 1969 insertions, 1483 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index 23f38cf2c5cd..afe8b28e0878 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_CXGB4
tristate "Chelsio T4/T5 RDMA Driver"
depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
+ select CHELSIO_LIB
select GENERIC_ALLOCATOR
---help---
This is an iWARP/RDMA driver for the Chelsio T4 and T5
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index e11cf7299945..fa40b685831b 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,4 +1,5 @@
ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index b6a953aed7e8..71c8867ef66b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -49,6 +49,7 @@
#include <rdma/ib_addr.h>
+#include <libcxgb_cm.h>
#include "iw_cxgb4.h"
#include "clip_tbl.h"
@@ -239,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
{
- struct cpl_tid_release *req;
+ u32 len = roundup(sizeof(struct cpl_tid_release), 16);
- skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ skb = get_skb(skb, len, GFP_KERNEL);
if (!skb)
return;
- req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
- INIT_TP_WR(req, hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+
+ cxgb_mk_tid_release(skb, len, hwtid, 0);
c4iw_ofld_send(rdev, skb);
return;
}
@@ -333,6 +332,8 @@ static void remove_ep_tid(struct c4iw_ep *ep)
spin_lock_irqsave(&ep->com.dev->lock, flags);
_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+ if (idr_is_empty(&ep->com.dev->hwtid_idr))
+ wake_up(&ep->com.dev->wait);
spin_unlock_irqrestore(&ep->com.dev->lock, flags);
}
@@ -464,72 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
}
-static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
-{
- int i;
-
- egress_dev = get_real_dev(egress_dev);
- for (i = 0; i < dev->rdev.lldi.nports; i++)
- if (dev->rdev.lldi.ports[i] == egress_dev)
- return 1;
- return 0;
-}
-
-static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
- __u8 *peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos,
- __u32 sin6_scope_id)
-{
- struct dst_entry *dst = NULL;
-
- if (IS_ENABLED(CONFIG_IPV6)) {
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- memcpy(&fl6.daddr, peer_ip, 16);
- memcpy(&fl6.saddr, local_ip, 16);
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = sin6_scope_id;
- dst = ip6_route_output(&init_net, NULL, &fl6);
- if (!dst)
- goto out;
- if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
- !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
- dst_release(dst);
- dst = NULL;
- }
- }
-
-out:
- return dst;
-}
-
-static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
- __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos)
-{
- struct rtable *rt;
- struct flowi4 fl4;
- struct neighbour *n;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- peer_port, local_port, IPPROTO_TCP,
- tos, 0);
- if (IS_ERR(rt))
- return NULL;
- n = dst_neigh_lookup(&rt->dst, &peer_ip);
- if (!n)
- return NULL;
- if (!our_interface(dev, n->dev) &&
- !(n->dev->flags & IFF_LOOPBACK)) {
- neigh_release(n);
- dst_release(&rt->dst);
- return NULL;
- }
- neigh_release(n);
- return &rt->dst;
-}
-
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
pr_err(MOD "ARP failure\n");
@@ -704,56 +639,32 @@ static int send_flowc(struct c4iw_ep *ep)
static int send_halfclose(struct c4iw_ep *ep)
{
- struct cpl_close_con_req *req;
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
- ep->hwtid));
+ cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
+ NULL, arp_failure_discard);
+
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
{
- struct cpl_abort_req *req;
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
- set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
- req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- req->cmd = CPL_ABORT_SEND_RST;
- return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
-}
+ cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
+ ep, abort_arp_failure);
-static void best_mtu(const unsigned short *mtus, unsigned short mtu,
- unsigned int *idx, int use_ts, int ipv6)
-{
- unsigned short hdr_size = (ipv6 ?
- sizeof(struct ipv6hdr) :
- sizeof(struct iphdr)) +
- sizeof(struct tcphdr) +
- (use_ts ?
- round_up(TCPOLEN_TIMESTAMP, 4) : 0);
- unsigned short data_size = mtu - hdr_size;
-
- cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+ return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static int send_connect(struct c4iw_ep *ep)
@@ -768,7 +679,7 @@ static int send_connect(struct c4iw_ep *ep)
u64 opt0;
u32 opt2;
unsigned int mtu_idx;
- int wscale;
+ u32 wscale;
int win, sizev4, sizev6, wrlen;
struct sockaddr_in *la = (struct sockaddr_in *)
&ep->com.local_addr;
@@ -815,10 +726,10 @@ static int send_connect(struct c4iw_ep *ep)
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
@@ -1445,9 +1356,9 @@ static void established_upcall(struct c4iw_ep *ep)
static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
{
- struct cpl_rx_data_ack *req;
struct sk_buff *skb;
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
+ u32 credit_dack;
PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -1464,15 +1375,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
- req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
- ep->hwtid));
- req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
- RX_DACK_CHANGE_F |
- RX_DACK_MODE_V(dack_mode));
- set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+ credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
+ RX_DACK_MODE_V(dack_mode);
+
+ cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
+ credit_dack);
+
c4iw_ofld_send(&ep->com.dev->rdev, skb);
return credits;
}
@@ -1970,7 +1878,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
struct sk_buff *skb;
struct fw_ofld_connection_wr *req;
unsigned int mtu_idx;
- int wscale;
+ u32 wscale;
struct sockaddr_in *sin;
int win;
@@ -1995,10 +1903,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
@@ -2052,15 +1960,6 @@ static inline int act_open_has_tid(int status)
status != CPL_ERR_CONN_EXIST);
}
-/* Returns whether a CPL status conveys negative advice.
- */
-static int is_neg_adv(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE ||
- status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
static char *neg_adv_str(unsigned int status)
{
switch (status) {
@@ -2117,8 +2016,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
}
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, rt_tos2priority(tos));
- if (!ep->l2t)
+ if (!ep->l2t) {
+ dev_put(pdev);
goto out;
+ }
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
@@ -2214,16 +2115,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
/* find a route */
if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
- ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, ep->com.cm_id->tos);
+ ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
+ laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr,
+ laddr->sin_port,
+ raddr->sin_port, ep->com.cm_id->tos);
iptype = 4;
ra = (__u8 *)&raddr->sin_addr;
} else {
- ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
- raddr6->sin6_addr.s6_addr,
- laddr6->sin6_port, raddr6->sin6_port, 0,
- raddr6->sin6_scope_id);
+ ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
+ get_real_dev,
+ laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port,
+ raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
}
@@ -2295,7 +2201,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));
- if (is_neg_adv(status)) {
+ if (cxgb_is_neg_adv(status)) {
PDBG("%s Connection problems for atid %u status %u (%s)\n",
__func__, atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
@@ -2422,7 +2328,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
unsigned int mtu_idx;
u64 opt0;
u32 opt2;
- int wscale;
+ u32 wscale;
struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
@@ -2443,10 +2349,10 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
ep->hwtid));
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps && req->tcpopt.tstamp,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
@@ -2518,42 +2424,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
return;
}
-static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
- int *iptype, __u8 *local_ip, __u8 *peer_ip,
- __be16 *local_port, __be16 *peer_port)
-{
- int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
- struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
- struct tcphdr *tcp = (struct tcphdr *)
- ((u8 *)(req + 1) + eth_len + ip_len);
-
- if (ip->version == 4) {
- PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
- ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
- ntohs(tcp->dest));
- *iptype = 4;
- memcpy(peer_ip, &ip->saddr, 4);
- memcpy(local_ip, &ip->daddr, 4);
- } else {
- PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
- ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
- ntohs(tcp->dest));
- *iptype = 6;
- memcpy(peer_ip, ip6->saddr.s6_addr, 16);
- memcpy(local_ip, ip6->daddr.s6_addr, 16);
- }
- *peer_port = tcp->source;
- *local_port = tcp->dest;
-
- return;
-}
-
static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct c4iw_ep *child_ep = NULL, *parent_ep;
@@ -2582,8 +2452,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
- local_ip, peer_ip, &local_port, &peer_port);
+ cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
+ &iptype, local_ip, peer_ip, &local_port, &peer_port);
/* Find output route */
if (iptype == 4) {
@@ -2591,18 +2461,19 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
, __func__, parent_ep, hwtid,
local_ip, peer_ip, ntohs(local_port),
ntohs(peer_port), peer_mss);
- dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
- local_port, peer_port,
- tos);
+ dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ *(__be32 *)local_ip, *(__be32 *)peer_ip,
+ local_port, peer_port, tos);
} else {
PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
, __func__, parent_ep, hwtid,
local_ip, peer_ip, ntohs(local_port),
ntohs(peer_port), peer_mss);
- dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
- PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
- ((struct sockaddr_in6 *)
- &parent_ep->com.local_addr)->sin6_scope_id);
+ dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ local_ip, peer_ip, local_port, peer_port,
+ PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+ ((struct sockaddr_in6 *)
+ &parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
@@ -2835,18 +2706,18 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss *req = cplhdr(skb);
struct c4iw_ep *ep;
- struct cpl_abort_rpl *rpl;
struct sk_buff *rpl_skb;
struct c4iw_qp_attributes attrs;
int ret;
int release = 0;
unsigned int tid = GET_TID(req);
+ u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- if (is_neg_adv(req->status)) {
+ if (cxgb_is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
@@ -2939,11 +2810,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
release = 1;
goto out;
}
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- rpl->cmd = CPL_ABORT_NO_RST;
+
+ cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
+
c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
out:
if (release)
@@ -3375,9 +3244,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
__func__, &laddr->sin_addr, ntohs(laddr->sin_port),
ra, ntohs(raddr->sin_port));
- ep->dst = find_route(dev, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, cm_id->tos);
+ ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr,
+ laddr->sin_port,
+ raddr->sin_port, cm_id->tos);
} else {
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
@@ -3396,10 +3267,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
__func__, laddr6->sin6_addr.s6_addr,
ntohs(laddr6->sin6_port),
raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
- ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
- raddr6->sin6_addr.s6_addr,
- laddr6->sin6_port, raddr6->sin6_port, 0,
- raddr6->sin6_scope_id);
+ ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port,
+ raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
}
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
@@ -4041,8 +3914,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
ntohs(tcph->source), iph->tos);
- dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
- iph->tos);
+ dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ iph->daddr, iph->saddr, tcph->dest,
+ tcph->source, iph->tos);
if (!dst) {
pr_err("%s - failed to find dst entry!\n",
__func__);
@@ -4317,7 +4191,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
kfree_skb(skb);
return 0;
}
- if (is_neg_adv(req->status)) {
+ if (cxgb_is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 071d7332ec06..93e3d270a98a 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
static void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
idr_destroy(&ctx->dev->cqidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
idr_destroy(&ctx->dev->qpidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
idr_destroy(&ctx->dev->mmidr);
+ wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
idr_destroy(&ctx->dev->hwtid_idr);
idr_destroy(&ctx->dev->stid_idr);
idr_destroy(&ctx->dev->atid_idr);
@@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
INIT_LIST_HEAD(&devp->db_fc_list);
+ init_waitqueue_head(&devp->wait);
devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
if (c4iw_debugfs_root) {
@@ -1475,6 +1480,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
static struct cxgb4_uld_info c4iw_uld_info = {
.name = DRV_NAME,
+ .nrxq = MAX_ULD_QSETS,
+ .rxq_size = 511,
+ .ciq = true,
+ .lro = false,
.add = c4iw_uld_add,
.rx_handler = c4iw_uld_rx_handler,
.state_change = c4iw_uld_state_change,
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index aa47e0ae80bc..cdcf3eeb6f4a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -263,6 +263,7 @@ struct c4iw_dev {
struct idr stid_idr;
struct list_head db_fc_list;
u32 avail_ird;
+ wait_queue_head_t wait;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -881,15 +882,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
return cm_id->provider_data;
}
-static inline int compute_wscale(int win)
-{
- int wscale = 0;
-
- while (wscale < 14 && (65535<<wscale) < win)
- wscale++;
- return wscale;
-}
-
static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
{
#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index edb1172b6f54..690435229be7 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,7 +683,7 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-void _free_qp(struct kref *kref)
+static void _free_qp(struct kref *kref)
{
struct c4iw_qp *qhp;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 0566393e5aba..a26a9a0bfc41 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -47,6 +47,7 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/interrupt.h>
#include "hfi.h"
#include "affinity.h"
@@ -55,7 +56,7 @@
struct hfi1_affinity_node_list node_affinity = {
.list = LIST_HEAD_INIT(node_affinity.list),
- .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+ .lock = __MUTEX_INITIALIZER(node_affinity.lock)
};
/* Name of IRQ types, indexed by enum irq_type */
@@ -159,14 +160,14 @@ void node_affinity_destroy(void)
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
list_for_each_safe(pos, q, &node_affinity.list) {
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
kfree(entry);
}
- spin_unlock(&node_affinity.lock);
+ mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
}
@@ -233,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
/*
* If this is the first time this NUMA node's affinity is used,
@@ -246,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
+ mutex_unlock(&node_affinity.lock);
return -ENOMEM;
}
init_cpu_mask_set(&entry->def_intr);
@@ -302,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- spin_lock(&node_affinity.lock);
node_affinity_add_tail(entry);
- spin_unlock(&node_affinity.lock);
}
-
+ mutex_unlock(&node_affinity.lock);
return 0;
}
-int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+/*
+ * Function updates the irq affinity hint for msix after it has been changed
+ * by the user using the /proc/irq interface. This function only accepts
+ * one cpu in the mask.
+ */
+static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
+{
+ struct sdma_engine *sde = msix->arg;
+ struct hfi1_devdata *dd = sde->dd;
+ struct hfi1_affinity_node *entry;
+ struct cpu_mask_set *set;
+ int i, old_cpu;
+
+ if (cpu > num_online_cpus() || cpu == sde->cpu)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ old_cpu = sde->cpu;
+ sde->cpu = cpu;
+ cpumask_clear(&msix->mask);
+ cpumask_set_cpu(cpu, &msix->mask);
+ dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
+ msix->msix.vector, irq_type_names[msix->type],
+ sde->this_idx, cpu);
+ irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+ /*
+ * Set the new cpu in the hfi1_affinity_node and clean
+ * the old cpu if it is not used by any other IRQ
+ */
+ set = &entry->def_intr;
+ cpumask_set_cpu(cpu, &set->mask);
+ cpumask_set_cpu(cpu, &set->used);
+ for (i = 0; i < dd->num_msix_entries; i++) {
+ struct hfi1_msix_entry *other_msix;
+
+ other_msix = &dd->msix_entries[i];
+ if (other_msix->type != IRQ_SDMA || other_msix == msix)
+ continue;
+
+ if (cpumask_test_cpu(old_cpu, &other_msix->mask))
+ goto unlock;
+ }
+ cpumask_clear_cpu(old_cpu, &set->mask);
+ cpumask_clear_cpu(old_cpu, &set->used);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+}
+
+static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ int cpu = cpumask_first(mask);
+ struct hfi1_msix_entry *msix = container_of(notify,
+ struct hfi1_msix_entry,
+ notify);
+
+ /* Only one CPU configuration supported currently */
+ hfi1_update_sdma_affinity(msix, cpu);
+}
+
+static void hfi1_irq_notifier_release(struct kref *ref)
+{
+ /*
+ * This is required by affinity notifier. We don't have anything to
+ * free here.
+ */
+}
+
+static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ notify->irq = msix->msix.vector;
+ notify->notify = hfi1_irq_notifier_notify;
+ notify->release = hfi1_irq_notifier_release;
+
+ if (irq_set_affinity_notifier(notify->irq, notify))
+ pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ if (irq_set_affinity_notifier(notify->irq, NULL))
+ pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+/*
+ * Function sets the irq affinity for msix.
+ * It *must* be called with node_affinity.lock held.
+ */
+static int get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix)
{
int ret;
cpumask_var_t diff;
@@ -328,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
if (!ret)
return -ENOMEM;
- spin_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
@@ -360,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
* finds its CPU here.
*/
if (cpu == -1 && set) {
- spin_lock(&node_affinity.lock);
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
@@ -372,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&node_affinity.lock);
- }
-
- switch (msix->type) {
- case IRQ_SDMA:
- sde->cpu = cpu;
- break;
- case IRQ_GENERAL:
- case IRQ_RCVCTXT:
- case IRQ_OTHER:
- break;
}
cpumask_set_cpu(cpu, &msix->mask);
@@ -391,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
extra, cpu);
irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ if (msix->type == IRQ_SDMA) {
+ sde->cpu = cpu;
+ hfi1_setup_sdma_notifier(msix);
+ }
+
free_cpumask_var(diff);
return 0;
}
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+ int ret;
+
+ mutex_lock(&node_affinity.lock);
+ ret = get_irq_affinity(dd, msix);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
@@ -402,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_ctxtdata *rcd;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
set = &entry->def_intr;
+ hfi1_cleanup_sdma_notifier(msix);
break;
case IRQ_GENERAL:
/* Don't do accounting for general contexts */
@@ -420,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
set = &entry->rcv_intr;
break;
default:
+ mutex_unlock(&node_affinity.lock);
return;
}
if (set) {
- spin_lock(&node_affinity.lock);
cpumask_andnot(&set->used, &set->used, &msix->mask);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&node_affinity.lock);
}
irq_set_affinity_hint(msix->msix.vector, NULL);
cpumask_clear(&msix->mask);
+ mutex_unlock(&node_affinity.lock);
}
/* This should be called with node_affinity.lock held */
@@ -535,7 +635,7 @@ int hfi1_get_proc_affinity(int node)
if (!ret)
goto free_available_mask;
- spin_lock(&affinity->lock);
+ mutex_lock(&affinity->lock);
/*
* If we've used all available HW threads, clear the mask and start
* overloading.
@@ -643,7 +743,8 @@ int hfi1_get_proc_affinity(int node)
cpu = -1;
else
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&affinity->lock);
+
+ mutex_unlock(&affinity->lock);
hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
free_cpumask_var(intrs_mask);
@@ -664,19 +765,17 @@ void hfi1_put_proc_affinity(int cpu)
if (cpu < 0)
return;
- spin_lock(&affinity->lock);
+
+ mutex_lock(&affinity->lock);
cpumask_clear_cpu(cpu, &set->used);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&affinity->lock);
+ mutex_unlock(&affinity->lock);
}
-/* Prevents concurrent reads and writes of the sdma_affinity attrib */
-static DEFINE_MUTEX(sdma_affinity_mutex);
-
int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
size_t count)
{
@@ -684,16 +783,19 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
cpumask_var_t mask;
int ret, i;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
- return -EINVAL;
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
- if (!ret)
- return -ENOMEM;
+ if (!ret) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
ret = cpulist_parse(buf, mask);
if (ret)
@@ -705,13 +807,11 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
goto out;
}
- mutex_lock(&sdma_affinity_mutex);
/* reset the SDMA interrupt affinity details */
init_cpu_mask_set(&entry->def_intr);
cpumask_copy(&entry->def_intr.mask, mask);
- /*
- * Reassign the affinity for each SDMA interrupt.
- */
+
+ /* Reassign the affinity for each SDMA interrupt. */
for (i = 0; i < dd->num_msix_entries; i++) {
struct hfi1_msix_entry *msix;
@@ -719,14 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
if (msix->type != IRQ_SDMA)
continue;
- ret = hfi1_get_irq_affinity(dd, msix);
+ ret = get_irq_affinity(dd, msix);
if (ret)
break;
}
- mutex_unlock(&sdma_affinity_mutex);
out:
free_cpumask_var(mask);
+unlock:
+ mutex_unlock(&node_affinity.lock);
return ret ? ret : strnlen(buf, PAGE_SIZE);
}
@@ -734,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
{
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
+ if (!entry) {
+ mutex_unlock(&node_affinity.lock);
return -EINVAL;
+ }
- mutex_lock(&sdma_affinity_mutex);
cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
- mutex_unlock(&sdma_affinity_mutex);
+ mutex_unlock(&node_affinity.lock);
return strnlen(buf, PAGE_SIZE);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 8879cf7a8cac..b89ea3c0ee1a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -121,8 +121,7 @@ struct hfi1_affinity_node_list {
int num_core_siblings;
int num_online_nodes;
int num_online_cpus;
- /* protect affinity node list */
- spinlock_t lock;
+ struct mutex lock; /* protects affinity nodes */
};
int node_affinity_init(void);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b32638d58ae8..9bf5f23544d4 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT),
- FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT)
+ FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT),
+ FLAG_ENTRY0("External Device Request Timeout",
+ EXTERNAL_DEVICE_REQ_TIMEOUT),
};
/*
@@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work)
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
OPA_LINKDOWN_REASON_SPEED_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work)
* If there is no cable attached, turn the DC off. Otherwise,
* start the link bring up.
*/
- if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
+ if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd))
dc_shutdown(ppd->dd);
- } else {
- tune_serdes(ppd);
+ else
start_link(ppd);
- }
}
void handle_link_bounce(struct work_struct *work)
@@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work)
*/
if (ppd->host_link_state & HLS_UP) {
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
@@ -7531,7 +7529,6 @@ done:
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
OPA_LINKDOWN_REASON_WIDTH_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -9161,6 +9158,12 @@ set_local_link_attributes_fail:
*/
int start_link(struct hfi1_pportdata *ppd)
{
+ /*
+ * Tune the SerDes to a ballpark setting for optimal signal and bit
+ * error rate. Needs to be done before starting the link.
+ */
+ tune_serdes(ppd);
+
if (!ppd->link_enabled) {
dd_dev_info(ppd->dd,
"%s: stopping link start because link is disabled\n",
@@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work)
*/
set_qsfp_int_n(ppd, 1);
- tune_serdes(ppd);
-
start_link(ppd);
}
@@ -9490,6 +9491,73 @@ static void init_lcb(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
}
+/*
+ * Perform a test read on the QSFP. Return 0 on success, -ERRNO
+ * on error.
+ */
+static int test_qsfp_read(struct hfi1_pportdata *ppd)
+{
+ int ret;
+ u8 status;
+
+ /* report success if not a QSFP */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0;
+
+ /* read byte 2, the status byte */
+ ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
+ if (ret < 0)
+ return ret;
+ if (ret != 1)
+ return -EIO;
+
+ return 0; /* success */
+}
+
+/*
+ * Values for QSFP retry.
+ *
+ * Give up after 10s (20 x 500ms). The overall timeout was empirically
+ * arrived at from experience on a large cluster.
+ */
+#define MAX_QSFP_RETRIES 20
+#define QSFP_RETRY_WAIT 500 /* msec */
+
+/*
+ * Try a QSFP read. If it fails, schedule a retry for later.
+ * Called on first link activation after driver load.
+ */
+static void try_start_link(struct hfi1_pportdata *ppd)
+{
+ if (test_qsfp_read(ppd)) {
+ /* read failed */
+ if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
+ dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
+ return;
+ }
+ dd_dev_info(ppd->dd,
+ "QSFP not responding, waiting and retrying %d\n",
+ (int)ppd->qsfp_retry_count);
+ ppd->qsfp_retry_count++;
+ queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ msecs_to_jiffies(QSFP_RETRY_WAIT));
+ return;
+ }
+ ppd->qsfp_retry_count = 0;
+
+ start_link(ppd);
+}
+
+/*
+ * Workqueue function to start the link after a delay.
+ */
+void handle_start_link(struct work_struct *work)
+{
+ struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+ start_link_work.work);
+ try_start_link(ppd);
+}
+
int bringup_serdes(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9593,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
set_qsfp_int_n(ppd, 1);
}
- /*
- * Tune the SerDes to a ballpark setting for
- * optimal signal and bit error rate
- * Needs to be done before starting the link
- */
- tune_serdes(ppd);
-
- return start_link(ppd);
+ try_start_link(ppd);
+ return 0;
}
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9611,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;
+ ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
+ flush_delayed_work(&ppd->start_link_work);
+ cancel_delayed_work_sync(&ppd->start_link_work);
+
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
@@ -9648,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct hfi1_message_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
+struct ib_header *hfi1_get_msgheader(
+ struct hfi1_devdata *dd, __le32 *rhf_addr)
{
u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
- return (struct hfi1_message_header *)
+ return (struct ib_header *)
(rhf_addr - dd->rhf_offset + offset);
}
@@ -11489,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
!(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
/* reset the cached receive header queue head value */
@@ -11557,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* update with a dummy tail address and then disable
* receive context.
*/
- if (dd->rcvhdrtail_dummy_physaddr) {
+ if (dd->rcvhdrtail_dummy_dma) {
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
}
@@ -11570,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
+ if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -11642,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* so it doesn't contain an address that is invalid.
*/
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
}
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
@@ -12865,7 +12931,7 @@ fail:
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
- int num_kernel_contexts;
+ unsigned long num_kernel_contexts;
int total_contexts;
int ret;
unsigned ngroups;
@@ -12894,9 +12960,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
dd_dev_err(dd,
- "Reducing # kernel rcv contexts to: %d, from %d\n",
+ "Reducing # kernel rcv contexts to: %d, from %lu\n",
(int)(dd->chip_send_contexts - num_vls - 1),
- (int)num_kernel_contexts);
+ num_kernel_contexts);
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
@@ -13319,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd)
/*
* Give up after 1ms - maximum wait time.
*
- * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at
+ * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at
* 250MB/s bandwidth. Lower rate to 66% for overhead to get:
- * 148 KB / (66% * 250MB/s) = 920us
+ * 136 KB / (66% * 250MB/s) = 844us
*/
if (count++ > 500) {
dd_dev_err(dd,
@@ -14500,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /* call before get_platform_config(), after init_chip_resources() */
+ ret = eprom_init(dd);
+ if (ret)
+ goto bail_free_rcverr;
+
/* Needs to be called before hfi1_firmware_init */
get_platform_config(dd);
@@ -14620,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_free_cntrs;
- ret = eprom_init(dd);
- if (ret)
- goto bail_free_rcverr;
-
goto bail;
bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index ed11107c50fe..92345259a8f4 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -82,7 +82,7 @@
*/
#define CM_VAU 3
/* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
-#define CM_GLOBAL_CREDITS 0x940
+#define CM_GLOBAL_CREDITS 0x880
/* Number of PKey entries in the HW */
#define MAX_PKEY_VALUES 16
@@ -254,12 +254,14 @@
#define FAILED_LNI_VERIFY_CAP2 BIT(10)
#define FAILED_LNI_CONFIGLT BIT(11)
#define HOST_HANDSHAKE_TIMEOUT BIT(12)
+#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13)
#define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
| FAILED_LNI_VERIFY_CAP1 \
| FAILED_LNI_VERIFY_CAP2 \
- | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
+ | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \
+ | EXTERNAL_DEVICE_REQ_TIMEOUT)
/* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
#define HOST_REQ_DONE BIT(0)
@@ -706,6 +708,7 @@ void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
+void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
@@ -1335,7 +1338,7 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct hfi1_message_header *hfi1_get_msgheader(
+struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index fcc9c217a97a..da7be21bedb4 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -320,14 +320,6 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
-/*
- * This structure contains the first field common to all protocols
- * that employ this chip.
- */
-struct hfi1_message_header {
- __be16 lrh[4];
-};
-
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index a49cc88f08a2..632ba21759ab 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -59,6 +59,40 @@
static struct dentry *hfi1_dbg_root;
+/* wrappers to enforce srcu in seq file */
+static ssize_t hfi1_seq_read(
+ struct file *file,
+ char __user *buf,
+ size_t size,
+ loff_t *ppos)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ ssize_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_read(file, buf, size, ppos);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
+static loff_t hfi1_seq_lseek(
+ struct file *file,
+ loff_t offset,
+ int whence)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ loff_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_lseek(file, offset, whence);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
@@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \
static const struct file_operations _##name##_file_ops = { \
.owner = THIS_MODULE, \
.open = _##name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
+ .read = hfi1_seq_read, \
+ .llseek = hfi1_seq_lseek, \
.release = seq_release \
}
@@ -105,11 +139,9 @@ do { \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_opcode_stats_perctx *opstats;
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
@@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _opcode_stats_seq_show(struct seq_file *s, void *v)
@@ -285,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats)
DEBUGFS_FILE_OPS(qp_stats);
static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_ibdev *ibd;
struct hfi1_devdata *dd;
- rcu_read_lock();
ibd = (struct hfi1_ibdev *)s->private;
dd = dd_from_dev(ibd);
if (!dd->per_sdma || *pos >= dd->num_sdma)
@@ -310,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _sdes_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _sdes_seq_show(struct seq_file *s, void *v)
@@ -339,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -356,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -383,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -400,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
struct hfi1_pportdata *ppd;
ssize_t rval;
- rcu_read_lock();
ppd = private2ppd(file);
avail = hfi1_read_portcntrs(ppd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -434,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
int used;
int i;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
size = PAGE_SIZE;
used = 0;
tmp = kmalloc(size, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
used += scnprintf(tmp + used, size - used,
@@ -470,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -486,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
u64 scratch0;
u64 clear;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto do_return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -527,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
do_free:
kfree(buff);
- do_return:
- rcu_read_unlock();
return ret;
}
@@ -542,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
char *tmp;
int ret;
- rcu_read_lock();
ppd = private2ppd(file);
tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
if (ret > 0)
ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -569,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
int offset;
int total_written;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -577,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -606,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -636,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
int offset;
int total_read;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -644,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
if (total_read < 0) {
@@ -673,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -701,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
int ret;
int total_written;
- rcu_read_lock();
- if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
- ret = -EINVAL;
- goto _return;
- }
+ if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */
+ return -EINVAL;
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
-
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -733,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -761,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
int ret;
int total_read;
- rcu_read_lock();
if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
ret = -EINVAL;
goto _return;
@@ -794,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
_return:
- rcu_read_unlock();
return ret;
}
@@ -952,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
};
+static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
+{
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ ++*pos;
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ loff_t *spos = v;
+ loff_t i = *spos;
+
+ sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
+DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
+DEBUGFS_FILE_OPS(sdma_cpu_list);
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -980,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
@@ -1010,7 +1029,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
ibd->hfi1_ibdev_dbg = NULL;
- synchronize_rcu();
}
/*
@@ -1035,9 +1053,7 @@ static const char * const hfi1_statnames[] = {
};
static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1055,9 +1071,7 @@ static void *_driver_stats_names_seq_next(
}
static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
@@ -1073,9 +1087,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names)
DEBUGFS_FILE_OPS(driver_stats_names);
static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1090,9 +1102,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _driver_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static u64 hfi1_sps_ints(void)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 303f10555729..6563e4d38b80 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
struct hfi1_packet *packet)
{
- struct hfi1_message_header *rhdr = packet->hdr;
+ struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr;
- struct hfi1_other_headers *ohdr = NULL;
+ struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(hdr->lrh[1]);
+ u16 lid = be16_to_cpu(rhdr->lrh[1]);
u32 qp_num;
u32 rcv_flags = 0;
@@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* Check for GRH */
if (lnh == HFI1_LRH_BTH) {
- ohdr = &hdr->u.oth;
+ ohdr = &rhdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
- ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ ohdr = &rhdr->u.l.oth;
+ if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
rcv_flags |= HFI1_HAS_GRH;
@@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
case IB_QPT_RC:
hfi1_rc_hdrerr(
rcd,
- hdr,
+ rhdr,
rcv_flags,
qp);
break;
@@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_ib_header *hdr = pkt->hdr;
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_header *hdr = pkt->hdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
@@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
+ sc = hdr2sc(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
@@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
struct rvt_qp *qp;
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header *hdr;
+ struct ib_other_headers *ohdr;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- hdr = (struct hfi1_ib_header *)
- hfi1_get_msgheader(dd, rhf_addr);
+ hdr = hfi1_get_msgheader(dd, rhf_addr);
+
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh == HFI1_LRH_BTH)
@@ -892,8 +891,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 36b77943cbfd..e70c223801b4 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -49,7 +49,26 @@
#include "common.h"
#include "eprom.h"
+/*
+ * The EPROM is logically divided into three partitions:
+ * partition 0: the first 128K, visible from PCI ROM BAR
+ * partition 1: 4K config file (sector size)
+ * partition 2: the rest
+ */
+#define P0_SIZE (128 * 1024)
+#define P1_SIZE (4 * 1024)
+#define P1_START P0_SIZE
+#define P2_START (P0_SIZE + P1_SIZE)
+
+/* controller page size, in bytes */
+#define EP_PAGE_SIZE 256
+#define EP_PAGE_MASK (EP_PAGE_SIZE - 1)
+#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32))
+
+/* controller commands */
#define CMD_SHIFT 24
+#define CMD_NOP (0)
+#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
/* controller interface speeds */
@@ -61,6 +80,90 @@
* Double it for safety.
*/
#define EPROM_TIMEOUT 80000 /* ms */
+
+/*
+ * Read a 256 byte (64 dword) EPROM page.
+ * All callers have verified the offset is at a page boundary.
+ */
+static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
+{
+ int i;
+
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
+ for (i = 0; i < EP_PAGE_DWORDS; i++)
+ result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
+}
+
+/*
+ * Read length bytes starting at offset from the start of the EPROM.
+ */
+static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest)
+{
+ u32 buffer[EP_PAGE_DWORDS];
+ u32 end;
+ u32 start_offset;
+ u32 read_start;
+ u32 bytes;
+
+ if (len == 0)
+ return 0;
+
+ end = start + len;
+
+ /*
+ * Make sure the read range is not outside of the controller read
+ * command address range. Note that '>' is correct below - the end
+ * of the range is OK if it stops at the limit, but no higher.
+ */
+ if (end > (1 << CMD_SHIFT))
+ return -EINVAL;
+
+ /* read the first partial page */
+ start_offset = start & EP_PAGE_MASK;
+ if (start_offset) {
+ /* partial starting page */
+
+ /* align and read the page that contains the start */
+ read_start = start & ~EP_PAGE_MASK;
+ read_page(dd, read_start, buffer);
+
+ /* the rest of the page is available data */
+ bytes = EP_PAGE_SIZE - start_offset;
+
+ if (len <= bytes) {
+ /* end is within this page */
+ memcpy(dest, (u8 *)buffer + start_offset, len);
+ return 0;
+ }
+
+ memcpy(dest, (u8 *)buffer + start_offset, bytes);
+
+ start += bytes;
+ len -= bytes;
+ dest += bytes;
+ }
+ /* start is now page aligned */
+
+ /* read whole pages */
+ while (len >= EP_PAGE_SIZE) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, EP_PAGE_SIZE);
+
+ start += EP_PAGE_SIZE;
+ len -= EP_PAGE_SIZE;
+ dest += EP_PAGE_SIZE;
+ }
+
+ /* read the last partial page */
+ if (len) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, len);
+ }
+
+ return 0;
+}
+
/*
* Initialize the EPROM handler.
*/
@@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd)
done_asic:
return ret;
}
+
+/* magic character sequence that trails an image */
+#define IMAGE_TRAIL_MAGIC "egamiAPO"
+
+/*
+ * Read all of partition 1. The actual file is at the front. Adjust
+ * the returned size if a trailing image magic is found.
+ */
+static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
+ u32 *size)
+{
+ void *buffer;
+ void *p;
+ u32 length;
+ int ret;
+
+ buffer = kmalloc(P1_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ ret = read_length(dd, P1_START, P1_SIZE, buffer);
+ if (ret) {
+ kfree(buffer);
+ return ret;
+ }
+
+ /* scan for image magic that may trail the actual data */
+ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+ if (p)
+ length = p - buffer;
+ else
+ length = P1_SIZE;
+
+ *data = buffer;
+ *size = length;
+ return 0;
+}
+
+/*
+ * Read the platform configuration file from the EPROM.
+ *
+ * On success, an allocated buffer containing the data and its size are
+ * returned. It is up to the caller to free this buffer.
+ *
+ * Return value:
+ * 0 - success
+ * -ENXIO - no EPROM is available
+ * -EBUSY - not able to acquire access to the EPROM
+ * -ENOENT - no recognizable file written
+ * -ENOMEM - buffer could not be allocated
+ */
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
+{
+ u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */
+ int ret;
+
+ if (!dd->eprom_available)
+ return -ENXIO;
+
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+ if (ret)
+ return -EBUSY;
+
+ /* read the last page of P0 for the EPROM format magic */
+ ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ if (ret)
+ goto done;
+
+ /* last dword of P0 contains a magic indicator */
+ if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* partition format */
+ ret = read_partition_platform_config(dd, data, size);
+ goto done;
+ }
+
+ /* nothing recognized */
+ ret = -ENOENT;
+
+done:
+ release_chip_resource(dd, CR_EPROM);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..e774184f1643 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -45,8 +45,8 @@
*
*/
-struct hfi1_cmd;
struct hfi1_devdata;
int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret,
+ u32 *size_ret);
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7e03ccd2554d..677efa0e8cd6 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -58,7 +58,6 @@
#include "trace.h"
#include "user_sdma.h"
#include "user_exp_rcv.h"
-#include "eprom.h"
#include "aspm.h"
#include "mmu_rb.h"
@@ -440,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd;
- unsigned long flags, pfn;
+ unsigned long flags;
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
+ void *memvirt = NULL;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -493,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memaddr = dd->cr_base[uctxt->numa_id].pa +
+ memvirt = dd->cr_base[uctxt->numa_id].va;
+ memaddr = virt_to_phys(memvirt) +
(((u64)uctxt->sc->hw_free -
(u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
memlen = PAGE_SIZE;
@@ -508,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
mapio = 1;
break;
case RCV_HDRQ:
- memaddr = uctxt->rcvhdrq_phys;
memlen = uctxt->rcvhdrq_size;
+ memvirt = uctxt->rcvhdrq;
break;
case RCV_EGRBUF: {
unsigned long addr;
@@ -533,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_flags &= ~VM_MAYWRITE;
addr = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
+ memlen = uctxt->egrbufs.buffers[i].len;
+ memvirt = uctxt->egrbufs.buffers[i].addr;
ret = remap_pfn_range(
vma, addr,
- uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT,
- uctxt->egrbufs.buffers[i].len,
+ /*
+ * virt_to_pfn() does the same, but
+ * it's not available on x86_64
+ * when CONFIG_MMU is enabled.
+ */
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
vma->vm_page_prot);
if (ret < 0)
goto done;
- addr += uctxt->egrbufs.buffers[i].len;
+ addr += memlen;
}
ret = 0;
goto done;
@@ -596,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EPERM;
goto done;
}
- memaddr = uctxt->rcvhdrqtailaddr_phys;
memlen = PAGE_SIZE;
+ memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -650,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
"%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
vma->vm_end - vma->vm_start, vma->vm_flags);
- pfn = (unsigned long)(memaddr >> PAGE_SHIFT);
if (vmf) {
- vma->vm_pgoff = pfn;
+ vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
} else if (mapio) {
- ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
+ } else if (memvirt) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
+ vma->vm_page_prot);
} else {
- ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
}
done:
@@ -961,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
*/
uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
uctxt->dd->node);
- if (!uctxt->sc)
- return -ENOMEM;
-
+ if (!uctxt->sc) {
+ ret = -ENOMEM;
+ goto ctxdata_free;
+ }
hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
uctxt->sc->hw_context);
ret = sc_enable(uctxt->sc);
if (ret)
- return ret;
+ goto ctxdata_free;
+
/*
* Setup shared context resources if the user-level has requested
* shared contexts and this is the 'master' process.
@@ -982,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
* send context because it will be done during file close
*/
if (ret)
- return ret;
+ goto ctxdata_free;
}
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
@@ -1002,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
fd->uctxt = uctxt;
return 0;
+
+ctxdata_free:
+ dd->rcd[ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ return ret;
}
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
@@ -1260,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
uctxt->rcvhdrq);
binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
fd->subctxt,
- uctxt->egrbufs.rcvtids[0].phys);
+ uctxt->egrbufs.rcvtids[0].dma);
binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
fd->subctxt, 0);
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a021e660d482..7eef11b316ff 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -64,6 +64,8 @@
#include <linux/kthread.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
+#include <rdma/ib_hdrs.h>
+#include <linux/rhashtable.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -171,12 +173,12 @@ struct ctxt_eager_bufs {
u32 threshold; /* head update threshold */
struct eager_buffer {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
ssize_t len;
} *buffers;
struct {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
} *rcvtids;
};
@@ -207,8 +209,8 @@ struct hfi1_ctxtdata {
/* size of each of the rcvhdrq entries */
u16 rcvhdrqentsize;
/* mmap of hdrq, must fit in 44 bits */
- dma_addr_t rcvhdrq_phys;
- dma_addr_t rcvhdrqtailaddr_phys;
+ dma_addr_t rcvhdrq_dma;
+ dma_addr_t rcvhdrqtailaddr_dma;
struct ctxt_eager_bufs egrbufs;
/* this receive context's assigned PIO ACK send context */
struct send_context *sc;
@@ -350,7 +352,7 @@ struct hfi1_packet {
struct hfi1_ctxtdata *rcd;
__le32 *rhf_addr;
struct rvt_qp *qp;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
@@ -529,6 +531,7 @@ struct hfi1_msix_entry {
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
+ struct irq_affinity_notify notify;
};
/* per-SL CCA information */
@@ -605,6 +608,7 @@ struct hfi1_pportdata {
struct work_struct freeze_work;
struct work_struct link_downgrade_work;
struct work_struct link_bounce_work;
+ struct delayed_work start_link_work;
/* host link state variables */
struct mutex hls_lock;
u32 host_link_state;
@@ -659,6 +663,7 @@ struct hfi1_pportdata {
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
u8 last_pstate; /* info only */
+ u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
@@ -1058,8 +1063,6 @@ struct hfi1_devdata {
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
- /* high volume overflow errors deferred to tasklet */
- struct tasklet_struct error_tasklet;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1162,7 +1165,7 @@ struct hfi1_devdata {
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
- dma_addr_t rcvhdrtail_dummy_physaddr;
+ dma_addr_t rcvhdrtail_dummy_dma;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
@@ -1173,6 +1176,7 @@ struct hfi1_devdata {
atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity;
+ struct rhashtable sdma_rht;
struct kobject kobj;
};
@@ -1266,7 +1270,7 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
((!!(rhf_dc_info(rhf))) << 4);
@@ -1601,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
@@ -1804,7 +1808,7 @@ extern unsigned int hfi1_max_mtu;
extern unsigned int hfi1_cu;
extern unsigned int user_credit_return_threshold;
extern int num_user_contexts;
-extern unsigned n_krcvqs;
+extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
extern uint kdeth_qp;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index b7935451093c..60db61536fed 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
/* computed based on above array */
-unsigned n_krcvqs;
+unsigned long n_krcvqs;
static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
@@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
return rcd;
bail:
+ dd->rcd[ctxt] = NULL;
kfree(rcd->egrbufs.rcvtids);
kfree(rcd->egrbufs.buffers);
kfree(rcd);
@@ -500,6 +501,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message);
INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+ INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
@@ -708,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
/* allocate dummy tail memory for all receive contexts */
dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
&dd->pcidev->dev, sizeof(u64),
- &dd->rcvhdrtail_dummy_physaddr,
+ &dd->rcvhdrtail_dummy_dma,
GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
@@ -941,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (rcd->rcvhdrq) {
dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
- rcd->rcvhdrq, rcd->rcvhdrq_phys);
+ rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
if (rcd->rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *)rcd->rcvhdrtail_kvaddr,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
}
@@ -955,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.rcvtids);
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].phys)
+ if (rcd->egrbufs.buffers[e].dma)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
- rcd->egrbufs.buffers[e].phys);
+ rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
@@ -1353,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
if (dd->rcvhdrtail_dummy_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
(void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
dd->rcvhdrtail_dummy_kvaddr = NULL;
}
@@ -1576,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t phys_hdrqtail;
+ dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1589,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
rcd->rcvhdrq = dma_zalloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
+ &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
if (!rcd->rcvhdrq) {
@@ -1601,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
+ rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;
@@ -1633,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
* before enabling any receive context
*/
write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
return 0;
@@ -1644,7 +1646,7 @@ bail_free:
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
bail:
return -ENOMEM;
@@ -1705,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[idx].addr =
dma_zalloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
- &rcd->egrbufs.buffers[idx].phys,
+ &rcd->egrbufs.buffers[idx].dma,
gfp_flags);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
rcd->egrbufs.buffers[idx].addr;
- rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys =
- rcd->egrbufs.buffers[idx].phys;
+ rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
+ rcd->egrbufs.buffers[idx].dma;
rcd->egrbufs.alloced++;
alloced_bytes += rcd->egrbufs.rcvtid_size;
idx++;
@@ -1754,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (i = 0, j = 0, offset = 0; j < idx; i++) {
if (i >= rcd->egrbufs.count)
break;
- rcd->egrbufs.rcvtids[i].phys =
- rcd->egrbufs.buffers[j].phys + offset;
+ rcd->egrbufs.rcvtids[i].dma =
+ rcd->egrbufs.buffers[j].dma + offset;
rcd->egrbufs.rcvtids[i].addr =
rcd->egrbufs.buffers[j].addr + offset;
rcd->egrbufs.alloced++;
- if ((rcd->egrbufs.buffers[j].phys + offset +
+ if ((rcd->egrbufs.buffers[j].dma + offset +
new_size) ==
- (rcd->egrbufs.buffers[j].phys +
+ (rcd->egrbufs.buffers[j].dma +
rcd->egrbufs.buffers[j].len)) {
j++;
offset = 0;
@@ -1813,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
- rcd->egrbufs.rcvtids[idx].phys, order);
+ rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
goto bail;
@@ -1825,9 +1827,9 @@ bail_rcvegrbuf_phys:
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[idx].len,
rcd->egrbufs.buffers[idx].addr,
- rcd->egrbufs.buffers[idx].phys);
+ rcd->egrbufs.buffers[idx].dma);
rcd->egrbufs.buffers[idx].addr = NULL;
- rcd->egrbufs.buffers[idx].phys = 0;
+ rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
bail:
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 39e42c373a01..9487c9bb8920 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* offline.
*/
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
set_link_state(ppd, link_state);
@@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
if (key == okey)
continue;
/*
- * Don't update pkeys[2], if an HFI port without MgmtAllowed
- * by neighbor is a switch.
- */
- if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
- continue;
- /*
* The SM gives us the complete PKey table. We have
* to ensure that we put the PKeys in the matching
* slots.
@@ -2604,7 +2597,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
- unsigned long port_num;
+ u8 port_num;
unsigned long vl;
u32 vl_select_mask;
int vfi;
@@ -2638,9 +2631,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2842,7 +2835,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3015,7 +3008,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3252,7 +3245,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index ac1bf4a73571..50a3a36d9363 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -551,11 +551,11 @@ static inline u32 group_size(u32 group)
}
/*
- * Obtain the credit return addresses, kernel virtual and physical, for the
+ * Obtain the credit return addresses, kernel virtual and bus, for the
* given sc.
*
* To understand this routine:
- * o va and pa are arrays of struct credit_return. One for each physical
+ * o va and dma are arrays of struct credit_return. One for each physical
* send context, per NUMA.
* o Each send context always looks in its relative location in a struct
* credit_return for its credit return.
@@ -563,14 +563,14 @@ static inline u32 group_size(u32 group)
* with the same value. Use the address of the first send context in the
* group.
*/
-static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa)
+static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
{
u32 gc = group_context(sc->hw_context, sc->group);
u32 index = sc->hw_context & 0x7;
sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
- *pa = (unsigned long)
- &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc];
+ *dma = (unsigned long)
+ &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
}
/*
@@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
- dma_addr_t pa;
+ dma_addr_t dma;
unsigned long flags;
u64 reg;
u32 thresh;
@@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->sw_index = sw_index;
sc->hw_context = hw_context;
- cr_group_addresses(sc, &pa);
+ cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
/* PIO Send Memory Address details */
@@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
/* set up credit return */
- reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
+ reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
/*
@@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd)
dd->cr_base[i].va = dma_zalloc_coherent(
&dd->pcidev->dev,
bytes,
- &dd->cr_base[i].pa,
+ &dd->cr_base[i].dma,
GFP_KERNEL);
if (!dd->cr_base[i].va) {
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd)
TXE_NUM_CONTEXTS *
sizeof(struct credit_return),
dd->cr_base[i].va,
- dd->cr_base[i].pa);
+ dd->cr_base[i].dma);
}
}
kfree(dd->cr_base);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 464cbd27b975..e709eaf743b5 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -154,7 +154,7 @@ struct credit_return {
/* NUMA indexed credit return array */
struct credit_return_base {
struct credit_return *va;
- dma_addr_t pa;
+ dma_addr_t dma;
};
/* send context configuration sizes (one per type) */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b58849..aa7773643107 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
preempt_enable();
}
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
/*
* Handle carry bytes using shifts and masks.
*
@@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
#define mshift(x) (8 * (x))
/*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry. Other bytes are zeroed. Any previous value
- * pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
-{
- unsigned long off;
-
- if (nbytes == 0) {
- pbuf->carry.val64 = 0;
- } else {
- /* align our pointer */
- off = (unsigned long)from & 0x7;
- from = (void *)((unsigned long)from & ~0x7l);
- pbuf->carry.val64 = ((*(u64 *)from)
- << zshift(nbytes + off))/* zero upper bytes */
- >> zshift(nbytes); /* place at bottom */
- }
- pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
- * read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
-{
- unsigned long off = (unsigned long)from & 0x7;
- unsigned int room, xbytes;
-
- /* align our pointer */
- from = (void *)((unsigned long)from & ~0x7l);
-
- /* check count first - don't read anything if count is zero */
- while (nbytes) {
- /* find the number of bytes in this u64 */
- room = 8 - off; /* this u64 has room for this many bytes */
- xbytes = min(room, nbytes);
-
- /*
- * shift down to zero lower bytes, shift up to zero upper
- * bytes, shift back down to move into place
- */
- pbuf->carry.val64 |= (((*(u64 *)from)
- >> mshift(off))
- << zshift(xbytes))
- >> zshift(xbytes + pbuf->carry_bytes);
- off = 0;
- pbuf->carry_bytes += xbytes;
- nbytes -= xbytes;
- from += sizeof(u64);
- }
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- unsigned int remaining;
-
- if (zbytes == 0) /* nothing to do */
- return;
-
- remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
-
- /* NOTE: zshift only guaranteed to work if remaining != 0 */
- if (remaining)
- pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
- >> zshift(remaining);
- else
- pbuf->carry.val64 = 0;
- pbuf->carry_bytes = remaining;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the LSB bytes of
- * pbuf->carry with the upper bytes zeroed..
- *
- * NOTES:
- * o result must keep unused bytes zeroed
- * o src must be u64 aligned
- */
-static inline void merge_write8(
- struct pio_buf *pbuf,
- void __iomem *dest,
- const void *src)
-{
- u64 new, temp;
-
- new = *(u64 *)src;
- temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
- writeq(temp, dest);
- pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void __iomem *dest)
-{
- writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry. If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
-{
- if (pbuf->carry_bytes) {
- /* unused bytes are always kept zeroed, so just write */
- writeq(pbuf->carry.val64, dest);
- return 1;
- }
-
- return 0;
-}
-
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
* Jump copy - no-loop copy for < 8 bytes.
*/
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
@@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
+ /* fall through */
case 6:
*dest++ = *src++;
+ /* fall through */
case 5:
*dest++ = *src++;
+ /* fall through */
case 4:
*dest++ = *src++;
+ /* fall through */
case 3:
*dest++ = *src++;
+ /* fall through */
case 2:
*dest++ = *src++;
+ /* fall through */
case 1:
*dest++ = *src++;
+ /* fall through */
}
}
@@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
+ pbuf->carry.val64 = 0;
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
}
@@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
}
/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * We do not care about the value of unused bytes in carry, so just
- * reduce the byte count.
+ * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
+ * Put the unused part of the next 8 bytes of src into the LSB bytes of
+ * pbuf->carry with the upper bytes zeroed..
*
* NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- pbuf->carry_bytes -= zbytes;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
+ * o result must keep unused bytes zeroed
+ * o src must be u64 aligned
*/
static inline void merge_write8(
struct pio_buf *pbuf,
- void *dest,
+ void __iomem *dest,
const void *src)
{
- u32 remainder = 8 - pbuf->carry_bytes;
+ u64 new, temp;
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
- writeq(pbuf->carry.val64, dest);
- jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
+ new = *(u64 *)src;
+ temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
+ writeq(temp, dest);
+ pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
-static inline void carry8_write8(union mix carry, void *dest)
+static inline void carry8_write8(union mix carry, void __iomem *dest)
{
writeq(carry.val64, dest);
}
@@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest)
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
+static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
{
if (pbuf->carry_bytes) {
- u64 zero = 0;
-
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
- 8 - pbuf->carry_bytes);
+ /* unused bytes are always kept zeroed, so just write */
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
-#endif /* USE_SHIFTS */
/*
* Segmented PIO Copy - start
@@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
- unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
- unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
+ unsigned long qw_to_write = nbytes >> 3;
+ unsigned long bytes_left = nbytes & 0x7;
/* calculate 8-byte data end */
dend = dest + (qw_to_write * sizeof(u64));
@@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- /* adjust carry */
- if (pbuf->carry_bytes < bytes_left) {
- /* need to read more */
- read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
+ pbuf->qw_written += qw_to_write;
+
+ /* handle carry and left-over bytes */
+ if (pbuf->carry_bytes + bytes_left >= 8) {
+ unsigned long nread;
+
+ /* there is enough to fill another qw - fill carry */
+ nread = 8 - pbuf->carry_bytes;
+ read_extra_bytes(pbuf, from, nread);
+
+ /*
+ * One more write - but need to make sure dest is correct.
+ * Check for wrap and the possibility the write
+ * should be in SOP space.
+ *
+ * The two checks immediately below cannot both be true, hence
+ * the else. If we have wrapped, we cannot still be within the
+ * first block. Conversely, if we are still in the first block,
+ * we cannot have wrapped. We do the wrap check first as that
+ * is more likely.
+ */
+ /* adjust if we have wrapped */
+ if (dest >= pbuf->end)
+ dest -= pbuf->size;
+ /* jump to the SOP range if within the first block */
+ else if (pbuf->qw_written < PIO_BLOCK_QWS)
+ dest += SOP_DISTANCE;
+
+ /* flush out full carry */
+ carry8_write8(pbuf->carry, dest);
+ pbuf->qw_written++;
+
+ /* now adjust and read the rest of the bytes into carry */
+ bytes_left -= nread;
+ from += nread; /* from is now not aligned */
+ read_low_bytes(pbuf, from, bytes_left);
} else {
- /* remove invalid bytes */
- zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
+ /* not enough to fill another qw, append the rest to carry */
+ read_extra_bytes(pbuf, from, bytes_left);
}
-
- pbuf->qw_written += qw_to_write;
}
/*
@@ -771,6 +649,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_extra_bytes(pbuf, from, to_fill);
from += to_fill;
nbytes -= to_fill;
+ /* may not be enough valid bytes left to align */
+ if (extra > nbytes)
+ extra = nbytes;
/* ...now write carry */
dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
@@ -798,6 +679,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_low_bytes(pbuf, from, extra);
from += extra;
nbytes -= extra;
+ /*
+ * If no bytes are left, return early - we are done.
+ * NOTE: This short-circuit is *required* because
+ * "extra" may have been reduced in size and "from"
+ * is not aligned, as required when leaving this
+ * if block.
+ */
+ if (nbytes == 0)
+ return;
}
/* at this point, from is QW aligned */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 965c8aef0c60..202433178864 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -47,29 +47,39 @@
#include "hfi.h"
#include "efivar.h"
+#include "eprom.h"
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
unsigned long size = 0;
u8 *temp_platform_config = NULL;
+ u32 esize;
+
+ ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ size = esize;
+ goto success;
+ }
+ /* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
- if (ret) {
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
- /* fall back to request firmware */
- platform_config_load = 1;
- goto bail;
- }
+ if (!ret)
+ goto success;
+
+ dd_dev_info(dd,
+ "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+ __func__);
+ /* fall back to request firmware */
+ platform_config_load = 1;
+ return;
+success:
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
-
-bail:
- /* exit */;
}
void free_platform_config(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4e4d8317c281..9fc75e7e8781 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp)
write_seqlock_irqsave(&dev->iowait_lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
}
@@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp)
*/
void hfi1_schedule_send(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
if (hfi1_send_ok(qp))
_hfi1_schedule_send(qp);
}
/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
+ * hfi1_get_credit - handle credit in aeth
+ * @qp: the qp
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
@@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
{
u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
+ lockdep_assert_held(&qp->s_lock);
/*
* If the credit is invalid, we can send
* as many packets as we like. Otherwise, we have to
@@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
static int iowait_sleep(
@@ -544,7 +544,7 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -808,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
kfree(priv);
return ERR_PTR(-ENOMEM);
}
+ iowait_init(
+ &priv->s_iowait,
+ 1,
+ _hfi1_do_send,
+ iowait_sleep,
+ iowait_wakeup,
+ iowait_sdma_drained);
setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
qp->s_timer.function = hfi1_rc_timeout;
return priv;
@@ -848,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi)
void flush_qp_waiters(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_stop_rc_timers(qp);
}
@@ -873,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- iowait_init(
- &priv->s_iowait,
- 1,
- _hfi1_do_send,
- iowait_sleep,
- iowait_wakeup,
- iowait_sdma_drained);
priv->r_adefered = 0;
clear_ahg(qp);
}
@@ -963,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 4e95ad810847..1869f639c3ae 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd,
bus->algo.getsda = hfi1_getsda;
bus->algo.getscl = hfi1_getscl;
bus->algo.udelay = 5;
- bus->algo.timeout = usecs_to_jiffies(50);
+ bus->algo.timeout = usecs_to_jiffies(100000);
bus->algo.data = bus;
bus->adapter.owner = THIS_MODULE;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5da190e6011b..8bc5013f39a1 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -55,7 +55,7 @@
#include "trace.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
+#define OP(x) RC_OP(x)
/**
* hfi1_add_retry_timer - add/start a retry timer
@@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies +
@@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_WAIT_RNR;
qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
add_timer(&priv->s_rnr_timer);
@@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
@@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
{
int rval = 0;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry */
if (qp->s_flags & RVT_S_TIMER) {
qp->s_flags &= ~RVT_S_TIMER;
@@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from all timers */
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
@@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
int rval = 0;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from rnr timer */
if (qp->s_flags & RVT_S_WAIT_RNR) {
qp->s_flags &= ~RVT_S_WAIT_RNR;
@@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp)
del_timer_sync(&priv->s_rnr_timer);
}
-/* only opcode mask for adaptive pio */
-const u32 rc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
- BIT(OP(ACKNOWLEDGE & 0x1f)) |
- BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
- BIT(OP(COMPARE_SWAP & 0x1f)) |
- BIT(OP(FETCH_ADD & 0x1f));
-
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
@@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
* Note the QP s_lock must be held.
*/
static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
@@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
@@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
e->sent = 1;
@@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
int middle = 0;
int delta;
+ lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
if (IS_ERR(ps->s_txreq))
goto bail_no_tx;
@@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* See restart_rc().
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr + len);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr + len,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
@@ -841,7 +835,7 @@ bail_no_tx:
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
+ * send side QP state and send engine.
*/
void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
@@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
u32 vl, plen;
struct send_context *sc;
struct pio_buf *pbuf;
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
unsigned long flags;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
@@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
if (!pbuf) {
/*
* We have no room to send at the moment. Pass
- * responsibility for sending the ACK to the send tasklet
+ * responsibility for sending the ACK to the send engine
* so that when enough buffer space becomes available,
* the ACK is sent ahead of other outgoing packets.
*/
@@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
return;
queue_ack:
- this_cpu_inc(*ibp->rvp.rc_qacks);
spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ goto unlock;
+ this_cpu_inc(*ibp->rvp.rc_qacks);
qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
if (is_fecn)
qp->s_flags |= RVT_S_ECN;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
hfi1_schedule_send(qp);
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
}
@@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
+ lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
/*
@@ -1027,7 +1025,7 @@ done:
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
- * asynchronously before the send tasklet can get scheduled.
+ * asynchronously before the send engine can get scheduled.
* Doing it in hfi1_make_rc_req() is too late.
*/
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
@@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_lock);
if (qp->s_retry == 0) {
if (qp->s_mig_state == IB_MIG_ARMED) {
hfi1_migrate_qp(qp);
@@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe;
u32 n = qp->s_last;
+ lockdep_assert_held(&qp->s_lock);
/* Find the work request corresponding to the given PSN. */
for (;;) {
wqe = rvt_get_swqe_ptr(qp, n);
@@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
+ lockdep_assert_held(&qp->s_lock);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct ib_wc wc;
unsigned i;
+ lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int diff;
unsigned long to;
+ lockdep_assert_held(&qp->s_lock);
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
{
struct rvt_swqe *wqe;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
hfi1_stop_rc_timers(qp);
@@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void rc_rcv_resp(struct hfi1_ibport *ibp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen, struct rvt_qp *qp,
u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
struct hfi1_ctxtdata *rcd)
@@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64)be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else {
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
- }
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
@@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
return;
list_del_init(&qp->rspwait);
qp->r_flags &= ~RVT_R_RSP_NAK;
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
/**
@@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
+static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
@@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
}
if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
@@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
case OP(FETCH_ADD): {
/*
* If we didn't find the atomic request in the ack queue
- * or the send tasklet is already backed up to send an
+ * or the send engine is already backed up to send an
* earlier entry, we can ignore this request.
*/
if (!e || e->opcode != (u8)opcode || old_req)
@@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
@@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int copy_last = 0;
u32 rkey;
+ lockdep_assert_held(&qp->r_lock);
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
return;
@@ -2342,7 +2343,7 @@ send_last:
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2397,7 +2398,7 @@ send_last:
len = be32_to_cpu(reth->length);
if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2432,7 +2433,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2469,8 +2470,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2481,11 +2481,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2499,7 +2499,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2575,12 +2575,12 @@ send_ack:
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp)
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
int diff;
u32 opcode;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 48d5094f98e2..a1576aea4756 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -352,7 +352,7 @@ err:
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the tasklet, we still
+ * Note that although we are single threaded due to the send engine, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
@@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
}
}
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps)
{
@@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work)
* @work: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
- * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
void hfi1_do_send(struct rvt_qp *qp)
@@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
- * the send tasklet will be woken up later.
+ * the send engine will be woken up later.
*/
if (hfi1_verbs_send(qp, &ps))
return;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f9befc05b349..fd39bcaa062d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void)
}
/**
+ * sdma_engine_get_vl() - return vl for a given sdma engine
+ * @sde: sdma engine
+ *
+ * This function returns the vl mapped to a given engine, or an error if
+ * the mapping can't be found. The mapping fields are protected by RCU.
+ */
+int sdma_engine_get_vl(struct sdma_engine *sde)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ struct sdma_vl_map *m;
+ u8 vl;
+
+ if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
+ return -EINVAL;
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->sdma_map);
+ if (unlikely(!m)) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ vl = m->engine_to_vl[sde->this_idx];
+ rcu_read_unlock();
+
+ return vl;
+}
+
+/**
* sdma_select_engine_vl() - select sdma engine
* @dd: devdata
* @selector: a spreading factor
@@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc(
return sdma_select_engine_vl(dd, selector, vl);
}
+struct sdma_rht_map_elem {
+ u32 mask;
+ u8 ctr;
+ struct sdma_engine *sde[0];
+};
+
+struct sdma_rht_node {
+ unsigned long cpu_id;
+ struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
+ struct rhash_head node;
+};
+
+#define NR_CPUS_HINT 192
+
+static const struct rhashtable_params sdma_rht_params = {
+ .nelem_hint = NR_CPUS_HINT,
+ .head_offset = offsetof(struct sdma_rht_node, node),
+ .key_offset = offsetof(struct sdma_rht_node, cpu_id),
+ .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .max_size = NR_CPUS,
+ .min_size = 8,
+ .automatic_shrinking = true,
+};
+
+/*
+ * sdma_select_user_engine() - select sdma engine based on user setup
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns an sdma engine for a user sdma request.
+ * User defined sdma engine affinity setting is honored when applicable,
+ * otherwise system default sdma engine mapping is used. To ensure correct
+ * ordering, the mapping from <selector, vl> to sde must remain unchanged.
+ */
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl)
+{
+ struct sdma_rht_node *rht_node;
+ struct sdma_engine *sde = NULL;
+ const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ unsigned long cpu_id;
+
+ /*
+ * To ensure that always the same sdma engine(s) will be
+ * selected make sure the process is pinned to this CPU only.
+ */
+ if (cpumask_weight(current_mask) != 1)
+ goto out;
+
+ cpu_id = smp_processor_id();
+ rcu_read_lock();
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
+
+ if (rht_node && rht_node->map[vl]) {
+ struct sdma_rht_map_elem *map = rht_node->map[vl];
+
+ sde = map->sde[selector & map->mask];
+ }
+ rcu_read_unlock();
+
+ if (sde)
+ return sde;
+
+out:
+ return sdma_select_engine_vl(dd, selector, vl);
+}
+
+static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
+{
+ int i;
+
+ for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
+ map->sde[map->ctr + i] = map->sde[i];
+}
+
+static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
+ struct sdma_engine *sde)
+{
+ unsigned int i, pow;
+
+ /* only need to check the first ctr entries for a match */
+ for (i = 0; i < map->ctr; i++) {
+ if (map->sde[i] == sde) {
+ memmove(&map->sde[i], &map->sde[i + 1],
+ (map->ctr - i - 1) * sizeof(map->sde[0]));
+ map->ctr--;
+ pow = roundup_pow_of_two(map->ctr ? : 1);
+ map->mask = pow - 1;
+ sdma_populate_sde_map(map);
+ break;
+ }
+ }
+}
+
+/*
+ * Prevents concurrent reads and writes of the sdma engine cpu_mask
+ */
+static DEFINE_MUTEX(process_to_sde_mutex);
+
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ cpumask_var_t mask, new_mask;
+ unsigned long cpu;
+ int ret, vl, sz;
+
+ vl = sdma_engine_get_vl(sde);
+ if (unlikely(vl < 0))
+ return -EINVAL;
+
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+ ret = cpulist_parse(buf, mask);
+ if (ret)
+ goto out_free;
+
+ if (!cpumask_subset(mask, cpu_online_mask)) {
+ dd_dev_warn(sde->dd, "Invalid CPU mask\n");
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ sz = sizeof(struct sdma_rht_map_elem) +
+ (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
+
+ mutex_lock(&process_to_sde_mutex);
+
+ for_each_cpu(cpu, mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Check if we have this already mapped */
+ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
+ cpumask_set_cpu(cpu, new_mask);
+ continue;
+ }
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (!rht_node) {
+ rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
+ if (!rht_node) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+ if (!rht_node->map[vl]) {
+ kfree(rht_node);
+ ret = -ENOMEM;
+ goto out;
+ }
+ rht_node->cpu_id = cpu;
+ rht_node->map[vl]->mask = 0;
+ rht_node->map[vl]->ctr = 1;
+ rht_node->map[vl]->sde[0] = sde;
+
+ ret = rhashtable_insert_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ if (ret) {
+ kfree(rht_node->map[vl]);
+ kfree(rht_node);
+ dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
+ cpu);
+ goto out;
+ }
+
+ } else {
+ int ctr, pow;
+
+ /* Add new user mappings */
+ if (!rht_node->map[vl])
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+
+ if (!rht_node->map[vl]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl]->ctr++;
+ ctr = rht_node->map[vl]->ctr;
+ rht_node->map[vl]->sde[ctr - 1] = sde;
+ pow = roundup_pow_of_two(ctr);
+ rht_node->map[vl]->mask = pow - 1;
+
+ /* Populate the sde map table */
+ sdma_populate_sde_map(rht_node->map[vl]);
+ }
+ cpumask_set_cpu(cpu, new_mask);
+ }
+
+ /* Clean up old mappings */
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Don't cleanup sdes that are set in the new mask */
+ if (cpumask_test_cpu(cpu, mask))
+ continue;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (rht_node) {
+ bool empty = true;
+ int i;
+
+ /* Remove mappings for old sde */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ if (rht_node->map[i])
+ sdma_cleanup_sde_map(rht_node->map[i],
+ sde);
+
+ /* Free empty hash table entries */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i])
+ continue;
+
+ if (rht_node->map[i]->ctr) {
+ empty = false;
+ break;
+ }
+ }
+
+ if (empty) {
+ ret = rhashtable_remove_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ WARN_ON(ret);
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+ }
+ }
+ }
+
+ cpumask_copy(&sde->cpu_mask, new_mask);
+out:
+ mutex_unlock(&process_to_sde_mutex);
+out_free:
+ free_cpumask_var(mask);
+ free_cpumask_var(new_mask);
+ return ret ? : strnlen(buf, PAGE_SIZE);
+}
+
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ mutex_lock(&process_to_sde_mutex);
+ if (cpumask_empty(&sde->cpu_mask))
+ snprintf(buf, PAGE_SIZE, "%s\n", "empty");
+ else
+ cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
+ mutex_unlock(&process_to_sde_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static void sdma_rht_free(void *ptr, void *arg)
+{
+ struct sdma_rht_node *rht_node = ptr;
+ int i;
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+}
+
+/**
+ * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
+ * @s: seq file
+ * @dd: hfi1_devdata
+ * @cpuid: cpu id
+ *
+ * This routine dumps the process to sde mappings per cpu
+ */
+void sdma_seqfile_dump_cpu_list(struct seq_file *s,
+ struct hfi1_devdata *dd,
+ unsigned long cpuid)
+{
+ struct sdma_rht_node *rht_node;
+ int i, j;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ sdma_rht_params);
+ if (!rht_node)
+ return;
+
+ seq_printf(s, "cpu%3lu: ", cpuid);
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i] || !rht_node->map[i]->ctr)
+ continue;
+
+ seq_printf(s, " vl%d: [", i);
+
+ for (j = 0; j < rht_node->map[i]->ctr; j++) {
+ if (!rht_node->map[i]->sde[j])
+ continue;
+
+ if (j > 0)
+ seq_puts(s, ",");
+
+ seq_printf(s, " sdma%2d",
+ rht_node->map[i]->sde[j]->this_idx);
+ }
+ seq_puts(s, " ]");
+ }
+
+ seq_puts(s, "\n");
+}
+
/*
* Free the indicated map struct
*/
@@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
goto bail;
+
+ if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ goto bail;
+
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
@@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
+ rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
@@ -2086,6 +2439,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
+ * @count: pointer to a u32 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2100,18 +2458,18 @@ nodesc:
* side locking.
*
* Return:
- * > 0 - Success (value is number of sdma_txreq's submitted),
+ * 0 - Success,
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list)
+ struct list_head *tx_list, u32 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
unsigned long flags;
u16 tail = INVALID_TAIL;
- int count = 0;
+ u32 submit_count = 0, flush_count = 0, total_count;
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
@@ -2127,33 +2485,34 @@ retry:
}
list_del_init(&tx->list);
tail = submit_tx(sde, tx);
- count++;
+ submit_count++;
if (tail != INVALID_TAIL &&
- (count & SDMA_TAIL_UPDATE_THRESH) == 0) {
+ (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
sdma_update_tail(sde, tail);
tail = INVALID_TAIL;
}
}
update_tail:
+ total_count = submit_count + flush_count;
if (wait)
- iowait_sdma_add(wait, count);
+ iowait_sdma_add(wait, total_count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
- return ret == 0 ? count : ret;
+ *count_out = total_count;
+ return ret;
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
list_del_init(&tx->list);
- if (wait)
- iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif
list_add_tail(&tx->list, &sde->flushlist);
+ flush_count++;
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..56257ea3598f 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -413,6 +413,8 @@ struct sdma_engine {
spinlock_t flushlist_lock;
/* private: */
struct list_head flushlist;
+ struct cpumask cpu_mask;
+ struct kobject kobj;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde,
struct sdma_txreq *tx);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
- struct list_head *tx_list);
+ struct list_head *tx_list,
+ u32 *count);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
@@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl(
u32 selector,
u8 vl);
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl);
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count);
+int sdma_engine_get_vl(struct sdma_engine *sde);
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
+void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
+ unsigned long cpuid);
#ifdef CONFIG_SDMA_VERBOSITY
void sdma_dumpstate(struct sdma_engine *);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 74c84c655f7e..edba22461a9c 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -766,13 +766,95 @@ bail:
return ret;
}
+struct sde_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct sdma_engine *sde, char *buf);
+ ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt);
+};
+
+static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!sde_attr->show)
+ return -EINVAL;
+
+ return sde_attr->show(sde, buf);
+}
+
+static ssize_t sde_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!sde_attr->store)
+ return -EINVAL;
+
+ return sde_attr->store(sde, buf, count);
+}
+
+static const struct sysfs_ops sde_sysfs_ops = {
+ .show = sde_show,
+ .store = sde_store,
+};
+
+static struct kobj_type sde_ktype = {
+ .sysfs_ops = &sde_sysfs_ops,
+};
+
+#define SDE_ATTR(_name, _mode, _show, _store) \
+ struct sde_attribute sde_attr_##_name = \
+ __ATTR(_name, _mode, _show, _store)
+
+static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ return sdma_get_cpu_to_sde_map(sde, buf);
+}
+
+static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde,
+ const char *buf, size_t count)
+{
+ return sdma_set_cpu_to_sde_map(sde, buf, count);
+}
+
+static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
+{
+ int vl;
+
+ vl = sdma_engine_get_vl(sde);
+ if (vl < 0)
+ return vl;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+}
+
+static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
+ sde_show_cpu_to_sde_map,
+ sde_store_cpu_to_sde_map);
+static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL);
+
+static struct sde_attribute *sde_attribs[] = {
+ &sde_attr_cpu_list,
+ &sde_attr_vl
+};
+
/*
* Register and create our files in /sys/class/infiniband.
*/
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
{
struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
- int i, ret;
+ struct device *class_dev = &dev->dev;
+ int i, j, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
@@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
goto bail;
}
+ for (i = 0; i < dd->num_sdma; i++) {
+ ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
+ &sde_ktype, &class_dev->kobj,
+ "sdma%d", i);
+ if (ret)
+ goto bail;
+
+ for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) {
+ ret = sysfs_create_file(&dd->per_sdma[i].kobj,
+ &sde_attribs[j]->attr);
+ if (ret)
+ goto bail;
+ }
+ }
+
return 0;
bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
+
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_del(&dd->per_sdma[i].kobj);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 4cfb13771897..01f525cd985a 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,9 +47,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
+u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u8 opcode;
u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %lld"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
+#define ATOMICACKETH_PRN "origdata %llx"
+#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
-static u64 ib_u64_get(__be32 *p)
-{
- return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]);
-}
-
static const char *parse_syndrome(u8 syndrome)
{
switch (syndrome >> 5) {
@@ -113,8 +108,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, RETH_PRN " " IMM_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length),
be32_to_cpu(eh->rc.imm_data));
@@ -126,8 +120,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY):
case OP(UC, RDMA_WRITE_ONLY):
trace_seq_printf(p, RETH_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length));
break;
@@ -145,20 +138,16 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
- (unsigned long long)
- ib_u64_get(eh->at.atomic_ack_eth));
+ ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
case OP(RC, COMPARE_SWAP):
case OP(RC, FETCH_ADD):
trace_seq_printf(p, ATOMICETH_PRN,
- (unsigned long long)ib_u64_get(
- eh->atomic_eth.vaddr),
+ get_ib_ateth_vaddr(&eh->atomic_eth),
eh->atomic_eth.rkey,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.swap_data),
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.compare_data));
+ get_ib_ateth_swap(&eh->atomic_eth),
+ get_ib_ateth_compare(&eh->atomic_eth));
break;
/* deth */
case OP(UD, SEND_ONLY):
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 31654bbac1cf..26ae789e47cf 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, hw_free)
__field(void __iomem *, piobase)
__field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
+ __field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
+ __field(u64, rcvegr_dma)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
@@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
__entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys =
- uctxt->egrbufs.rcvtids[0].phys;
+ __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
),
TP_printk("[%s] ctxt %u " UCTXT_FMT,
__get_str(dev),
@@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
+ __entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_phys
+ __entry->rcvegr_dma
)
);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index c3e41aed0034..382fcda3a5f6 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,7 +55,7 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+u8 ibhdr_exhdr_len(struct ib_header *hdr);
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -74,7 +74,7 @@ __print_symbolic(lrh, \
DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
+ struct ib_header *hdr),
TP_ARGS(dd, hdr),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
/* LRH */
@@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
);
DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 9ba1f615ec95..11e02b228922 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TRACE_EVENT(snoop_capture,
TP_PROTO(struct hfi1_devdata *dd,
int hdr_len,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
int data_len,
void *data),
TP_ARGS(dd, hdr_len, hdr, data_len, data),
@@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture,
__dynamic_array(u8, raw_pkt, data_len)
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
if (__entry->lnh == HFI1_LRH_BTH)
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index a726d96d185f..5e6d1bac4914 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -50,14 +50,7 @@
#include "qp.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/* only opcode mask for adaptive pio */
-const u32 uc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+#define OP(x) UC_OP(x)
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
@@ -70,7 +63,7 @@ const u32 uc_only_opcode =
int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords = 5;
u32 bth0 = 0;
@@ -304,12 +297,12 @@ bail_no_tx:
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index f01e8e1d62d3..97ae24b6314c 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -271,7 +271,7 @@ drop:
int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
@@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
/*
* opa_smp_check() - Do the regular pkey checking, and the additional
- * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
- * ("SMA Packet Checks").
+ * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section
+ * 9.10.25 ("SMA Packet Checks").
*
* Note that:
* - Checks are done using the pkey directly from the packet's BTH,
@@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/*
* SMPs fall into one of four (disjoint) categories:
- * SMA request, SMA response, trap, or trap repress.
- * Our response depends, in part, on which type of
- * SMP we're processing.
+ * SMA request, SMA response, SMA trap, or SMA trap repress.
+ * Our response depends, in part, on which type of SMP we're
+ * processing.
*
- * If this is not an SMA request, or trap repress:
- * - accept MAD if the port is running an SM
- * - pkey == FULL_MGMT_P_KEY =>
- * reply with unsupported method (i.e., just mark
- * the smp's status field here, and let it be
- * processed normally)
- * - pkey != LIM_MGMT_P_KEY =>
- * increment port recv constraint errors, drop MAD
- * If this is an SMA request or trap repress:
+ * If this is an SMA response, skip the check here.
+ *
+ * If this is an SMA request or SMA trap repress:
* - pkey != FULL_MGMT_P_KEY =>
* increment port recv constraint errors, drop MAD
+ *
+ * Otherwise:
+ * - accept if the port is running an SM
+ * - drop MAD if it's an SMA trap
+ * - pkey == FULL_MGMT_P_KEY =>
+ * reply with unsupported method
+ * - pkey != FULL_MGMT_P_KEY =>
+ * increment port recv constraint errors, drop MAD
*/
switch (smp->method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ break;
case IB_MGMT_METHOD_GET:
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_REPORT:
@@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
return 1;
}
break;
- case IB_MGMT_METHOD_SEND:
- case IB_MGMT_METHOD_TRAP:
- case IB_MGMT_METHOD_GET_RESP:
- case IB_MGMT_METHOD_REPORT_RESP:
+ default:
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return 0;
+ if (smp->method == IB_MGMT_METHOD_TRAP)
+ return 1;
if (pkey == FULL_MGMT_P_KEY) {
smp->status |= IB_SMP_UNSUP_METHOD;
return 0;
}
- if (pkey != LIM_MGMT_P_KEY) {
- ingress_pkey_table_fail(ppd, pkey, slid);
- return 1;
- }
- break;
- default:
- break;
+ ingress_pkey_table_fail(ppd, pkey, slid);
+ return 1;
}
return 0;
}
@@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
@@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ u8 sc5 = hdr2sc(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0ecf27903dc2..a761f804111e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_SHIFT 24
#define KDETH_HCRC_LOWER_MASK 0xff
+#define AHG_KDETH_INTR_SHIFT 12
+
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -546,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
u8 opcode, sc, vl;
int req_queued = 0;
u16 dlid;
- u8 selector;
+ u32 selector;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
@@ -751,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
dlid = be16_to_cpu(req->hdr.lrh[1]);
selector = dlid_to_selector(dlid);
+ selector += uctxt->ctxt + fd->subctxt;
+ req->sde = sdma_select_user_engine(dd, selector, vl);
- /* Have to select the engine */
- req->sde = sdma_select_engine_vl(dd,
- (u32)(uctxt->ctxt + fd->subctxt +
- selector),
- vl);
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM;
goto free_req;
@@ -892,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
{
- int ret = 0;
+ int ret = 0, count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -1088,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
- if (list_empty(&req->txps)) {
- req->seqsubmitted = req->seqnum;
- if (req->seqnum == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
- /*
- * The txreq has already been submitted to the HW queue
- * so we can free the AHG entry now. Corruption will not
- * happen due to the sequential manner in which
- * descriptors are processed.
- */
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
- sdma_ahg_free(req->sde, req->ahg_idx);
- }
- } else if (ret > 0) {
- req->seqsubmitted += ret;
- ret = 0;
+ ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ req->seqsubmitted += count;
+ if (req->seqsubmitted == req->info.npkts) {
+ set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ /*
+ * The txreq has already been submitted to the HW queue
+ * so we can free the AHG entry now. Corruption will not
+ * happen due to the sequential manner in which
+ * descriptors are processed.
+ */
+ if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
@@ -1480,7 +1474,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* Clear KDETH.SH on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) >> 16);
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT);
val &= cpu_to_le16(~(1U << 13));
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
} else {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2b359540901d..f2f6b5a78e0e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF;
module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-unsigned int hfi1_max_cqes = 0x2FFFF;
+unsigned int hfi1_max_cqes = 0x2FFFFF;
module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
MODULE_PARM_DESC(max_cqes,
"Maximum number of completion queue entries to support");
@@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF;
module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-unsigned int hfi1_max_qps = 16384;
+unsigned int hfi1_max_qps = 32768;
module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
@@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4,
[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4,
[IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4,
- [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
+ [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8,
[IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
@@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_CNP] = &hfi1_cnp_rcv
};
+#define OPMASK 0x1f
+
+static const u32 pio_opmask[BIT(3)] = {
+ /* RC */
+ [IB_OPCODE_RC >> 5] =
+ BIT(RC_OP(SEND_ONLY) & OPMASK) |
+ BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
+ BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
+ BIT(RC_OP(FETCH_ADD) & OPMASK),
+ /* UC */
+ [IB_OPCODE_UC >> 5] =
+ BIT(UC_OP(SEND_ONLY) & OPMASK) |
+ BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
+};
+
/*
* System image GUID.
*/
@@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
void hfi1_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -719,7 +741,7 @@ static void verbs_sdma_complete(
if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct hfi1_ib_header *hdr;
+ struct ib_header *hdr;
hdr = &tx->phdr.hdr;
hfi1_rc_send_complete(qp, hdr);
@@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
@@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ib_header *h = &tx->phdr.hdr;
+ struct ib_header *h = &tx->phdr.hdr;
if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
return dd->process_pio_send;
@@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
case IB_QPT_GSI:
case IB_QPT_UD:
break;
- case IB_QPT_RC:
- if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
- iowait_sdma_pending(&priv->s_iowait) == 0 &&
- !sdma_txreq_built(&tx->txreq))
- return dd->process_pio_send;
- break;
case IB_QPT_UC:
+ case IB_QPT_RC: {
+ u8 op = get_opcode(h);
+
if (piothreshold &&
qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+ (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
return dd->process_pio_send;
break;
+ }
default:
break;
}
@@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
- struct hfi1_ib_header *hdr;
+ struct ib_other_headers *ohdr;
+ struct ib_header *hdr;
send_routine sr;
int ret;
u8 lnh;
@@ -1754,7 +1772,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
@@ -1781,7 +1799,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ sc5 = hdr2sc(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index d1b101c54828..1c3815d89eb7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -60,6 +60,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
#include <rdma/rdmavt_cq.h>
@@ -80,16 +81,6 @@ struct hfi1_packet;
*/
#define HFI1_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -104,80 +95,16 @@ struct hfi1_packet;
#define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK BIT(31)
-#define IB_BTH_SOLICITED BIT(23)
-#define IB_BTH_MIG_REQ BIT(22)
-
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+#define RC_OP(x) IB_OPCODE_RC_##x
+#define UC_OP(x) IB_OPCODE_UC_##x
+
/* flags passed by hfi1_ib_rcv() */
enum {
HFI1_HAS_GRH = (1 << 0),
};
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-union ib_ehdrs {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
-} __packed;
-
-struct hfi1_other_headers {
- __be32 bth[3];
- union ib_ehdrs u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct hfi1_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct hfi1_other_headers oth;
- } l;
- struct hfi1_other_headers oth;
- } u;
-} __packed;
-
struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
@@ -187,7 +114,7 @@ struct hfi1_ahg_info {
struct hfi1_sdma_header {
__le64 pbc;
- struct hfi1_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp);
@@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg);
void hfi1_del_timers_sync(struct rvt_qp *qp);
void hfi1_stop_rc_timers(struct rvt_qp *qp);
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
@@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
-static inline u8 get_opcode(struct hfi1_ib_header *h)
+static inline u8 get_opcode(struct ib_header *h)
{
u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
@@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index d8fb056526f8..094ab829ec42 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 3ee0cad96bc6..0c92a40b3e86 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp,
info.dont_send_fin = false;
if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
info.reset_tcp_conn = true;
+ iwqp->hw_iwarp_state = state;
i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 0cbbe4038298..445e230d5ff8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = {
.notifier_call = i40iw_net_event
};
-static int i40iw_notifiers_registered;
+static atomic_t i40iw_notifiers_registered;
/**
* i40iw_find_i40e_handler - find a handler given a client info
@@ -1342,12 +1342,11 @@ exit:
*/
static void i40iw_register_notifiers(void)
{
- if (!i40iw_notifiers_registered) {
+ if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
register_inetaddr_notifier(&i40iw_inetaddr_notifier);
register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
register_netevent_notifier(&i40iw_net_notifier);
}
- i40iw_notifiers_registered++;
}
/**
@@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
/* fallthrough */
case INET_NOTIFIER:
- if (i40iw_notifiers_registered > 0) {
- i40iw_notifiers_registered--;
+ if (!atomic_dec_return(&i40iw_notifiers_registered)) {
unregister_netevent_notifier(&i40iw_net_notifier);
unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 006db6436e3b..5df63dacaaa3 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -687,12 +687,6 @@ repoll:
is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR;
- if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
- is_send)) {
- pr_warn("Completion for NOP opcode detected!\n");
- return -EAGAIN;
- }
-
/* Resize CQ in progress */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
if (cq->resize_buf) {
@@ -718,12 +712,6 @@ repoll:
*/
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
- if (unlikely(!mqp)) {
- pr_warn("CQ %06x with entry for unknown QPN %06x\n",
- cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
- return -EAGAIN;
- }
-
*cur_qp = to_mibqp(mqp);
}
@@ -736,11 +724,6 @@ repoll:
/* SRQ is also in the radix tree */
msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
srq_num);
- if (unlikely(!msrq)) {
- pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
- cq->mcq.cqn, srq_num);
- return -EAGAIN;
- }
}
if (is_send) {
@@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
struct mlx4_ib_qp *cur_qp = NULL;
unsigned long flags;
int npolled;
- int err = 0;
struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
spin_lock_irqsave(&cq->lock, flags);
@@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
}
for (npolled = 0; npolled < num_entries; ++npolled) {
- err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
- if (err)
+ if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
break;
}
@@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
out:
spin_unlock_irqrestore(&cq->lock, flags);
- if (err == 0 || err == -EAGAIN)
- return npolled;
- else
- return err;
+ return npolled;
}
int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2e53d28f98..0f21c3a25552 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
/* Generate GUID changed event */
if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+ if (mlx4_is_master(dev->dev)) {
+ union ib_gid gid;
+ int err = 0;
+
+ if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
+ err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
+ else
+ gid.global.subnet_prefix =
+ eqe->event.port_mgmt_change.params.port_info.gid_prefix;
+ if (err) {
+ pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
+ port, err);
+ } else {
+ pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
+ port,
+ (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
+ be64_to_cpu(gid.global.subnet_prefix));
+ atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
+ be64_to_cpu(gid.global.subnet_prefix));
+ }
+ }
mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
/*if master, notify all slaves*/
if (mlx4_is_master(dev->dev))
@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
if (err)
goto demux_err;
dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+ atomic64_set(&dev->sriov.demux[i].subnet_prefix,
+ be64_to_cpu(gid.global.subnet_prefix));
err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
&dev->sriov.sqps[i]);
if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2af44c2de262..87ba9bca4181 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
bool per_port = !!(ibdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
+ if (mlx4_is_slave(ibdev->dev))
+ return 0;
+
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
/* i == 1 means we are building port counters */
if (i && !per_port)
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 8f7ad07915b0..097bfcc4ee99 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
if (!group->members[i])
leave_state |= (1 << i);
- return leave_state & (group->rec.scope_join_state & 7);
+ return leave_state & (group->rec.scope_join_state & 0xf);
}
static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
} else
mcg_warn_group(group, "DRIVER BUG\n");
} else if (group->state == MCAST_LEAVE_SENT) {
- if (group->rec.scope_join_state & 7)
- group->rec.scope_join_state &= 0xf8;
+ if (group->rec.scope_join_state & 0xf)
+ group->rec.scope_join_state &= 0xf0;
group->state = MCAST_IDLE;
mutex_unlock(&group->lock);
if (release_group(group, 1))
@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
static int handle_join_req(struct mcast_group *group, u8 join_mask,
struct mcast_req *req)
{
- u8 group_join_state = group->rec.scope_join_state & 7;
+ u8 group_join_state = group->rec.scope_join_state & 0xf;
int ref = 0;
u16 status;
struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
u8 cur_join_state;
resp_join_state = ((struct ib_sa_mcmember_data *)
- group->response_sa_mad.data)->scope_join_state & 7;
- cur_join_state = group->rec.scope_join_state & 7;
+ group->response_sa_mad.data)->scope_join_state & 0xf;
+ cur_join_state = group->rec.scope_join_state & 0xf;
if (method == IB_MGMT_METHOD_GET_RESP) {
/* successfull join */
@@ -710,7 +710,7 @@ process_requests:
req = list_first_entry(&group->pending_list, struct mcast_req,
group_list);
sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
- req_join_state = sa_data->scope_join_state & 0x7;
+ req_join_state = sa_data->scope_join_state & 0xf;
/* For a leave request, we will immediately answer the VF, and
* update our internal counters. The actual leave will be sent
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7c5832ede4bd..686ab48ff644 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
struct workqueue_struct *wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
- __be64 subnet_prefix;
+ atomic64_t subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 768085f59566..7fb9629bd12b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2493,24 +2493,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
- if (is_eth)
+ if (is_eth) {
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
- else {
- if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- /* When multi-function is enabled, the ib_core gid
- * indexes don't necessarily match the hw ones, so
- * we must use our own cache */
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- subnet_prefix;
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
- } else
- ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid, NULL);
+ } else {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache
+ */
+ sqp->ud_header.grh.source_gid.global.subnet_prefix =
+ cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
+ demux[sqp->qp.port - 1].
+ subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global.interface_id =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ guid_cache[ah->av.ib.gid_index];
+ } else {
+ ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index,
+ &sqp->ud_header.grh.source_gid, NULL);
+ }
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 308a358e5b46..5de9a65f53bc 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -553,12 +553,6 @@ repoll:
* from the table.
*/
mqp = __mlx5_qp_lookup(dev->mdev, qpn);
- if (unlikely(!mqp)) {
- mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
- cq->mcq.cqn, qpn);
- return -EINVAL;
- }
-
*cur_qp = to_mibqp(mqp);
}
@@ -619,13 +613,6 @@ repoll:
read_lock(&dev->mdev->priv.mkey_table.lock);
mmkey = __mlx5_mr_lookup(dev->mdev,
mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- if (unlikely(!mmkey)) {
- read_unlock(&dev->mdev->priv.mkey_table.lock);
- mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
- cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
- return -EINVAL;
- }
-
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
@@ -676,7 +663,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
unsigned long flags;
int soft_polled = 0;
int npolled;
- int err = 0;
spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -688,8 +674,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
soft_polled = poll_soft_wc(cq, num_entries, wc);
for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
- err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
- if (err)
+ if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
break;
}
@@ -698,10 +683,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
out:
spin_unlock_irqrestore(&cq->lock, flags);
- if (err == 0 || err == -EAGAIN)
- return soft_polled + npolled;
- else
- return err;
+ return soft_polled + npolled;
}
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@ -747,14 +729,16 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct ib_ucontext *context, struct mlx5_ib_cq *cq,
- int entries, struct mlx5_create_cq_mbox_in **cqb,
+ int entries, u32 **cqb,
int *cqe_size, int *index, int *inlen)
{
struct mlx5_ib_create_cq ucmd;
size_t ucmdlen;
int page_shift;
+ __be64 *pas;
int npages;
int ncont;
+ void *cqc;
int err;
ucmdlen =
@@ -792,14 +776,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
- *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+ *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
*cqb = mlx5_vzalloc(*inlen);
if (!*cqb) {
err = -ENOMEM;
goto err_db;
}
- mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
- (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+ mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+
+ cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+ MLX5_SET(cqc, cqc, log_page_size,
+ page_shift - MLX5_ADAPTER_PAGE_SHIFT);
*index = to_mucontext(context)->uuari.uars[0].index;
@@ -834,9 +824,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size,
- struct mlx5_create_cq_mbox_in **cqb,
- int *index, int *inlen)
+ u32 **cqb, int *index, int *inlen)
{
+ __be64 *pas;
+ void *cqc;
int err;
err = mlx5_db_alloc(dev->mdev, &cq->db);
@@ -853,15 +844,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
init_cq_buf(cq, &cq->buf);
- *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+ *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
*cqb = mlx5_vzalloc(*inlen);
if (!*cqb) {
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
- (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+ mlx5_fill_page_array(&cq->buf.buf, pas);
+
+ cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+ MLX5_SET(cqc, cqc, log_page_size,
+ cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
*index = dev->mdev->priv.uuari.uars[0].index;
return 0;
@@ -895,11 +892,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
{
int entries = attr->cqe;
int vector = attr->comp_vector;
- struct mlx5_create_cq_mbox_in *cqb = NULL;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_cq *cq;
int uninitialized_var(index);
int uninitialized_var(inlen);
+ u32 *cqb = NULL;
+ void *cqc;
int cqe_size;
unsigned int irqn;
int eqn;
@@ -945,19 +943,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
- cq->cqe_size = cqe_size;
- cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-
- if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
- cqb->ctx.cqe_sz_flags |= (1 << 1);
-
- cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
goto err_cqb;
- cqb->ctx.c_eqn = cpu_to_be16(eqn);
- cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
+ cq->cqe_size = cqe_size;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
+ MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
+ MLX5_SET(cqc, cqc, uar_page, index);
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
+ if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ MLX5_SET(cqc, cqc, oi, 1);
err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
if (err)
@@ -1088,27 +1087,15 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
- struct mlx5_modify_cq_mbox_in *in;
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
int err;
- u32 fsel;
if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
return -ENOSYS;
- in = kzalloc(sizeof(*in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- in->cqn = cpu_to_be32(mcq->mcq.cqn);
- fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
- in->ctx.cq_period = cpu_to_be16(cq_period);
- in->ctx.cq_max_count = cpu_to_be16(cq_count);
- in->field_select = cpu_to_be32(fsel);
- err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in));
- kfree(in);
-
+ err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
+ cq_period, cq_count);
if (err)
mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
@@ -1241,9 +1228,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
struct mlx5_ib_cq *cq = to_mcq(ibcq);
- struct mlx5_modify_cq_mbox_in *in;
+ void *cqc;
+ u32 *in;
int err;
int npas;
+ __be64 *pas;
int page_shift;
int inlen;
int uninitialized_var(cqe_size);
@@ -1285,28 +1274,37 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
if (err)
goto ex;
- inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+ inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
+ MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
+
in = mlx5_vzalloc(inlen);
if (!in) {
err = -ENOMEM;
goto ex_resize;
}
+ pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
if (udata)
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
- in->pas, 0);
+ pas, 0);
else
- mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
-
- in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE |
- MLX5_MODIFY_CQ_MASK_PG_OFFSET |
- MLX5_MODIFY_CQ_MASK_PG_SIZE);
- in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
- in->ctx.page_offset = 0;
- in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
- in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
- in->cqn = cpu_to_be32(cq->mcq.cqn);
+ mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+
+ MLX5_SET(modify_cq_in, in,
+ modify_field_select_resize_field_select.resize_field_select.resize_field_select,
+ MLX5_MODIFY_CQ_MASK_LOG_SIZE |
+ MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+ MLX5_MODIFY_CQ_MASK_PG_SIZE);
+
+ cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+
+ MLX5_SET(cqc, cqc, log_page_size,
+ page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
+
+ MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
+ MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 1b4094baa2de..551aa0e789aa 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -232,23 +232,19 @@ static int set_roce_addr(struct ib_device *device, u8 port_num,
const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
if (ll != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
- memset(in, 0, sizeof(in));
-
ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-
- memset(out, 0, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
@@ -288,7 +284,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
- return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+ return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ return 0;
}
enum {
@@ -751,8 +749,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
&props->active_width);
if (err)
goto out;
- err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
- port);
+ err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
if (err)
goto out;
@@ -1428,6 +1425,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1);
@@ -1849,6 +1853,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int domain)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
struct mlx5_ib_flow_handler *handler = NULL;
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio;
@@ -1875,7 +1880,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+ if (mqp->flags & MLX5_IB_QP_RSS)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 40df2cca0609..996b54e366b0 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
addr = addr >> page_shift;
tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, sizeof(tmp));
+ m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
i = 0;
@@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
- m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp)));
+ m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
skip = 1 << m;
mask = skip - 1;
base = pfn;
@@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
} else {
if (base + p != pfn) {
tmp = (unsigned long)p;
- m = find_first_bit(&tmp, sizeof(tmp));
+ m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
base = pfn;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 372385d0f993..67cc7416fdff 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -402,6 +402,7 @@ enum mlx5_ib_qp_flags {
/* QP uses 1 as its source QP number */
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
+ MLX5_IB_QP_RSS = 1 << 8,
};
struct mlx5_umr_wr {
@@ -504,7 +505,7 @@ struct mlx5_ib_mr {
int umred;
int npages;
struct mlx5_ib_dev *dev;
- struct mlx5_create_mkey_mbox_out out;
+ u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
int live;
void *descs_alloc;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4b021305c321..6f7e34753abc 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -135,20 +135,10 @@ static void reg_mr_callback(int status, void *context)
return;
}
- if (mr->out.hdr.status) {
- mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
- mr->out.hdr.status,
- be32_to_cpu(mr->out.hdr.syndrome));
- kfree(mr);
- dev->fill_delay = 1;
- mod_timer(&dev->delay_timer, jiffies + HZ);
- return;
- }
-
spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
- mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+ mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
cache->last_add = jiffies;
@@ -170,16 +160,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
- struct mlx5_create_mkey_mbox_in *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
int npages = 1 << ent->order;
+ void *mkc;
+ u32 *in;
int err = 0;
int i;
- in = kzalloc(sizeof(*in), GFP_KERNEL);
+ in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
@@ -194,18 +187,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
mr->order = ent->order;
mr->umred = 1;
mr->dev = dev;
- in->seg.status = MLX5_MKEY_STATUS_FREE;
- in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
- in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
- in->seg.log2_page_size = 12;
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
+ MLX5_SET(mkc, mkc, log_page_size, 12);
spin_lock_irq(&ent->lock);
ent->pending++;
spin_unlock_irq(&ent->lock);
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
- sizeof(*in), reg_mr_callback,
- mr, &mr->out);
+ err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
+ in, inlen,
+ mr->out, sizeof(mr->out),
+ reg_mr_callback, mr);
if (err) {
spin_lock_irq(&ent->lock);
ent->pending--;
@@ -670,30 +667,38 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_create_mkey_mbox_in *in;
- struct mlx5_mkey_seg *seg;
struct mlx5_ib_mr *mr;
+ void *mkc;
+ u32 *in;
int err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- in = kzalloc(sizeof(*in), GFP_KERNEL);
+ in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
- seg = &in->seg;
- seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
- seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- seg->start_addr = 0;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
- err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
- NULL);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, 0);
+
+ err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
goto err_in;
@@ -1063,9 +1068,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int page_shift, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
+ __be64 *pas;
+ void *mkc;
int inlen;
+ u32 *in;
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
@@ -1073,31 +1080,41 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
if (!mr)
return ERR_PTR(-ENOMEM);
- inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(*pas) * ((npages + 1) / 2) * 2;
in = mlx5_vzalloc(inlen);
if (!in) {
err = -ENOMEM;
goto err_1;
}
- mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+ pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ mlx5_ib_populate_pas(dev, umem, page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
- /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+ /* The pg_access bit allows setting the access flags
* in the page list submitted with the command. */
- in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
- in->seg.flags = convert_access(access_flags) |
- MLX5_ACCESS_MODE_MTT;
- in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
- in->seg.start_addr = cpu_to_be64(virt_addr);
- in->seg.len = cpu_to_be64(length);
- in->seg.bsfs_octo_size = 0;
- in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
- in->seg.log2_page_size = page_shift;
- in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
- 1 << page_shift));
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
- NULL, NULL);
+ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET64(mkc, mkc, start_addr, virt_addr);
+ MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(virt_addr, length, 1 << page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ get_octo_len(virt_addr, length, 1 << page_shift));
+
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
@@ -1523,30 +1540,32 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
u32 max_num_sg)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_create_mkey_mbox_in *in;
- struct mlx5_ib_mr *mr;
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
int ndescs = ALIGN(max_num_sg, 4);
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ u32 *in;
int err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- in = kzalloc(sizeof(*in), GFP_KERNEL);
+ in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
- in->seg.status = MLX5_MKEY_STATUS_FREE;
- in->seg.xlt_oct_size = cpu_to_be32(ndescs);
- in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
if (mr_type == IB_MR_TYPE_MEM_REG) {
- mr->access_mode = MLX5_ACCESS_MODE_MTT;
- in->seg.log2_page_size = PAGE_SHIFT;
-
+ mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
err = mlx5_alloc_priv_descs(pd->device, mr,
ndescs, sizeof(u64));
if (err)
@@ -1555,7 +1574,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
mr->desc_size = sizeof(u64);
mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
- mr->access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
err = mlx5_alloc_priv_descs(pd->device, mr,
ndescs, sizeof(struct mlx5_klm));
@@ -1566,9 +1585,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
- in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
- MLX5_MKEY_BSF_EN);
- in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+ MLX5_SET(mkc, mkc, bsf_en, 1);
+ MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
if (!mr->sig) {
err = -ENOMEM;
@@ -1581,7 +1599,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_free_sig;
- mr->access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
mr->sig->psv_memory.psv_idx = psv_index[0];
mr->sig->psv_wire.psv_idx = psv_index[1];
@@ -1595,9 +1613,10 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
goto err_free_in;
}
- in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
- NULL, NULL, NULL);
+ MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
goto err_destroy_psv;
@@ -1633,8 +1652,10 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_create_mkey_mbox_in *in = NULL;
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mw *mw = NULL;
+ u32 *in = NULL;
+ void *mkc;
int ndescs;
int err;
struct mlx5_ib_alloc_mw req = {};
@@ -1658,23 +1679,24 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
mw = kzalloc(sizeof(*mw), GFP_KERNEL);
- in = kzalloc(sizeof(*in), GFP_KERNEL);
+ in = kzalloc(inlen, GFP_KERNEL);
if (!mw || !in) {
err = -ENOMEM;
goto free;
}
- in->seg.status = MLX5_MKEY_STATUS_FREE;
- in->seg.xlt_oct_size = cpu_to_be32(ndescs);
- in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
- in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
- MLX5_PERM_LOCAL_READ;
- if (type == IB_MW_TYPE_2)
- in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
- in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-
- err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
- NULL, NULL, NULL);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
+ MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
if (err)
goto free;
@@ -1811,7 +1833,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
mr->desc_size * mr->max_descs,
DMA_TO_DEVICE);
- if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
else
n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0dd7d93cac95..9529b464fbdc 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -726,7 +726,7 @@ err_umem:
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
- struct mlx5_create_qp_mbox_in **in,
+ u32 **in,
struct mlx5_ib_create_qp_resp *resp, int *inlen,
struct mlx5_ib_qp_base *base)
{
@@ -739,6 +739,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 offset = 0;
int uuarn;
int ncont = 0;
+ __be64 *pas;
+ void *qpc;
int err;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -795,20 +797,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ubuffer->umem = NULL;
}
- *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+ *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_umem;
}
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
- mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
- (*in)->pas, 0);
- (*in)->ctx.log_pg_sz_remote_qpn =
- cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
- (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+ mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
+
+ qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+
+ MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, page_offset, offset);
- (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+ MLX5_SET(qpc, qpc, uar_page, uar_index);
resp->uuar_index = uuarn;
qp->uuarn = uuarn;
@@ -857,12 +863,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in **in, int *inlen,
+ u32 **in, int *inlen,
struct mlx5_ib_qp_base *base)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
int uar_index;
+ void *qpc;
int uuarn;
int err;
@@ -902,25 +909,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
}
qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
- *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+ *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_buf;
}
- (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
- (*in)->ctx.log_pg_sz_remote_qpn =
- cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+
+ qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+ MLX5_SET(qpc, qpc, uar_page, uar_index);
+ MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
/* Set "fast registration enabled" for all kernel QPs */
- (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
- (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
- (*in)->ctx.deth_sqpn = cpu_to_be32(1);
+ MLX5_SET(qpc, qpc, deth_sqpn, 1);
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
- mlx5_fill_page_array(&qp->buf, (*in)->pas);
+ mlx5_fill_page_array(&qp->buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
err = mlx5_db_alloc(dev->mdev, &qp->db);
if (err) {
@@ -974,15 +985,15 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
}
-static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
+static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
{
if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
(attr->qp_type == IB_QPT_XRC_INI))
- return cpu_to_be32(MLX5_SRQ_RQ);
+ return MLX5_SRQ_RQ;
else if (!qp->has_rq)
- return cpu_to_be32(MLX5_ZERO_LEN_RQ);
+ return MLX5_ZERO_LEN_RQ;
else
- return cpu_to_be32(MLX5_NON_ZERO_RQ);
+ return MLX5_NON_ZERO_RQ;
}
static int is_connected(enum ib_qp_type qp_type)
@@ -996,13 +1007,10 @@ static int is_connected(enum ib_qp_type qp_type)
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, u32 tdn)
{
- u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- memset(in, 0, sizeof(in));
-
MLX5_SET(tisc, tisc, transport_domain, tdn);
-
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
}
@@ -1191,7 +1199,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
}
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in *in,
+ u32 *in,
struct ib_pd *pd)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
@@ -1449,6 +1457,7 @@ create_tir:
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
+ qp->flags |= MLX5_IB_QP_RSS;
return 0;
err:
@@ -1461,18 +1470,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_qp_base *base;
struct mlx5_ib_create_qp_resp resp;
- struct mlx5_create_qp_mbox_in *in;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_cq *send_cq;
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
- int inlen = sizeof(*in);
- int err;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_qp_base *base;
void *qpc;
+ u32 *in;
+ int err;
base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
&qp->raw_packet_qp.rq.base :
@@ -1600,7 +1609,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (err)
return err;
} else {
- in = mlx5_vzalloc(sizeof(*in));
+ in = mlx5_vzalloc(inlen);
if (!in)
return -ENOMEM;
@@ -1610,26 +1619,29 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (is_sqp(init_attr->qp_type))
qp->port = init_attr->port_num;
- in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
- MLX5_QP_PM_MIGRATED << 11);
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
- in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
else
- in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
+ MLX5_SET(qpc, qpc, latency_sensitive, 1);
+
if (qp->wq_sig)
- in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+ MLX5_SET(qpc, qpc, wq_signature, 1);
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
- in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
- in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+ MLX5_SET(qpc, qpc, cd_master, 1);
if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
- in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+ MLX5_SET(qpc, qpc, cd_slave_send, 1);
if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
- in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+ MLX5_SET(qpc, qpc, cd_slave_receive, 1);
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
@@ -1639,71 +1651,68 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
if (rcqe_sz == 128)
- in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
+ MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
else
- in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
+ MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
if (scqe_sz == 128)
- in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
+ MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
else
- in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
+ MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
}
}
if (qp->rq.wqe_cnt) {
- in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
- in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
+ MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
- in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
+ MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
if (qp->sq.wqe_cnt)
- in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
else
- in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
+ MLX5_SET(qpc, qpc, no_sq, 1);
/* Set default resources */
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
- in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
- in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
- in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
- in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
break;
case IB_QPT_XRC_INI:
- in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
- in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
- in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
- in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
- in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
} else {
- in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
- in->ctx.rq_type_srqn |=
- cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
}
}
if (init_attr->send_cq)
- in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
if (init_attr->recv_cq)
- in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
- in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
- if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
- qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- /* 0xffffff means we ask to work with cqe version 0 */
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
MLX5_SET(qpc, qpc, user_index, uidx);
- }
+
/* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
if (init_attr->qp_type == IB_QPT_UD &&
(init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
- qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
qp->flags |= MLX5_IB_QP_LSO;
}
@@ -1860,7 +1869,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
- struct mlx5_modify_qp_mbox_in *in;
unsigned long flags;
int err;
@@ -1873,16 +1881,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
- in = kzalloc(sizeof(*in), GFP_KERNEL);
- if (!in)
- return;
-
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev,
- MLX5_CMD_OP_2RST_QP, in, 0,
- &base->mqp);
+ MLX5_CMD_OP_2RST_QP, 0,
+ NULL, &base->mqp);
} else {
err = modify_raw_packet_qp(dev, qp,
MLX5_CMD_OP_2RST_QP);
@@ -1924,8 +1928,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
base->mqp.qpn);
}
- kfree(in);
-
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
@@ -2511,7 +2513,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
- struct mlx5_modify_qp_mbox_in *in;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
@@ -2520,11 +2521,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int err;
u16 op;
- in = kzalloc(sizeof(*in), GFP_KERNEL);
- if (!in)
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
return -ENOMEM;
- context = &in->ctx;
err = to_mlx5_st(ibqp->qp_type);
if (err < 0) {
mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
@@ -2689,12 +2689,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
- in->optparam = cpu_to_be32(optpar);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
err = modify_raw_packet_qp(dev, qp, op);
else
- err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+ err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
&base->mqp);
if (err)
goto out;
@@ -2735,7 +2734,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
out:
- kfree(in);
+ kfree(context);
return err;
}
@@ -2968,7 +2967,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
- if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
/* KLMs take twice the size of MTTs */
ndescs *= 2;
@@ -3111,9 +3110,9 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
memset(seg, 0, sizeof(*seg));
- if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+ if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
seg->log2_page_size = ilog2(mr->ibmr.page_size);
- else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
/* KLMs take twice the size of MTTs */
ndescs *= 2;
@@ -3454,7 +3453,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
memset(seg, 0, sizeof(*seg));
seg->flags = get_umr_flags(wr->access_flags) |
- MLX5_ACCESS_MODE_KLM;
+ MLX5_MKC_ACCESS_MODE_KLMS;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
@@ -3658,12 +3657,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct ib_send_wr *wr, unsigned *idx,
int *size, int nreq)
{
- int err = 0;
-
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
- err = -ENOMEM;
- return err;
- }
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+ return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_get_send_wqe(qp, *idx);
@@ -3679,7 +3674,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
- return err;
+ return 0;
}
static void finish_wqe(struct mlx5_ib_qp *qp,
@@ -3758,7 +3753,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
+ err = -EINVAL;
*bad_wr = wr;
goto out;
}
@@ -4320,21 +4315,24 @@ static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct ib_qp_attr *qp_attr)
{
- struct mlx5_query_qp_mbox_out *outb;
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
struct mlx5_qp_context *context;
int mlx5_state;
+ u32 *outb;
int err = 0;
- outb = kzalloc(sizeof(*outb), GFP_KERNEL);
+ outb = kzalloc(outlen, GFP_KERNEL);
if (!outb)
return -ENOMEM;
- context = &outb->ctx;
err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
- sizeof(*outb));
+ outlen);
if (err)
goto out;
+ /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+ context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
+
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index bbf0a163aeab..a3e21a25cea5 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -52,6 +52,7 @@
#include <linux/kref.h>
#include <linux/sched.h>
#include <linux/kthread.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include "qib_common.h"
@@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock;
extern struct qib_devdata *qib_lookup(int unit);
extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist;
-extern u16 qpt_mask;
extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int);
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 67ee6438cf59..728e0a030d2e 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
ret = 1;
else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
/* For TIDERR and RC QPs premptively schedule a NAK */
- struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
- struct qib_other_headers *ohdr = NULL;
+ struct ib_header *hdr = (struct ib_header *)rhdr;
+ struct ib_other_headers *ohdr = NULL;
struct qib_ibport *ibp = &ppd->ibport_data;
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -588,8 +588,7 @@ move_along:
qib_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
bail:
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ce4034071f9c..ded27172320e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd)
u32 *hdr;
u64 pbc;
const unsigned hdrwords = 7;
- static struct qib_ib_header ibhdr = {
+ static struct ib_header ibhdr = {
.lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH),
.lrh[1] = IB_LID_PERMISSIVE,
.lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC),
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index f9b8cd2354d1..99d31efe4c2f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -41,14 +41,6 @@
#include "qib.h"
-/*
- * mask field which was present in now deleted qib_qpn_table
- * is not present in rvt_qpn_table. Defining the same field
- * as qpt_mask here instead of adding the mask field to
- * rvt_qpn_table.
- */
-u16 qpt_mask;
-
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off)
{
@@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off,
- unsigned n)
+ unsigned n, u16 qpt_mask)
{
if (qpt_mask) {
off++;
@@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
verbs_dev);
+ u16 qpt_mask = dd->qpn_mask;
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
goto bail;
}
offset = find_next_offset(qpt, map, offset,
- dd->n_krcv_queues);
+ dd->n_krcv_queues, qpt_mask);
qpn = mk_qpn(qpt, map, offset);
/*
* This test differs from alloc_pidmap().
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 444028a3582a..2097512e75aa 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp)
* Note the QP s_lock must be held.
*/
static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
- struct qib_other_headers *ohdr, u32 pmtu)
+ struct ib_other_headers *ohdr, u32 pmtu)
{
struct rvt_ack_entry *e;
u32 hwords;
@@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = qib_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn & QIB_PSN_MASK;
e->sent = 1;
@@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
u32 hwords;
@@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
}
if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp)
u32 hwords;
u32 pbufn;
u32 __iomem *piobuf;
- struct qib_ib_header hdr;
- struct qib_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
u32 control;
unsigned long flags;
@@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
@@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void qib_rc_rcv_resp(struct qib_ibport *ibp,
- struct qib_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen,
struct rvt_qp *qp,
u32 opcode,
@@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64) be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
@@ -1608,7 +1600,7 @@ bail:
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
void *data,
struct rvt_qp *qp,
u32 opcode,
@@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
*/
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
* for the given QP.
* Called at interrupt level.
*/
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
@@ -2177,8 +2169,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2189,11 +2180,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2233,7 +2224,7 @@ rnr_nak:
/* Queue RNR NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
@@ -2245,7 +2236,7 @@ nack_op_err:
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
@@ -2259,7 +2250,7 @@ nack_inv:
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b67779256297..de1bde5950f5 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the qib_migrate_qp() call.
*/
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2)
{
struct qib_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 1d61bd04f449..5b2d483451ad 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -48,7 +48,7 @@
int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords;
u32 bth0;
@@ -236,10 +236,10 @@ bail:
* for the given QP.
* Called at interrupt level.
*/
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 10d062561bd9..f45cad1198b0 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -245,7 +245,7 @@ drop:
int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
@@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
* for the given QP.
* Called at interrupt level.
*/
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 pad;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fd1dfbce5539..876ebb442d38 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
* for the given QP.
* Called at interrupt level.
*/
-static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
@@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
{
struct qib_pportdata *ppd = rcd->ppd;
struct qib_ibport *ibp = &ppd->ibport_data;
- struct qib_ib_header *hdr = rhdr;
+ struct ib_header *hdr = rhdr;
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_qp *qp;
u32 qp_num;
int lnh;
@@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
if (tx->wqe)
qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct qib_ib_header *hdr;
+ struct ib_header *hdr;
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
hdr = &tx->align_buf->hdr;
@@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
return ret;
}
-static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
@@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp)
return ret;
}
-static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
@@ -1133,7 +1133,7 @@ done:
* Return zero if packet is sent or queued OK.
* Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
*/
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len)
{
struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1606,8 +1606,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
/* Only need to initialize non-zero fields. */
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
- qpt_mask = dd->qpn_mask;
-
INIT_LIST_HEAD(&dev->piowait);
INIT_LIST_HEAD(&dev->dmawait);
INIT_LIST_HEAD(&dev->txwait);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 736ced684842..94fd30fdedac 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_cq.h>
@@ -63,16 +64,6 @@ struct qib_verbs_txreq;
*/
#define QIB_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -87,22 +78,9 @@ struct qib_verbs_txreq;
#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK (1 << 31)
-#define IB_BTH_SOLICITED (1 << 23)
-#define IB_BTH_MIG_REQ (1 << 22)
-
/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
/* Values for set/get portinfo VLCap OperationalVLs */
@@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls)
}
}
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-struct qib_other_headers {
- __be32 bth[3];
- union {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
- } u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct qib_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct qib_other_headers oth;
- } l;
- struct qib_other_headers oth;
- } u;
-} __packed;
-
struct qib_pio_header {
__le32 pbc[2];
- struct qib_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -191,7 +117,7 @@ struct qib_pio_header {
* is made common.
*/
struct qib_qp_priv {
- struct qib_ib_header *s_hdr; /* next packet header to send */
+ struct ib_header *s_hdr; /* next packet header to send */
struct list_head iowait; /* link for wait PIO buf */
atomic_t s_dma_busy;
struct qib_verbs_txreq *s_tx;
@@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
void qib_put_txreq(struct qib_verbs_txreq *tx);
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len);
void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
@@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
@@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
void qib_rc_rnr_retry(unsigned long arg);
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
void mr_rcu_callback(struct rcu_head *list);
@@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
void qib_migrate_qp(struct rvt_qp *qp);
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2);
void _qib_do_send(struct work_struct *work);