From 615399896ca3787728c56c499c99be79e40ac125 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:46 -0700 Subject: nvme-fc: Sync header to FC-NVME-2 rev 1.08 A couple of minor changes occurred between 1.06 and 1.08: - Addition of NVME_SR_RSP opcode - change of SR_RSP status code 1 to Reserved Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index e8c30b39bb27..51fe44e0328b 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -4,8 +4,8 @@ */ /* - * This file contains definitions relative to FC-NVME-2 r1.06 - * (T11-2019-00210-v001). + * This file contains definitions relative to FC-NVME-2 r1.08 + * (T11-2019-00210-v004). */ #ifndef _NVME_FC_H @@ -81,7 +81,8 @@ struct nvme_fc_ersp_iu { }; -#define FCNVME_NVME_SR_OPCODE 0x01 +#define FCNVME_NVME_SR_OPCODE 0x01 +#define FCNVME_NVME_SR_RSP_OPCODE 0x02 struct nvme_fc_nvme_sr_iu { __u8 fc_id; @@ -94,7 +95,7 @@ struct nvme_fc_nvme_sr_iu { enum { FCNVME_SRSTAT_ACC = 0x0, - FCNVME_SRSTAT_INV_FCID = 0x1, + /* reserved 0x1 */ /* reserved 0x2 */ FCNVME_SRSTAT_LOGICAL_ERR = 0x3, FCNVME_SRSTAT_INV_QUALIF = 0x4, @@ -397,7 +398,7 @@ struct fcnvme_ls_disconnect_conn_rqst { struct fcnvme_ls_rqst_w0 w0; __be32 desc_list_len; struct fcnvme_lsdesc_assoc_id associd; - struct fcnvme_lsdesc_disconn_cmd connectid; + struct fcnvme_lsdesc_conn_id connectid; }; struct fcnvme_ls_disconnect_conn_acc { -- cgit v1.2.3 From 72e6329f86c714785ac195d293cb19dd24507880 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:47 -0700 Subject: nvme-fc and nvmet-fc: revise LLDD api for LS reception and LS request The current LLDD api has: nvme-fc: contains api for transport to do LS requests (and aborts of them). However, there is no interface for reception of LS's and sending responses for them. nvmet-fc: contains api for transport to do reception of LS's and sending of responses for them. However, there is no interface for doing LS requests. Revise the api's so that both nvme-fc and nvmet-fc can send LS's, as well as receiving LS's and sending their responses. Change name of the rcv_ls_req struct to better reflect generic use as a context to used to send an ls rsp. Specifically: nvmefc_tgt_ls_req -> nvmefc_ls_rsp nvmefc_tgt_ls_req.nvmet_fc_private -> nvmefc_ls_rsp.nvme_fc_private Change nvmet_fc_rcv_ls_req() calling sequence to provide handle that can be used by transport in later LS request sequences for an association. nvme-fc nvmet_fc nvme_fcloop: Revise to adapt to changed names in api header. Change calling sequence to nvmet_fc_rcv_ls_req() for hosthandle. Add stubs for new interfaces: host/fc.c: nvme_fc_rcv_ls_req() target/fc.c: nvmet_fc_invalidate_host() lpfc: Revise to adapt code to changed names in api header. Change calling sequence to nvmet_fc_rcv_ls_req() for hosthandle. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 35 ++++ drivers/nvme/target/fc.c | 77 ++++++--- drivers/nvme/target/fcloop.c | 20 +-- drivers/scsi/lpfc/lpfc_nvmet.c | 10 +- drivers/scsi/lpfc/lpfc_nvmet.h | 2 +- include/linux/nvme-fc-driver.h | 368 ++++++++++++++++++++++++++++++----------- 6 files changed, 378 insertions(+), 134 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7dfc4a2ecf1e..8012099fc3ee 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1464,6 +1464,41 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) kfree(lsop); } +/** + * nvme_fc_rcv_ls_req - transport entry point called by an LLDD + * upon the reception of a NVME LS request. + * + * The nvme-fc layer will copy payload to an internal structure for + * processing. As such, upon completion of the routine, the LLDD may + * immediately free/reuse the LS request buffer passed in the call. + * + * If this routine returns error, the LLDD should abort the exchange. + * + * @remoteport: pointer to the (registered) remote port that the LS + * was received from. The remoteport is associated with + * a specific localport. + * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be + * used to reference the exchange corresponding to the LS + * when issuing an ls response. + * @lsreqbuf: pointer to the buffer containing the LS Request + * @lsreqbuf_len: length, in bytes, of the received LS request + */ +int +nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, + struct nvmefc_ls_rsp *lsrsp, + void *lsreqbuf, u32 lsreqbuf_len) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + struct nvme_fc_lport *lport = rport->lport; + + /* validate there's a routine to transmit a response */ + if (!lport->ops->xmt_ls_rsp) + return(-EINVAL); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); + /* *********************** NVME Ctrl Routines **************************** */ diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a8ceb7721640..aac7869a70bb 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -28,7 +28,7 @@ struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; struct nvmet_fc_ls_iod { - struct nvmefc_tgt_ls_req *lsreq; + struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ struct list_head ls_list; /* tgtport->ls_list */ @@ -1146,6 +1146,42 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) spin_unlock_irqrestore(&tgtport->lock, flags); } +/** + * nvmet_fc_invalidate_host - transport entry point called by an LLDD + * to remove references to a hosthandle for LS's. + * + * The nvmet-fc layer ensures that any references to the hosthandle + * on the targetport are forgotten (set to NULL). The LLDD will + * typically call this when a login with a remote host port has been + * lost, thus LS's for the remote host port are no longer possible. + * + * If an LS request is outstanding to the targetport/hosthandle (or + * issued concurrently with the call to invalidate the host), the + * LLDD is responsible for terminating/aborting the LS and completing + * the LS request. It is recommended that these terminations/aborts + * occur after calling to invalidate the host handle to avoid additional + * retries by the nvmet-fc transport. The nvmet-fc transport may + * continue to reference host handle while it cleans up outstanding + * NVME associations. The nvmet-fc transport will call the + * ops->host_release() callback to notify the LLDD that all references + * are complete and the related host handle can be recovered. + * Note: if there are no references, the callback may be called before + * the invalidate host call returns. + * + * @target_port: pointer to the (registered) target port that a prior + * LS was received on and which supplied the transport the + * hosthandle. + * @hosthandle: the handle (pointer) that represents the host port + * that no longer has connectivity and that LS's should + * no longer be directed to. + */ +void +nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, + void *hosthandle) +{ +} +EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host); + /* * nvmet layer has called to terminate an association */ @@ -1371,7 +1407,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Association LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); @@ -1384,7 +1420,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( @@ -1462,7 +1498,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Connection LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : @@ -1477,7 +1513,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), @@ -1542,7 +1578,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : @@ -1555,7 +1591,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( @@ -1577,9 +1613,9 @@ static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; static void -nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) +nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) { - struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private; + struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private; struct nvmet_fc_tgtport *tgtport = iod->tgtport; fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, @@ -1597,9 +1633,9 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); - ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq); + ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp); if (ret) - nvmet_fc_xmt_ls_rsp_done(iod->lsreq); + nvmet_fc_xmt_ls_rsp_done(iod->lsrsp); } /* @@ -1612,12 +1648,12 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; - iod->lsreq->nvmet_fc_private = iod; - iod->lsreq->rspbuf = iod->rspbuf; - iod->lsreq->rspdma = iod->rspdma; - iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done; + iod->lsrsp->nvme_fc_private = iod; + iod->lsrsp->rspbuf = iod->rspbuf; + iod->lsrsp->rspdma = iod->rspdma; + iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done; /* Be preventative. handlers will later set to valid length */ - iod->lsreq->rsplen = 0; + iod->lsrsp->rsplen = 0; iod->assoc = NULL; @@ -1640,7 +1676,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, nvmet_fc_ls_disconnect(tgtport, iod); break; default: - iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } @@ -1674,14 +1710,15 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * @target_port: pointer to the (registered) target port the LS was * received on. - * @lsreq: pointer to a lsreq request structure to be used to reference + * @lsrsp: pointer to a lsrsp structure to be used to reference * the exchange corresponding to the LS. * @lsreqbuf: pointer to the buffer containing the LS Request * @lsreqbuf_len: length, in bytes, of the received LS request */ int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, - struct nvmefc_tgt_ls_req *lsreq, + void *hosthandle, + struct nvmefc_ls_rsp *lsrsp, void *lsreqbuf, u32 lsreqbuf_len) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); @@ -1699,7 +1736,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, return -ENOENT; } - iod->lsreq = lsreq; + iod->lsrsp = lsrsp; iod->fcpreq = NULL; memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); iod->rqstdatalen = lsreqbuf_len; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index f69ce66e2d44..c11805a155e8 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -228,7 +228,7 @@ struct fcloop_nport { struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; - struct nvmefc_tgt_ls_req tgt_ls_req; + struct nvmefc_ls_rsp ls_rsp; int status; struct list_head ls_list; /* fcloop_rport->ls_list */ }; @@ -267,9 +267,9 @@ struct fcloop_ini_fcpreq { }; static inline struct fcloop_lsreq * -tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) +ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp) { - return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req); + return container_of(lsrsp, struct fcloop_lsreq, ls_rsp); } static inline struct fcloop_fcpreq * @@ -344,7 +344,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } tls_req->status = 0; - ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, + ret = nvmet_fc_rcv_ls_req(rport->targetport, NULL, &tls_req->ls_rsp, lsreq->rqstaddr, lsreq->rqstlen); return ret; @@ -352,19 +352,19 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, static int fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, - struct nvmefc_tgt_ls_req *tgt_lsreq) + struct nvmefc_ls_rsp *lsrsp) { - struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); struct nvmefc_ls_req *lsreq = tls_req->lsreq; struct fcloop_tport *tport = targetport->private; struct nvme_fc_remote_port *remoteport = tport->remoteport; struct fcloop_rport *rport; - memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, - ((lsreq->rsplen < tgt_lsreq->rsplen) ? - lsreq->rsplen : tgt_lsreq->rsplen)); + memcpy(lsreq->rspaddr, lsrsp->rspbuf, + ((lsreq->rsplen < lsrsp->rsplen) ? + lsreq->rsplen : lsrsp->rsplen)); - tgt_lsreq->done(tgt_lsreq); + lsrsp->done(lsrsp); if (remoteport) { rport = remoteport->private; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 565419bf8d74..3b25bcb150ea 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -302,7 +302,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { struct lpfc_nvmet_tgtport *tgtp; - struct nvmefc_tgt_ls_req *rsp; + struct nvmefc_ls_rsp *rsp; struct lpfc_nvmet_rcv_ctx *ctxp; uint32_t status, result; @@ -335,7 +335,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, } out: - rsp = &ctxp->ctx.ls_req; + rsp = &ctxp->ctx.ls_rsp; lpfc_nvmeio_data(phba, "NVMET LS CMPL: xri x%x stat x%x result x%x\n", ctxp->oxid, status, result); @@ -828,10 +828,10 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, static int lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_ls_req *rsp) + struct nvmefc_ls_rsp *rsp) { struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req); + container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_rsp); struct lpfc_hba *phba = ctxp->phba; struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)ctxp->rqb_buffer; @@ -1999,7 +1999,7 @@ dropit: * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp. */ atomic_inc(&tgtp->rcv_ls_req_in); - rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req, + rc = nvmet_fc_rcv_ls_req(phba->targetport, NULL, &ctxp->ctx.ls_rsp, payload, size); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index b80b1639b9a7..f0196f3ef90d 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -105,7 +105,7 @@ struct lpfc_nvmet_ctx_info { struct lpfc_nvmet_rcv_ctx { union { - struct nvmefc_tgt_ls_req ls_req; + struct nvmefc_ls_rsp ls_rsp; struct nvmefc_tgt_fcp_req fcp_req; } ctx; struct list_head list; diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 10f81629b9ce..41e7795a3ee4 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -10,47 +10,26 @@ /* - * ********************** LLDD FC-NVME Host API ******************** + * ********************** FC-NVME LS API ******************** * - * For FC LLDD's that are the NVME Host role. + * Data structures used by both FC-NVME hosts and FC-NVME + * targets to perform FC-NVME LS requests or transmit + * responses. * - * ****************************************************************** + * *********************************************************** */ - - /** - * struct nvme_fc_port_info - port-specific ids and FC connection-specific - * data element used during NVME Host role - * registrations - * - * Static fields describing the port being registered: - * @node_name: FC WWNN for the port - * @port_name: FC WWPN for the port - * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx) - * @dev_loss_tmo: maximum delay for reconnects to an association on - * this device. Used only on a remoteport. + * struct nvmefc_ls_req - Request structure passed from the transport + * to the LLDD to perform a NVME-FC LS request and obtain + * a response. + * Used by nvme-fc transport (host) to send LS's such as + * Create Association, Create Connection and Disconnect + * Association. + * Used by the nvmet-fc transport (controller) to send + * LS's such as Disconnect Association. * - * Initialization values for dynamic port fields: - * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must - * be set to 0. - */ -struct nvme_fc_port_info { - u64 node_name; - u64 port_name; - u32 port_role; - u32 port_id; - u32 dev_loss_tmo; -}; - - -/** - * struct nvmefc_ls_req - Request structure passed from NVME-FC transport - * to LLDD in order to perform a NVME FC-4 LS - * request and obtain a response. - * - * Values set by the NVME-FC layer prior to calling the LLDD ls_req - * entrypoint. + * Values set by the requestor prior to calling the LLDD ls_req entrypoint: * @rqstaddr: pointer to request buffer * @rqstdma: PCI DMA address of request buffer * @rqstlen: Length, in bytes, of request buffer @@ -63,8 +42,8 @@ struct nvme_fc_port_info { * @private: pointer to memory allocated alongside the ls request structure * that is specifically for the LLDD to use while processing the * request. The length of the buffer corresponds to the - * lsrqst_priv_sz value specified in the nvme_fc_port_template - * supplied by the LLDD. + * lsrqst_priv_sz value specified in the xxx_template supplied + * by the LLDD. * @done: The callback routine the LLDD is to invoke upon completion of * the LS request. req argument is the pointer to the original LS * request structure. Status argument must be 0 upon success, a @@ -86,6 +65,101 @@ struct nvmefc_ls_req { } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ +/** + * struct nvmefc_ls_rsp - Structure passed from the transport to the LLDD + * to request the transmit the NVME-FC LS response to a + * NVME-FC LS request. The structure originates in the LLDD + * and is given to the transport via the xxx_rcv_ls_req() + * transport routine. As such, the structure represents the + * FC exchange context for the NVME-FC LS request that was + * received and which the response is to be sent for. + * Used by the LLDD to pass the nvmet-fc transport (controller) + * received LS's such as Create Association, Create Connection + * and Disconnect Association. + * Used by the LLDD to pass the nvme-fc transport (host) + * received LS's such as Disconnect Association or Disconnect + * Connection. + * + * The structure is allocated by the LLDD whenever a LS Request is received + * from the FC link. The address of the structure is passed to the nvmet-fc + * or nvme-fc layer via the xxx_rcv_ls_req() transport routines. + * + * The address of the structure is to be passed back to the LLDD + * when the response is to be transmit. The LLDD will use the address to + * map back to the LLDD exchange structure which maintains information such + * the remote N_Port that sent the LS as well as any FC exchange context. + * Upon completion of the LS response transmit, the LLDD will pass the + * address of the structure back to the transport LS rsp done() routine, + * allowing the transport release dma resources. Upon completion of + * the done() routine, no further access to the structure will be made by + * the transport and the LLDD can de-allocate the structure. + * + * Field initialization: + * At the time of the xxx_rcv_ls_req() call, there is no content that + * is valid in the structure. + * + * When the structure is used for the LLDD->xmt_ls_rsp() call, the + * transport layer will fully set the fields in order to specify the + * response payload buffer and its length as well as the done routine + * to be called upon completion of the transmit. The transport layer + * will also set a private pointer for its own use in the done routine. + * + * Values set by the transport layer prior to calling the LLDD xmt_ls_rsp + * entrypoint: + * @rspbuf: pointer to the LS response buffer + * @rspdma: PCI DMA address of the LS response buffer + * @rsplen: Length, in bytes, of the LS response buffer + * @done: The callback routine the LLDD is to invoke upon completion of + * transmitting the LS response. req argument is the pointer to + * the original ls request. + * @nvme_fc_private: pointer to an internal transport-specific structure + * used as part of the transport done() processing. The LLDD is + * not to access this pointer. + */ +struct nvmefc_ls_rsp { + void *rspbuf; + dma_addr_t rspdma; + u16 rsplen; + + void (*done)(struct nvmefc_ls_rsp *rsp); + void *nvme_fc_private; /* LLDD is not to access !! */ +}; + + + +/* + * ********************** LLDD FC-NVME Host API ******************** + * + * For FC LLDD's that are the NVME Host role. + * + * ****************************************************************** + */ + + +/** + * struct nvme_fc_port_info - port-specific ids and FC connection-specific + * data element used during NVME Host role + * registrations + * + * Static fields describing the port being registered: + * @node_name: FC WWNN for the port + * @port_name: FC WWPN for the port + * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx) + * @dev_loss_tmo: maximum delay for reconnects to an association on + * this device. Used only on a remoteport. + * + * Initialization values for dynamic port fields: + * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must + * be set to 0. + */ +struct nvme_fc_port_info { + u64 node_name; + u64 port_name; + u32 port_role; + u32 port_id; + u32 dev_loss_tmo; +}; + enum nvmefc_fcp_datadir { NVMEFC_FCP_NODATA, /* payload_length and sg_cnt will be zero */ NVMEFC_FCP_WRITE, @@ -337,6 +411,21 @@ struct nvme_fc_remote_port { * indicating an FC transport Aborted status. * Entrypoint is Mandatory. * + * @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service. + * The nvmefc_ls_rsp structure is the same LLDD-supplied exchange + * structure specified in the nvme_fc_rcv_ls_req() call made when + * the LS request was received. The structure will fully describe + * the buffers for the response payload and the dma address of the + * payload. The LLDD is to transmit the response (or return a + * non-zero errno status), and upon completion of the transmit, call + * the "done" routine specified in the nvmefc_ls_rsp structure + * (argument to done is the address of the nvmefc_ls_rsp structure + * itself). Upon the completion of the done routine, the LLDD shall + * consider the LS handling complete and the nvmefc_ls_rsp structure + * may be freed/released. + * Entrypoint is mandatory if the LLDD calls the nvme_fc_rcv_ls_req() + * entrypoint. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -371,7 +460,7 @@ struct nvme_fc_remote_port { * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional * memory that it would like fc nvme layer to allocate on the LLDD's * behalf whenever a ls request structure is allocated. The additional - * memory area solely for the of the LLDD and its location is + * memory area is solely for use by the LLDD and its location is * specified by the ls_request->private pointer. * Value is Mandatory. Allowed to be zero. * @@ -405,6 +494,9 @@ struct nvme_fc_port_template { struct nvme_fc_remote_port *, void *hw_queue_handle, struct nvmefc_fcp_req *); + int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *rport, + struct nvmefc_ls_rsp *ls_rsp); u32 max_hw_queues; u16 max_sgl_segments; @@ -441,6 +533,34 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport); int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport, u32 dev_loss_tmo); +/* + * Routine called to pass a NVME-FC LS request, received by the lldd, + * to the nvme-fc transport. + * + * If the return value is zero: the LS was successfully accepted by the + * transport. + * If the return value is non-zero: the transport has not accepted the + * LS. The lldd should ABTS-LS the LS. + * + * Note: if the LLDD receives and ABTS for the LS prior to the transport + * calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD + * shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the + * response shall not be transmit and the struct nvmefc_ls_rsp() done + * routine shall be called. The LLDD may transmit the ABTS response as + * soon as the LS was marked or can delay until the xmt_ls_rsp() call is + * made. + * Note: if an RCV LS was successfully posted to the transport and the + * remoteport is then unregistered before xmt_ls_rsp() was called for + * the lsrsp structure, the transport will still call xmt_ls_rsp() + * afterward to cleanup the outstanding lsrsp structure. The LLDD should + * noop the transmission of the rsp and call the lsrsp->done() routine + * to allow the lsrsp structure to be released. + */ +int nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_rsp *lsrsp, + void *lsreqbuf, u32 lsreqbuf_len); + + /* * *************** LLDD FC-NVME Target/Subsystem API *************** @@ -470,55 +590,6 @@ struct nvmet_fc_port_info { }; -/** - * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC - * layer to represent the exchange context for - * a FC-NVME Link Service (LS). - * - * The structure is allocated by the LLDD whenever a LS Request is received - * from the FC link. The address of the structure is passed to the nvmet-fc - * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure - * will be passed back to the LLDD when the response is to be transmit. - * The LLDD is to use the address to map back to the LLDD exchange structure - * which maintains information such as the targetport the LS was received - * on, the remote FC NVME initiator that sent the LS, and any FC exchange - * context. Upon completion of the LS response transmit, the address of the - * structure will be passed back to the LS rsp done() routine, allowing the - * nvmet-fc layer to release dma resources. Upon completion of the done() - * routine, no further access will be made by the nvmet-fc layer and the - * LLDD can de-allocate the structure. - * - * Field initialization: - * At the time of the nvmet_fc_rcv_ls_req() call, there is no content that - * is valid in the structure. - * - * When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc - * layer will fully set the fields in order to specify the response - * payload buffer and its length as well as the done routine to be called - * upon compeletion of the transmit. The nvmet-fc layer will also set a - * private pointer for its own use in the done routine. - * - * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp - * entrypoint. - * @rspbuf: pointer to the LS response buffer - * @rspdma: PCI DMA address of the LS response buffer - * @rsplen: Length, in bytes, of the LS response buffer - * @done: The callback routine the LLDD is to invoke upon completion of - * transmitting the LS response. req argument is the pointer to - * the original ls request. - * @nvmet_fc_private: pointer to an internal NVMET-FC layer structure used - * as part of the NVMET-FC processing. The LLDD is not to access - * this pointer. - */ -struct nvmefc_tgt_ls_req { - void *rspbuf; - dma_addr_t rspdma; - u16 rsplen; - - void (*done)(struct nvmefc_tgt_ls_req *req); - void *nvmet_fc_private; /* LLDD is not to access !! */ -}; - /* Operations that NVME-FC layer may request the LLDD to perform for FCP */ enum { NVMET_FCOP_READDATA = 1, /* xmt data to initiator */ @@ -693,17 +764,19 @@ struct nvmet_fc_target_port { * Entrypoint is Mandatory. * * @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service. - * The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange + * The nvmefc_ls_rsp structure is the same LLDD-supplied exchange * structure specified in the nvmet_fc_rcv_ls_req() call made when - * the LS request was received. The structure will fully describe + * the LS request was received. The structure will fully describe * the buffers for the response payload and the dma address of the - * payload. The LLDD is to transmit the response (or return a non-zero - * errno status), and upon completion of the transmit, call the - * "done" routine specified in the nvmefc_tgt_ls_req structure - * (argument to done is the ls reqwuest structure itself). - * After calling the done routine, the LLDD shall consider the - * LS handling complete and the nvmefc_tgt_ls_req structure may - * be freed/released. + * payload. The LLDD is to transmit the response (or return a + * non-zero errno status), and upon completion of the transmit, call + * the "done" routine specified in the nvmefc_ls_rsp structure + * (argument to done is the address of the nvmefc_ls_rsp structure + * itself). Upon the completion of the done() routine, the LLDD shall + * consider the LS handling complete and the nvmefc_ls_rsp structure + * may be freed/released. + * The transport will always call the xmt_ls_rsp() routine for any + * LS received. * Entrypoint is Mandatory. * * @fcp_op: Called to perform a data transfer or transmit a response. @@ -798,6 +871,39 @@ struct nvmet_fc_target_port { * should cause the initiator to rescan the discovery controller * on the targetport. * + * @ls_req: Called to issue a FC-NVME FC-4 LS service request. + * The nvme_fc_ls_req structure will fully describe the buffers for + * the request payload and where to place the response payload. + * The targetport that is to issue the LS request is identified by + * the targetport argument. The remote port that is to receive the + * LS request is identified by the hosthandle argument. The nvmet-fc + * transport is only allowed to issue FC-NVME LS's on behalf of an + * association that was created prior by a Create Association LS. + * The hosthandle will originate from the LLDD in the struct + * nvmefc_ls_rsp structure for the Create Association LS that + * was delivered to the transport. The transport will save the + * hosthandle as an attribute of the association. If the LLDD + * loses connectivity with the remote port, it must call the + * nvmet_fc_invalidate_host() routine to remove any references to + * the remote port in the transport. + * The LLDD is to allocate an exchange, issue the LS request, obtain + * the LS response, and call the "done" routine specified in the + * request structure (argument to done is the ls request structure + * itself). + * Entrypoint is Optional - but highly recommended. + * + * @ls_abort: called to request the LLDD to abort the indicated ls request. + * The call may return before the abort has completed. After aborting + * the request, the LLDD must still call the ls request done routine + * indicating an FC transport Aborted status. + * Entrypoint is Mandatory if the ls_req entry point is specified. + * + * @host_release: called to inform the LLDD that the request to invalidate + * the host port indicated by the hosthandle has been fully completed. + * No associations exist with the host port and there will be no + * further references to hosthandle. + * Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host(). + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -826,11 +932,19 @@ struct nvmet_fc_target_port { * area solely for the of the LLDD and its location is specified by * the targetport->private pointer. * Value is Mandatory. Allowed to be zero. + * + * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional + * memory that it would like nvmet-fc layer to allocate on the LLDD's + * behalf whenever a ls request structure is allocated. The additional + * memory area is solely for use by the LLDD and its location is + * specified by the ls_request->private pointer. + * Value is Mandatory. Allowed to be zero. + * */ struct nvmet_fc_target_template { void (*targetport_delete)(struct nvmet_fc_target_port *tgtport); int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_ls_req *tls_req); + struct nvmefc_ls_rsp *ls_rsp); int (*fcp_op)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); void (*fcp_abort)(struct nvmet_fc_target_port *tgtport, @@ -840,6 +954,11 @@ struct nvmet_fc_target_template { void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); void (*discovery_event)(struct nvmet_fc_target_port *tgtport); + int (*ls_req)(struct nvmet_fc_target_port *targetport, + void *hosthandle, struct nvmefc_ls_req *lsreq); + void (*ls_abort)(struct nvmet_fc_target_port *targetport, + void *hosthandle, struct nvmefc_ls_req *lsreq); + void (*host_release)(void *hosthandle); u32 max_hw_queues; u16 max_sgl_segments; @@ -848,7 +967,9 @@ struct nvmet_fc_target_template { u32 target_features; + /* sizes of additional private data for data structures */ u32 target_priv_sz; + u32 lsrqst_priv_sz; }; @@ -859,10 +980,61 @@ int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo, int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport); +/* + * Routine called to pass a NVME-FC LS request, received by the lldd, + * to the nvmet-fc transport. + * + * If the return value is zero: the LS was successfully accepted by the + * transport. + * If the return value is non-zero: the transport has not accepted the + * LS. The lldd should ABTS-LS the LS. + * + * Note: if the LLDD receives and ABTS for the LS prior to the transport + * calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD + * shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the + * response shall not be transmit and the struct nvmefc_ls_rsp() done + * routine shall be called. The LLDD may transmit the ABTS response as + * soon as the LS was marked or can delay until the xmt_ls_rsp() call is + * made. + * Note: if an RCV LS was successfully posted to the transport and the + * targetport is then unregistered before xmt_ls_rsp() was called for + * the lsrsp structure, the transport will still call xmt_ls_rsp() + * afterward to cleanup the outstanding lsrsp structure. The LLDD should + * noop the transmission of the rsp and call the lsrsp->done() routine + * to allow the lsrsp structure to be released. + */ int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_ls_req *lsreq, + void *hosthandle, + struct nvmefc_ls_rsp *rsp, void *lsreqbuf, u32 lsreqbuf_len); +/* + * Routine called by the LLDD whenever it has a logout or loss of + * connectivity to a NVME-FC host port which there had been active + * NVMe controllers for. The host port is indicated by the + * hosthandle. The hosthandle is given to the nvmet-fc transport + * when a NVME LS was received, typically to create a new association. + * The nvmet-fc transport will cache the hostport value with the + * association for use in LS requests for the association. + * When the LLDD calls this routine, the nvmet-fc transport will + * immediately terminate all associations that were created with + * the hosthandle host port. + * The LLDD, after calling this routine and having control returned, + * must assume the transport may subsequently utilize hosthandle as + * part of sending LS's to terminate the association. The LLDD + * should reject the LS's if they are attempted. + * Once the last association has terminated for the hosthandle host + * port, the nvmet-fc transport will call the ops->host_release() + * callback. As of the callback, the nvmet-fc transport will no + * longer reference hosthandle. + */ +void nvmet_fc_invalidate_host(struct nvmet_fc_target_port *tgtport, + void *hosthandle); + +/* + * If nvmet_fc_rcv_fcp_req returns non-zero, the transport has not accepted + * the FCP cmd. The lldd should ABTS-LS the cmd. + */ int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq, void *cmdiubuf, u32 cmdiubuf_len); -- cgit v1.2.3 From ca19bcd086331ec2fa182ad8cd589014beb931be Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:48 -0700 Subject: nvme-fc nvmet-fc: refactor for common LS definitions Routines in the target will want to be used in the host as well. Error definitions should now shared as both sides will process requests and responses to requests. Moved common declarations to new fc.h header kept in the host subdirectory. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 36 +------------ drivers/nvme/host/fc.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/fc.c | 115 ++++------------------------------------ 3 files changed, 143 insertions(+), 141 deletions(-) create mode 100644 drivers/nvme/host/fc.h diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8012099fc3ee..83f9b2ac7c55 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -14,6 +14,7 @@ #include "fabrics.h" #include #include +#include "fc.h" #include /* *************************** Data Structures/Defines ****************** */ @@ -1140,41 +1141,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, return __nvme_fc_send_ls_req(rport, lsop, done); } -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_LSACC = 1, - VERR_LSDESC_RQST = 2, - VERR_LSDESC_RQST_LEN = 3, - VERR_ASSOC_ID = 4, - VERR_ASSOC_ID_LEN = 5, - VERR_CONN_ID = 6, - VERR_CONN_ID_LEN = 7, - VERR_CR_ASSOC = 8, - VERR_CR_ASSOC_ACC_LEN = 9, - VERR_CR_CONN = 10, - VERR_CR_CONN_ACC_LEN = 11, - VERR_DISCONN = 12, - VERR_DISCONN_ACC_LEN = 13, -}; - -static char *validation_errors[] = { - "OK", - "Not LS_ACC", - "Not LSDESC_RQST", - "Bad LSDESC_RQST Length", - "Not Association ID", - "Bad Association ID Length", - "Not Connection ID", - "Bad Connection ID Length", - "Not CR_ASSOC Rqst", - "Bad CR_ASSOC ACC Length", - "Not CR_CONN Rqst", - "Bad CR_CONN ACC Length", - "Not Disconnect Rqst", - "Bad Disconnect ACC Length", -}; - static int nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h new file mode 100644 index 000000000000..d2861cdd58ee --- /dev/null +++ b/drivers/nvme/host/fc.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016, Avago Technologies + */ + +#ifndef _NVME_FC_TRANSPORT_H +#define _NVME_FC_TRANSPORT_H 1 + + +/* + * Common definitions between the nvme_fc (host) transport and + * nvmet_fc (target) transport implementation. + */ + +/* + * ****************** FC-NVME LS HANDLING ****************** + */ + +static inline void +nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) +{ + struct fcnvme_ls_acc_hdr *acc = buf; + + acc->w0.ls_cmd = ls_cmd; + acc->desc_list_len = desc_len; + acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); + acc->rqst.desc_len = + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); + acc->rqst.w0.ls_cmd = rqst_ls_cmd; +} + +static inline int +nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, + u8 reason, u8 explanation, u8 vendor) +{ + struct fcnvme_ls_rjt *rjt = buf; + + nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), + ls_cmd); + rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); + rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); + rjt->rjt.reason_code = reason; + rjt->rjt.reason_explanation = explanation; + rjt->rjt.vendor = vendor; + + return sizeof(struct fcnvme_ls_rjt); +} + +/* Validation Error indexes into the string table below */ +enum { + VERR_NO_ERROR = 0, + VERR_CR_ASSOC_LEN = 1, + VERR_CR_ASSOC_RQST_LEN = 2, + VERR_CR_ASSOC_CMD = 3, + VERR_CR_ASSOC_CMD_LEN = 4, + VERR_ERSP_RATIO = 5, + VERR_ASSOC_ALLOC_FAIL = 6, + VERR_QUEUE_ALLOC_FAIL = 7, + VERR_CR_CONN_LEN = 8, + VERR_CR_CONN_RQST_LEN = 9, + VERR_ASSOC_ID = 10, + VERR_ASSOC_ID_LEN = 11, + VERR_NO_ASSOC = 12, + VERR_CONN_ID = 13, + VERR_CONN_ID_LEN = 14, + VERR_INVAL_CONN = 15, + VERR_CR_CONN_CMD = 16, + VERR_CR_CONN_CMD_LEN = 17, + VERR_DISCONN_LEN = 18, + VERR_DISCONN_RQST_LEN = 19, + VERR_DISCONN_CMD = 20, + VERR_DISCONN_CMD_LEN = 21, + VERR_DISCONN_SCOPE = 22, + VERR_RS_LEN = 23, + VERR_RS_RQST_LEN = 24, + VERR_RS_CMD = 25, + VERR_RS_CMD_LEN = 26, + VERR_RS_RCTL = 27, + VERR_RS_RO = 28, + VERR_LSACC = 29, + VERR_LSDESC_RQST = 30, + VERR_LSDESC_RQST_LEN = 31, + VERR_CR_ASSOC = 32, + VERR_CR_ASSOC_ACC_LEN = 33, + VERR_CR_CONN = 34, + VERR_CR_CONN_ACC_LEN = 35, + VERR_DISCONN = 36, + VERR_DISCONN_ACC_LEN = 37, +}; + +static char *validation_errors[] = { + "OK", + "Bad CR_ASSOC Length", + "Bad CR_ASSOC Rqst Length", + "Not CR_ASSOC Cmd", + "Bad CR_ASSOC Cmd Length", + "Bad Ersp Ratio", + "Association Allocation Failed", + "Queue Allocation Failed", + "Bad CR_CONN Length", + "Bad CR_CONN Rqst Length", + "Not Association ID", + "Bad Association ID Length", + "No Association", + "Not Connection ID", + "Bad Connection ID Length", + "Invalid Connection ID", + "Not CR_CONN Cmd", + "Bad CR_CONN Cmd Length", + "Bad DISCONN Length", + "Bad DISCONN Rqst Length", + "Not DISCONN Cmd", + "Bad DISCONN Cmd Length", + "Bad Disconnect Scope", + "Bad RS Length", + "Bad RS Rqst Length", + "Not RS Cmd", + "Bad RS Cmd Length", + "Bad RS R_CTL", + "Bad RS Relative Offset", + "Not LS_ACC", + "Not LSDESC_RQST", + "Bad LSDESC_RQST Length", + "Not CR_ASSOC Rqst", + "Bad CR_ASSOC ACC Length", + "Not CR_CONN Rqst", + "Bad CR_CONN ACC Length", + "Not Disconnect Rqst", + "Bad Disconnect ACC Length", +}; + +#endif /* _NVME_FC_TRANSPORT_H */ diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index aac7869a70bb..1f3118a3b0a3 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -14,6 +14,7 @@ #include "nvmet.h" #include #include +#include "../host/fc.h" /* *************************** Data Structures/Defines ****************** */ @@ -1257,102 +1258,6 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); /* *********************** FC-NVME LS Handling **************************** */ -static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) -{ - struct fcnvme_ls_acc_hdr *acc = buf; - - acc->w0.ls_cmd = ls_cmd; - acc->desc_list_len = desc_len; - acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); - acc->rqst.desc_len = - fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); - acc->rqst.w0.ls_cmd = rqst_ls_cmd; -} - -static int -nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, - u8 reason, u8 explanation, u8 vendor) -{ - struct fcnvme_ls_rjt *rjt = buf; - - nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, - fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), - ls_cmd); - rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); - rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); - rjt->rjt.reason_code = reason; - rjt->rjt.reason_explanation = explanation; - rjt->rjt.vendor = vendor; - - return sizeof(struct fcnvme_ls_rjt); -} - -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_CR_ASSOC_LEN = 1, - VERR_CR_ASSOC_RQST_LEN = 2, - VERR_CR_ASSOC_CMD = 3, - VERR_CR_ASSOC_CMD_LEN = 4, - VERR_ERSP_RATIO = 5, - VERR_ASSOC_ALLOC_FAIL = 6, - VERR_QUEUE_ALLOC_FAIL = 7, - VERR_CR_CONN_LEN = 8, - VERR_CR_CONN_RQST_LEN = 9, - VERR_ASSOC_ID = 10, - VERR_ASSOC_ID_LEN = 11, - VERR_NO_ASSOC = 12, - VERR_CONN_ID = 13, - VERR_CONN_ID_LEN = 14, - VERR_NO_CONN = 15, - VERR_CR_CONN_CMD = 16, - VERR_CR_CONN_CMD_LEN = 17, - VERR_DISCONN_LEN = 18, - VERR_DISCONN_RQST_LEN = 19, - VERR_DISCONN_CMD = 20, - VERR_DISCONN_CMD_LEN = 21, - VERR_DISCONN_SCOPE = 22, - VERR_RS_LEN = 23, - VERR_RS_RQST_LEN = 24, - VERR_RS_CMD = 25, - VERR_RS_CMD_LEN = 26, - VERR_RS_RCTL = 27, - VERR_RS_RO = 28, -}; - -static char *validation_errors[] = { - "OK", - "Bad CR_ASSOC Length", - "Bad CR_ASSOC Rqst Length", - "Not CR_ASSOC Cmd", - "Bad CR_ASSOC Cmd Length", - "Bad Ersp Ratio", - "Association Allocation Failed", - "Queue Allocation Failed", - "Bad CR_CONN Length", - "Bad CR_CONN Rqst Length", - "Not Association ID", - "Bad Association ID Length", - "No Association", - "Not Connection ID", - "Bad Connection ID Length", - "No Connection", - "Not CR_CONN Cmd", - "Bad CR_CONN Cmd Length", - "Bad DISCONN Length", - "Bad DISCONN Rqst Length", - "Not DISCONN Cmd", - "Bad DISCONN Cmd Length", - "Bad Disconnect Scope", - "Bad RS Length", - "Bad RS Rqst Length", - "Not RS Cmd", - "Bad RS Cmd Length", - "Bad RS R_CTL", - "Bad RS Relative Offset", -}; - static void nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) @@ -1407,7 +1312,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Association LS failed: %s\n", validation_errors[ret]); - iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); @@ -1422,7 +1327,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc)), FCNVME_LS_CREATE_ASSOCIATION); @@ -1498,7 +1403,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Connection LS failed: %s\n", validation_errors[ret]); - iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : @@ -1515,7 +1420,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), FCNVME_LS_CREATE_CONNECTION); acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID); @@ -1578,13 +1483,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); - iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : - (ret == VERR_NO_CONN) ? - FCNVME_RJT_RC_INV_CONN : - FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); return; } @@ -1593,7 +1496,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); @@ -1676,7 +1579,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, nvmet_fc_ls_disconnect(tgtport, iod); break; default: - iod->lsrsp->rsplen = nvmet_fc_format_rjt(iod->rspbuf, + iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } -- cgit v1.2.3 From 3b8281b02bdc6fc7bed6f20af6fd7933a86b94e2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:49 -0700 Subject: nvmet-fc: Better size LS buffers Current code uses NVME_FC_MAX_LS_BUFFER_SIZE (2KB) when allocating buffers for LS requests and responses. This is considerable overkill for what is actually defined. Rework code to have unions for all possible requests and responses and size based on the unions. Remove NVME_FC_MAX_LS_BUFFER_SIZE. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.h | 15 ++++++++++++++ drivers/nvme/target/fc.c | 53 +++++++++++++++++++++--------------------------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h index d2861cdd58ee..08fa88381d45 100644 --- a/drivers/nvme/host/fc.h +++ b/drivers/nvme/host/fc.h @@ -16,6 +16,21 @@ * ****************** FC-NVME LS HANDLING ****************** */ +union nvmefc_ls_requests { + struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc; + struct fcnvme_ls_cr_conn_rqst rq_cr_conn; + struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc; + struct fcnvme_ls_disconnect_conn_rqst rq_dis_conn; +} __aligned(128); /* alignment for other things alloc'd with */ + +union nvmefc_ls_responses { + struct fcnvme_ls_rjt rsp_rjt; + struct fcnvme_ls_cr_assoc_acc rsp_cr_assoc; + struct fcnvme_ls_cr_conn_acc rsp_cr_conn; + struct fcnvme_ls_disconnect_assoc_acc rsp_dis_assoc; + struct fcnvme_ls_disconnect_conn_acc rsp_dis_conn; +} __aligned(128); /* alignment for other things alloc'd with */ + static inline void nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) { diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1f3118a3b0a3..66de6bd8f4fd 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -22,9 +22,6 @@ #define NVMET_LS_CTX_COUNT 256 -/* for this implementation, assume small single frame rqst/rsp */ -#define NVME_FC_MAX_LS_BUFFER_SIZE 2048 - struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; @@ -37,8 +34,8 @@ struct nvmet_fc_ls_iod { struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; - u8 *rqstbuf; - u8 *rspbuf; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; u16 rqstdatalen; dma_addr_t rspdma; @@ -340,15 +337,16 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) iod->tgtport = tgtport; list_add_tail(&iod->ls_list, &tgtport->ls_list); - iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE, - GFP_KERNEL); + iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) + + sizeof(union nvmefc_ls_responses), + GFP_KERNEL); if (!iod->rqstbuf) goto out_fail; - iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE; + iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1]; iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf, - NVME_FC_MAX_LS_BUFFER_SIZE, + sizeof(*iod->rspbuf), DMA_TO_DEVICE); if (fc_dma_mapping_error(tgtport->dev, iod->rspdma)) goto out_fail; @@ -361,7 +359,7 @@ out_fail: list_del(&iod->ls_list); for (iod--, i--; i >= 0; iod--, i--) { fc_dma_unmap_single(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); list_del(&iod->ls_list); } @@ -379,7 +377,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { fc_dma_unmap_single(tgtport->dev, - iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, + iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); list_del(&iod->ls_list); @@ -1262,10 +1260,8 @@ static void nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_cr_assoc_rqst *rqst = - (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf; - struct fcnvme_ls_cr_assoc_acc *acc = - (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf; + struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc; + struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc; struct nvmet_fc_tgt_queue *queue; int ret = 0; @@ -1313,7 +1309,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, "Create Association LS failed: %s\n", validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + sizeof(*acc), rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); return; @@ -1348,10 +1344,8 @@ static void nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_cr_conn_rqst *rqst = - (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf; - struct fcnvme_ls_cr_conn_acc *acc = - (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf; + struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn; + struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn; struct nvmet_fc_tgt_queue *queue; int ret = 0; @@ -1404,7 +1398,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, "Create Connection LS failed: %s\n", validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, @@ -1437,9 +1431,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { struct fcnvme_ls_disconnect_assoc_rqst *rqst = - (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf; + &iod->rqstbuf->rq_dis_assoc; struct fcnvme_ls_disconnect_assoc_acc *acc = - (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf; + &iod->rspbuf->rsp_dis_assoc; struct nvmet_fc_tgt_assoc *assoc; int ret = 0; @@ -1484,7 +1478,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, "Disconnect LS failed: %s\n", validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, @@ -1522,7 +1516,7 @@ nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) struct nvmet_fc_tgtport *tgtport = iod->tgtport; fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); nvmet_fc_free_ls_iod(tgtport, iod); nvmet_fc_tgtport_put(tgtport); } @@ -1534,7 +1528,7 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, int ret; fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp); if (ret) @@ -1548,8 +1542,7 @@ static void nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_rqst_w0 *w0 = - (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; + struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0; iod->lsrsp->nvme_fc_private = iod; iod->lsrsp->rspbuf = iod->rspbuf; @@ -1580,7 +1573,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, break; default: iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, - NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, + sizeof(*iod->rspbuf), w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } @@ -1627,7 +1620,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); struct nvmet_fc_ls_iod *iod; - if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE) + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) return -E2BIG; if (!nvmet_fc_tgtport_get(tgtport)) -- cgit v1.2.3 From f56bf76f79f3dc15f17433dda1b567d34f18e699 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:50 -0700 Subject: nvme-fc: Ensure private pointers are NULL if no data Ensure that when allocations are done, and the lldd options indicate no private data is needed, that private pointers will be set to NULL (catches driver error that forgot to set private data size). Slightly reorg the allocations so that private data follows allocations for LS request/response buffers. Ensures better alignments for the buffers as well as the private pointer. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 81 ++++++++++++++++++++++++++++++------------------ drivers/nvme/target/fc.c | 5 ++- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 83f9b2ac7c55..bf80b941d739 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -395,7 +395,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, newrec->ops = template; newrec->dev = dev; ida_init(&newrec->endp_cnt); - newrec->localport.private = &newrec[1]; + if (template->local_priv_sz) + newrec->localport.private = &newrec[1]; + else + newrec->localport.private = NULL; newrec->localport.node_name = pinfo->node_name; newrec->localport.port_name = pinfo->port_name; newrec->localport.port_role = pinfo->port_role; @@ -704,7 +707,10 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.localport = &lport->localport; newrec->dev = lport->dev; newrec->lport = lport; - newrec->remoteport.private = &newrec[1]; + if (lport->ops->remote_priv_sz) + newrec->remoteport.private = &newrec[1]; + else + newrec->remoteport.private = NULL; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; newrec->remoteport.port_name = pinfo->port_name; @@ -1152,18 +1158,23 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); + sizeof(*assoc_rqst) + sizeof(*assoc_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Association failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1]; assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = &assoc_acc[1]; + else + lsreq->private = NULL; assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; assoc_rqst->desc_list_len = @@ -1261,18 +1272,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); + sizeof(*conn_rqst) + sizeof(*conn_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Connection failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1]; conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&conn_acc[1]; + else + lsreq->private = NULL; conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; conn_rqst->desc_list_len = cpu_to_be32( @@ -1386,19 +1402,23 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) int ret; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*discon_rqst) + sizeof(*discon_acc)), - GFP_KERNEL); - if (!lsop) - /* couldn't sent it... too bad */ + sizeof(*discon_rqst) + sizeof(*discon_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Disconnect Association " + "failed: ENOMEM\n", + ctrl->cnum); return; + } - lsreq = &lsop->ls_req; - - lsreq->private = (void *)&lsop[1]; - discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; discon_rqst->desc_list_len = cpu_to_be32( @@ -1784,15 +1804,17 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; - void *private; + void *private = NULL; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { - private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + if (ctrl->lport->ops->fcprqst_priv_sz) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, GFP_KERNEL); - if (!private) - return -ENOMEM; + if (!private) + return -ENOMEM; + } cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; @@ -1823,9 +1845,6 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { - if (!aen_op->fcp_req.private) - continue; - __nvme_fc_exit_request(ctrl, aen_op); kfree(aen_op->fcp_req.private); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 66de6bd8f4fd..66a60a218994 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1047,7 +1047,10 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->fc_target_port.node_name = pinfo->node_name; newrec->fc_target_port.port_name = pinfo->port_name; - newrec->fc_target_port.private = &newrec[1]; + if (template->target_priv_sz) + newrec->fc_target_port.private = &newrec[1]; + else + newrec->fc_target_port.private = NULL; newrec->fc_target_port.port_id = pinfo->port_id; newrec->fc_target_port.port_num = idx; INIT_LIST_HEAD(&newrec->tgt_list); -- cgit v1.2.3 From eb4ee8f125157926cf36a3c275b04825f1bf8cfa Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:51 -0700 Subject: nvme-fc: convert assoc_active flag to bit op Convert the assoc_active boolean flag to a bitop on the flags field. The bit ops will provide atomicity. To make this change, the flags field was converted to a long type, which also affects the FCCTRL_TERMIO flag. Both FCCTRL_TERMIO and now ASSOC_ACTIVE flags are set/cleared by bit operations. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index bf80b941d739..0ac246603063 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -127,9 +127,9 @@ struct nvme_fc_rport { unsigned long dev_loss_end; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_flags { - FCCTRL_TERMIO = (1 << 0), -}; +/* fc_ctrl flags values - specified as bit positions */ +#define ASSOC_ACTIVE 0 +#define FCCTRL_TERMIO 1 struct nvme_fc_ctrl { spinlock_t lock; @@ -140,7 +140,6 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - bool assoc_active; atomic_t err_work_active; u64 association_id; @@ -153,7 +152,7 @@ struct nvme_fc_ctrl { struct work_struct err_work; struct kref ref; - u32 flags; + unsigned long flags; u32 iocnt; wait_queue_head_t ioabort_wait; @@ -1521,7 +1520,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); if (opstate != FCPOP_STATE_ACTIVE) atomic_set(&op->state, opstate); - else if (ctrl->flags & FCCTRL_TERMIO) + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) ctrl->iocnt++; spin_unlock_irqrestore(&ctrl->lock, flags); @@ -1558,7 +1557,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, if (opstate == FCPOP_STATE_ABORTED) { spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) { + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait); } @@ -2386,16 +2385,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; - unsigned long flags; - bool terminating = false; blk_status_t ret; - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) - terminating = true; - spin_unlock_irqrestore(&ctrl->lock, flags); - - if (terminating) + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) return; aen_op = &ctrl->aen_ops[0]; @@ -2604,10 +2596,9 @@ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) struct nvme_fc_rport *rport = ctrl->rport; u32 cnt; - if (ctrl->assoc_active) + if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags)) return 1; - ctrl->assoc_active = true; cnt = atomic_inc_return(&rport->act_ctrl_cnt); if (cnt == 1) nvme_fc_rport_active_on_lport(rport); @@ -2622,7 +2613,7 @@ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) struct nvme_fc_lport *lport = rport->lport; u32 cnt; - /* ctrl->assoc_active=false will be set independently */ + /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */ cnt = atomic_dec_return(&rport->act_ctrl_cnt); if (cnt == 0) { @@ -2764,7 +2755,7 @@ out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); - ctrl->assoc_active = false; + clear_bit(ASSOC_ACTIVE, &ctrl->flags); nvme_fc_ctlr_inactive_on_rport(ctrl); return ret; @@ -2781,12 +2772,11 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { unsigned long flags; - if (!ctrl->assoc_active) + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) return; - ctrl->assoc_active = false; spin_lock_irqsave(&ctrl->lock, flags); - ctrl->flags |= FCCTRL_TERMIO; + set_bit(FCCTRL_TERMIO, &ctrl->flags); ctrl->iocnt = 0; spin_unlock_irqrestore(&ctrl->lock, flags); @@ -2837,7 +2827,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* wait for all io that had to be aborted */ spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); - ctrl->flags &= ~FCCTRL_TERMIO; + clear_bit(FCCTRL_TERMIO, &ctrl->flags); spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -3109,7 +3099,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->dev = lport->dev; ctrl->cnum = idx; ctrl->ioq_live = false; - ctrl->assoc_active = false; atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); -- cgit v1.2.3 From fd5a5f2213048b012bc7e19e832e9ae0ec1a2c4a Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:52 -0700 Subject: nvme-fc: Update header and host for common definitions for LS handling Given that both host and target now generate and receive LS's create a single table definition for LS names. Each tranport half will have a local version of the table. As Create Association LS is issued by both sides, and received by both sides, create common routines to format the LS and to validate the LS. Convert the host side transport to use the new common Create Association LS formatting routine. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 25 ++-------------- drivers/nvme/host/fc.h | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 23 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0ac246603063..c069ab056202 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1419,29 +1419,8 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) else lsreq->private = NULL; - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; - discon_rqst->desc_list_len = cpu_to_be32( - sizeof(struct fcnvme_lsdesc_assoc_id) + - sizeof(struct fcnvme_lsdesc_disconn_cmd)); - - discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); - discon_rqst->associd.desc_len = - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id)); - - discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); - - discon_rqst->discon_cmd.desc_tag = cpu_to_be32( - FCNVME_LSDESC_DISCONN_CMD); - discon_rqst->discon_cmd.desc_len = - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd)); - - lsreq->rqstaddr = discon_rqst; - lsreq->rqstlen = sizeof(*discon_rqst); - lsreq->rspaddr = discon_acc; - lsreq->rsplen = sizeof(*discon_acc); - lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + ctrl->association_id); ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, nvme_fc_disconnect_assoc_done); diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h index 08fa88381d45..05ce566f2caf 100644 --- a/drivers/nvme/host/fc.h +++ b/drivers/nvme/host/fc.h @@ -17,6 +17,7 @@ */ union nvmefc_ls_requests { + struct fcnvme_ls_rqst_w0 w0; struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc; struct fcnvme_ls_cr_conn_rqst rq_cr_conn; struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc; @@ -145,4 +146,82 @@ static char *validation_errors[] = { "Bad Disconnect ACC Length", }; +#define NVME_FC_LAST_LS_CMD_VALUE FCNVME_LS_DISCONNECT_CONN + +static char *nvmefc_ls_names[] = { + "Reserved (0)", + "RJT (1)", + "ACC (2)", + "Create Association", + "Create Connection", + "Disconnect Association", + "Disconnect Connection", +}; + +static inline void +nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq, + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst, + struct fcnvme_ls_disconnect_assoc_acc *discon_acc, + u64 association_id) +{ + lsreq->rqstaddr = discon_rqst; + lsreq->rqstlen = sizeof(*discon_rqst); + lsreq->rspaddr = discon_acc; + lsreq->rsplen = sizeof(*discon_acc); + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; + + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; + discon_rqst->desc_list_len = cpu_to_be32( + sizeof(struct fcnvme_lsdesc_assoc_id) + + sizeof(struct fcnvme_lsdesc_disconn_cmd)); + + discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); + discon_rqst->associd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id)); + + discon_rqst->associd.association_id = cpu_to_be64(association_id); + + discon_rqst->discon_cmd.desc_tag = cpu_to_be32( + FCNVME_LSDESC_DISCONN_CMD); + discon_rqst->discon_cmd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd)); +} + +static inline int +nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen, + struct fcnvme_ls_disconnect_assoc_rqst *rqst) +{ + int ret = 0; + + if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) + ret = VERR_DISCONN_LEN; + else if (rqst->desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) + ret = VERR_DISCONN_RQST_LEN; + else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) + ret = VERR_ASSOC_ID; + else if (rqst->associd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id))) + ret = VERR_ASSOC_ID_LEN; + else if (rqst->discon_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) + ret = VERR_DISCONN_CMD; + else if (rqst->discon_cmd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd))) + ret = VERR_DISCONN_CMD_LEN; + /* + * As the standard changed on the LS, check if old format and scope + * something other than Association (e.g. 0). + */ + else if (rqst->discon_cmd.rsvd8[0]) + ret = VERR_DISCONN_SCOPE; + + return ret; +} + #endif /* _NVME_FC_TRANSPORT_H */ -- cgit v1.2.3 From ec3b0e3cc393dee1ad3f4bd1026f2c1f8b1c1ffb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:53 -0700 Subject: nvmet-fc: Update target for common definitions for LS handling Given that both host and target now generate and receive LS's create a single table definition for LS names. Each tranport half will have a local version of the table. Convert the target side transport to use the new common Create Association LS validation routine. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 66a60a218994..5739df7edc59 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1442,32 +1442,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, memset(acc, 0, sizeof(*acc)); - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) - ret = VERR_DISCONN_LEN; - else if (rqst->desc_list_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) - ret = VERR_DISCONN_RQST_LEN; - else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) - ret = VERR_ASSOC_ID; - else if (rqst->associd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id))) - ret = VERR_ASSOC_ID_LEN; - else if (rqst->discon_cmd.desc_tag != - cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) - ret = VERR_DISCONN_CMD; - else if (rqst->discon_cmd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd))) - ret = VERR_DISCONN_CMD_LEN; - /* - * As the standard changed on the LS, check if old format and scope - * something other than Association (e.g. 0). - */ - else if (rqst->discon_cmd.rsvd8[0]) - ret = VERR_DISCONN_SCOPE; - else { + ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst); + if (!ret) { /* match an active association */ assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); -- cgit v1.2.3 From 14fd1e98afafc0027a6a86ea1962e31dceafb400 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:54 -0700 Subject: nvme-fc: Add Disconnect Association Rcv support The nvme-fc host transport did not support the reception of a FC-NVME LS. Reception is necessary to implement full compliance with FC-NVME-2. Populate the LS receive handler, and specifically the handling of a Disconnect Association LS. The response to the LS, if it matched a controller, must be sent after the aborts for any I/O on any connection have been sent. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 363 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 359 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c069ab056202..1921d2195541 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -62,6 +62,17 @@ struct nvmefc_ls_req_op { bool req_queued; }; +struct nvmefc_ls_rcv_op { + struct nvme_fc_rport *rport; + struct nvmefc_ls_rsp *lsrsp; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; + u16 rqstdatalen; + bool handled; + dma_addr_t rspdma; + struct list_head lsrcv_list; /* rport->ls_rcv_list */ +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + enum nvme_fcpop_state { FCPOP_STATE_UNINIT = 0, FCPOP_STATE_IDLE = 1, @@ -118,6 +129,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head ls_rcv_list; struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; @@ -125,6 +137,7 @@ struct nvme_fc_rport { struct kref ref; atomic_t act_ctrl_cnt; unsigned long dev_loss_end; + struct work_struct lsrcv_work; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ /* fc_ctrl flags values - specified as bit positions */ @@ -142,6 +155,7 @@ struct nvme_fc_ctrl { bool ioq_live; atomic_t err_work_active; u64 association_id; + struct nvmefc_ls_rcv_op *rcv_disconn; struct list_head ctrl_list; /* rport->ctrl_list */ @@ -219,6 +233,9 @@ static struct device *fc_udev_device; static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); + + static void nvme_fc_free_lport(struct kref *ref) { @@ -704,6 +721,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + INIT_LIST_HEAD(&newrec->ls_rcv_list); newrec->dev = lport->dev; newrec->lport = lport; if (lport->ops->remote_priv_sz) @@ -717,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); @@ -1006,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); +static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static void __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) @@ -1154,6 +1174,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req *lsreq; struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; struct fcnvme_ls_cr_assoc_acc *assoc_acc; + unsigned long flags; int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + @@ -1243,11 +1264,13 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, "q %d Create Association LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = be64_to_cpu(assoc_acc->associd.association_id); queue->connection_id = be64_to_cpu(assoc_acc->connectid.connection_id); set_bit(NVME_FC_Q_CONNECTED, &queue->flags); + spin_unlock_irqrestore(&ctrl->lock, flags); } out_free_buffer: @@ -1428,6 +1451,247 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) kfree(lsop); } +static void +nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) +{ + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + list_del(&lsop->lsrcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + kfree(lsop); + + nvme_fc_rport_put(rport); +} + +static void +nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + int ret; + + fc_dma_sync_single_for_device(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport, + lsop->lsrsp); + if (ret) { + dev_warn(lport->dev, + "LLDD rejected LS RSP xmt: LS %d status %d\n", + w0->ls_cmd, ret); + nvme_fc_xmt_ls_rsp_done(lsop->lsrsp); + return; + } +} + +static struct nvme_fc_ctrl * +nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, + struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct nvme_fc_ctrl *ctrl, *ret = NULL; + struct nvmefc_ls_rcv_op *oldls = NULL; + u64 association_id = be64_to_cpu(rqst->associd.association_id); + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + if (!nvme_fc_ctrl_get(ctrl)) + continue; + spin_lock(&ctrl->lock); + if (association_id == ctrl->association_id) { + oldls = ctrl->rcv_disconn; + ctrl->rcv_disconn = lsop; + ret = ctrl; + } + spin_unlock(&ctrl->lock); + if (ret) + /* leave the ctrl get reference */ + break; + nvme_fc_ctrl_put(ctrl); + } + + spin_unlock_irqrestore(&rport->lock, flags); + + /* transmit a response for anything that was pending */ + if (oldls) { + dev_info(rport->lport->dev, + "NVME-FC{%d}: Multiple Disconnect Association " + "LS's received\n", ctrl->cnum); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*oldls->rspbuf), + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvme_fc_xmt_ls_rsp(oldls); + } + + return ret; +} + +/* + * returns true to mean LS handled and ls_rsp can be sent + * returns false to defer ls_rsp xmt (will be done as part of + * association termination) + */ +static bool +nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct fcnvme_ls_disconnect_assoc_acc *acc = + &lsop->rspbuf->rsp_dis_assoc; + struct nvme_fc_ctrl *ctrl = NULL; + int ret = 0; + + memset(acc, 0, sizeof(*acc)); + + ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst); + if (!ret) { + /* match an active association */ + ctrl = nvme_fc_match_disconn_ls(rport, lsop); + if (!ctrl) + ret = VERR_NO_ASSOC; + } + + if (ret) { + dev_info(rport->lport->dev, + "Disconnect LS failed: %s\n", + validation_errors[ret]); + lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); + return true; + } + + /* format an ACCept response */ + + lsop->lsrsp->rsplen = sizeof(*acc); + + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_assoc_acc)), + FCNVME_LS_DISCONNECT_ASSOC); + + /* + * the transmit of the response will occur after the exchanges + * for the association have been ABTS'd by + * nvme_fc_delete_association(). + */ + + /* fail the association */ + nvme_fc_error_recovery(ctrl, "Disconnect Association LS received"); + + /* release the reference taken by nvme_fc_match_disconn_ls() */ + nvme_fc_ctrl_put(ctrl); + + return false; +} + +/* + * Actual Processing routine for received FC-NVME LS Requests from the LLD + * returns true if a response should be sent afterward, false if rsp will + * be sent asynchronously. + */ +static bool +nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + bool ret = true; + + lsop->lsrsp->nvme_fc_private = lsop; + lsop->lsrsp->rspbuf = lsop->rspbuf; + lsop->lsrsp->rspdma = lsop->rspdma; + lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done; + /* Be preventative. handlers will later set to valid length */ + lsop->lsrsp->rsplen = 0; + + /* + * handlers: + * parse request input, execute the request, and format the + * LS response + */ + switch (w0->ls_cmd) { + case FCNVME_LS_DISCONNECT_ASSOC: + ret = nvme_fc_ls_disconnect_assoc(lsop); + break; + case FCNVME_LS_DISCONNECT_CONN: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0); + break; + case FCNVME_LS_CREATE_ASSOCIATION: + case FCNVME_LS_CREATE_CONNECTION: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); + break; + default: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); + break; + } + + return(ret); +} + +static void +nvme_fc_handle_ls_rqst_work(struct work_struct *work) +{ + struct nvme_fc_rport *rport = + container_of(work, struct nvme_fc_rport, lsrcv_work); + struct fcnvme_ls_rqst_w0 *w0; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + bool sendrsp; + +restart: + sendrsp = true; + spin_lock_irqsave(&rport->lock, flags); + list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) { + if (lsop->handled) + continue; + + lsop->handled = true; + if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + sendrsp = nvme_fc_handle_ls_rqst(lsop); + } else { + spin_unlock_irqrestore(&rport->lock, flags); + w0 = &lsop->rqstbuf->w0; + lsop->lsrsp->rsplen = nvme_fc_format_rjt( + lsop->rspbuf, + sizeof(*lsop->rspbuf), + w0->ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + } + if (sendrsp) + nvme_fc_xmt_ls_rsp(lsop); + goto restart; + } + spin_unlock_irqrestore(&rport->lock, flags); +} + /** * nvme_fc_rcv_ls_req - transport entry point called by an LLDD * upon the reception of a NVME LS request. @@ -1454,20 +1718,92 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, { struct nvme_fc_rport *rport = remoteport_to_rport(portptr); struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + int ret; + + nvme_fc_rport_get(rport); /* validate there's a routine to transmit a response */ - if (!lport->ops->xmt_ls_rsp) - return(-EINVAL); + if (!lport->ops->xmt_ls_rsp) { + dev_info(lport->dev, + "RCV %s LS failed: no LLDD xmt_ls_rsp\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EINVAL; + goto out_put; + } + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(lport->dev, + "RCV %s LS failed: payload too large\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -E2BIG; + goto out_put; + } + + lsop = kzalloc(sizeof(*lsop) + + sizeof(union nvmefc_ls_requests) + + sizeof(union nvmefc_ls_responses), + GFP_KERNEL); + if (!lsop) { + dev_info(lport->dev, + "RCV %s LS failed: No memory\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -ENOMEM; + goto out_put; + } + lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1]; + lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1]; + + lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, + sizeof(*lsop->rspbuf), + DMA_TO_DEVICE); + if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) { + dev_info(lport->dev, + "RCV %s LS failed: DMA mapping failure\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EFAULT; + goto out_free; + } + + lsop->rport = rport; + lsop->lsrsp = lsrsp; + + memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len); + lsop->rqstdatalen = lsreqbuf_len; + + spin_lock_irqsave(&rport->lock, flags); + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + ret = -ENOTCONN; + goto out_unmap; + } + list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + schedule_work(&rport->lsrcv_work); return 0; + +out_unmap: + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); +out_free: + kfree(lsop); +out_put: + nvme_fc_rport_put(rport); + return ret; } EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); /* *********************** NVME Ctrl Routines **************************** */ -static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); - static void __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) @@ -2612,6 +2948,8 @@ static int nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + struct nvmefc_ls_rcv_op *disls = NULL; + unsigned long flags; int ret; bool changed; @@ -2729,7 +3067,13 @@ out_term_aen_ops: out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + nvme_fc_xmt_ls_rsp(disls); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: @@ -2749,6 +3093,7 @@ out_free_queue: static void nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { + struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) @@ -2820,7 +3165,17 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) if (ctrl->association_id) nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + /* + * if a Disconnect Request was waiting for a response, send + * now that all ABTS's have been issued (and are complete). + */ + nvme_fc_xmt_ls_rsp(disls); if (ctrl->ctrl.tagset) { nvme_fc_delete_hw_io_queues(ctrl); -- cgit v1.2.3 From a5c2b4f633cf06df62d24b0ef11f824e8da646a5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:55 -0700 Subject: nvmet-fc: add LS failure messages Add LS reception failure messages Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 5739df7edc59..a91c443c9098 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1598,15 +1598,31 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); struct nvmet_fc_ls_iod *iod; - - if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(tgtport->dev, + "RCV %s LS failed: payload too large (%d)\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : "", + lsreqbuf_len); return -E2BIG; + } - if (!nvmet_fc_tgtport_get(tgtport)) + if (!nvmet_fc_tgtport_get(tgtport)) { + dev_info(tgtport->dev, + "RCV %s LS failed: target deleting\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); return -ESHUTDOWN; + } iod = nvmet_fc_alloc_ls_iod(tgtport); if (!iod) { + dev_info(tgtport->dev, + "RCV %s LS failed: context allocation failed\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); nvmet_fc_tgtport_put(tgtport); return -ENOENT; } -- cgit v1.2.3 From 0dfb992e0ec2e7b9e5ccf92d2261aaa6b5cc57a8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:56 -0700 Subject: nvmet-fc: perform small cleanups on unneeded checks While code reviewing saw a couple of items that can be cleaned up: - In nvmet_fc_delete_target_queue(), the routine unlocks, then checks and relocks. Reorganize to avoid the unlock/relock. - In nvmet_fc_delete_target_queue(), there's a check on the disconnect state that is unnecessary as the routine validates the state before starting any action. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a91c443c9098..01488fc35d46 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -677,7 +677,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) struct nvmet_fc_fcp_iod *fod = queue->fod; struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; unsigned long flags; - int i, writedataactive; + int i; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -688,20 +688,18 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; - writedataactive = fod->writedataactive; - spin_unlock(&fod->flock); /* * only call lldd abort routine if waiting for * writedata. other outstanding ops should finish * on their own. */ - if (writedataactive) { - spin_lock(&fod->flock); + if (fod->writedataactive) { fod->aborted = true; spin_unlock(&fod->flock); tgtport->ops->fcp_abort( &tgtport->fc_target_port, fod->fcpreq); - } + } else + spin_unlock(&fod->flock); } } @@ -741,8 +739,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) flush_workqueue(queue->work_q); - if (disconnect) - nvmet_sq_destroy(&queue->nvme_sq); + nvmet_sq_destroy(&queue->nvme_sq); nvmet_fc_tgt_q_put(queue); } -- cgit v1.2.3 From 58ab8ff9dca2142ba16ea02f7db9ba06eebbc37c Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:57 -0700 Subject: nvmet-fc: track hostport handle for associations In preparation for sending LS requests for an association that terminates, save and track the hosthandle that is part of the LS's that are received to create associations. Support consists of: - Create a hostport structure that will be 1:1 mapped to a host port handle. The hostport structure is specific to a targetport. - Whenever an association is created, create a host port for the hosthandle the Create Association LS was received from. There will be only 1 hostport structure created, with all associations that have the same hosthandle sharing the hostport structure. - When the association is terminated, the hostport reference will be removed. After the last association for the host port is removed, the hostport will be deleted. - Add support for the new nvmet_fc_invalidate_host() interface. In the past, the LLDD didn't notify loss of connectivity to host ports - the LLD would simply reject new requests and wait for the kato timeout to kill the association. Now, when host port connectivity is lost, the LLDD can notify the transport. The transport will initiate the termination of all associations for that host port. When the last association has been terminated and the hosthandle will no longer be referenced, the new host_release callback will be made to the lldd. - For compatibility with prior behavior which didn't report the hosthandle: the LLDD must set hosthandle to NULL. In these cases, not LS request will be made, and no host_release callbacks will be made either. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 177 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 01488fc35d46..3ccf27c328b2 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -33,6 +33,7 @@ struct nvmet_fc_ls_iod { struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; + void *hosthandle; union nvmefc_ls_requests *rqstbuf; union nvmefc_ls_responses *rspbuf; @@ -81,7 +82,6 @@ struct nvmet_fc_fcp_iod { }; struct nvmet_fc_tgtport { - struct nvmet_fc_target_port fc_target_port; struct list_head tgt_list; /* nvmet_fc_target_list */ @@ -93,6 +93,7 @@ struct nvmet_fc_tgtport { struct list_head ls_list; struct list_head ls_busylist; struct list_head assoc_list; + struct list_head host_list; struct ida assoc_cnt; struct nvmet_fc_port_entry *pe; struct kref ref; @@ -134,14 +135,24 @@ struct nvmet_fc_tgt_queue { struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ } __aligned(sizeof(unsigned long long)); +struct nvmet_fc_hostport { + struct nvmet_fc_tgtport *tgtport; + void *hosthandle; + struct list_head host_list; + struct kref ref; + u8 invalid; +}; + struct nvmet_fc_tgt_assoc { u64 association_id; u32 a_id; struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_hostport *hostport; struct list_head a_list; struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; + atomic_t del_work_active; }; @@ -773,6 +784,102 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, return NULL; } +static void +nvmet_fc_hostport_free(struct kref *ref) +{ + struct nvmet_fc_hostport *hostport = + container_of(ref, struct nvmet_fc_hostport, ref); + struct nvmet_fc_tgtport *tgtport = hostport->tgtport; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_del(&hostport->host_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + if (tgtport->ops->host_release && hostport->invalid) + tgtport->ops->host_release(hostport->hosthandle); + kfree(hostport); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport) +{ + kref_put(&hostport->ref, nvmet_fc_hostport_free); +} + +static int +nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport) +{ + return kref_get_unless_zero(&hostport->ref); +} + +static void +nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) +{ + /* if LLDD not implemented, leave as NULL */ + if (!hostport->hosthandle) + return; + + nvmet_fc_hostport_put(hostport); +} + +static struct nvmet_fc_hostport * +nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) +{ + struct nvmet_fc_hostport *newhost, *host, *match = NULL; + unsigned long flags; + + /* if LLDD not implemented, leave as NULL */ + if (!hosthandle) + return NULL; + + /* take reference for what will be the newly allocated hostport */ + if (!nvmet_fc_tgtport_get(tgtport)) + return ERR_PTR(-EINVAL); + + newhost = kzalloc(sizeof(*newhost), GFP_KERNEL); + if (!newhost) { + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(host, &tgtport->host_list, host_list) { + if (host->hosthandle == hosthandle && !host->invalid) { + if (nvmet_fc_hostport_get(host)) { + match = host; + break; + } + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + /* no allocation - release reference */ + nvmet_fc_tgtport_put(tgtport); + return (match) ? match : ERR_PTR(-ENOMEM); + } + + newhost->tgtport = tgtport; + newhost->hosthandle = hosthandle; + INIT_LIST_HEAD(&newhost->host_list); + kref_init(&newhost->ref); + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(host, &tgtport->host_list, host_list) { + if (host->hosthandle == hosthandle && !host->invalid) { + if (nvmet_fc_hostport_get(host)) { + match = host; + break; + } + } + } + if (match) { + kfree(newhost); + newhost = NULL; + /* releasing allocation - release reference */ + nvmet_fc_tgtport_put(tgtport); + } else + list_add_tail(&newhost->host_list, &tgtport->host_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + + return (match) ? match : newhost; +} + static void nvmet_fc_delete_assoc(struct work_struct *work) { @@ -780,11 +887,12 @@ nvmet_fc_delete_assoc(struct work_struct *work) container_of(work, struct nvmet_fc_tgt_assoc, del_work); nvmet_fc_delete_target_assoc(assoc); + atomic_set(&assoc->del_work_active, 0); nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * -nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) +nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; unsigned long flags; @@ -801,13 +909,18 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) goto out_free_assoc; if (!nvmet_fc_tgtport_get(tgtport)) - goto out_ida_put; + goto out_ida; + + assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); + if (IS_ERR(assoc->hostport)) + goto out_put; assoc->tgtport = tgtport; assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + atomic_set(&assoc->del_work_active, 0); while (needrandom) { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); @@ -829,7 +942,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) return assoc; -out_ida_put: +out_put: + nvmet_fc_tgtport_put(tgtport); +out_ida: ida_simple_remove(&tgtport->assoc_cnt, idx); out_free_assoc: kfree(assoc); @@ -844,6 +959,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; unsigned long flags; + nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); list_del(&assoc->a_list); spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1057,6 +1173,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->ls_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); + INIT_LIST_HEAD(&newrec->host_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; @@ -1133,14 +1250,21 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) { struct nvmet_fc_tgt_assoc *assoc, *next; unsigned long flags; + int ret; spin_lock_irqsave(&tgtport->lock, flags); list_for_each_entry_safe(assoc, next, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!schedule_work(&assoc->del_work)) + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); + } } spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -1178,6 +1302,36 @@ void nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, void *hosthandle) { + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + struct nvmet_fc_tgt_assoc *assoc, *next; + unsigned long flags; + bool noassoc = true; + int ret; + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry_safe(assoc, next, + &tgtport->assoc_list, a_list) { + if (!assoc->hostport || + assoc->hostport->hosthandle != hosthandle) + continue; + if (!nvmet_fc_tgt_a_get(assoc)) + continue; + assoc->hostport->invalid = 1; + noassoc = false; + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ + nvmet_fc_tgt_a_put(assoc); + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + + /* if there's nothing to wait for - call the callback */ + if (noassoc && tgtport->ops->host_release) + tgtport->ops->host_release(hosthandle); } EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host); @@ -1192,6 +1346,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) struct nvmet_fc_tgt_queue *queue; unsigned long flags; bool found_ctrl = false; + int ret; /* this is a bit ugly, but don't want to make locks layered */ spin_lock_irqsave(&nvmet_fc_tgtlock, flags); @@ -1215,8 +1370,14 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!schedule_work(&assoc->del_work)) + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); + } return; } @@ -1293,7 +1454,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, else { /* new association w/ admin queue */ - iod->assoc = nvmet_fc_alloc_target_assoc(tgtport); + iod->assoc = nvmet_fc_alloc_target_assoc( + tgtport, iod->hosthandle); if (!iod->assoc) ret = VERR_ASSOC_ALLOC_FAIL; else { @@ -1628,6 +1790,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod->fcpreq = NULL; memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); iod->rqstdatalen = lsreqbuf_len; + iod->hosthandle = hosthandle; schedule_work(&iod->work); -- cgit v1.2.3 From 349c694ee71ce0dfe4b9ccfac76ef5c1efb476cf Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:58 -0700 Subject: nvmet-fc: rename ls_list to ls_rcv_list In preparation to add ls request support, rename the current ls_list, which is RCV LS request only, to ls_rcv_list. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3ccf27c328b2..6a5af99f19ba 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -29,7 +29,7 @@ struct nvmet_fc_ls_iod { struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ - struct list_head ls_list; /* tgtport->ls_list */ + struct list_head ls_rcv_list; /* tgtport->ls_rcv_list */ struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; @@ -90,7 +90,7 @@ struct nvmet_fc_tgtport { struct nvmet_fc_ls_iod *iod; spinlock_t lock; - struct list_head ls_list; + struct list_head ls_rcv_list; struct list_head ls_busylist; struct list_head assoc_list; struct list_head host_list; @@ -346,7 +346,7 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work); iod->tgtport = tgtport; - list_add_tail(&iod->ls_list, &tgtport->ls_list); + list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list); iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) + sizeof(union nvmefc_ls_responses), @@ -367,12 +367,12 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) out_fail: kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); for (iod--, i--; i >= 0; iod--, i--) { fc_dma_unmap_single(tgtport->dev, iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); } kfree(iod); @@ -391,7 +391,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); } kfree(tgtport->iod); } @@ -403,10 +403,10 @@ nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); - iod = list_first_entry_or_null(&tgtport->ls_list, - struct nvmet_fc_ls_iod, ls_list); + iod = list_first_entry_or_null(&tgtport->ls_rcv_list, + struct nvmet_fc_ls_iod, ls_rcv_list); if (iod) - list_move_tail(&iod->ls_list, &tgtport->ls_busylist); + list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist); spin_unlock_irqrestore(&tgtport->lock, flags); return iod; } @@ -419,7 +419,7 @@ nvmet_fc_free_ls_iod(struct nvmet_fc_tgtport *tgtport, unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); - list_move(&iod->ls_list, &tgtport->ls_list); + list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list); spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -1170,7 +1170,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->dev = dev; newrec->ops = template; spin_lock_init(&newrec->lock); - INIT_LIST_HEAD(&newrec->ls_list); + INIT_LIST_HEAD(&newrec->ls_rcv_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); INIT_LIST_HEAD(&newrec->host_list); -- cgit v1.2.3 From 47bf3241064498878ffed10a69131be9154201eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:59 -0700 Subject: nvmet-fc: Add Disconnect Association Xmt support As part of FC-NVME-2 (and ammendment on FC-NVME), the target is to send a Disconnect LS after an association is terminated and any exchanges for the association have been ABTS'd. The target is also not to send the receipt to any Disconnect Association LS, received to initiate the association termination or received while the association is terminating, until the Disconnect LS has been transmit. Add support for sending Disconnect Association LS after all I/O's complete (which is after ABTS'd certainly). Utilizes the new LLDD api to send ls requests. There is no need to track the Disconnect LS response or to retry after timeout. All spec requirements will have been met by waiting for i/o completion to initiate the transmission. Add support for tracking the reception of Disconnect Association and defering the response transmission until after the Disconnect Association LS has been transmit. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 296 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 6a5af99f19ba..02d9751bb7ee 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -25,7 +25,7 @@ struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; -struct nvmet_fc_ls_iod { +struct nvmet_fc_ls_iod { /* for an LS RQST RCV */ struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ @@ -45,6 +45,18 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); +struct nvmet_fc_ls_req_op { /* for an LS RQST XMT */ + struct nvmefc_ls_req ls_req; + + struct nvmet_fc_tgtport *tgtport; + void *hosthandle; + + int ls_error; + struct list_head lsreq_list; /* tgtport->ls_req_list */ + bool req_queued; +}; + + /* desired maximum for a single sequence - if sg list allows it */ #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) @@ -91,6 +103,7 @@ struct nvmet_fc_tgtport { struct nvmet_fc_ls_iod *iod; spinlock_t lock; struct list_head ls_rcv_list; + struct list_head ls_req_list; struct list_head ls_busylist; struct list_head assoc_list; struct list_head host_list; @@ -146,8 +159,10 @@ struct nvmet_fc_hostport { struct nvmet_fc_tgt_assoc { u64 association_id; u32 a_id; + atomic_t terminating; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_hostport *hostport; + struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; @@ -236,6 +251,8 @@ static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc); +static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod); /* *********************** FC-NVME DMA Handling **************************** */ @@ -327,6 +344,188 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, } +/* ********************** FC-NVME LS XMT Handling ************************* */ + + +static void +__nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) +{ + struct nvmet_fc_tgtport *tgtport = lsop->tgtport; + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + + if (!lsop->req_queued) { + spin_unlock_irqrestore(&tgtport->lock, flags); + return; + } + + list_del(&lsop->lsreq_list); + + lsop->req_queued = false; + + spin_unlock_irqrestore(&tgtport->lock, flags); + + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); + + nvmet_fc_tgtport_put(tgtport); +} + +static int +__nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + int ret = 0; + + if (!tgtport->ops->ls_req) + return -EOPNOTSUPP; + + if (!nvmet_fc_tgtport_get(tgtport)) + return -ESHUTDOWN; + + lsreq->done = done; + lsop->req_queued = false; + INIT_LIST_HEAD(&lsop->lsreq_list); + + lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr, + lsreq->rqstlen + lsreq->rsplen, + DMA_BIDIRECTIONAL); + if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_puttgtport; + } + lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; + + spin_lock_irqsave(&tgtport->lock, flags); + + list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list); + + lsop->req_queued = true; + + spin_unlock_irqrestore(&tgtport->lock, flags); + + ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle, + lsreq); + if (ret) + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&tgtport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_puttgtport: + nvmet_fc_tgtport_put(tgtport); + + return ret; +} + +static int +nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + /* don't wait for completion */ + + return __nvmet_fc_send_ls_req(tgtport, lsop, done); +} + +static void +nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) +{ + struct nvmet_fc_ls_req_op *lsop = + container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req); + + __nvmet_fc_finish_ls_req(lsop); + + /* fc-nvme target doesn't care about success or failure of cmd */ + + kfree(lsop); +} + +/* + * This routine sends a FC-NVME LS to disconnect (aka terminate) + * the FC-NVME Association. Terminating the association also + * terminates the FC-NVME connections (per queue, both admin and io + * queues) that are part of the association. E.g. things are torn + * down, and the related FC-NVME Association ID and Connection IDs + * become invalid. + * + * The behavior of the fc-nvme target is such that it's + * understanding of the association and connections will implicitly + * be torn down. The action is implicit as it may be due to a loss of + * connectivity with the fc-nvme host, so the target may never get a + * response even if it tried. As such, the action of this routine + * is to asynchronously send the LS, ignore any results of the LS, and + * continue on with terminating the association. If the fc-nvme host + * is present and receives the LS, it too can tear down. + */ +static void +nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; + struct nvmet_fc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + int ret; + + /* + * If ls_req is NULL or no hosthandle, it's an older lldd and no + * message is normal. Otherwise, send unless the hostport has + * already been invalidated by the lldd. + */ + if (!tgtport->ops->ls_req || !assoc->hostport || + assoc->hostport->invalid) + return; + + lsop = kzalloc((sizeof(*lsop) + + sizeof(*discon_rqst) + sizeof(*discon_acc) + + tgtport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(tgtport->dev, + "{%d:%d} send Disconnect Association failed: ENOMEM\n", + tgtport->fc_target_port.port_num, assoc->a_id); + return; + } + + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; + lsreq = &lsop->ls_req; + if (tgtport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; + + lsop->tgtport = tgtport; + lsop->hosthandle = assoc->hostport->hosthandle; + + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + assoc->association_id); + + ret = nvmet_fc_send_ls_req_async(tgtport, lsop, + nvmet_fc_disconnect_assoc_done); + if (ret) { + dev_info(tgtport->dev, + "{%d:%d} XMT Disconnect Association failed: %d\n", + tgtport->fc_target_port.port_num, assoc->a_id, ret); + kfree(lsop); + } +} + + /* *********************** FC-NVME Port Management ************************ */ @@ -693,6 +892,10 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) disconnect = atomic_xchg(&queue->connected, 0); + /* if not connected, nothing to do */ + if (!disconnect) + return; + spin_lock_irqsave(&queue->qlock, flags); /* abort outstanding io's */ for (i = 0; i < queue->sqsize; fod++, i++) { @@ -921,6 +1124,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) kref_init(&assoc->ref); INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); atomic_set(&assoc->del_work_active, 0); + atomic_set(&assoc->terminating, 0); while (needrandom) { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); @@ -957,13 +1161,24 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgt_assoc *assoc = container_of(ref, struct nvmet_fc_tgt_assoc, ref); struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct nvmet_fc_ls_iod *oldls; unsigned long flags; + /* Send Disconnect now that all i/o has completed */ + nvmet_fc_xmt_disconnect_assoc(assoc); + nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); list_del(&assoc->a_list); + oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); + /* if pending Rcv Disconnect Association LS, send rsp now */ + if (oldls) + nvmet_fc_xmt_ls_rsp(tgtport, oldls); ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); + dev_info(tgtport->dev, + "{%d:%d} Association freed\n", + tgtport->fc_target_port.port_num, assoc->a_id); kfree(assoc); nvmet_fc_tgtport_put(tgtport); } @@ -986,7 +1201,13 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_tgt_queue *queue; unsigned long flags; - int i; + int i, terminating; + + terminating = atomic_xchg(&assoc->terminating, 1); + + /* if already terminating, do nothing */ + if (terminating) + return; spin_lock_irqsave(&tgtport->lock, flags); for (i = NVMET_NR_QUEUES; i >= 0; i--) { @@ -1002,6 +1223,10 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) } spin_unlock_irqrestore(&tgtport->lock, flags); + dev_info(tgtport->dev, + "{%d:%d} Association deleted\n", + tgtport->fc_target_port.port_num, assoc->a_id); + nvmet_fc_tgt_a_put(assoc); } @@ -1171,6 +1396,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->ops = template; spin_lock_init(&newrec->lock); INIT_LIST_HEAD(&newrec->ls_rcv_list); + INIT_LIST_HEAD(&newrec->ls_req_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); INIT_LIST_HEAD(&newrec->host_list); @@ -1407,6 +1633,13 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + /* + * should terminate LS's as well. However, LS's will be generated + * at the tail end of association termination, so they likely don't + * exist yet. And even if they did, it's worthwhile to just let + * them finish and targetport ref counting will clean things up. + */ + nvmet_fc_tgtport_put(tgtport); return 0; @@ -1414,7 +1647,7 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); -/* *********************** FC-NVME LS Handling **************************** */ +/* ********************** FC-NVME LS RCV Handling ************************* */ static void @@ -1481,6 +1714,10 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, atomic_set(&queue->connected, 1); queue->sqhd = 0; /* best place to init value */ + dev_info(tgtport->dev, + "{%d:%d} Association created\n", + tgtport->fc_target_port.port_num, iod->assoc->a_id); + /* format a response */ iod->lsrsp->rsplen = sizeof(*acc); @@ -1588,7 +1825,11 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, be16_to_cpu(rqst->connect_cmd.qid))); } -static void +/* + * Returns true if the LS response is to be transmit + * Returns false if the LS response is to be delayed + */ +static int nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { @@ -1597,13 +1838,15 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct fcnvme_ls_disconnect_assoc_acc *acc = &iod->rspbuf->rsp_dis_assoc; struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_ls_iod *oldls = NULL; + unsigned long flags; int ret = 0; memset(acc, 0, sizeof(*acc)); ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst); if (!ret) { - /* match an active association */ + /* match an active association - takes an assoc ref if !NULL */ assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); iod->assoc = assoc; @@ -1621,7 +1864,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); - return; + return true; } /* format a response */ @@ -1634,9 +1877,40 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, FCNVME_LS_DISCONNECT_ASSOC); /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(iod->assoc); + nvmet_fc_tgt_a_put(assoc); + + /* + * The rules for LS response says the response cannot + * go back until ABTS's have been sent for all outstanding + * I/O and a Disconnect Association LS has been sent. + * So... save off the Disconnect LS to send the response + * later. If there was a prior LS already saved, replace + * it with the newer one and send a can't perform reject + * on the older one. + */ + spin_lock_irqsave(&tgtport->lock, flags); + oldls = assoc->rcv_disconn; + assoc->rcv_disconn = iod; + spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(iod->assoc); + nvmet_fc_delete_target_assoc(assoc); + + if (oldls) { + dev_info(tgtport->dev, + "{%d:%d} Multiple Disconnect Association LS's " + "received\n", + tgtport->fc_target_port.port_num, assoc->a_id); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*iod->rspbuf), + /* ok to use rqst, LS is same */ + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvmet_fc_xmt_ls_rsp(tgtport, oldls); + } + + return false; } @@ -1681,6 +1955,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0; + bool sendrsp = true; iod->lsrsp->nvme_fc_private = iod; iod->lsrsp->rspbuf = iod->rspbuf; @@ -1707,7 +1982,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, break; case FCNVME_LS_DISCONNECT_ASSOC: /* Terminate a Queue/Connection or the Association */ - nvmet_fc_ls_disconnect(tgtport, iod); + sendrsp = nvmet_fc_ls_disconnect(tgtport, iod); break; default: iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, @@ -1715,7 +1990,8 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } - nvmet_fc_xmt_ls_rsp(tgtport, iod); + if (sendrsp) + nvmet_fc_xmt_ls_rsp(tgtport, iod); } /* -- cgit v1.2.3 From ea39765843faf5f4426ffda000b0ca02217a1eeb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:00 -0700 Subject: nvme-fcloop: refactor to enable target to host LS Currently nvmefc-loop only sends LS's from host to target. Slightly rework data structures and routine names to reflect this path. Allows a straight-forward conversion to be used by ls's from target to host. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fcloop.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c11805a155e8..fac7dbe572db 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -226,9 +226,15 @@ struct fcloop_nport { u32 port_id; }; +enum { + H2T = 0, + T2H = 1, +}; + struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; struct nvmefc_ls_rsp ls_rsp; + int lsdir; /* H2T or T2H */ int status; struct list_head ls_list; /* fcloop_rport->ls_list */ }; @@ -323,7 +329,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work) } static int -fcloop_ls_req(struct nvme_fc_local_port *localport, +fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { @@ -331,6 +337,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; int ret = 0; + tls_req->lsdir = H2T; tls_req->lsreq = lsreq; INIT_LIST_HEAD(&tls_req->ls_list); @@ -351,7 +358,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } static int -fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, +fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, struct nvmefc_ls_rsp *lsrsp) { struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); @@ -762,7 +769,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, } static void -fcloop_ls_abort(struct nvme_fc_local_port *localport, +fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { @@ -879,9 +886,9 @@ static struct nvme_fc_port_template fctemplate = { .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, .delete_queue = fcloop_delete_queue, - .ls_req = fcloop_ls_req, + .ls_req = fcloop_h2t_ls_req, .fcp_io = fcloop_fcp_req, - .ls_abort = fcloop_ls_abort, + .ls_abort = fcloop_h2t_ls_abort, .fcp_abort = fcloop_fcp_abort, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, @@ -896,7 +903,7 @@ static struct nvme_fc_port_template fctemplate = { static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, - .xmt_ls_rsp = fcloop_xmt_ls_rsp, + .xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, -- cgit v1.2.3 From 437c0b824dbd05dbdab772ed1e0f69ffec76119d Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:01 -0700 Subject: nvme-fcloop: add target to host LS request support Add support for performing LS requests from target to host. Include sending request from targetport, reception into host, host sending ls rsp. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fcloop.c | 130 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index fac7dbe572db..2ff1d1334a03 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -208,10 +208,13 @@ struct fcloop_rport { }; struct fcloop_tport { - struct nvmet_fc_target_port *targetport; - struct nvme_fc_remote_port *remoteport; - struct fcloop_nport *nport; - struct fcloop_lport *lport; + struct nvmet_fc_target_port *targetport; + struct nvme_fc_remote_port *remoteport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; + spinlock_t lock; + struct list_head ls_list; + struct work_struct ls_work; }; struct fcloop_nport { @@ -226,11 +229,6 @@ struct fcloop_nport { u32 port_id; }; -enum { - H2T = 0, - T2H = 1, -}; - struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; struct nvmefc_ls_rsp ls_rsp; @@ -337,7 +335,6 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; int ret = 0; - tls_req->lsdir = H2T; tls_req->lsreq = lsreq; INIT_LIST_HEAD(&tls_req->ls_list); @@ -351,8 +348,9 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, } tls_req->status = 0; - ret = nvmet_fc_rcv_ls_req(rport->targetport, NULL, &tls_req->ls_rsp, - lsreq->rqstaddr, lsreq->rqstlen); + ret = nvmet_fc_rcv_ls_req(rport->targetport, rport, + &tls_req->ls_rsp, + lsreq->rqstaddr, lsreq->rqstlen); return ret; } @@ -384,6 +382,99 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, return 0; } +static void +fcloop_tport_lsrqst_work(struct work_struct *work) +{ + struct fcloop_tport *tport = + container_of(work, struct fcloop_tport, ls_work); + struct fcloop_lsreq *tls_req; + + spin_lock(&tport->lock); + for (;;) { + tls_req = list_first_entry_or_null(&tport->ls_list, + struct fcloop_lsreq, ls_list); + if (!tls_req) + break; + + list_del(&tls_req->ls_list); + spin_unlock(&tport->lock); + + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); + /* + * callee may free memory containing tls_req. + * do not reference lsreq after this. + */ + + spin_lock(&tport->lock); + } + spin_unlock(&tport->lock); +} + +static int +fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, + struct nvmefc_ls_req *lsreq) +{ + struct fcloop_lsreq *tls_req = lsreq->private; + struct fcloop_tport *tport = targetport->private; + int ret = 0; + + /* + * hosthandle should be the dst.rport value. + * hosthandle ignored as fcloop currently is + * 1:1 tgtport vs remoteport + */ + tls_req->lsreq = lsreq; + INIT_LIST_HEAD(&tls_req->ls_list); + + if (!tport->remoteport) { + tls_req->status = -ECONNREFUSED; + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); + schedule_work(&tport->ls_work); + return ret; + } + + tls_req->status = 0; + ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp, + lsreq->rqstaddr, lsreq->rqstlen); + + return ret; +} + +static int +fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_rsp *lsrsp) +{ + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); + struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_rport *rport = remoteport->private; + struct nvmet_fc_target_port *targetport = rport->targetport; + struct fcloop_tport *tport; + + memcpy(lsreq->rspaddr, lsrsp->rspbuf, + ((lsreq->rsplen < lsrsp->rsplen) ? + lsreq->rsplen : lsrsp->rsplen)); + lsrsp->done(lsrsp); + + if (targetport) { + tport = targetport->private; + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); + schedule_work(&tport->ls_work); + } + + return 0; +} + +static void +fcloop_t2h_host_release(void *hosthandle) +{ + /* host handle ignored for now */ +} + /* * Simulate reception of RSCN and converting it to a initiator transport * call to rescan a remote port. @@ -775,6 +866,12 @@ fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport, { } +static void +fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport, + void *hosthandle, struct nvmefc_ls_req *lsreq) +{ +} + static void fcloop_fcp_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, @@ -874,6 +971,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) { struct fcloop_tport *tport = targetport->private; + flush_work(&tport->ls_work); fcloop_nport_put(tport->nport); } @@ -890,6 +988,7 @@ static struct nvme_fc_port_template fctemplate = { .fcp_io = fcloop_fcp_req, .ls_abort = fcloop_h2t_ls_abort, .fcp_abort = fcloop_fcp_abort, + .xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, @@ -908,6 +1007,9 @@ static struct nvmet_fc_target_template tgttemplate = { .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, .discovery_event = fcloop_tgt_discovery_evt, + .ls_req = fcloop_t2h_ls_req, + .ls_abort = fcloop_t2h_ls_abort, + .host_release = fcloop_t2h_host_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, @@ -916,6 +1018,7 @@ static struct nvmet_fc_target_template tgttemplate = { .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), + .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), }; static ssize_t @@ -1265,6 +1368,9 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->nport = nport; tport->lport = nport->lport; nport->tport = tport; + spin_lock_init(&tport->lock); + INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work); + INIT_LIST_HEAD(&tport->ls_list); return count; } -- cgit v1.2.3 From 2a1160a03ac477b95d596bc4a0955ee3d7d0f3c9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:02 -0700 Subject: lpfc: Refactor lpfc nvme headers A lot of files in lpfc include nvme headers, building up relationships that require a file to change for its headers when there is no other change necessary. It would be better to localize the nvme headers. There is also no need for separate nvme (initiator) and nvmet (tgt) header files. Refactor the inclusion of nvme headers so that all nvme items are included by lpfc_nvme.h Merge lpfc_nvmet.h into lpfc_nvme.h so that there is a single header used by both the nvme and nvmet sides. This prepares for structure sharing between the two roles. Prep to add shared function prototypes for upcoming shared routines. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_attr.c | 3 - drivers/scsi/lpfc/lpfc_ct.c | 1 - drivers/scsi/lpfc/lpfc_debugfs.c | 3 - drivers/scsi/lpfc/lpfc_hbadisc.c | 2 - drivers/scsi/lpfc/lpfc_init.c | 3 - drivers/scsi/lpfc/lpfc_mem.c | 4 - drivers/scsi/lpfc/lpfc_nportdisc.c | 2 - drivers/scsi/lpfc/lpfc_nvme.c | 3 - drivers/scsi/lpfc/lpfc_nvme.h | 146 ++++++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_nvmet.c | 5 -- drivers/scsi/lpfc/lpfc_nvmet.h | 158 ------------------------------------- drivers/scsi/lpfc/lpfc_sli.c | 3 - 12 files changed, 146 insertions(+), 187 deletions(-) delete mode 100644 drivers/scsi/lpfc/lpfc_nvmet.h diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 1354c141d614..f089867674cb 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -37,8 +37,6 @@ #include #include -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -48,7 +46,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_version.h" #include "lpfc_compat.h" diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 2aa578d20f8c..196f6ae9952e 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -44,7 +44,6 @@ #include "lpfc_disc.h" #include "lpfc.h" #include "lpfc_scsi.h" -#include "lpfc_nvme.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_version.h" diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 8a6e02aa553f..8e78e49e3f9f 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -39,8 +39,6 @@ #include #include -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -50,7 +48,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 789eecbf32eb..8dec7b7c06d1 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -36,8 +36,6 @@ #include #include -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_nl.h" diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4104bdcdbb6f..a29566010d94 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -50,8 +50,6 @@ #include #include -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -61,7 +59,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 7082279e4c01..726f6619230f 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -31,8 +31,6 @@ #include #include -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -41,8 +39,6 @@ #include "lpfc_disc.h" #include "lpfc.h" #include "lpfc_scsi.h" -#include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_crtn.h" #include "lpfc_logmsg.h" diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index a024e5a3918f..81f4ba1c24b4 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -32,8 +32,6 @@ #include #include -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a45936e08031..6045000477ac 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -36,9 +36,6 @@ #include #include -#include -#include -#include #include "lpfc_version.h" #include "lpfc_hw4.h" #include "lpfc_hw.h" diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 593c48ff634e..e59dc1f5c18a 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -21,6 +21,10 @@ * included with this package. * ********************************************************************/ +#include +#include +#include + #define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVME_ERSP_LEN 0x20 @@ -74,3 +78,145 @@ struct lpfc_nvme_rport { struct lpfc_nvme_fcpreq_priv { struct lpfc_io_buf *nvme_buf; }; + + +#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ +#define LPFC_NVMET_RQE_MIN_POST 128 +#define LPFC_NVMET_RQE_DEF_POST 512 +#define LPFC_NVMET_RQE_DEF_COUNT 2048 +#define LPFC_NVMET_SUCCESS_LEN 12 + +#define LPFC_NVMET_MRQ_AUTO 0 +#define LPFC_NVMET_MRQ_MAX 16 + +#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) + +/* Used for NVME Target */ +struct lpfc_nvmet_tgtport { + struct lpfc_hba *phba; + struct completion *tport_unreg_cmp; + + /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ + atomic_t rcv_ls_req_in; + atomic_t rcv_ls_req_out; + atomic_t rcv_ls_req_drop; + atomic_t xmt_ls_abort; + atomic_t xmt_ls_abort_cmpl; + + /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ + atomic_t xmt_ls_rsp; + atomic_t xmt_ls_drop; + + /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */ + atomic_t xmt_ls_rsp_error; + atomic_t xmt_ls_rsp_aborted; + atomic_t xmt_ls_rsp_xb_set; + atomic_t xmt_ls_rsp_cmpl; + + /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */ + atomic_t rcv_fcp_cmd_in; + atomic_t rcv_fcp_cmd_out; + atomic_t rcv_fcp_cmd_drop; + atomic_t rcv_fcp_cmd_defer; + atomic_t xmt_fcp_release; + + /* Stats counters - lpfc_nvmet_xmt_fcp_op */ + atomic_t xmt_fcp_drop; + atomic_t xmt_fcp_read_rsp; + atomic_t xmt_fcp_read; + atomic_t xmt_fcp_write; + atomic_t xmt_fcp_rsp; + + /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */ + atomic_t xmt_fcp_rsp_xb_set; + atomic_t xmt_fcp_rsp_cmpl; + atomic_t xmt_fcp_rsp_error; + atomic_t xmt_fcp_rsp_aborted; + atomic_t xmt_fcp_rsp_drop; + + /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ + atomic_t xmt_fcp_xri_abort_cqe; + atomic_t xmt_fcp_abort; + atomic_t xmt_fcp_abort_cmpl; + atomic_t xmt_abort_sol; + atomic_t xmt_abort_unsol; + atomic_t xmt_abort_rsp; + atomic_t xmt_abort_rsp_error; + + /* Stats counters - defer IO */ + atomic_t defer_ctx; + atomic_t defer_fod; + atomic_t defer_wqfull; +}; + +struct lpfc_nvmet_ctx_info { + struct list_head nvmet_ctx_list; + spinlock_t nvmet_ctx_list_lock; /* lock per CPU */ + struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu; + struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu; + uint16_t nvmet_ctx_list_cnt; + char pad[16]; /* pad to a cache-line */ +}; + +/* This retrieves the context info associated with the specified cpu / mrq */ +#define lpfc_get_ctx_list(phba, cpu, mrq) \ + (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) + +struct lpfc_nvmet_rcv_ctx { + union { + struct nvmefc_ls_rsp ls_rsp; + struct nvmefc_tgt_fcp_req fcp_req; + } ctx; + struct list_head list; + struct lpfc_hba *phba; + struct lpfc_iocbq *wqeq; + struct lpfc_iocbq *abort_wqeq; + spinlock_t ctxlock; /* protect flag access */ + uint32_t sid; + uint32_t offset; + uint16_t oxid; + uint16_t size; + uint16_t entry_cnt; + uint16_t cpu; + uint16_t idx; + uint16_t state; + /* States */ +#define LPFC_NVMET_STE_LS_RCV 1 +#define LPFC_NVMET_STE_LS_ABORT 2 +#define LPFC_NVMET_STE_LS_RSP 3 +#define LPFC_NVMET_STE_RCV 4 +#define LPFC_NVMET_STE_DATA 5 +#define LPFC_NVMET_STE_ABORT 6 +#define LPFC_NVMET_STE_DONE 7 +#define LPFC_NVMET_STE_FREE 0xff + uint16_t flag; +#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ +#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ +#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ +#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ +#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ +#define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ +#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ +#define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */ + struct rqb_dmabuf *rqb_buffer; + struct lpfc_nvmet_ctxbuf *ctxbuf; + struct lpfc_sli4_hdw_queue *hdwq; + +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + uint64_t ts_isr_cmd; + uint64_t ts_cmd_nvme; + uint64_t ts_nvme_data; + uint64_t ts_data_wqput; + uint64_t ts_isr_data; + uint64_t ts_data_nvme; + uint64_t ts_nvme_status; + uint64_t ts_status_wqput; + uint64_t ts_isr_status; + uint64_t ts_status_nvme; +#endif +}; + + +/* routines found in lpfc_nvme.c */ + +/* routines found in lpfc_nvmet.c */ diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 3b25bcb150ea..9576bc30ec85 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -36,10 +36,6 @@ #include #include -#include -#include -#include - #include "lpfc_version.h" #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -50,7 +46,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h deleted file mode 100644 index f0196f3ef90d..000000000000 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ /dev/null @@ -1,158 +0,0 @@ -/******************************************************************* - * This file is part of the Emulex Linux Device Driver for * - * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term * - * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * - * Copyright (C) 2004-2016 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.broadcom.com * - * Portions Copyright (C) 2004-2005 Christoph Hellwig * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * - ********************************************************************/ - -#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ -#define LPFC_NVMET_RQE_MIN_POST 128 -#define LPFC_NVMET_RQE_DEF_POST 512 -#define LPFC_NVMET_RQE_DEF_COUNT 2048 -#define LPFC_NVMET_SUCCESS_LEN 12 - -#define LPFC_NVMET_MRQ_AUTO 0 -#define LPFC_NVMET_MRQ_MAX 16 - -#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) - -/* Used for NVME Target */ -struct lpfc_nvmet_tgtport { - struct lpfc_hba *phba; - struct completion *tport_unreg_cmp; - - /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ - atomic_t rcv_ls_req_in; - atomic_t rcv_ls_req_out; - atomic_t rcv_ls_req_drop; - atomic_t xmt_ls_abort; - atomic_t xmt_ls_abort_cmpl; - - /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ - atomic_t xmt_ls_rsp; - atomic_t xmt_ls_drop; - - /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */ - atomic_t xmt_ls_rsp_error; - atomic_t xmt_ls_rsp_aborted; - atomic_t xmt_ls_rsp_xb_set; - atomic_t xmt_ls_rsp_cmpl; - - /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */ - atomic_t rcv_fcp_cmd_in; - atomic_t rcv_fcp_cmd_out; - atomic_t rcv_fcp_cmd_drop; - atomic_t rcv_fcp_cmd_defer; - atomic_t xmt_fcp_release; - - /* Stats counters - lpfc_nvmet_xmt_fcp_op */ - atomic_t xmt_fcp_drop; - atomic_t xmt_fcp_read_rsp; - atomic_t xmt_fcp_read; - atomic_t xmt_fcp_write; - atomic_t xmt_fcp_rsp; - - /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */ - atomic_t xmt_fcp_rsp_xb_set; - atomic_t xmt_fcp_rsp_cmpl; - atomic_t xmt_fcp_rsp_error; - atomic_t xmt_fcp_rsp_aborted; - atomic_t xmt_fcp_rsp_drop; - - /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ - atomic_t xmt_fcp_xri_abort_cqe; - atomic_t xmt_fcp_abort; - atomic_t xmt_fcp_abort_cmpl; - atomic_t xmt_abort_sol; - atomic_t xmt_abort_unsol; - atomic_t xmt_abort_rsp; - atomic_t xmt_abort_rsp_error; - - /* Stats counters - defer IO */ - atomic_t defer_ctx; - atomic_t defer_fod; - atomic_t defer_wqfull; -}; - -struct lpfc_nvmet_ctx_info { - struct list_head nvmet_ctx_list; - spinlock_t nvmet_ctx_list_lock; /* lock per CPU */ - struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu; - struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu; - uint16_t nvmet_ctx_list_cnt; - char pad[16]; /* pad to a cache-line */ -}; - -/* This retrieves the context info associated with the specified cpu / mrq */ -#define lpfc_get_ctx_list(phba, cpu, mrq) \ - (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) - -struct lpfc_nvmet_rcv_ctx { - union { - struct nvmefc_ls_rsp ls_rsp; - struct nvmefc_tgt_fcp_req fcp_req; - } ctx; - struct list_head list; - struct lpfc_hba *phba; - struct lpfc_iocbq *wqeq; - struct lpfc_iocbq *abort_wqeq; - spinlock_t ctxlock; /* protect flag access */ - uint32_t sid; - uint32_t offset; - uint16_t oxid; - uint16_t size; - uint16_t entry_cnt; - uint16_t cpu; - uint16_t idx; - uint16_t state; - /* States */ -#define LPFC_NVMET_STE_LS_RCV 1 -#define LPFC_NVMET_STE_LS_ABORT 2 -#define LPFC_NVMET_STE_LS_RSP 3 -#define LPFC_NVMET_STE_RCV 4 -#define LPFC_NVMET_STE_DATA 5 -#define LPFC_NVMET_STE_ABORT 6 -#define LPFC_NVMET_STE_DONE 7 -#define LPFC_NVMET_STE_FREE 0xff - uint16_t flag; -#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ -#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ -#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ -#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ -#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ -#define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ -#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ -#define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */ - struct rqb_dmabuf *rqb_buffer; - struct lpfc_nvmet_ctxbuf *ctxbuf; - struct lpfc_sli4_hdw_queue *hdwq; - -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - uint64_t ts_isr_cmd; - uint64_t ts_cmd_nvme; - uint64_t ts_nvme_data; - uint64_t ts_data_wqput; - uint64_t ts_isr_data; - uint64_t ts_data_nvme; - uint64_t ts_nvme_status; - uint64_t ts_status_wqput; - uint64_t ts_isr_status; - uint64_t ts_status_nvme; -#endif -}; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index b6fb665e6ec4..d57918dec15c 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -39,8 +39,6 @@ #include #endif -#include - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -50,7 +48,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_crtn.h" #include "lpfc_logmsg.h" #include "lpfc_compat.h" -- cgit v1.2.3 From 7cacae2ad04762803ad93bdf08dc482106817ec7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:03 -0700 Subject: lpfc: Refactor nvmet_rcv_ctx to create lpfc_async_xchg_ctx To support FC-NVME-2 support (actually FC-NVME (rev 1) with Ammendment 1), both the nvme (host) and nvmet (controller/target) sides will need to be able to receive LS requests. Currently, this support is in the nvmet side only. To prepare for both sides supporting LS receive, rename lpfc_nvmet_rcv_ctx to lpfc_async_xchg_ctx and commonize the definition. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc.h | 2 +- drivers/scsi/lpfc/lpfc_crtn.h | 1 - drivers/scsi/lpfc/lpfc_debugfs.c | 2 +- drivers/scsi/lpfc/lpfc_init.c | 2 +- drivers/scsi/lpfc/lpfc_nvme.h | 7 +-- drivers/scsi/lpfc/lpfc_nvmet.c | 109 ++++++++++++++++++++------------------- drivers/scsi/lpfc/lpfc_sli.c | 2 +- 7 files changed, 63 insertions(+), 62 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 8e2a356911a9..62e96d4fdcc6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -143,7 +143,7 @@ struct lpfc_dmabuf { struct lpfc_nvmet_ctxbuf { struct list_head list; - struct lpfc_nvmet_rcv_ctx *context; + struct lpfc_async_xchg_ctx *context; struct lpfc_iocbq *iocbq; struct lpfc_sglq *sglq; struct work_struct defer_work; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 76dc8d9493d2..635df4a0188c 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -24,7 +24,6 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *); struct fc_rport; struct fc_frame_header; -struct lpfc_nvmet_rcv_ctx; void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli_read_link_ste(struct lpfc_hba *); void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 8e78e49e3f9f..4e94148b9352 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -1032,7 +1032,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_nvmet_tgtport *tgtp; - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; struct nvme_fc_local_port *localport; struct lpfc_fc4_ctrl_stat *cstat; struct lpfc_nvme_lport *lport; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a29566010d94..0e0b40710ea9 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1029,7 +1029,7 @@ static int lpfc_hba_down_post_s4(struct lpfc_hba *phba) { struct lpfc_io_buf *psb, *psb_next; - struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; + struct lpfc_async_xchg_ctx *ctxp, *ctxp_next; struct lpfc_sli4_hdw_queue *qp; LIST_HEAD(aborts); LIST_HEAD(nvme_aborts); diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index e59dc1f5c18a..be7d262f8cf3 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -162,13 +162,14 @@ struct lpfc_nvmet_ctx_info { #define lpfc_get_ctx_list(phba, cpu, mrq) \ (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) -struct lpfc_nvmet_rcv_ctx { +struct lpfc_async_xchg_ctx { union { - struct nvmefc_ls_rsp ls_rsp; struct nvmefc_tgt_fcp_req fcp_req; - } ctx; + } hdlrctx; struct list_head list; struct lpfc_hba *phba; + struct nvmefc_ls_req *ls_req; + struct nvmefc_ls_rsp ls_rsp; struct lpfc_iocbq *wqeq; struct lpfc_iocbq *abort_wqeq; spinlock_t ctxlock; /* protect flag access */ diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 9576bc30ec85..03e8010564af 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -52,22 +52,22 @@ #include "lpfc_debugfs.h" static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, dma_addr_t rspbuf, uint16_t rspsize); static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *); + struct lpfc_async_xchg_ctx *); static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, uint32_t, uint16_t); static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, uint32_t, uint16_t); static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, uint32_t, uint16_t); static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *, - struct lpfc_nvmet_rcv_ctx *); + struct lpfc_async_xchg_ctx *); static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *); static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf); @@ -216,10 +216,10 @@ lpfc_nvmet_cmd_template(void) } #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) -static struct lpfc_nvmet_rcv_ctx * +static struct lpfc_async_xchg_ctx * lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; unsigned long iflag; bool found = false; @@ -238,10 +238,10 @@ lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri) return NULL; } -static struct lpfc_nvmet_rcv_ctx * +static struct lpfc_async_xchg_ctx * lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; unsigned long iflag; bool found = false; @@ -262,7 +262,8 @@ lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid) #endif static void -lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) +lpfc_nvmet_defer_release(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *ctxp) { lockdep_assert_held(&ctxp->ctxlock); @@ -298,7 +299,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, { struct lpfc_nvmet_tgtport *tgtp; struct nvmefc_ls_rsp *rsp; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; uint32_t status, result; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -330,7 +331,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, } out: - rsp = &ctxp->ctx.ls_rsp; + rsp = &ctxp->ls_rsp; lpfc_nvmeio_data(phba, "NVMET LS CMPL: xri x%x stat x%x result x%x\n", ctxp->oxid, status, result); @@ -364,7 +365,7 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) - struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; + struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct rqb_dmabuf *nvmebuf; @@ -416,7 +417,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) size = nvmebuf->bytes_recv; sid = sli4_sid_from_fc_hdr(fc_hdr); - ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; + ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context; ctxp->wqeq = NULL; ctxp->offset = 0; ctxp->phba = phba; @@ -490,7 +491,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) #ifdef CONFIG_SCSI_LPFC_DEBUG_FS static void lpfc_nvmet_ktime(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp) + struct lpfc_async_xchg_ctx *ctxp) { uint64_t seg1, seg2, seg3, seg4, seg5; uint64_t seg6, seg7, seg8, seg9, seg10; @@ -699,7 +700,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, { struct lpfc_nvmet_tgtport *tgtp; struct nvmefc_tgt_fcp_req *rsp; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; uint32_t status, result, op, start_clean, logerr; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS int id; @@ -708,7 +709,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, ctxp = cmdwqe->context2; ctxp->flag &= ~LPFC_NVMET_IO_INP; - rsp = &ctxp->ctx.fcp_req; + rsp = &ctxp->hdlrctx.fcp_req; op = rsp->op; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -825,8 +826,8 @@ static int lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, struct nvmefc_ls_rsp *rsp) { - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_rsp); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, ls_rsp); struct lpfc_hba *phba = ctxp->phba; struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)ctxp->rqb_buffer; @@ -916,8 +917,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_queue *wq; struct lpfc_iocbq *nvmewqeq; @@ -1051,8 +1052,8 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *req) { struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(req, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_queue *wq; unsigned long flags; @@ -1113,8 +1114,8 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; unsigned long flags; bool aborting = false; @@ -1156,8 +1157,8 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { struct lpfc_nvmet_tgtport *tgtp; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; struct lpfc_hba *phba = ctxp->phba; unsigned long iflag; @@ -1563,7 +1564,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; struct lpfc_nvmet_tgtport *tgtp; struct nvmefc_tgt_fcp_req *req = NULL; struct lpfc_nodelist *ndlp; @@ -1649,7 +1650,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", xri, raw_smp_processor_id(), 0); - req = &ctxp->ctx.fcp_req; + req = &ctxp->hdlrctx.fcp_req; if (req) nvmet_fc_rcv_fcp_abort(phba->targetport, req); } @@ -1662,7 +1663,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_hba *phba = vport->phba; - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; struct nvmefc_tgt_fcp_req *rsp; uint32_t sid; uint16_t oxid, xri; @@ -1695,7 +1696,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6319 NVMET Rcv ABTS:acc xri x%x\n", xri); - rsp = &ctxp->ctx.fcp_req; + rsp = &ctxp->hdlrctx.fcp_req; nvmet_fc_rcv_fcp_abort(phba->targetport, rsp); /* Respond with BA_ACC accordingly */ @@ -1769,7 +1770,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, if (ctxp->flag & LPFC_NVMET_TNOTIFY) { /* Notify the transport */ nvmet_fc_rcv_fcp_abort(phba->targetport, - &ctxp->ctx.fcp_req); + &ctxp->hdlrctx.fcp_req); } else { cancel_work_sync(&ctxp->ctxbuf->defer_work); spin_lock_irqsave(&ctxp->ctxlock, iflag); @@ -1797,7 +1798,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq, - struct lpfc_nvmet_rcv_ctx *ctxp) + struct lpfc_async_xchg_ctx *ctxp) { struct lpfc_sli_ring *pring; struct lpfc_iocbq *nvmewqeq; @@ -1848,7 +1849,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_sli_ring *pring; struct lpfc_iocbq *nvmewqeq; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; unsigned long iflags; int rc; @@ -1862,7 +1863,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq, list); spin_unlock_irqrestore(&pring->ring_lock, iflags); - ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmewqeq->context2; + ctxp = (struct lpfc_async_xchg_ctx *)nvmewqeq->context2; rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); spin_lock_irqsave(&pring->ring_lock, iflags); if (rc == -EBUSY) { @@ -1874,7 +1875,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, if (rc == WQE_SUCCESS) { #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (ctxp->ts_cmd_nvme) { - if (ctxp->ctx.fcp_req.op == NVMET_FCOP_RSP) + if (ctxp->hdlrctx.fcp_req.op == NVMET_FCOP_RSP) ctxp->ts_status_wqput = ktime_get_ns(); else ctxp->ts_data_wqput = ktime_get_ns(); @@ -1940,7 +1941,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; uint32_t *payload; uint32_t size, oxid, sid, rc; @@ -1963,7 +1964,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); sid = sli4_sid_from_fc_hdr(fc_hdr); - ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC); + ctxp = kzalloc(sizeof(struct lpfc_async_xchg_ctx), GFP_ATOMIC); if (ctxp == NULL) { atomic_inc(&tgtp->rcv_ls_req_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, @@ -1994,7 +1995,7 @@ dropit: * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp. */ atomic_inc(&tgtp->rcv_ls_req_in); - rc = nvmet_fc_rcv_ls_req(phba->targetport, NULL, &ctxp->ctx.ls_rsp, + rc = nvmet_fc_rcv_ls_req(phba->targetport, NULL, &ctxp->ls_rsp, payload, size); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, @@ -2028,7 +2029,7 @@ static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) - struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; + struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context; struct lpfc_hba *phba = ctxp->phba; struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; struct lpfc_nvmet_tgtport *tgtp; @@ -2072,7 +2073,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) * A buffer has already been reposted for this IO, so just free * the nvmebuf. */ - rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req, + rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->hdlrctx.fcp_req, payload, ctxp->size); /* Process FCP command */ if (rc == 0) { @@ -2219,7 +2220,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, uint64_t isr_timestamp, uint8_t cqflag) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_ctxbuf *ctx_buf; @@ -2301,7 +2302,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, sid = sli4_sid_from_fc_hdr(fc_hdr); - ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; + ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context; spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag); list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list); spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag); @@ -2457,7 +2458,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, **/ static struct lpfc_iocbq * lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, dma_addr_t rspbuf, uint16_t rspsize) { struct lpfc_nodelist *ndlp; @@ -2579,9 +2580,9 @@ nvme_wqe_free_wqeq_exit: static struct lpfc_iocbq * lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp) + struct lpfc_async_xchg_ctx *ctxp) { - struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req; + struct nvmefc_tgt_fcp_req *rsp = &ctxp->hdlrctx.fcp_req; struct lpfc_nvmet_tgtport *tgtp; struct sli4_sge *sgl; struct lpfc_nodelist *ndlp; @@ -2926,7 +2927,7 @@ static void lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t result; unsigned long flags; @@ -2995,7 +2996,7 @@ static void lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; unsigned long flags; uint32_t result; @@ -3076,7 +3077,7 @@ static void lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t result; @@ -3117,7 +3118,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, static int lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { struct lpfc_nvmet_tgtport *tgtp; @@ -3212,7 +3213,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { struct lpfc_nvmet_tgtport *tgtp; @@ -3338,7 +3339,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { struct lpfc_nvmet_tgtport *tgtp; @@ -3403,7 +3404,7 @@ aerr: static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { struct lpfc_nvmet_tgtport *tgtp; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d57918dec15c..d4d810cb8944 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -19888,7 +19888,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, struct lpfc_iocbq *pwqe) { union lpfc_wqe128 *wqe = &pwqe->wqe; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_queue *wq; struct lpfc_sglq *sglq; struct lpfc_sli_ring *pring; -- cgit v1.2.3 From 7b7f551b0403e0f740c6af2553b46ba2d3531c80 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:04 -0700 Subject: lpfc: Commonize lpfc_async_xchg_ctx state and flag definitions The last step of commonization is to remove the 'T' suffix from state and flag field definitions. This is minor, but removes the mental association that it solely applies to nvmet use. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_init.c | 2 +- drivers/scsi/lpfc/lpfc_nvme.h | 37 +++++----- drivers/scsi/lpfc/lpfc_nvmet.c | 158 ++++++++++++++++++++--------------------- 3 files changed, 100 insertions(+), 97 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 0e0b40710ea9..ea99483345f2 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1096,7 +1096,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) &nvmet_aborts); spin_unlock_irq(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { - ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); + ctxp->flag &= ~(LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP); lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); } } diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index be7d262f8cf3..220b51af43da 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -162,6 +162,26 @@ struct lpfc_nvmet_ctx_info { #define lpfc_get_ctx_list(phba, cpu, mrq) \ (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) +/* Values for state field of struct lpfc_async_xchg_ctx */ +#define LPFC_NVME_STE_LS_RCV 1 +#define LPFC_NVME_STE_LS_ABORT 2 +#define LPFC_NVME_STE_LS_RSP 3 +#define LPFC_NVME_STE_RCV 4 +#define LPFC_NVME_STE_DATA 5 +#define LPFC_NVME_STE_ABORT 6 +#define LPFC_NVME_STE_DONE 7 +#define LPFC_NVME_STE_FREE 0xff + +/* Values for flag field of struct lpfc_async_xchg_ctx */ +#define LPFC_NVME_IO_INP 0x1 /* IO is in progress on exchange */ +#define LPFC_NVME_ABORT_OP 0x2 /* Abort WQE issued on exchange */ +#define LPFC_NVME_XBUSY 0x4 /* XB bit set on IO cmpl */ +#define LPFC_NVME_CTX_RLS 0x8 /* ctx free requested */ +#define LPFC_NVME_ABTS_RCV 0x10 /* ABTS received on exchange */ +#define LPFC_NVME_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ +#define LPFC_NVME_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ +#define LPFC_NVME_TNOTIFY 0x80 /* notify transport of abts */ + struct lpfc_async_xchg_ctx { union { struct nvmefc_tgt_fcp_req fcp_req; @@ -181,24 +201,7 @@ struct lpfc_async_xchg_ctx { uint16_t cpu; uint16_t idx; uint16_t state; - /* States */ -#define LPFC_NVMET_STE_LS_RCV 1 -#define LPFC_NVMET_STE_LS_ABORT 2 -#define LPFC_NVMET_STE_LS_RSP 3 -#define LPFC_NVMET_STE_RCV 4 -#define LPFC_NVMET_STE_DATA 5 -#define LPFC_NVMET_STE_ABORT 6 -#define LPFC_NVMET_STE_DONE 7 -#define LPFC_NVMET_STE_FREE 0xff uint16_t flag; -#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ -#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ -#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ -#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ -#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ -#define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ -#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ -#define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */ struct rqb_dmabuf *rqb_buffer; struct lpfc_nvmet_ctxbuf *ctxbuf; struct lpfc_sli4_hdw_queue *hdwq; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 03e8010564af..e0d5be4617ac 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -271,10 +271,10 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, "6313 NVMET Defer ctx release oxid x%x flg x%x\n", ctxp->oxid, ctxp->flag); - if (ctxp->flag & LPFC_NVMET_CTX_RLS) + if (ctxp->flag & LPFC_NVME_CTX_RLS) return; - ctxp->flag |= LPFC_NVMET_CTX_RLS; + ctxp->flag |= LPFC_NVME_CTX_RLS; spin_lock(&phba->sli4_hba.t_active_list_lock); list_del(&ctxp->list); spin_unlock(&phba->sli4_hba.t_active_list_lock); @@ -306,7 +306,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result = wcqe->parameter; ctxp = cmdwqe->context2; - if (ctxp->state != LPFC_NVMET_STE_LS_RSP || ctxp->entry_cnt != 2) { + if (ctxp->state != LPFC_NVME_STE_LS_RSP || ctxp->entry_cnt != 2) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6410 NVMET LS cmpl state mismatch IO x%x: " "%d %d\n", @@ -374,7 +374,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) int cpu; unsigned long iflag; - if (ctxp->state == LPFC_NVMET_STE_FREE) { + if (ctxp->state == LPFC_NVME_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6411 NVMET free, already free IO x%x: %d %d\n", ctxp->oxid, ctxp->state, ctxp->entry_cnt); @@ -386,8 +386,8 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) /* check if freed in another path whilst acquiring lock */ if (nvmebuf) { ctxp->rqb_buffer = NULL; - if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) { - ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ; + if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) { + ctxp->flag &= ~LPFC_NVME_CTX_REUSE_WQ; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf); @@ -400,7 +400,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } } - ctxp->state = LPFC_NVMET_STE_FREE; + ctxp->state = LPFC_NVME_STE_FREE; spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag); if (phba->sli4_hba.nvmet_io_wait_cnt) { @@ -424,7 +424,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) ctxp->size = size; ctxp->oxid = oxid; ctxp->sid = sid; - ctxp->state = LPFC_NVMET_STE_RCV; + ctxp->state = LPFC_NVME_STE_RCV; ctxp->entry_cnt = 1; ctxp->flag = 0; ctxp->ctxbuf = ctx_buf; @@ -449,7 +449,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) /* Indicate that a replacement buffer has been posted */ spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ; + ctxp->flag |= LPFC_NVME_CTX_REUSE_WQ; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); if (!queue_work(phba->wq, &ctx_buf->defer_work)) { @@ -707,7 +707,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, #endif ctxp = cmdwqe->context2; - ctxp->flag &= ~LPFC_NVMET_IO_INP; + ctxp->flag &= ~LPFC_NVME_IO_INP; rsp = &ctxp->hdlrctx.fcp_req; op = rsp->op; @@ -736,13 +736,13 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, /* pick up SLI4 exhange busy condition */ if (bf_get(lpfc_wcqe_c_xb, wcqe)) { - ctxp->flag |= LPFC_NVMET_XBUSY; + ctxp->flag |= LPFC_NVME_XBUSY; logerr |= LOG_NVME_ABTS; if (tgtp) atomic_inc(&tgtp->xmt_fcp_rsp_xb_set); } else { - ctxp->flag &= ~LPFC_NVMET_XBUSY; + ctxp->flag &= ~LPFC_NVME_XBUSY; } lpfc_printf_log(phba, KERN_INFO, logerr, @@ -764,7 +764,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, if ((op == NVMET_FCOP_READDATA_RSP) || (op == NVMET_FCOP_RSP)) { /* Sanity check */ - ctxp->state = LPFC_NVMET_STE_DONE; + ctxp->state = LPFC_NVME_STE_DONE; ctxp->entry_cnt++; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -846,14 +846,14 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, "6023 NVMET LS rsp oxid x%x\n", ctxp->oxid); - if ((ctxp->state != LPFC_NVMET_STE_LS_RCV) || + if ((ctxp->state != LPFC_NVME_STE_LS_RCV) || (ctxp->entry_cnt != 1)) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6412 NVMET LS rsp state mismatch " "oxid x%x: %d %d\n", ctxp->oxid, ctxp->state, ctxp->entry_cnt); } - ctxp->state = LPFC_NVMET_STE_LS_RSP; + ctxp->state = LPFC_NVME_STE_LS_RSP; ctxp->entry_cnt++; nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma, @@ -964,8 +964,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, #endif /* Sanity check */ - if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) || - (ctxp->state == LPFC_NVMET_STE_ABORT)) { + if ((ctxp->flag & LPFC_NVME_ABTS_RCV) || + (ctxp->state == LPFC_NVME_STE_ABORT)) { atomic_inc(&lpfc_nvmep->xmt_fcp_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6102 IO oxid x%x aborted\n", @@ -993,7 +993,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", ctxp->oxid, rsp->op, rsp->rsplen); - ctxp->flag |= LPFC_NVMET_IO_INP; + ctxp->flag |= LPFC_NVME_IO_INP; rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); if (rc == WQE_SUCCESS) { #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -1012,7 +1012,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, * WQ was full, so queue nvmewqeq to be sent after * WQE release CQE */ - ctxp->flag |= LPFC_NVMET_DEFER_WQFULL; + ctxp->flag |= LPFC_NVME_DEFER_WQFULL; wq = ctxp->hdwq->io_wq; pring = wq->pring; spin_lock_irqsave(&pring->ring_lock, iflags); @@ -1081,13 +1081,13 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, /* Since iaab/iaar are NOT set, we need to check * if the firmware is in process of aborting IO */ - if (ctxp->flag & (LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP)) { + if (ctxp->flag & (LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP)) { spin_unlock_irqrestore(&ctxp->ctxlock, flags); return; } - ctxp->flag |= LPFC_NVMET_ABORT_OP; + ctxp->flag |= LPFC_NVME_ABORT_OP; - if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) { + if (ctxp->flag & LPFC_NVME_DEFER_WQFULL) { spin_unlock_irqrestore(&ctxp->ctxlock, flags); lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); @@ -1097,11 +1097,11 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, } spin_unlock_irqrestore(&ctxp->ctxlock, flags); - /* An state of LPFC_NVMET_STE_RCV means we have just received + /* A state of LPFC_NVME_STE_RCV means we have just received * the NVME command and have not started processing it. * (by issuing any IO WQEs on this exchange yet) */ - if (ctxp->state == LPFC_NVMET_STE_RCV) + if (ctxp->state == LPFC_NVME_STE_RCV) lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); else @@ -1121,19 +1121,19 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, bool aborting = false; spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_XBUSY) + if (ctxp->flag & LPFC_NVME_XBUSY) lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, "6027 NVMET release with XBUSY flag x%x" " oxid x%x\n", ctxp->flag, ctxp->oxid); - else if (ctxp->state != LPFC_NVMET_STE_DONE && - ctxp->state != LPFC_NVMET_STE_ABORT) + else if (ctxp->state != LPFC_NVME_STE_DONE && + ctxp->state != LPFC_NVME_STE_ABORT) lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6413 NVMET release bad state %d %d oxid x%x\n", ctxp->state, ctxp->entry_cnt, ctxp->oxid); - if ((ctxp->flag & LPFC_NVMET_ABORT_OP) || - (ctxp->flag & LPFC_NVMET_XBUSY)) { + if ((ctxp->flag & LPFC_NVME_ABORT_OP) || + (ctxp->flag & LPFC_NVME_XBUSY)) { aborting = true; /* let the abort path do the real release */ lpfc_nvmet_defer_release(phba, ctxp); @@ -1144,7 +1144,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, ctxp->state, aborting); atomic_inc(&lpfc_nvmep->xmt_fcp_release); - ctxp->flag &= ~LPFC_NVMET_TNOTIFY; + ctxp->flag &= ~LPFC_NVME_TNOTIFY; if (aborting) return; @@ -1364,7 +1364,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) return -ENOMEM; } ctx_buf->context->ctxbuf = ctx_buf; - ctx_buf->context->state = LPFC_NVMET_STE_FREE; + ctx_buf->context->state = LPFC_NVME_STE_FREE; ctx_buf->iocbq = lpfc_sli_get_iocbq(phba); if (!ctx_buf->iocbq) { @@ -1595,12 +1595,12 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, /* Check if we already received a free context call * and we have completed processing an abort situation. */ - if (ctxp->flag & LPFC_NVMET_CTX_RLS && - !(ctxp->flag & LPFC_NVMET_ABORT_OP)) { + if (ctxp->flag & LPFC_NVME_CTX_RLS && + !(ctxp->flag & LPFC_NVME_ABORT_OP)) { list_del_init(&ctxp->list); released = true; } - ctxp->flag &= ~LPFC_NVMET_XBUSY; + ctxp->flag &= ~LPFC_NVME_XBUSY; spin_unlock(&ctxp->ctxlock); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); @@ -1642,8 +1642,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, rxid); spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= LPFC_NVMET_ABTS_RCV; - ctxp->state = LPFC_NVMET_STE_ABORT; + ctxp->flag |= LPFC_NVME_ABTS_RCV; + ctxp->state = LPFC_NVME_STE_ABORT; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_nvmeio_data(phba, @@ -1686,7 +1686,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, spin_unlock_irqrestore(&phba->hbalock, iflag); spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= LPFC_NVMET_ABTS_RCV; + ctxp->flag |= LPFC_NVME_ABTS_RCV; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_nvmeio_data(phba, @@ -1755,7 +1755,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, xri = ctxp->ctxbuf->sglq->sli4_xritag; spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= (LPFC_NVMET_ABTS_RCV | LPFC_NVMET_ABORT_OP); + ctxp->flag |= (LPFC_NVME_ABTS_RCV | LPFC_NVME_ABORT_OP); spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_nvmeio_data(phba, @@ -1767,7 +1767,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, "flag x%x state x%x\n", ctxp->oxid, xri, ctxp->flag, ctxp->state); - if (ctxp->flag & LPFC_NVMET_TNOTIFY) { + if (ctxp->flag & LPFC_NVME_TNOTIFY) { /* Notify the transport */ nvmet_fc_rcv_fcp_abort(phba->targetport, &ctxp->hdlrctx.fcp_req); @@ -1982,7 +1982,7 @@ dropit: ctxp->oxid = oxid; ctxp->sid = sid; ctxp->wqeq = NULL; - ctxp->state = LPFC_NVMET_STE_LS_RCV; + ctxp->state = LPFC_NVME_STE_LS_RCV; ctxp->entry_cnt = 1; ctxp->rqb_buffer = (void *)nvmebuf; ctxp->hdwq = &phba->sli4_hba.hdwq[0]; @@ -2050,7 +2050,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) return; } - if (ctxp->flag & LPFC_NVMET_ABTS_RCV) { + if (ctxp->flag & LPFC_NVME_ABTS_RCV) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6324 IO oxid x%x aborted\n", ctxp->oxid); @@ -2059,7 +2059,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) payload = (uint32_t *)(nvmebuf->dbuf.virt); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - ctxp->flag |= LPFC_NVMET_TNOTIFY; + ctxp->flag |= LPFC_NVME_TNOTIFY; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (ctxp->ts_isr_cmd) ctxp->ts_cmd_nvme = ktime_get_ns(); @@ -2079,7 +2079,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) if (rc == 0) { atomic_inc(&tgtp->rcv_fcp_cmd_out); spin_lock_irqsave(&ctxp->ctxlock, iflags); - if ((ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) || + if ((ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) || (nvmebuf != ctxp->rqb_buffer)) { spin_unlock_irqrestore(&ctxp->ctxlock, iflags); return; @@ -2098,7 +2098,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) atomic_inc(&tgtp->rcv_fcp_cmd_out); atomic_inc(&tgtp->defer_fod); spin_lock_irqsave(&ctxp->ctxlock, iflags); - if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) { + if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) { spin_unlock_irqrestore(&ctxp->ctxlock, iflags); return; } @@ -2113,7 +2113,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) phba->sli4_hba.nvmet_mrq_data[qno], 1, qno); return; } - ctxp->flag &= ~LPFC_NVMET_TNOTIFY; + ctxp->flag &= ~LPFC_NVME_TNOTIFY; atomic_inc(&tgtp->rcv_fcp_cmd_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n", @@ -2306,7 +2306,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag); list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list); spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag); - if (ctxp->state != LPFC_NVMET_STE_FREE) { + if (ctxp->state != LPFC_NVME_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6414 NVMET Context corrupt %d %d oxid x%x\n", ctxp->state, ctxp->entry_cnt, ctxp->oxid); @@ -2318,7 +2318,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->oxid = oxid; ctxp->sid = sid; ctxp->idx = idx; - ctxp->state = LPFC_NVMET_STE_RCV; + ctxp->state = LPFC_NVME_STE_RCV; ctxp->entry_cnt = 1; ctxp->flag = 0; ctxp->ctxbuf = ctx_buf; @@ -2643,9 +2643,9 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, } /* Sanity check */ - if (((ctxp->state == LPFC_NVMET_STE_RCV) && + if (((ctxp->state == LPFC_NVME_STE_RCV) && (ctxp->entry_cnt == 1)) || - (ctxp->state == LPFC_NVMET_STE_DATA)) { + (ctxp->state == LPFC_NVME_STE_DATA)) { wqe = &nvmewqe->wqe; } else { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, @@ -2908,7 +2908,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, sgl++; ctxp->offset += cnt; } - ctxp->state = LPFC_NVMET_STE_DATA; + ctxp->state = LPFC_NVME_STE_DATA; ctxp->entry_cnt++; return nvmewqe; } @@ -2937,23 +2937,23 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result = wcqe->parameter; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - if (ctxp->flag & LPFC_NVMET_ABORT_OP) + if (ctxp->flag & LPFC_NVME_ABORT_OP) atomic_inc(&tgtp->xmt_fcp_abort_cmpl); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->state = LPFC_NVMET_STE_DONE; + ctxp->state = LPFC_NVME_STE_DONE; /* Check if we already received a free context call * and we have completed processing an abort situation. */ - if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && - !(ctxp->flag & LPFC_NVMET_XBUSY)) { + if ((ctxp->flag & LPFC_NVME_CTX_RLS) && + !(ctxp->flag & LPFC_NVME_XBUSY)) { spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_del_init(&ctxp->list); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); released = true; } - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); atomic_inc(&tgtp->xmt_abort_rsp); @@ -2977,7 +2977,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, lpfc_sli_release_iocbq(phba, cmdwqe); /* Since iaab/iaar are NOT set, there is no work left. - * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted * should have been called already. */ } @@ -3016,11 +3016,11 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_ABORT_OP) + if (ctxp->flag & LPFC_NVME_ABORT_OP) atomic_inc(&tgtp->xmt_fcp_abort_cmpl); /* Sanity check */ - if (ctxp->state != LPFC_NVMET_STE_ABORT) { + if (ctxp->state != LPFC_NVME_STE_ABORT) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6112 ABTS Wrong state:%d oxid x%x\n", ctxp->state, ctxp->oxid); @@ -3029,15 +3029,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, /* Check if we already received a free context call * and we have completed processing an abort situation. */ - ctxp->state = LPFC_NVMET_STE_DONE; - if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && - !(ctxp->flag & LPFC_NVMET_XBUSY)) { + ctxp->state = LPFC_NVME_STE_DONE; + if ((ctxp->flag & LPFC_NVME_CTX_RLS) && + !(ctxp->flag & LPFC_NVME_XBUSY)) { spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_del_init(&ctxp->list); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); released = true; } - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); atomic_inc(&tgtp->xmt_abort_rsp); @@ -3058,7 +3058,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); /* Since iaab/iaar are NOT set, there is no work left. - * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted * should have been called already. */ } @@ -3103,7 +3103,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, return; } - if (ctxp->state != LPFC_NVMET_STE_LS_ABORT) { + if (ctxp->state != LPFC_NVME_STE_LS_ABORT) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6416 NVMET LS abort cmpl state mismatch: " "oxid x%x: %d %d\n", @@ -3240,7 +3240,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* No failure to an ABTS request. */ spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } @@ -3254,13 +3254,13 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "6161 ABORT failed: No wqeqs: " "xri: x%x\n", ctxp->oxid); /* No failure to an ABTS request. */ - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } abts_wqeq = ctxp->abort_wqeq; - ctxp->state = LPFC_NVMET_STE_ABORT; - opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0; + ctxp->state = LPFC_NVME_STE_ABORT; + opt = (ctxp->flag & LPFC_NVME_ABTS_RCV) ? INHIBIT_ABORT : 0; spin_unlock_irqrestore(&ctxp->ctxlock, flags); /* Announce entry to new IO submit field. */ @@ -3283,7 +3283,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, phba->hba_flag, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } @@ -3298,7 +3298,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } @@ -3327,7 +3327,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, atomic_inc(&tgtp->xmt_abort_rsp_error); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); lpfc_sli_release_iocbq(phba, abts_wqeq); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, @@ -3354,14 +3354,14 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->wqeq->hba_wqidx = 0; } - if (ctxp->state == LPFC_NVMET_STE_FREE) { + if (ctxp->state == LPFC_NVME_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6417 NVMET ABORT ctx freed %d %d oxid x%x\n", ctxp->state, ctxp->entry_cnt, ctxp->oxid); rc = WQE_BUSY; goto aerr; } - ctxp->state = LPFC_NVMET_STE_ABORT; + ctxp->state = LPFC_NVME_STE_ABORT; ctxp->entry_cnt++; rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri); if (rc == 0) @@ -3383,13 +3383,13 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, aerr: spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_CTX_RLS) { + if (ctxp->flag & LPFC_NVME_CTX_RLS) { spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_del_init(&ctxp->list); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); released = true; } - ctxp->flag &= ~(LPFC_NVMET_ABORT_OP | LPFC_NVMET_CTX_RLS); + ctxp->flag &= ~(LPFC_NVME_ABORT_OP | LPFC_NVME_CTX_RLS); spin_unlock_irqrestore(&ctxp->ctxlock, flags); atomic_inc(&tgtp->xmt_abort_rsp_error); @@ -3412,16 +3412,16 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, unsigned long flags; int rc; - if ((ctxp->state == LPFC_NVMET_STE_LS_RCV && ctxp->entry_cnt == 1) || - (ctxp->state == LPFC_NVMET_STE_LS_RSP && ctxp->entry_cnt == 2)) { - ctxp->state = LPFC_NVMET_STE_LS_ABORT; + if ((ctxp->state == LPFC_NVME_STE_LS_RCV && ctxp->entry_cnt == 1) || + (ctxp->state == LPFC_NVME_STE_LS_RSP && ctxp->entry_cnt == 2)) { + ctxp->state = LPFC_NVME_STE_LS_ABORT; ctxp->entry_cnt++; } else { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6418 NVMET LS abort state mismatch " "IO x%x: %d %d\n", ctxp->oxid, ctxp->state, ctxp->entry_cnt); - ctxp->state = LPFC_NVMET_STE_LS_ABORT; + ctxp->state = LPFC_NVME_STE_LS_ABORT; } tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; -- cgit v1.2.3 From 3a8070c567aaaa6038b52113ce01527992604c40 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:05 -0700 Subject: lpfc: Refactor NVME LS receive handling In preparation for supporting both intiator mode and target mode receiving NVME LS's, commonize the existing NVME LS request receive handling found in the base driver and in the nvmet side. Using the original lpfc_nvmet_unsol_ls_event() and lpfc_nvme_unsol_ls_buffer() routines as a templates, commonize the reception of an NVME LS request. The common routine will validate the LS request, that it was received from a logged-in node, and allocate a lpfc_async_xchg_ctx that is used to manage the LS request. The role of the port is then inspected to determine which handler is to receive the LS - nvme or nvmet. As such, the nvmet handler is tied back in. A handler is created in nvme and is stubbed out. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_crtn.h | 6 +- drivers/scsi/lpfc/lpfc_nvme.c | 19 +++++ drivers/scsi/lpfc/lpfc_nvme.h | 5 ++ drivers/scsi/lpfc/lpfc_nvmet.c | 163 ++++++++++------------------------------- drivers/scsi/lpfc/lpfc_sli.c | 121 +++++++++++++++++++++++++++++- 5 files changed, 184 insertions(+), 130 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 635df4a0188c..07b4f0fd3fe2 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -563,8 +563,10 @@ void lpfc_nvme_update_localport(struct lpfc_vport *vport); int lpfc_nvmet_create_targetport(struct lpfc_hba *phba); int lpfc_nvmet_update_targetport(struct lpfc_hba *phba); void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba); -void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb); +int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg); +int lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg); void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx, struct rqb_dmabuf *nvmebuf, uint64_t isr_ts, uint8_t cqflag); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 6045000477ac..c2113f6c76c1 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -393,6 +393,25 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) return; } +/** + * lpfc_nvme_handle_lsreq - Process an unsolicited NVME LS request + * @phba: pointer to lpfc hba data structure. + * @axchg: pointer to exchange context for the NVME LS request + * + * This routine is used for processing an asychronously received NVME LS + * request. Any remaining validation is done and the LS is then forwarded + * to the nvme-fc transport via nvme_fc_rcv_ls_req(). + * + * Returns 0 if LS was handled and delivered to the transport + * Returns 1 if LS failed to be handled and should be dropped + */ +int +lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg) +{ + return 1; +} + static void lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 220b51af43da..10e8d868608e 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -188,6 +188,7 @@ struct lpfc_async_xchg_ctx { } hdlrctx; struct list_head list; struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; struct nvmefc_ls_req *ls_req; struct nvmefc_ls_rsp ls_rsp; struct lpfc_iocbq *wqeq; @@ -202,6 +203,7 @@ struct lpfc_async_xchg_ctx { uint16_t idx; uint16_t state; uint16_t flag; + void *payload; struct rqb_dmabuf *rqb_buffer; struct lpfc_nvmet_ctxbuf *ctxbuf; struct lpfc_sli4_hdw_queue *hdwq; @@ -224,3 +226,6 @@ struct lpfc_async_xchg_ctx { /* routines found in lpfc_nvme.c */ /* routines found in lpfc_nvmet.c */ +int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, + uint16_t xri); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index e0d5be4617ac..a95c8ef6373a 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -63,9 +63,6 @@ static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *, static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *, struct lpfc_async_xchg_ctx *, uint32_t, uint16_t); -static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, - struct lpfc_async_xchg_ctx *, - uint32_t, uint16_t); static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *, struct lpfc_async_xchg_ctx *); static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *); @@ -865,7 +862,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, ctxp->oxid); lpfc_in_buf_free(phba, &nvmebuf->dbuf); atomic_inc(&nvmep->xmt_ls_abort); - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, + lpfc_nvme_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); return -ENOMEM; } @@ -908,7 +905,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, lpfc_in_buf_free(phba, &nvmebuf->dbuf); atomic_inc(&nvmep->xmt_ls_abort); - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); + lpfc_nvme_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); return -ENXIO; } @@ -1922,107 +1919,49 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) } /** - * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer + * lpfc_nvmet_handle_lsreq - Process an NVME LS request * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a SLI ring. - * @nvmebuf: pointer to lpfc nvme command HBQ data structure. + * @axchg: pointer to exchange context for the NVME LS request * - * This routine is used for processing the WQE associated with a unsolicited - * event. It first determines whether there is an existing ndlp that matches - * the DID from the unsolicited WQE. If not, it will create a new one with - * the DID from the unsolicited WQE. The ELS command from the unsolicited - * WQE is then used to invoke the proper routine and to set up proper state - * of the discovery state machine. - **/ -static void -lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct hbq_dmabuf *nvmebuf) + * This routine is used for processing an asychronously received NVME LS + * request. Any remaining validation is done and the LS is then forwarded + * to the nvmet-fc transport via nvmet_fc_rcv_ls_req(). + * + * The calling sequence should be: nvmet_fc_rcv_ls_req() -> (processing) + * -> lpfc_nvmet_xmt_ls_rsp/cmp -> req->done. + * lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg. + * + * Returns 0 if LS was handled and delivered to the transport + * Returns 1 if LS failed to be handled and should be dropped + */ +int +lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) - struct lpfc_nvmet_tgtport *tgtp; - struct fc_frame_header *fc_hdr; - struct lpfc_async_xchg_ctx *ctxp; - uint32_t *payload; - uint32_t size, oxid, sid, rc; - - - if (!nvmebuf || !phba->targetport) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6154 LS Drop IO\n"); - oxid = 0; - size = 0; - sid = 0; - ctxp = NULL; - goto dropit; - } - - fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); - oxid = be16_to_cpu(fc_hdr->fh_ox_id); - - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - payload = (uint32_t *)(nvmebuf->dbuf.virt); - size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); - sid = sli4_sid_from_fc_hdr(fc_hdr); - - ctxp = kzalloc(sizeof(struct lpfc_async_xchg_ctx), GFP_ATOMIC); - if (ctxp == NULL) { - atomic_inc(&tgtp->rcv_ls_req_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6155 LS Drop IO x%x: Alloc\n", - oxid); -dropit: - lpfc_nvmeio_data(phba, "NVMET LS DROP: " - "xri x%x sz %d from %06x\n", - oxid, size, sid); - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - return; - } - ctxp->phba = phba; - ctxp->size = size; - ctxp->oxid = oxid; - ctxp->sid = sid; - ctxp->wqeq = NULL; - ctxp->state = LPFC_NVME_STE_LS_RCV; - ctxp->entry_cnt = 1; - ctxp->rqb_buffer = (void *)nvmebuf; - ctxp->hdwq = &phba->sli4_hba.hdwq[0]; + struct lpfc_nvmet_tgtport *tgtp = phba->targetport->private; + uint32_t *payload = axchg->payload; + int rc; - lpfc_nvmeio_data(phba, "NVMET LS RCV: xri x%x sz %d from %06x\n", - oxid, size, sid); - /* - * The calling sequence should be: - * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done - * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp. - */ atomic_inc(&tgtp->rcv_ls_req_in); - rc = nvmet_fc_rcv_ls_req(phba->targetport, NULL, &ctxp->ls_rsp, - payload, size); + + rc = nvmet_fc_rcv_ls_req(phba->targetport, NULL, &axchg->ls_rsp, + axchg->payload, axchg->size); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, "6037 NVMET Unsol rcv: sz %d rc %d: %08x %08x %08x " - "%08x %08x %08x\n", size, rc, + "%08x %08x %08x\n", axchg->size, rc, *payload, *(payload+1), *(payload+2), *(payload+3), *(payload+4), *(payload+5)); - if (rc == 0) { + if (!rc) { atomic_inc(&tgtp->rcv_ls_req_out); - return; + return 0; } - lpfc_nvmeio_data(phba, "NVMET LS DROP: xri x%x sz %d from %06x\n", - oxid, size, sid); - atomic_inc(&tgtp->rcv_ls_req_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n", - ctxp->oxid, rc); - - /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - - atomic_inc(&tgtp->xmt_ls_abort); - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid); #endif + return 1; } static void @@ -2364,40 +2303,6 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, } } -/** - * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport - * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a SLI ring. - * @nvmebuf: pointer to received nvme data structure. - * - * This routine is used to process an unsolicited event received from a SLI - * (Service Level Interface) ring. The actual processing of the data buffer - * associated with the unsolicited event is done by invoking the routine - * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the - * SLI RQ on which the unsolicited event was received. - **/ -void -lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocb) -{ - struct lpfc_dmabuf *d_buf; - struct hbq_dmabuf *nvmebuf; - - d_buf = piocb->context2; - nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); - - if (!nvmebuf) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "3015 LS Drop IO\n"); - return; - } - if (phba->nvmet_support == 0) { - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - return; - } - lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf); -} - /** * lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport * @phba: pointer to lpfc hba data structure. @@ -3402,8 +3307,16 @@ aerr: return 1; } -static int -lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, +/** + * lpfc_nvme_unsol_ls_issue_abort - issue ABTS on an exchange received + * via async frame receive where the frame is not handled. + * @phba: pointer to adapter structure + * @ctxp: pointer to the asynchronously received received sequence + * @sid: address of the remote port to send the ABTS to + * @xri: oxid value to for the ABTS (other side's exchange id). + **/ +int +lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d4d810cb8944..1aaf40081e21 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2792,6 +2792,121 @@ lpfc_sli_get_buff(struct lpfc_hba *phba, return &hbq_entry->dbuf; } +/** + * lpfc_nvme_unsol_ls_handler - Process an unsolicited event data buffer + * containing a NVME LS request. + * @phba: pointer to lpfc hba data structure. + * @piocb: pointer to the iocbq struct representing the sequence starting + * frame. + * + * This routine initially validates the NVME LS, validates there is a login + * with the port that sent the LS, and then calls the appropriate nvme host + * or target LS request handler. + **/ +static void +lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb) +{ + struct lpfc_nodelist *ndlp; + struct lpfc_dmabuf *d_buf; + struct hbq_dmabuf *nvmebuf; + struct fc_frame_header *fc_hdr; + struct lpfc_async_xchg_ctx *axchg = NULL; + char *failwhy = NULL; + uint32_t oxid, sid, did, fctl, size; + int ret; + + d_buf = piocb->context2; + + nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); + fc_hdr = nvmebuf->hbuf.virt; + oxid = be16_to_cpu(fc_hdr->fh_ox_id); + sid = sli4_sid_from_fc_hdr(fc_hdr); + did = sli4_did_from_fc_hdr(fc_hdr); + fctl = (fc_hdr->fh_f_ctl[0] << 16 | + fc_hdr->fh_f_ctl[1] << 8 | + fc_hdr->fh_f_ctl[2]); + size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); + + lpfc_nvmeio_data(phba, "NVME LS RCV: xri x%x sz %d from %06x\n", + oxid, size, sid); + + if (phba->pport->load_flag & FC_UNLOADING) { + failwhy = "Driver Unloading"; + } else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { + failwhy = "NVME FC4 Disabled"; + } else if (!phba->nvmet_support && !phba->pport->localport) { + failwhy = "No Localport"; + } else if (phba->nvmet_support && !phba->targetport) { + failwhy = "No Targetport"; + } else if (unlikely(fc_hdr->fh_r_ctl != FC_RCTL_ELS4_REQ)) { + failwhy = "Bad NVME LS R_CTL"; + } else if (unlikely((fctl & 0x00FF0000) != + (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT))) { + failwhy = "Bad NVME LS F_CTL"; + } else { + axchg = kzalloc(sizeof(*axchg), GFP_ATOMIC); + if (!axchg) + failwhy = "No CTX memory"; + } + + if (unlikely(failwhy)) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6154 Drop NVME LS: SID %06X OXID x%X: %s\n", + sid, oxid, failwhy); + goto out_fail; + } + + /* validate the source of the LS is logged in */ + ndlp = lpfc_findnode_did(phba->pport, sid); + if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || + ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && + (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, + "6216 NVME Unsol rcv: No ndlp: " + "NPort_ID x%x oxid x%x\n", + sid, oxid); + goto out_fail; + } + + axchg->phba = phba; + axchg->ndlp = ndlp; + axchg->size = size; + axchg->oxid = oxid; + axchg->sid = sid; + axchg->wqeq = NULL; + axchg->state = LPFC_NVME_STE_LS_RCV; + axchg->entry_cnt = 1; + axchg->rqb_buffer = (void *)nvmebuf; + axchg->hdwq = &phba->sli4_hba.hdwq[0]; + axchg->payload = nvmebuf->dbuf.virt; + INIT_LIST_HEAD(&axchg->list); + + if (phba->nvmet_support) + ret = lpfc_nvmet_handle_lsreq(phba, axchg); + else + ret = lpfc_nvme_handle_lsreq(phba, axchg); + + /* if zero, LS was successfully handled. If non-zero, LS not handled */ + if (!ret) + return; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6155 Drop NVME LS from DID %06X: SID %06X OXID x%X " + "NVMe%s handler failed %d\n", + did, sid, oxid, + (phba->nvmet_support) ? "T" : "I", ret); + +out_fail: + kfree(axchg); + + /* recycle receive buffer */ + lpfc_in_buf_free(phba, &nvmebuf->dbuf); + + /* If start of new exchange, abort it */ + if (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)) + lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid); +} + /** * lpfc_complete_unsol_iocb - Complete an unsolicited sequence * @phba: Pointer to HBA context object. @@ -2813,7 +2928,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, switch (fch_type) { case FC_TYPE_NVME: - lpfc_nvmet_unsol_ls_event(phba, pring, saveq); + lpfc_nvme_unsol_ls_handler(phba, saveq); return 1; default: break; @@ -13978,8 +14093,8 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, /* Just some basic sanity checks on FCP Command frame */ fctl = (fc_hdr->fh_f_ctl[0] << 16 | - fc_hdr->fh_f_ctl[1] << 8 | - fc_hdr->fh_f_ctl[2]); + fc_hdr->fh_f_ctl[1] << 8 | + fc_hdr->fh_f_ctl[2]); if (((fctl & (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) != (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) || -- cgit v1.2.3 From 6514b25d3fba0610cd6c42aa36e34937bed0e4d8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:06 -0700 Subject: lpfc: Refactor Send LS Request support Currently, the ability to send an NVME LS request is limited to the nvme (host) side of the driver. In preparation of both the nvme and nvmet sides support Send LS Request, rework the existing send ls_req and ls_req completion routines such that there is common code that can be used by both sides. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_nvme.c | 289 +++++++++++++++++++++++++----------------- drivers/scsi/lpfc/lpfc_nvme.h | 13 ++ 2 files changed, 184 insertions(+), 118 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index c2113f6c76c1..1b7651c24f06 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -412,43 +412,43 @@ lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, return 1; } -static void -lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - struct lpfc_wcqe_complete *wcqe) +/** + * __lpfc_nvme_ls_req_cmp - Generic completion handler for a NVME + * LS request. + * @phba: Pointer to HBA context object + * @vport: The local port that issued the LS + * @cmdwqe: Pointer to driver command WQE object. + * @wcqe: Pointer to driver response CQE object. + * + * This function is the generic completion handler for NVME LS requests. + * The function updates any states and statistics, calls the transport + * ls_req done() routine, then tears down the command and buffers used + * for the LS request. + **/ +void +__lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) { - struct lpfc_vport *vport = cmdwqe->vport; - struct lpfc_nvme_lport *lport; - uint32_t status; struct nvmefc_ls_req *pnvme_lsreq; struct lpfc_dmabuf *buf_ptr; struct lpfc_nodelist *ndlp; + uint32_t status; pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2; + ndlp = (struct lpfc_nodelist *)cmdwqe->context1; status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; - if (vport->localport) { - lport = (struct lpfc_nvme_lport *)vport->localport->private; - if (lport) { - atomic_inc(&lport->fc4NvmeLsCmpls); - if (status) { - if (bf_get(lpfc_wcqe_c_xb, wcqe)) - atomic_inc(&lport->cmpl_ls_xb); - atomic_inc(&lport->cmpl_ls_err); - } - } - } - - ndlp = (struct lpfc_nodelist *)cmdwqe->context1; lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6047 nvme cmpl Enter " - "Data %px DID %x Xri: %x status %x reason x%x " - "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n", + "6047 NVMEx LS REQ %px cmpl DID %x Xri: %x " + "status %x reason x%x cmd:x%px lsreg:x%px bmp:x%px " + "ndlp:x%px\n", pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, cmdwqe->sli4_xritag, status, (wcqe->parameter & 0xffff), cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp); - lpfc_nvmeio_data(phba, "NVME LS CMPL: xri x%x stat x%x parm x%x\n", + lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x parm x%x\n", cmdwqe->sli4_xritag, status, wcqe->parameter); if (cmdwqe->context3) { @@ -461,7 +461,7 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, pnvme_lsreq->done(pnvme_lsreq, status); else lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6046 nvme cmpl without done call back? " + "6046 NVMEx cmpl without done call back? " "Data %px DID %x Xri: %x status %x\n", pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, cmdwqe->sli4_xritag, status); @@ -472,6 +472,31 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, lpfc_sli_release_iocbq(phba, cmdwqe); } +static void +lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ + struct lpfc_vport *vport = cmdwqe->vport; + struct lpfc_nvme_lport *lport; + uint32_t status; + + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; + + if (vport->localport) { + lport = (struct lpfc_nvme_lport *)vport->localport->private; + if (lport) { + atomic_inc(&lport->fc4NvmeLsCmpls); + if (status) { + if (bf_get(lpfc_wcqe_c_xb, wcqe)) + atomic_inc(&lport->cmpl_ls_xb); + atomic_inc(&lport->cmpl_ls_err); + } + } + } + + __lpfc_nvme_ls_req_cmp(phba, vport, cmdwqe, wcqe); +} + static int lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, struct lpfc_dmabuf *inp, @@ -573,13 +598,6 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, /* Issue GEN REQ WQE for NPORT */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "6050 Issue GEN REQ WQE to NPORT x%x " - "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px " - "xmit:%d 1st:%d\n", - ndlp->nlp_DID, genwqe->iotag, - vport->port_state, - genwqe, pnvme_lsreq, bmp, xmit_len, first_len); genwqe->wqe_cmpl = cmpl; genwqe->iocb_cmpl = NULL; genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT; @@ -591,105 +609,108 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe); if (rc) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_ELS, "6045 Issue GEN REQ WQE to NPORT x%x " - "Data: x%x x%x\n", + "Data: x%x x%x rc x%x\n", ndlp->nlp_DID, genwqe->iotag, - vport->port_state); + vport->port_state, rc); lpfc_sli_release_iocbq(phba, genwqe); return 1; } + + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_ELS, + "6050 Issue GEN REQ WQE to NPORT x%x " + "Data: oxid: x%x state: x%x wq:x%px lsreq:x%px " + "bmp:x%px xmit:%d 1st:%d\n", + ndlp->nlp_DID, genwqe->sli4_xritag, + vport->port_state, + genwqe, pnvme_lsreq, bmp, xmit_len, first_len); return 0; } + /** - * lpfc_nvme_ls_req - Issue an Link Service request - * @lpfc_pnvme: Pointer to the driver's nvme instance data - * @lpfc_nvme_lport: Pointer to the driver's local port data - * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq + * __lpfc_nvme_ls_req - Generic service routine to issue an NVME LS request + * @vport: The local port issuing the LS + * @ndlp: The remote port to send the LS to + * @pnvme_lsreq: Pointer to LS request structure from the transport * - * Driver registers this routine to handle any link service request - * from the nvme_fc transport to a remote nvme-aware port. + * Routine validates the ndlp, builds buffers and sends a GEN_REQUEST + * WQE to perform the LS operation. * * Return value : * 0 - Success - * TODO: What are the failure codes. + * non-zero: various error codes, in form of -Exxx **/ -static int -lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, - struct nvme_fc_remote_port *pnvme_rport, - struct nvmefc_ls_req *pnvme_lsreq) +int +__lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct nvmefc_ls_req *pnvme_lsreq, + void (*gen_req_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)) { - int ret = 0; - struct lpfc_nvme_lport *lport; - struct lpfc_nvme_rport *rport; - struct lpfc_vport *vport; - struct lpfc_nodelist *ndlp; - struct ulp_bde64 *bpl; struct lpfc_dmabuf *bmp; + struct ulp_bde64 *bpl; + int ret; uint16_t ntype, nstate; - /* there are two dma buf in the request, actually there is one and - * the second one is just the start address + cmd size. - * Before calling lpfc_nvme_gen_req these buffers need to be wrapped - * in a lpfc_dmabuf struct. When freeing we just free the wrapper - * because the nvem layer owns the data bufs. - * We do not have to break these packets open, we don't care what is in - * them. And we do not have to look at the resonse data, we only care - * that we got a response. All of the caring is going to happen in the - * nvme-fc layer. - */ - - lport = (struct lpfc_nvme_lport *)pnvme_lport->private; - rport = (struct lpfc_nvme_rport *)pnvme_rport->private; - if (unlikely(!lport) || unlikely(!rport)) - return -EINVAL; - - vport = lport->vport; - - if (vport->load_flag & FC_UNLOADING) - return -ENODEV; - - /* Need the ndlp. It is stored in the driver's rport. */ - ndlp = rport->ndlp; if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6051 Remoteport x%px, rport has invalid ndlp. " - "Failing LS Req\n", pnvme_rport); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR, + "6051 NVMEx LS REQ: Bad NDLP x%px, Failing " + "LS Req\n", + ndlp); return -ENODEV; } - /* The remote node has to be a mapped nvme target or an - * unmapped nvme initiator or it's an error. - */ ntype = ndlp->nlp_type; nstate = ndlp->nlp_state; if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) || (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6088 DID x%06x not ready for " - "IO. State x%x, Type x%x\n", - pnvme_rport->port_id, - ndlp->nlp_state, ndlp->nlp_type); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR, + "6088 NVMEx LS REQ: Fail DID x%06x not " + "ready for IO. Type x%x, State x%x\n", + ndlp->nlp_DID, ntype, nstate); return -ENODEV; } - bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); + + /* + * there are two dma buf in the request, actually there is one and + * the second one is just the start address + cmd size. + * Before calling lpfc_nvme_gen_req these buffers need to be wrapped + * in a lpfc_dmabuf struct. When freeing we just free the wrapper + * because the nvem layer owns the data bufs. + * We do not have to break these packets open, we don't care what is + * in them. And we do not have to look at the resonse data, we only + * care that we got a response. All of the caring is going to happen + * in the nvme-fc layer. + */ + + bmp = kmalloc(sizeof(*bmp), GFP_KERNEL); if (!bmp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6044 Could not find node for DID %x\n", - pnvme_rport->port_id); - return 2; + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR, + "6044 NVMEx LS REQ: Could not alloc LS buf " + "for DID %x\n", + ndlp->nlp_DID); + return -ENOMEM; } - INIT_LIST_HEAD(&bmp->list); + bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys)); if (!bmp->virt) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6042 Could not find node for DID %x\n", - pnvme_rport->port_id); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR, + "6042 NVMEx LS REQ: Could not alloc mbuf " + "for DID %x\n", + ndlp->nlp_DID); kfree(bmp); - return 3; + return -ENOMEM; } + + INIT_LIST_HEAD(&bmp->list); + bpl = (struct ulp_bde64 *)bmp->virt; bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma)); bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma)); @@ -704,37 +725,69 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, bpl->tus.f.bdeSize = pnvme_lsreq->rsplen; bpl->tus.w = le32_to_cpu(bpl->tus.w); - /* Expand print to include key fields. */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6149 Issue LS Req to DID 0x%06x lport x%px, " - "rport x%px lsreq x%px rqstlen:%d rsplen:%d " - "%pad %pad\n", - ndlp->nlp_DID, pnvme_lport, pnvme_rport, - pnvme_lsreq, pnvme_lsreq->rqstlen, - pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, - &pnvme_lsreq->rspdma); - - atomic_inc(&lport->fc4NvmeLsRequests); + "6149 NVMEx LS REQ: Issue to DID 0x%06x lsreq x%px, " + "rqstlen:%d rsplen:%d %pad %pad\n", + ndlp->nlp_DID, pnvme_lsreq, pnvme_lsreq->rqstlen, + pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, + &pnvme_lsreq->rspdma); - /* Hardcode the wait to 30 seconds. Connections are failing otherwise. - * This code allows it all to work. - */ ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr, - pnvme_lsreq, lpfc_nvme_cmpl_gen_req, - ndlp, 2, 30, 0); + pnvme_lsreq, gen_req_cmp, ndlp, 2, + LPFC_NVME_LS_TIMEOUT, 0); if (ret != WQE_SUCCESS) { - atomic_inc(&lport->xmt_ls_err); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6052 EXIT. issue ls wqe failed lport x%px, " - "rport x%px lsreq x%px Status %x DID %x\n", - pnvme_lport, pnvme_rport, pnvme_lsreq, - ret, ndlp->nlp_DID); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR, + "6052 NVMEx REQ: EXIT. issue ls wqe failed " + "lsreq x%px Status %x DID %x\n", + pnvme_lsreq, ret, ndlp->nlp_DID); lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys); kfree(bmp); - return ret; + return -EIO; } - /* Stub in routine and return 0 for now. */ + return 0; +} + +/** + * lpfc_nvme_ls_req - Issue an NVME Link Service request + * @lpfc_nvme_lport: Transport localport that LS is to be issued from. + * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to. + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS + * + * Driver registers this routine to handle any link service request + * from the nvme_fc transport to a remote nvme-aware port. + * + * Return value : + * 0 - Success + * non-zero: various error codes, in form of -Exxx + **/ +static int +lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, + struct nvme_fc_remote_port *pnvme_rport, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvme_lport *lport; + struct lpfc_nvme_rport *rport; + struct lpfc_vport *vport; + int ret; + + lport = (struct lpfc_nvme_lport *)pnvme_lport->private; + rport = (struct lpfc_nvme_rport *)pnvme_rport->private; + if (unlikely(!lport) || unlikely(!rport)) + return -EINVAL; + + vport = lport->vport; + if (vport->load_flag & FC_UNLOADING) + return -ENODEV; + + atomic_inc(&lport->fc4NvmeLsRequests); + + ret = __lpfc_nvme_ls_req(vport, rport->ndlp, pnvme_lsreq, + lpfc_nvme_ls_req_cmp); + if (ret) + atomic_inc(&lport->xmt_ls_err); + return ret; } diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 10e8d868608e..6f8d44aa47b2 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -79,6 +79,12 @@ struct lpfc_nvme_fcpreq_priv { struct lpfc_io_buf *nvme_buf; }; +/* + * set NVME LS request timeouts to 30s. It is larger than the 2*R_A_TOV + * set by the spec, which appears to have issues with some devices. + */ +#define LPFC_NVME_LS_TIMEOUT 30 + #define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVMET_RQE_MIN_POST 128 @@ -224,6 +230,13 @@ struct lpfc_async_xchg_ctx { /* routines found in lpfc_nvme.c */ +int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct nvmefc_ls_req *pnvme_lsreq, + void (*gen_req_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)); +void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); /* routines found in lpfc_nvmet.c */ int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, -- cgit v1.2.3 From e96a22b0b7c252295180c12128af380282e3b8c5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:07 -0700 Subject: lpfc: Refactor Send LS Abort support Send LS Abort support is needed when Send LS Request is supported. Currently, the ability to abort an NVME LS request is limited to the nvme (host) side of the driver. In preparation of both the nvme and nvmet sides supporting Send LS Abort, rework the existing ls_req abort routines such that there is common code that can be used by both sides. While refactoring it was seen the logic in the abort routine was incorrect. It attempted to abort all NVME LS's on the indicated port. As such, the routine was reworked to abort only the NVME LS request that was specified. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_nvme.c | 125 +++++++++++++++++++++++++----------------- drivers/scsi/lpfc/lpfc_nvme.h | 2 + 2 files changed, 77 insertions(+), 50 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 1b7651c24f06..406f71b327a1 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -792,83 +792,108 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, } /** - * lpfc_nvme_ls_abort - Issue an Link Service request - * @lpfc_pnvme: Pointer to the driver's nvme instance data - * @lpfc_nvme_lport: Pointer to the driver's local port data - * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq + * __lpfc_nvme_ls_abort - Generic service routine to abort a prior + * NVME LS request + * @vport: The local port that issued the LS + * @ndlp: The remote port the LS was sent to + * @pnvme_lsreq: Pointer to LS request structure from the transport * - * Driver registers this routine to handle any link service request - * from the nvme_fc transport to a remote nvme-aware port. + * The driver validates the ndlp, looks for the LS, and aborts the + * LS if found. * - * Return value : - * 0 - Success - * TODO: What are the failure codes. + * Returns: + * 0 : if LS found and aborted + * non-zero: various error conditions in form -Exxx **/ -static void -lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport, - struct nvme_fc_remote_port *pnvme_rport, - struct nvmefc_ls_req *pnvme_lsreq) +int +__lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct nvmefc_ls_req *pnvme_lsreq) { - struct lpfc_nvme_lport *lport; - struct lpfc_vport *vport; - struct lpfc_hba *phba; - struct lpfc_nodelist *ndlp; - LIST_HEAD(abort_list); + struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ring *pring; struct lpfc_iocbq *wqe, *next_wqe; + bool foundit = false; - lport = (struct lpfc_nvme_lport *)pnvme_lport->private; - if (unlikely(!lport)) - return; - vport = lport->vport; - phba = vport->phba; - - if (vport->load_flag & FC_UNLOADING) - return; - - ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); if (!ndlp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, - "6049 Could not find node for DID %x\n", - pnvme_rport->port_id); - return; + lpfc_printf_log(phba, KERN_ERR, + LOG_NVME_DISC | LOG_NODE | + LOG_NVME_IOERR | LOG_NVME_ABTS, + "6049 NVMEx LS REQ Abort: Bad NDLP x%px DID " + "x%06x, Failing LS Req\n", + ndlp, ndlp ? ndlp->nlp_DID : 0); + return -EINVAL; } - /* Expand print to include key fields. */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, - "6040 ENTER. lport x%px, rport x%px lsreq x%px rqstlen:%d " - "rsplen:%d %pad %pad\n", - pnvme_lport, pnvme_rport, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, + "6040 NVMEx LS REQ Abort: Issue LS_ABORT for lsreq " + "x%p rqstlen:%d rsplen:%d %pad %pad\n", pnvme_lsreq, pnvme_lsreq->rqstlen, pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, &pnvme_lsreq->rspdma); /* - * Lock the ELS ring txcmplq and build a local list of all ELS IOs - * that need an ABTS. The IOs need to stay on the txcmplq so that - * the abort operation completes them successfully. + * Lock the ELS ring txcmplq and look for the wqe that matches + * this ELS. If found, issue an abort on the wqe. */ pring = phba->sli4_hba.nvmels_wq->pring; spin_lock_irq(&phba->hbalock); spin_lock(&pring->ring_lock); list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) { - /* Add to abort_list on on NDLP match. */ - if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) { + if (wqe->context2 == pnvme_lsreq) { wqe->iocb_flag |= LPFC_DRIVER_ABORTED; - list_add_tail(&wqe->dlist, &abort_list); + foundit = true; + break; } } spin_unlock(&pring->ring_lock); + + if (foundit) + lpfc_sli_issue_abort_iotag(phba, pring, wqe); spin_unlock_irq(&phba->hbalock); - /* Abort the targeted IOs and remove them from the abort list. */ - list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) { + if (foundit) + return 0; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, + "6213 NVMEx LS REQ Abort: Unable to locate req x%p\n", + pnvme_lsreq); + return 1; +} + +/** + * lpfc_nvme_ls_abort - Abort a prior NVME LS request + * @lpfc_nvme_lport: Transport localport that LS is to be issued from. + * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to. + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS + * + * Driver registers this routine to abort a NVME LS request that is + * in progress (from the transports perspective). + **/ +static void +lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport, + struct nvme_fc_remote_port *pnvme_rport, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvme_lport *lport; + struct lpfc_vport *vport; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + int ret; + + lport = (struct lpfc_nvme_lport *)pnvme_lport->private; + if (unlikely(!lport)) + return; + vport = lport->vport; + phba = vport->phba; + + if (vport->load_flag & FC_UNLOADING) + return; + + ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); + + ret = __lpfc_nvme_ls_abort(vport, ndlp, pnvme_lsreq); + if (!ret) atomic_inc(&lport->xmt_ls_abort); - spin_lock_irq(&phba->hbalock); - list_del_init(&wqe->dlist); - lpfc_sli_issue_abort_iotag(phba, pring, wqe); - spin_unlock_irq(&phba->hbalock); - } } /* Fix up the existing sgls for NVME IO. */ diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 6f8d44aa47b2..a9ad81d0c182 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -237,6 +237,8 @@ int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_wcqe_complete *wcqe)); void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); +int __lpfc_nvme_ls_abort(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, struct nvmefc_ls_req *pnvme_lsreq); /* routines found in lpfc_nvmet.c */ int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, -- cgit v1.2.3 From fe1bedec5b9ce741fd6d4ebd6ede56e2c429467b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:08 -0700 Subject: lpfc: Refactor Send LS Response support Currently, the ability to send an NVME LS response is limited to the nvmet (controller/target) side of the driver. In preparation of both the nvme and nvmet sides supporting Send LS Response, rework the existing send ls_rsp and ls_rsp completion routines such that there is common code that can be used by both sides. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_nvme.h | 7 ++ drivers/scsi/lpfc/lpfc_nvmet.c | 255 ++++++++++++++++++++++++++++------------- 2 files changed, 184 insertions(+), 78 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index a9ad81d0c182..f28a6789e252 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -244,3 +244,10 @@ int __lpfc_nvme_ls_abort(struct lpfc_vport *vport, int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri); +int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, + struct nvmefc_ls_rsp *ls_rsp, + void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)); +void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index a95c8ef6373a..6a1365df6c65 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -280,6 +280,53 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); } +/** + * __lpfc_nvme_xmt_ls_rsp_cmp - Generic completion handler for the + * transmission of an NVME LS response. + * @phba: Pointer to HBA context object. + * @cmdwqe: Pointer to driver command WQE object. + * @wcqe: Pointer to driver response CQE object. + * + * The function is called from SLI ring event handler with no + * lock held. The function frees memory resources used for the command + * used to send the NVME LS RSP. + **/ +void +__lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ + struct lpfc_async_xchg_ctx *axchg = cmdwqe->context2; + struct nvmefc_ls_rsp *ls_rsp = &axchg->ls_rsp; + uint32_t status, result; + + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; + result = wcqe->parameter; + + if (axchg->state != LPFC_NVME_STE_LS_RSP || axchg->entry_cnt != 2) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6410 NVMEx LS cmpl state mismatch IO x%x: " + "%d %d\n", + axchg->oxid, axchg->state, axchg->entry_cnt); + } + + lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x result x%x\n", + axchg->oxid, status, result); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, + "6038 NVMEx LS rsp cmpl: %d %d oxid x%x\n", + status, result, axchg->oxid); + + lpfc_nlp_put(cmdwqe->context1); + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; + lpfc_sli_release_iocbq(phba, cmdwqe); + ls_rsp->done(ls_rsp); + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, + "6200 NVMEx LS rsp cmpl done status %d oxid x%x\n", + status, axchg->oxid); + kfree(axchg); +} + /** * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response * @phba: Pointer to HBA context object. @@ -288,33 +335,23 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, * * The function is called from SLI ring event handler with no * lock held. This function is the completion handler for NVME LS commands - * The function frees memory resources used for the NVME commands. + * The function updates any states and statistics, then calls the + * generic completion handler to free resources. **/ static void lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { struct lpfc_nvmet_tgtport *tgtp; - struct nvmefc_ls_rsp *rsp; - struct lpfc_async_xchg_ctx *ctxp; uint32_t status, result; - status = bf_get(lpfc_wcqe_c_status, wcqe); - result = wcqe->parameter; - ctxp = cmdwqe->context2; - - if (ctxp->state != LPFC_NVME_STE_LS_RSP || ctxp->entry_cnt != 2) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6410 NVMET LS cmpl state mismatch IO x%x: " - "%d %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - } - if (!phba->targetport) - goto out; + goto finish; - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; + result = wcqe->parameter; + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (tgtp) { if (status) { atomic_inc(&tgtp->xmt_ls_rsp_error); @@ -327,22 +364,8 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, } } -out: - rsp = &ctxp->ls_rsp; - - lpfc_nvmeio_data(phba, "NVMET LS CMPL: xri x%x stat x%x result x%x\n", - ctxp->oxid, status, result); - - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, - "6038 NVMET LS rsp cmpl: %d %d oxid x%x\n", - status, result, ctxp->oxid); - - lpfc_nlp_put(cmdwqe->context1); - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; - lpfc_sli_release_iocbq(phba, cmdwqe); - rsp->done(rsp); - kfree(ctxp); +finish: + __lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, wcqe); } /** @@ -819,52 +842,61 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, #endif } -static int -lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, - struct nvmefc_ls_rsp *rsp) +/** + * __lpfc_nvme_xmt_ls_rsp - Generic service routine to issue transmit + * an NVME LS rsp for a prior NVME LS request that was received. + * @axchg: pointer to exchange context for the NVME LS request the response + * is for. + * @ls_rsp: pointer to the transport LS RSP that is to be sent + * @xmt_ls_rsp_cmp: completion routine to call upon RSP transmit done + * + * This routine is used to format and send a WQE to transmit a NVME LS + * Response. The response is for a prior NVME LS request that was + * received and posted to the transport. + * + * Returns: + * 0 : if response successfully transmit + * non-zero : if response failed to transmit, of the form -Exxx. + **/ +int +__lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, + struct nvmefc_ls_rsp *ls_rsp, + void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)) { - struct lpfc_async_xchg_ctx *ctxp = - container_of(rsp, struct lpfc_async_xchg_ctx, ls_rsp); - struct lpfc_hba *phba = ctxp->phba; - struct hbq_dmabuf *nvmebuf = - (struct hbq_dmabuf *)ctxp->rqb_buffer; + struct lpfc_hba *phba = axchg->phba; + struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)axchg->rqb_buffer; struct lpfc_iocbq *nvmewqeq; - struct lpfc_nvmet_tgtport *nvmep = tgtport->private; struct lpfc_dmabuf dmabuf; struct ulp_bde64 bpl; int rc; - if (phba->pport->load_flag & FC_UNLOADING) - return -ENODEV; - if (phba->pport->load_flag & FC_UNLOADING) return -ENODEV; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, - "6023 NVMET LS rsp oxid x%x\n", ctxp->oxid); + "6023 NVMEx LS rsp oxid x%x\n", axchg->oxid); - if ((ctxp->state != LPFC_NVME_STE_LS_RCV) || - (ctxp->entry_cnt != 1)) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6412 NVMET LS rsp state mismatch " + if (axchg->state != LPFC_NVME_STE_LS_RCV || axchg->entry_cnt != 1) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6412 NVMEx LS rsp state mismatch " "oxid x%x: %d %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); + axchg->oxid, axchg->state, axchg->entry_cnt); + return -EALREADY; } - ctxp->state = LPFC_NVME_STE_LS_RSP; - ctxp->entry_cnt++; + axchg->state = LPFC_NVME_STE_LS_RSP; + axchg->entry_cnt++; - nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma, - rsp->rsplen); + nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, axchg, ls_rsp->rspdma, + ls_rsp->rsplen); if (nvmewqeq == NULL) { - atomic_inc(&nvmep->xmt_ls_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6150 LS Drop IO x%x: Prep\n", - ctxp->oxid); - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - atomic_inc(&nvmep->xmt_ls_abort); - lpfc_nvme_unsol_ls_issue_abort(phba, ctxp, - ctxp->sid, ctxp->oxid); - return -ENOMEM; + lpfc_printf_log(phba, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS, + "6150 NVMEx LS Drop Rsp x%x: Prep\n", + axchg->oxid); + rc = -ENOMEM; + goto out_free_buf; } /* Save numBdes for bpl2sgl */ @@ -874,39 +906,106 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, dmabuf.virt = &bpl; bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow; bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh; - bpl.tus.f.bdeSize = rsp->rsplen; + bpl.tus.f.bdeSize = ls_rsp->rsplen; bpl.tus.f.bdeFlags = 0; bpl.tus.w = le32_to_cpu(bpl.tus.w); + /* + * Note: although we're using stack space for the dmabuf, the + * call to lpfc_sli4_issue_wqe is synchronous, so it will not + * be referenced after it returns back to this routine. + */ - nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp; + nvmewqeq->wqe_cmpl = xmt_ls_rsp_cmp; nvmewqeq->iocb_cmpl = NULL; - nvmewqeq->context2 = ctxp; + nvmewqeq->context2 = axchg; - lpfc_nvmeio_data(phba, "NVMET LS RESP: xri x%x wqidx x%x len x%x\n", - ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen); + lpfc_nvmeio_data(phba, "NVMEx LS RSP: xri x%x wqidx x%x len x%x\n", + axchg->oxid, nvmewqeq->hba_wqidx, ls_rsp->rsplen); + + rc = lpfc_sli4_issue_wqe(phba, axchg->hdwq, nvmewqeq); + + /* clear to be sure there's no reference */ + nvmewqeq->context3 = NULL; - rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); if (rc == WQE_SUCCESS) { /* * Okay to repost buffer here, but wait till cmpl * before freeing ctxp and iocbq. */ lpfc_in_buf_free(phba, &nvmebuf->dbuf); - atomic_inc(&nvmep->xmt_ls_rsp); return 0; } - /* Give back resources */ - atomic_inc(&nvmep->xmt_ls_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6151 LS Drop IO x%x: Issue %d\n", - ctxp->oxid, rc); + + lpfc_printf_log(phba, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS, + "6151 NVMEx LS RSP x%x: failed to transmit %d\n", + axchg->oxid, rc); + + rc = -ENXIO; lpfc_nlp_put(nvmewqeq->context1); +out_free_buf: + /* Give back resources */ lpfc_in_buf_free(phba, &nvmebuf->dbuf); - atomic_inc(&nvmep->xmt_ls_abort); - lpfc_nvme_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); - return -ENXIO; + + /* + * As transport doesn't track completions of responses, if the rsp + * fails to send, the transport will effectively ignore the rsp + * and consider the LS done. However, the driver has an active + * exchange open for the LS - so be sure to abort the exchange + * if the response isn't sent. + */ + lpfc_nvme_unsol_ls_issue_abort(phba, axchg, axchg->sid, axchg->oxid); + return rc; +} + +/** + * lpfc_nvmet_xmt_ls_rsp - Transmit NVME LS response + * @tgtport: pointer to target port that NVME LS is to be transmit from. + * @ls_rsp: pointer to the transport LS RSP that is to be sent + * + * Driver registers this routine to transmit responses for received NVME + * LS requests. + * + * This routine is used to format and send a WQE to transmit a NVME LS + * Response. The ls_rsp is used to reverse-map the LS to the original + * NVME LS request sequence, which provides addressing information for + * the remote port the LS to be sent to, as well as the exchange id + * that is the LS is bound to. + * + * Returns: + * 0 : if response successfully transmit + * non-zero : if response failed to transmit, of the form -Exxx. + **/ +static int +lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, + struct nvmefc_ls_rsp *ls_rsp) +{ + struct lpfc_async_xchg_ctx *axchg = + container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp); + struct lpfc_nvmet_tgtport *nvmep = tgtport->private; + int rc; + + if (axchg->phba->pport->load_flag & FC_UNLOADING) + return -ENODEV; + + rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, lpfc_nvmet_xmt_ls_rsp_cmp); + + if (rc) { + atomic_inc(&nvmep->xmt_ls_drop); + /* + * unless the failure is due to having already sent + * the response, an abort will be generated for the + * exchange if the rsp can't be sent. + */ + if (rc != -EALREADY) + atomic_inc(&nvmep->xmt_ls_abort); + return rc; + } + + atomic_inc(&nvmep->xmt_ls_rsp); + return 0; } static int -- cgit v1.2.3 From 9aa09e98b288649544c74d1a7b88223f36e4bffd Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:09 -0700 Subject: lpfc: nvme: Add Receive LS Request and Send LS Response support to nvme Now that common helpers exist, add the ability to receive NVME LS requests to the driver. New requests will be delivered to the transport by nvme_fc_rcv_ls_req(). In order to complete the LS, add support for Send LS Response and send LS response completion handling to the driver. Signed-off-by: Paul Ely Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_nvme.c | 71 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 406f71b327a1..21bbccf0dc31 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -402,6 +402,10 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) * request. Any remaining validation is done and the LS is then forwarded * to the nvme-fc transport via nvme_fc_rcv_ls_req(). * + * The calling sequence should be: nvme_fc_rcv_ls_req() -> (processing) + * -> lpfc_nvme_xmt_ls_rsp/cmp -> req->done. + * __lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg. + * * Returns 0 if LS was handled and delivered to the transport * Returns 1 if LS failed to be handled and should be dropped */ @@ -409,6 +413,40 @@ int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, struct lpfc_async_xchg_ctx *axchg) { +#if (IS_ENABLED(CONFIG_NVME_FC)) + struct lpfc_vport *vport; + struct lpfc_nvme_rport *lpfc_rport; + struct nvme_fc_remote_port *remoteport; + struct lpfc_nvme_lport *lport; + uint32_t *payload = axchg->payload; + int rc; + + vport = axchg->ndlp->vport; + lpfc_rport = axchg->ndlp->nrport; + if (!lpfc_rport) + return -EINVAL; + + remoteport = lpfc_rport->remoteport; + if (!vport->localport) + return -EINVAL; + + lport = vport->localport->private; + if (!lport) + return -EINVAL; + + rc = nvme_fc_rcv_ls_req(remoteport, &axchg->ls_rsp, axchg->payload, + axchg->size); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, + "6205 NVME Unsol rcv: sz %d rc %d: %08x %08x %08x " + "%08x %08x %08x\n", + axchg->size, rc, + *payload, *(payload+1), *(payload+2), + *(payload+3), *(payload+4), *(payload+5)); + + if (!rc) + return 0; +#endif return 1; } @@ -860,6 +898,37 @@ __lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, return 1; } +static int +lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_rsp *ls_rsp) +{ + struct lpfc_async_xchg_ctx *axchg = + container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp); + struct lpfc_nvme_lport *lport; + int rc; + + if (axchg->phba->pport->load_flag & FC_UNLOADING) + return -ENODEV; + + lport = (struct lpfc_nvme_lport *)localport->private; + + rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, __lpfc_nvme_xmt_ls_rsp_cmp); + + if (rc) { + /* + * unless the failure is due to having already sent + * the response, an abort will be generated for the + * exchange if the rsp can't be sent. + */ + if (rc != -EALREADY) + atomic_inc(&lport->xmt_ls_abort); + return rc; + } + + return 0; +} + /** * lpfc_nvme_ls_abort - Abort a prior NVME LS request * @lpfc_nvme_lport: Transport localport that LS is to be issued from. @@ -2005,6 +2074,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = { .fcp_io = lpfc_nvme_fcp_io_submit, .ls_abort = lpfc_nvme_ls_abort, .fcp_abort = lpfc_nvme_fcp_abort, + .xmt_ls_rsp = lpfc_nvme_xmt_ls_rsp, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVME_DEFAULT_SEGS, @@ -2200,6 +2270,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) atomic_set(&lport->cmpl_fcp_err, 0); atomic_set(&lport->cmpl_ls_xb, 0); atomic_set(&lport->cmpl_ls_err, 0); + atomic_set(&lport->fc4NvmeLsRequests, 0); atomic_set(&lport->fc4NvmeLsCmpls, 0); } -- cgit v1.2.3 From 4c2805aab519a39e8adf281afcef40174d48fd3f Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:10 -0700 Subject: lpfc: nvmet: Add support for NVME LS request hosthandle As the nvmet layer does not have the concept of a remoteport object, which can be used to identify the entity on the other end of the fabric that is to receive an LS, the hosthandle was introduced. The driver passes the hosthandle, a value representative of the remote port, with a ls request receive. The LS request will create the association. The transport will remember the hosthandle for the association, and if there is a need to initiate a LS request to the remote port for the association, the hosthandle will be used. When the driver loses connectivity with the remote port, it needs to notify the transport that the hosthandle is no longer valid, allowing the transport to terminate associations related to the hosthandle. This patch adds support to the driver for the hosthandle. The driver will use the ndlp pointer of the remote port for the hosthandle in calls to nvmet_fc_rcv_ls_req(). The discovery engine is updated to invalidate the hosthandle whenever connectivity with the remote port is lost. Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_crtn.h | 2 ++ drivers/scsi/lpfc/lpfc_hbadisc.c | 6 +++++ drivers/scsi/lpfc/lpfc_nportdisc.c | 11 ++++++++ drivers/scsi/lpfc/lpfc_nvme.h | 3 +++ drivers/scsi/lpfc/lpfc_nvmet.c | 53 +++++++++++++++++++++++++++++++++++++- 5 files changed, 74 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 07b4f0fd3fe2..9ee6b930a655 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -571,6 +571,8 @@ void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx, struct rqb_dmabuf *nvmebuf, uint64_t isr_ts, uint8_t cqflag); void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba); +void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, + struct lpfc_nodelist *ndlp); void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_wcqe_complete *abts_cmpl); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 8dec7b7c06d1..f5952f8cd4b5 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -823,6 +823,12 @@ lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove) if ((phba->sli_rev < LPFC_SLI_REV4) && (!remove && ndlp->nlp_type & NLP_FABRIC)) continue; + + /* Notify transport of connectivity loss to trigger cleanup. */ + if (phba->nvmet_support && + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) + lpfc_nvmet_invalidate_host(phba, ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, remove ? NLP_EVT_DEVICE_RM diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 81f4ba1c24b4..d8501bd959e7 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -489,6 +489,11 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, (unsigned long long) wwn_to_u64(sp->portName.u.wwn)); + /* Notify transport of connectivity loss to trigger cleanup. */ + if (phba->nvmet_support && + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) + lpfc_nvmet_invalidate_host(phba, ndlp); + ndlp->nlp_prev_state = ndlp->nlp_state; /* rport needs to be unregistered first */ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); @@ -839,6 +844,12 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL); else lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL); + + /* Notify transport of connectivity loss to trigger cleanup. */ + if (phba->nvmet_support && + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) + lpfc_nvmet_invalidate_host(phba, ndlp); + if (ndlp->nlp_DID == Fabric_DID) { if (vport->port_state <= LPFC_FDISC) goto out; diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index f28a6789e252..4a4c3f780e1f 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -98,9 +98,12 @@ struct lpfc_nvme_fcpreq_priv { #define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) /* Used for NVME Target */ +#define LPFC_NVMET_INV_HOST_ACTIVE 1 + struct lpfc_nvmet_tgtport { struct lpfc_hba *phba; struct completion *tport_unreg_cmp; + atomic_t state; /* tracks nvmet hosthandle invalidation */ /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ atomic_t rcv_ls_req_in; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 6a1365df6c65..5584eaafbfb2 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1282,6 +1282,24 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } +static void +lpfc_nvmet_host_release(void *hosthandle) +{ + struct lpfc_nodelist *ndlp = hosthandle; + struct lpfc_hba *phba = NULL; + struct lpfc_nvmet_tgtport *tgtp; + + phba = ndlp->phba; + if (!phba->targetport || !phba->targetport->private) + return; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6202 NVMET XPT releasing hosthandle x%px\n", + hosthandle); + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + atomic_set(&tgtp->state, 0); +} + static void lpfc_nvmet_discovery_event(struct nvmet_fc_target_port *tgtport) { @@ -1306,6 +1324,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .defer_rcv = lpfc_nvmet_defer_rcv, .discovery_event = lpfc_nvmet_discovery_event, + .host_release = lpfc_nvmet_host_release, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, @@ -2044,7 +2063,12 @@ lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, atomic_inc(&tgtp->rcv_ls_req_in); - rc = nvmet_fc_rcv_ls_req(phba->targetport, NULL, &axchg->ls_rsp, + /* + * Driver passes the ndlp as the hosthandle argument allowing + * the transport to generate LS requests for any associateions + * that are created. + */ + rc = nvmet_fc_rcv_ls_req(phba->targetport, axchg->ndlp, &axchg->ls_rsp, axchg->payload, axchg->size); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, @@ -3476,3 +3500,30 @@ out: "6056 Failed to Issue ABTS. Status x%x\n", rc); return 0; } + +/** + * lpfc_nvmet_invalidate_host + * + * @phba - pointer to the driver instance bound to an adapter port. + * @ndlp - pointer to an lpfc_nodelist type + * + * This routine upcalls the nvmet transport to invalidate an NVME + * host to which this target instance had active connections. + */ +void +lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +{ + struct lpfc_nvmet_tgtport *tgtp; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS, + "6203 Invalidating hosthandle x%px\n", + ndlp); + + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE); + +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + /* Need to get the nvmet_fc_target_port pointer here.*/ + nvmet_fc_invalidate_host(phba->targetport, ndlp); +#endif +} -- cgit v1.2.3 From 54840bed372c7779f23ece8514853fa83887b02e Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:11 -0700 Subject: lpfc: nvmet: Add Send LS Request and Abort LS Request support Now that common helpers exist, add the ability to Send an NVME LS Request and to Abort an outstanding LS Request to the nvmet side of the driver. Signed-off-by: Paul Ely Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_nvmet.c | 95 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 5584eaafbfb2..1c6bbbba70b5 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1282,6 +1282,98 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } +/** + * lpfc_nvmet_ls_req_cmp - completion handler for a nvme ls request + * @phba: Pointer to HBA context object + * @cmdwqe: Pointer to driver command WQE object. + * @wcqe: Pointer to driver response CQE object. + * + * This function is the completion handler for NVME LS requests. + * The function updates any states and statistics, then calls the + * generic completion handler to finish completion of the request. + **/ +static void +lpfc_nvmet_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ + __lpfc_nvme_ls_req_cmp(phba, cmdwqe->vport, cmdwqe, wcqe); +} + +/** + * lpfc_nvmet_ls_req - Issue an Link Service request + * @targetport - pointer to target instance registered with nvmet transport. + * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv. + * Driver sets this value to the ndlp pointer. + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS + * + * Driver registers this routine to handle any link service request + * from the nvme_fc transport to a remote nvme-aware port. + * + * Return value : + * 0 - Success + * non-zero: various error codes, in form of -Exxx + **/ +static int +lpfc_nvmet_ls_req(struct nvmet_fc_target_port *targetport, + void *hosthandle, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + int ret; + u32 hstate; + + if (!lpfc_nvmet) + return -EINVAL; + + phba = lpfc_nvmet->phba; + if (phba->pport->load_flag & FC_UNLOADING) + return -EINVAL; + + hstate = atomic_read(&lpfc_nvmet->state); + if (hstate == LPFC_NVMET_INV_HOST_ACTIVE) + return -EACCES; + + ndlp = (struct lpfc_nodelist *)hosthandle; + + ret = __lpfc_nvme_ls_req(phba->pport, ndlp, pnvme_lsreq, + lpfc_nvmet_ls_req_cmp); + + return ret; +} + +/** + * lpfc_nvmet_ls_abort - Abort a prior NVME LS request + * @targetport: Transport targetport, that LS was issued from. + * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv. + * Driver sets this value to the ndlp pointer. + * @pnvme_lsreq - the transport nvme_ls_req structure for LS to be aborted + * + * Driver registers this routine to abort an NVME LS request that is + * in progress (from the transports perspective). + **/ +static void +lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport, + void *hosthandle, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + int ret; + + phba = lpfc_nvmet->phba; + if (phba->pport->load_flag & FC_UNLOADING) + return; + + ndlp = (struct lpfc_nodelist *)hosthandle; + + ret = __lpfc_nvme_ls_abort(phba->pport, ndlp, pnvme_lsreq); + if (!ret) + atomic_inc(&lpfc_nvmet->xmt_ls_abort); +} + static void lpfc_nvmet_host_release(void *hosthandle) { @@ -1324,6 +1416,8 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .defer_rcv = lpfc_nvmet_defer_rcv, .discovery_event = lpfc_nvmet_discovery_event, + .ls_req = lpfc_nvmet_ls_req, + .ls_abort = lpfc_nvmet_ls_abort, .host_release = lpfc_nvmet_host_release, .max_hw_queues = 1, @@ -1335,6 +1429,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct lpfc_nvmet_tgtport), + .lsrqst_priv_sz = 0, }; static void -- cgit v1.2.3 From 71fb90eb71d76089115aedf942533bcb3fd5c7f9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 3 Apr 2020 09:24:01 -0700 Subject: nvme: provide num dword helper Various nvme commands use a zeroes based number of dwords field. Create a helper function to convert byte lengths to this format. Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/nvme.h | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f3c037f5a9ba..6da941b6c67c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -530,7 +530,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); + c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s))); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; @@ -2746,7 +2746,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset) { struct nvme_command c = { }; - unsigned long dwlen = size / 4 - 1; + u32 dwlen = nvme_bytes_to_numd(size); c.get_log_page.opcode = nvme_admin_get_log_page; c.get_log_page.nsid = cpu_to_le32(nsid); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2e04a36296d9..36f44b79bb3b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -449,6 +449,14 @@ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) return lba << (ns->lba_shift - SECTOR_SHIFT); } +/* + * Convert byte length to nvme's 0-based num dwords + */ +static inline u32 nvme_bytes_to_numd(size_t len) +{ + return (len >> 2) - 1; +} + static inline void nvme_end_request(struct request *req, __le16 status, union nvme_result result) { -- cgit v1.2.3 From 03f8cebc127fa285874074ec314b76b1333f6c43 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 3 Apr 2020 09:24:09 -0700 Subject: nvme: remove unused parameter nvme_alloc_ns_head() doesn't use the 'struct nvme_id_ns' parameter. Remove it, and update caller accordingly. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6da941b6c67c..3d0c7d90ad25 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3410,8 +3410,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns *id, - struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3482,7 +3481,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, if (is_shared) head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); + head = nvme_alloc_ns_head(ctrl, nsid, &ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; -- cgit v1.2.3 From b0012dd397155438c61b0c1b52ceec1f1366b3cc Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 19 Apr 2017 11:56:57 +0300 Subject: nvmet-rdma: use SRQ per completion vector In order to save resource allocation and utilize the completion locality in a better way (compared to SRQ per device that exist today), allocate Shared Receive Queues (SRQs) per completion vector. Associate each created QP/CQ with an appropriate SRQ according to the queue index. This association will reduce the lock contention in the fast path (compared to SRQ per device solution) and increase the locality in memory buffers. Add new module parameter for SRQ size to adjust it according to the expected load. User should make sure the size is >= 256 to avoid lack of resources. Also reduce the debug level of "last WQE reached" event that is raised when a QP is using SRQ during destruction process to relief the log. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/rdma.c | 178 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 136 insertions(+), 42 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index fd47de0e4e4e..7a90b10359bb 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -34,6 +34,8 @@ /* Assume mpsmin == device_page_size == 4KB */ #define NVMET_RDMA_MAX_MDTS 8 +struct nvmet_rdma_srq; + struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_cqe cqe; @@ -41,6 +43,7 @@ struct nvmet_rdma_cmd { struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; struct nvme_command *nvme_cmd; struct nvmet_rdma_queue *queue; + struct nvmet_rdma_srq *nsrq; }; enum { @@ -83,6 +86,7 @@ struct nvmet_rdma_queue { struct ib_cq *cq; atomic_t sq_wr_avail; struct nvmet_rdma_device *dev; + struct nvmet_rdma_srq *nsrq; spinlock_t state_lock; enum nvmet_rdma_queue_state state; struct nvmet_cq nvme_cq; @@ -100,6 +104,7 @@ struct nvmet_rdma_queue { int idx; int host_qid; + int comp_vector; int recv_queue_size; int send_queue_size; @@ -113,11 +118,17 @@ struct nvmet_rdma_port { struct delayed_work repair_work; }; +struct nvmet_rdma_srq { + struct ib_srq *srq; + struct nvmet_rdma_cmd *cmds; + struct nvmet_rdma_device *ndev; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; - struct ib_srq *srq; - struct nvmet_rdma_cmd *srq_cmds; + struct nvmet_rdma_srq **srqs; + int srq_count; size_t srq_size; struct kref ref; struct list_head entry; @@ -129,6 +140,16 @@ static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); +static int srq_size_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops srq_size_ops = { + .set = srq_size_set, + .get = param_get_int, +}; + +static int nvmet_rdma_srq_size = 1024; +module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); +MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); + static DEFINE_IDA(nvmet_rdma_queue_ida); static LIST_HEAD(nvmet_rdma_queue_list); static DEFINE_MUTEX(nvmet_rdma_queue_mutex); @@ -149,6 +170,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, static const struct nvmet_fabrics_ops nvmet_rdma_ops; +static int srq_size_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < 256) + return -EINVAL; + + return param_set_int(val, kp); +} + static int num_pages(int len) { return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); @@ -466,8 +498,8 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, cmd->sge[0].addr, cmd->sge[0].length, DMA_FROM_DEVICE); - if (ndev->srq) - ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); + if (cmd->nsrq) + ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); else ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); @@ -845,23 +877,40 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) nvmet_rdma_handle_command(queue, rsp); } -static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev) +static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) +{ + nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, + false); + ib_destroy_srq(nsrq->srq); + + kfree(nsrq); +} + +static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) { - if (!ndev->srq) + int i; + + if (!ndev->srqs) return; - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); - ib_destroy_srq(ndev->srq); + for (i = 0; i < ndev->srq_count; i++) + nvmet_rdma_destroy_srq(ndev->srqs[i]); + + kfree(ndev->srqs); } -static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) +static struct nvmet_rdma_srq * +nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) { struct ib_srq_init_attr srq_attr = { NULL, }; + size_t srq_size = ndev->srq_size; + struct nvmet_rdma_srq *nsrq; struct ib_srq *srq; - size_t srq_size; int ret, i; - srq_size = 4095; /* XXX: tune */ + nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); + if (!nsrq) + return ERR_PTR(-ENOMEM); srq_attr.attr.max_wr = srq_size; srq_attr.attr.max_sge = 1 + ndev->inline_page_count; @@ -869,35 +918,73 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) srq_attr.srq_type = IB_SRQT_BASIC; srq = ib_create_srq(ndev->pd, &srq_attr); if (IS_ERR(srq)) { - /* - * If SRQs aren't supported we just go ahead and use normal - * non-shared receive queues. - */ - pr_info("SRQ requested but not supported.\n"); - return 0; + ret = PTR_ERR(srq); + goto out_free; } - ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); - if (IS_ERR(ndev->srq_cmds)) { - ret = PTR_ERR(ndev->srq_cmds); + nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); + if (IS_ERR(nsrq->cmds)) { + ret = PTR_ERR(nsrq->cmds); goto out_destroy_srq; } - ndev->srq = srq; - ndev->srq_size = srq_size; + nsrq->srq = srq; + nsrq->ndev = ndev; for (i = 0; i < srq_size; i++) { - ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); + nsrq->cmds[i].nsrq = nsrq; + ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); if (ret) goto out_free_cmds; } - return 0; + return nsrq; out_free_cmds: - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); + nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); out_destroy_srq: ib_destroy_srq(srq); +out_free: + kfree(nsrq); + return ERR_PTR(ret); +} + +static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) +{ + int i, ret; + + if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { + /* + * If SRQs aren't supported we just go ahead and use normal + * non-shared receive queues. + */ + pr_info("SRQ requested but not supported.\n"); + return 0; + } + + ndev->srq_size = min(ndev->device->attrs.max_srq_wr, + nvmet_rdma_srq_size); + ndev->srq_count = min(ndev->device->num_comp_vectors, + ndev->device->attrs.max_srq); + + ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); + if (!ndev->srqs) + return -ENOMEM; + + for (i = 0; i < ndev->srq_count; i++) { + ndev->srqs[i] = nvmet_rdma_init_srq(ndev); + if (IS_ERR(ndev->srqs[i])) { + ret = PTR_ERR(ndev->srqs[i]); + goto err_srq; + } + } + + return 0; + +err_srq: + while (--i >= 0) + nvmet_rdma_destroy_srq(ndev->srqs[i]); + kfree(ndev->srqs); return ret; } @@ -910,7 +997,7 @@ static void nvmet_rdma_free_dev(struct kref *ref) list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - nvmet_rdma_destroy_srq(ndev); + nvmet_rdma_destroy_srqs(ndev); ib_dealloc_pd(ndev->pd); kfree(ndev); @@ -957,7 +1044,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) goto out_free_dev; if (nvmet_rdma_use_srq) { - ret = nvmet_rdma_init_srq(ndev); + ret = nvmet_rdma_init_srqs(ndev); if (ret) goto out_free_pd; } @@ -981,14 +1068,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) { struct ib_qp_init_attr qp_attr; struct nvmet_rdma_device *ndev = queue->dev; - int comp_vector, nr_cqe, ret, i, factor; - - /* - * Spread the io queues across completion vectors, - * but still keep all admin queues on vector 0. - */ - comp_vector = !queue->host_qid ? 0 : - queue->idx % ndev->device->num_comp_vectors; + int nr_cqe, ret, i, factor; /* * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. @@ -996,7 +1076,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; queue->cq = ib_alloc_cq(ndev->device, queue, - nr_cqe + 1, comp_vector, + nr_cqe + 1, queue->comp_vector, IB_POLL_WORKQUEUE); if (IS_ERR(queue->cq)) { ret = PTR_ERR(queue->cq); @@ -1020,8 +1100,8 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, ndev->device->attrs.max_send_sge); - if (ndev->srq) { - qp_attr.srq = ndev->srq; + if (queue->nsrq) { + qp_attr.srq = queue->nsrq->srq; } else { /* +1 for drain */ qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; @@ -1041,7 +1121,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, qp_attr.cap.max_send_wr, queue->cm_id); - if (!ndev->srq) { + if (!queue->nsrq) { for (i = 0; i < queue->recv_queue_size; i++) { queue->cmds[i].queue = queue; ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); @@ -1076,7 +1156,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) nvmet_sq_destroy(&queue->nvme_sq); nvmet_rdma_destroy_queue_ib(queue); - if (!queue->dev->srq) { + if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); @@ -1188,13 +1268,23 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, goto out_destroy_sq; } + /* + * Spread the io queues across completion vectors, + * but still keep all admin queues on vector 0. + */ + queue->comp_vector = !queue->host_qid ? 0 : + queue->idx % ndev->device->num_comp_vectors; + + ret = nvmet_rdma_alloc_rsps(queue); if (ret) { ret = NVME_RDMA_CM_NO_RSC; goto out_ida_remove; } - if (!ndev->srq) { + if (ndev->srqs) { + queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; + } else { queue->cmds = nvmet_rdma_alloc_cmds(ndev, queue->recv_queue_size, !queue->host_qid); @@ -1215,7 +1305,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, return queue; out_free_cmds: - if (!ndev->srq) { + if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); @@ -1241,6 +1331,10 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv) case IB_EVENT_COMM_EST: rdma_notify(queue->cm_id, event->event); break; + case IB_EVENT_QP_LAST_WQE_REACHED: + pr_debug("received last WQE reached event for queue=0x%p\n", + queue); + break; default: pr_err("received IB QP event: %s (%d)\n", ib_event_msg(event->event), event->event); -- cgit v1.2.3 From e4fcc72c1a420bdbe425530dd19724214ceb44ec Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 6 Apr 2020 16:55:34 -0700 Subject: nvmet-fc: slight cleanup for kbuild test warnings The kbuild tst robot flagged the following 3 issues: Case 1) >> drivers/nvme/target/fc.c:1201:37: warning: Either the condition >> '!assoc' is redundant or there is possible null pointer dereference: >> assoc. [nullPointerRedundantCheck] >> struct nvmet_fc_tgtport *tgtport = assoc->tgtport; ^ >> drivers/nvme/target/fc.c:1853:7: note: Assuming that condition '!assoc' >> is not redundant >> if (!assoc) ^ >> drivers/nvme/target/fc.c:1850:37: note: Assignment >> 'assoc=nvmet_fc_find_target_assoc(tgtport,be64_to_cpu( >> rqst->associd.association_id))', assigned value is 0 >> assoc = nvmet_fc_find_target_assoc(tgtport, ^ >> drivers/nvme/target/fc.c:1896:31: note: Calling function >> 'nvmet_fc_delete_target_assoc', 1st argument 'assoc' value is 0 >> nvmet_fc_delete_target_assoc(assoc); ^ The tool isn't smart enough to see that line 1854 sets a ret value which thereafter causes the routine to exit. This occurs before any of the assoc references, so it is not an issue. There are 2 more reportings of this same failure. To quiet the tool - rework the if test that does the exit to also reference assoc. No change in logic otherwise. Case 2) drivers/nvme/target/fc.c:1202:29: warning: The scope of the variable 'queue' can be reduced. [variableScope] struct nvmet_fc_tgt_queue *queue; ^ The tool is requesting the variable be declared within the code block that utilizes it. Ignoring this report as existing code style is fine. Case 3) drivers/nvme/target/fc.c:1137:16: warning: Variable 'needrandom' is assigned a value that is never used. [unreadVariable] needrandom = true; ^ Another parsing issue with the tool. Given that parens were not used with the list_for_each_entry() check, it inadvertantly thinks the break exited the outer while loop not the inner for loop. This is not an error. But, added parens to the inner list_for_each_entry() to quiet the tool and as it is better coding style. -- james Signed-off-by: James Smart Reported-by: kbuild test robot CC: kbuild test robot CC: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 02d9751bb7ee..27fd3b5aa621 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1132,11 +1132,12 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) spin_lock_irqsave(&tgtport->lock, flags); needrandom = false; - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) + list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { if (ran == tmpassoc->association_id) { needrandom = true; break; } + } if (!needrandom) { assoc->association_id = ran; list_add_tail(&assoc->a_list, &tgtport->assoc_list); @@ -1837,7 +1838,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, &iod->rqstbuf->rq_dis_assoc; struct fcnvme_ls_disconnect_assoc_acc *acc = &iod->rspbuf->rsp_dis_assoc; - struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_tgt_assoc *assoc = NULL; struct nvmet_fc_ls_iod *oldls = NULL; unsigned long flags; int ret = 0; @@ -1854,7 +1855,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, ret = VERR_NO_ASSOC; } - if (ret) { + if (ret || !assoc) { dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); -- cgit v1.2.3 From b9a5c3d4c34d8bd9fd75f7f28d18a57cb68da237 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:11:28 +0200 Subject: nvme: refine the Qemu Identify CNS quirk Add a helper to check if we can use Identify CNS values > 1, and refine the Qemu quirk to not apply to reported versions larger than 1.1, as the Qemu implementation had been fixed by then. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3d0c7d90ad25..a9af83918afe 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1027,6 +1027,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); +/* + * In NVMe 1.0 the CNS field was just a binary controller or namespace + * flag, thus sending any new CNS opcodes has a big chance of not working. + * Qemu unfortunately had that bug after reporting a 1.1 version compliance + * (but not for any later version). + */ +static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +{ + if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) + return ctrl->vs < NVME_VS(1, 2, 0); + return ctrl->vs < NVME_VS(1, 1, 0); +} + static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) { struct nvme_command c = { }; @@ -3814,8 +3827,7 @@ static void nvme_scan_work(struct work_struct *work) mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { + if (!nvme_ctrl_limited_cns(ctrl)) { if (!nvme_scan_ns_list(ctrl, nn)) goto out_free_id; } -- cgit v1.2.3 From 25dcaa9292afc0689749099a5ba56fdb264eda7a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:16:03 +0200 Subject: nvme: clean up nvme_scan_work Move the check for the supported CNS value into nvme_scan_ns_list, and limit the life time of the identify controller allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a9af83918afe..025a8a6d81c7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3738,6 +3738,9 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); int ret = 0; + if (nvme_ctrl_limited_cns(ctrl)) + return -EOPNOTSUPP; + ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; @@ -3824,17 +3827,14 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return; + nn = le32_to_cpu(id->nn); + kfree(id); mutex_lock(&ctrl->scan_lock); - nn = le32_to_cpu(id->nn); - if (!nvme_ctrl_limited_cns(ctrl)) { - if (!nvme_scan_ns_list(ctrl, nn)) - goto out_free_id; - } - nvme_scan_ns_sequential(ctrl, nn); -out_free_id: + if (nvme_scan_ns_list(ctrl, nn) != 0) + nvme_scan_ns_sequential(ctrl, nn); mutex_unlock(&ctrl->scan_lock); - kfree(id); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); -- cgit v1.2.3 From 4450ba3bbb47d5fe852b730ab6773d2c18cfbfd6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:30:32 +0200 Subject: nvme: factor out a nvme_ns_remove_by_nsid helper Factor out a piece of deeply indented and logicaly separate code from nvme_scan_ns_list into a new helper. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 025a8a6d81c7..42102b81d540 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3699,6 +3699,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_put_ns(ns); } +static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) +{ + struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid); + + if (ns) { + nvme_ns_remove(ns); + nvme_put_ns(ns); + } +} + static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3732,7 +3742,6 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) { - struct nvme_ns *ns; __le32 *ns_list; unsigned i, j, nsid, prev = 0; unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); @@ -3757,13 +3766,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) nvme_validate_ns(ctrl, nsid); - while (++prev < nsid) { - ns = nvme_find_get_ns(ctrl, prev); - if (ns) { - nvme_ns_remove(ns); - nvme_put_ns(ns); - } - } + while (++prev < nsid) + nvme_ns_remove_by_nsid(ctrl, prev); } nn -= j; } -- cgit v1.2.3 From 4005f28d25cbd3d4f85529a31e46a0b78e293082 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:31:35 +0200 Subject: nvme: avoid an Identify Controller command for each namespace scan The namespace lists are 0-terminated, so we don't really need the NN value execept for the legacy sequential scan. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 42102b81d540..15b63426a113 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3740,12 +3740,11 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, } -static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) +static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) { __le32 *ns_list; - unsigned i, j, nsid, prev = 0; - unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); - int ret = 0; + u32 prev = 0; + int ret = 0, i; if (nvme_ctrl_limited_cns(ctrl)) return -EOPNOTSUPP; @@ -3754,22 +3753,20 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) if (!ns_list) return -ENOMEM; - for (i = 0; i < num_lists; i++) { + for (;;) { ret = nvme_identify_ns_list(ctrl, prev, ns_list); if (ret) goto free; - for (j = 0; j < min(nn, 1024U); j++) { - nsid = le32_to_cpu(ns_list[j]); - if (!nsid) - goto out; + for (i = 0; i < 1024; i++) { + u32 nsid = le32_to_cpu(ns_list[i]); + if (!nsid) /* end of the list? */ + goto out; nvme_validate_ns(ctrl, nsid); - while (++prev < nsid) nvme_ns_remove_by_nsid(ctrl, prev); } - nn -= j; } out: nvme_remove_invalid_namespaces(ctrl, prev); @@ -3778,9 +3775,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) return ret; } -static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) +static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl) { - unsigned i; + struct nvme_id_ctrl *id; + u32 nn, i; + + if (nvme_identify_ctrl(ctrl, &id)) + return; + nn = le32_to_cpu(id->nn); + kfree(id); for (i = 1; i <= nn; i++) nvme_validate_ns(ctrl, i); @@ -3817,8 +3820,6 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); - struct nvme_id_ctrl *id; - unsigned nn; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) @@ -3829,14 +3830,9 @@ static void nvme_scan_work(struct work_struct *work) nvme_clear_changed_ns_log(ctrl); } - if (nvme_identify_ctrl(ctrl, &id)) - return; - nn = le32_to_cpu(id->nn); - kfree(id); - mutex_lock(&ctrl->scan_lock); - if (nvme_scan_ns_list(ctrl, nn) != 0) - nvme_scan_ns_sequential(ctrl, nn); + if (nvme_scan_ns_list(ctrl) != 0) + nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); down_write(&ctrl->namespaces_rwsem); -- cgit v1.2.3 From aec459b484b8356d6abe862428787d98ffb1cbde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:34:21 +0200 Subject: nvme: remove the magic 1024 constant in nvme_scan_ns_list Replace it with a value derived from the identify data and nsid sizes. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 15b63426a113..d0f65322156c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3742,6 +3742,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) { + const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32); __le32 *ns_list; u32 prev = 0; int ret = 0, i; @@ -3758,7 +3759,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) if (ret) goto free; - for (i = 0; i < 1024; i++) { + for (i = 0; i < nr_entries; i++) { u32 nsid = le32_to_cpu(ns_list[i]); if (!nsid) /* end of the list? */ -- cgit v1.2.3 From d567572906d986dedb78b37f111c44eba033f3ef Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:08:59 -0700 Subject: nvme: unlink head after removing last namespace The driver had been unlinking the namespace head from the subsystem's list only after the last reference was released, and outside of the list's subsys->lock protection. There is no reason to track an empty head, so unlink the entry from the subsystem's list when the last namespace using that head is removed and with the mutex lock protecting the list update. The next namespace to attach reusing the previous NSID will allocate a new head rather than find the old head with mismatched identifiers. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d0f65322156c..15b9e60be204 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -433,7 +433,6 @@ static void nvme_free_ns_head(struct kref *ref) nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); - list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); @@ -3414,7 +3413,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && - !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -3660,6 +3658,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ctrl->subsys->lock); nvme_put_ns_head(ns->head); out_free_id: @@ -3679,7 +3679,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); + synchronize_rcu(); /* guarantee not available in head->list */ nvme_mpath_clear_current_path(ns); synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ -- cgit v1.2.3 From ac262508daa88fb12c5dc53cf30bde163f9f26c9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:00 -0700 Subject: nvme: release namespace head reference on error If a namespace identification does not match the subsystem's head for that NSID, release the reference that was taken when the matching head was initially found. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 15b9e60be204..f2dc89dd2e1c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3503,6 +3503,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, "IDs don't match for shared namespace %d\n", nsid); ret = -EINVAL; + nvme_put_ns_head(head); goto out_unlock; } } -- cgit v1.2.3 From 9ad1927a3bc2735996ccc74e31b68a41ae9a3d33 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:01 -0700 Subject: nvme: always search for namespace head Even if a namespace reports it is not capable of sharing, search the subsystem for a matching namespace head. If found, the driver should reject that namespace since it's coming from an invalid configuration. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f2dc89dd2e1c..6093c8baf809 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3489,8 +3489,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, goto out; mutex_lock(&ctrl->subsys->lock); - if (is_shared) - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { head = nvme_alloc_ns_head(ctrl, nsid, &ids); if (IS_ERR(head)) { @@ -3498,6 +3497,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, goto out_unlock; } } else { + if (!is_shared) { + dev_err(ctrl->device, + "Duplicate unshared namespace %d\n", + nsid); + ret = -EINVAL; + nvme_put_ns_head(head); + goto out_unlock; + } if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", -- cgit v1.2.3 From 0c284db7f23571f6428c44ca714f13a1fc5f70df Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:02 -0700 Subject: nvme: check namespace head shared property Reject a new shared namespace if a duplicate unshared namespace exists. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6093c8baf809..6437f2273785 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3496,8 +3496,9 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, ret = PTR_ERR(head); goto out_unlock; } + head->shared = is_shared; } else { - if (!is_shared) { + if (!is_shared || !head->shared) { dev_err(ctrl->device, "Duplicate unshared namespace %d\n", nsid); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 36f44b79bb3b..6222439a0776 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -352,6 +352,7 @@ struct nvme_ns_head { struct nvme_ns_ids ids; struct list_head entry; struct kref ref; + bool shared; int instance; #ifdef CONFIG_NVME_MULTIPATH struct gendisk *disk; -- cgit v1.2.3 From b2ce4d90690bd29ce5b554e203cd03682dd59697 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:04 -0700 Subject: nvme-multipath: set bdi capabilities once The queues' backing device info capabilities don't change with each namespace revalidation. Set it only when each path's request_queue is initially added to a multipath queue. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 7 ------- drivers/nvme/host/multipath.c | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6437f2273785..0624393d95e2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1909,13 +1909,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct backing_dev_info *info = - ns->head->disk->queue->backing_dev_info; - - info->capabilities |= BDI_CAP_STABLE_WRITES; - } - revalidate_disk(ns->head->disk); } #endif diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 54603bd3e02d..9f2844935fdf 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -3,6 +3,7 @@ * Copyright (c) 2017-2018 Christoph Hellwig. */ +#include #include #include #include "nvme.h" @@ -666,6 +667,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_mpath_set_live(ns); mutex_unlock(&ns->head->lock); } + + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { + struct backing_dev_info *info = + ns->head->disk->queue->backing_dev_info; + + info->capabilities |= BDI_CAP_STABLE_WRITES; + } } void nvme_mpath_remove_disk(struct nvme_ns_head *head) -- cgit v1.2.3 From b2b2de7c5a0127882848f9c5e9379e84d2e02041 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:05 -0700 Subject: nvme: revalidate after verifying identifiers If the namespace identifiers have changed, skip updating the disk information, as that will register parameters from a mismatched namespace. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0624393d95e2..b9e6e6de58cf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1936,7 +1936,6 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); if (ret) goto free_id; @@ -1945,8 +1944,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; + goto free_id; } + __nvme_revalidate_disk(disk, id); free_id: kfree(id); out: -- cgit v1.2.3 From 38adf94e166e3cb4eb89683458ca578051e8218d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:06 -0700 Subject: nvme: consolidate chunk_sectors settings Move the quirked chunk_sectors setting to the same location as noiob so one place registers this setting. And since the noiob value is only used locally, remove the member from struct nvme_ns. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 22 ++++++++++------------ drivers/nvme/host/nvme.h | 1 - 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b9e6e6de58cf..2279557d7361 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1725,12 +1725,6 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_set_chunk_size(struct nvme_ns *ns) -{ - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); -} - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1885,6 +1879,7 @@ static void nvme_update_disk_info(struct gendisk *disk, static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + u32 iob; /* * If identify namespace failed, use default 512 byte block size so @@ -1893,7 +1888,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - ns->noiob = le16_to_cpu(id->noiob); + + if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ns->ctrl->max_hw_sectors)) + iob = ns->ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ @@ -1902,8 +1903,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else ns->pi_type = 0; - if (ns->noiob) - nvme_set_chunk_size(ns); + if (iob) + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { @@ -2255,9 +2256,6 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6222439a0776..f3ab17778349 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -389,7 +389,6 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 - u16 noiob; struct nvme_fault_inject fault_inject; -- cgit v1.2.3 From bc1af009a8ed99aac86ee44a711c4a406ed74263 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:07 -0700 Subject: nvme: revalidate namespace stream parameters The stream parameters are based on the currently formatted logical block size. Recheck these parameters on namespace revalidation so the registered constraints will be accurate. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 54 ++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2279557d7361..86ca33b3873f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1810,6 +1810,32 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; } +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) +{ + struct streams_directive_params s; + int ret; + + if (!ctrl->nr_streams) + return 0; + + ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); + if (ret) + return ret; + + ns->sws = le32_to_cpu(s.sws); + ns->sgs = le16_to_cpu(s.sgs); + + if (ns->sws) { + unsigned int bs = 1 << ns->lba_shift; + + blk_queue_io_min(ns->queue, bs * ns->sws); + if (ns->sgs) + blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); + } + + return 0; +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { @@ -1824,6 +1850,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); + nvme_setup_streams_ns(ns->ctrl, ns); if (id->nabo == 0) { /* * Bit 1 indicates whether NAWUPF is defined for this namespace @@ -3546,32 +3573,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) return ret; } -static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) -{ - struct streams_directive_params s; - int ret; - - if (!ctrl->nr_streams) - return 0; - - ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); - if (ret) - return ret; - - ns->sws = le32_to_cpu(s.sws); - ns->sgs = le16_to_cpu(s.sgs); - - if (ns->sws) { - unsigned int bs = 1 << ns->lba_shift; - - blk_queue_io_min(ns->queue, bs * ns->sws); - if (ns->sgs) - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); - } - - return 0; -} - static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3615,7 +3616,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ret = nvme_init_ns_head(ns, nsid, id); if (ret) goto out_free_id; - nvme_setup_streams_ns(ctrl, ns); nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); -- cgit v1.2.3 From 31fdad7be18992606078caed6ff71741fa76310a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:08 -0700 Subject: nvme: consolodate io settings The stream parameters indicating optimal io settings were just getting overwritten later. Rearrange the settings so the streams parameters can be preserved if provided. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 86ca33b3873f..0231f61f37d0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1810,7 +1810,8 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; } -static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u32 *phys_bs, u32 *io_opt) { struct streams_directive_params s; int ret; @@ -1826,11 +1827,9 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) ns->sgs = le16_to_cpu(s.sgs); if (ns->sws) { - unsigned int bs = 1 << ns->lba_shift; - - blk_queue_io_min(ns->queue, bs * ns->sws); + *phys_bs = ns->sws * (1 << ns->lba_shift); if (ns->sgs) - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); + *io_opt = *phys_bs * ns->sgs; } return 0; @@ -1850,7 +1849,8 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); - nvme_setup_streams_ns(ns->ctrl, ns); + atomic_bs = phys_bs = io_opt = bs; + nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); if (id->nabo == 0) { /* * Bit 1 indicates whether NAWUPF is defined for this namespace @@ -1861,16 +1861,13 @@ static void nvme_update_disk_info(struct gendisk *disk, atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; - } else { - atomic_bs = bs; } - phys_bs = bs; - io_opt = bs; + if (id->nsfeat & (1 << 4)) { /* NPWG = Namespace Preferred Write Granularity */ - phys_bs *= 1 + le16_to_cpu(id->npwg); + phys_bs = bs * (1 + le16_to_cpu(id->npwg)); /* NOWS = Namespace Optimal Write Size */ - io_opt *= 1 + le16_to_cpu(id->nows); + io_opt = bs * (1 + le16_to_cpu(id->nows)); } blk_queue_logical_block_size(disk->queue, bs); -- cgit v1.2.3 From e8cd1ff11d58a21242ea2b85450298f3681768a1 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Sun, 19 Apr 2020 16:48:50 -0700 Subject: nvmet: add ns revalidation support Add support for detecting capacity changes on nvmet blockdev and file backed namespaces. This allows for emulating and testing online resizing of nvme devices and filesystems on top. Signed-off-by: Anthony Iliopoulos [chaitanya: Fix comments posted on V1] Signed-off-by: Chaitanya Kulkarni [hch: reuse code a bit more] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/admin-cmd.c | 5 +++++ drivers/nvme/target/io-cmd-bdev.c | 5 +++++ drivers/nvme/target/io-cmd-file.c | 17 +++++++++++++---- drivers/nvme/target/nvmet.h | 2 ++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 9d6f75cfa77c..4c79aa804887 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -486,6 +486,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) if (!ns) goto done; + if (ns->bdev) + nvmet_bdev_ns_revalidate(ns); + else + nvmet_file_ns_revalidate(ns); + /* * nuse = ncap = nsze isn't always true, but we have no way to find * that out from the underlying device. diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index ea0e596be15d..0427e040e3dd 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -75,6 +75,11 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns) } } +void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) +{ + ns->size = i_size_read(ns->bdev->bd_inode); +} + static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) { u16 status = NVME_SC_SUCCESS; diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index cd5670b83118..f0bd08d86ac0 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -13,6 +13,18 @@ #define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MIN_MPOOL_OBJ 16 +int nvmet_file_ns_revalidate(struct nvmet_ns *ns) +{ + struct kstat stat; + int ret; + + ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE, + AT_STATX_FORCE_SYNC); + if (!ret) + ns->size = stat.size; + return ret; +} + void nvmet_file_ns_disable(struct nvmet_ns *ns) { if (ns->file) { @@ -30,7 +42,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns) { int flags = O_RDWR | O_LARGEFILE; - struct kstat stat; int ret; if (!ns->buffered_io) @@ -43,12 +54,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) return PTR_ERR(ns->file); } - ret = vfs_getattr(&ns->file->f_path, - &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); + ret = nvmet_file_ns_revalidate(ns); if (ret) goto err; - ns->size = stat.size; /* * i_blkbits can be greater than the universally accepted upper bound, * so make sure we export a sane namespace lba_shift. diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 421dff3ea143..3d981eb6e100 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -498,6 +498,8 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns); u16 nvmet_bdev_flush(struct nvmet_req *req); u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); +void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); +int nvmet_file_ns_revalidate(struct nvmet_ns *ns); static inline u32 nvmet_rw_len(struct nvmet_req *req) { -- cgit v1.2.3 From 3add1d93d9919b6de94aa47900d4904adffbc976 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Apr 2020 23:30:57 +0200 Subject: nvme-fc: avoid gcc-10 zero-length-bounds warning When CONFIG_ARCH_NO_SG_CHAIN is set, op->sgl[0] cannot be dereferenced, as gcc-10 now points out: drivers/nvme/host/fc.c: In function 'nvme_fc_init_request': drivers/nvme/host/fc.c:1774:29: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct scatterlist[0]' [-Wzero-length-bounds] 1774 | op->op.fcp_req.first_sgl = &op->sgl[0]; | ^~~~~~~~~~~ drivers/nvme/host/fc.c:98:21: note: while referencing 'sgl' 98 | struct scatterlist sgl[NVME_INLINE_SG_CNT]; | ^~~ I don't know if this is a legitimate warning or a false-positive. If this is just a false alarm, the warning is easily suppressed by interpreting the array as a pointer. Fixes: b1ae1a238900 ("nvme-fc: Avoid preallocating big SGL for data") Signed-off-by: Arnd Bergmann Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1921d2195541..0b3ab3355e25 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2106,7 +2106,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); if (res) return res; - op->op.fcp_req.first_sgl = &op->sgl[0]; + op->op.fcp_req.first_sgl = op->sgl; op->op.fcp_req.private = &op->priv[0]; nvme_req(rq)->ctrl = &ctrl->ctrl; return res; -- cgit v1.2.3 From 6623c5b3dfa5513190d729a8516db7a5163ec7de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Apr 2020 09:59:08 +0200 Subject: nvme: clean up error handling in nvme_init_ns_head Use a common label for putting the nshead if needed and only convert nvme status codes for the one case where it actually is needed. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0231f61f37d0..35e279261c7d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3501,8 +3501,11 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, int ret = 0; ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) - goto out; + if (ret) { + if (ret < 0) + return ret; + return blk_status_to_errno(nvme_error_status(ret)); + } mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl->subsys, nsid); @@ -3514,32 +3517,29 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } head->shared = is_shared; } else { + ret = -EINVAL; if (!is_shared || !head->shared) { dev_err(ctrl->device, - "Duplicate unshared namespace %d\n", - nsid); - ret = -EINVAL; - nvme_put_ns_head(head); - goto out_unlock; + "Duplicate unshared namespace %d\n", nsid); + goto out_put_ns_head; } if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", nsid); - ret = -EINVAL; - nvme_put_ns_head(head); - goto out_unlock; + goto out_put_ns_head; } } list_add_tail(&ns->siblings, &head->list); ns->head = head; + mutex_unlock(&ctrl->subsys->lock); + return 0; +out_put_ns_head: + nvme_put_ns_head(head); out_unlock: mutex_unlock(&ctrl->subsys->lock); -out: - if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } -- cgit v1.2.3 From b04df85d9a05d7ee4b3f93c1a22312b41a949ec1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 30 Apr 2020 05:31:23 +0900 Subject: nvme: flush scan work on passthrough commands If a passthrough command causes the namespace inventory or capabilities to change, flush the scan work that handles these changes so the driver synchronizes with the user command's effects before returning the result to user space. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 35e279261c7d..4a124a28118f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1404,8 +1404,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); - if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); + flush_work(&ctrl->scan_work); + } } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, -- cgit v1.2.3 From 74943d45eef4db64b1e5c9f7ad1d018576e113c5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 28 Apr 2020 07:21:56 -0700 Subject: nvme-pci: remove volatile cqes The completion queue entry is not volatile once the phase is confirmed. Remove the volatile keywords and check the phase using the appropriate READ_ONCE() accessor, allowing the compiler to optimize the remaining completion path. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e13c370de830..e95c7465c7bd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -166,7 +166,7 @@ struct nvme_queue { void *sq_cmds; /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; - volatile struct nvme_completion *cqes; + struct nvme_completion *cqes; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -922,8 +922,9 @@ static void nvme_pci_complete_rq(struct request *req) /* We read the CQE phase first to check if the rest of the entry is valid */ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { - return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == - nvmeq->cq_phase; + struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head]; + + return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase; } static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) @@ -944,7 +945,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { - volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; + struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; if (unlikely(cqe->command_id >= nvmeq->q_depth)) { -- cgit v1.2.3 From 54b2fcee1db041a83b52b51752dade6090cf952f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 27 Apr 2020 11:54:46 -0700 Subject: nvme-pci: remove last_sq_tail The nvme driver does not have enough tags to wrap the queue, and blk-mq will no longer call commit_rqs() when there are no new submissions to notify. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e95c7465c7bd..b945e9a89883 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -173,7 +173,6 @@ struct nvme_queue { u16 q_depth; u16 cq_vector; u16 sq_tail; - u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -446,24 +445,11 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } -/* - * Write sq tail if we are asked to, or if the next command would wrap. - */ -static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) { - if (!write_sq) { - u16 next_tail = nvmeq->sq_tail + 1; - - if (next_tail == nvmeq->q_depth) - next_tail = 0; - if (next_tail != nvmeq->last_sq_tail) - return; - } - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); - nvmeq->last_sq_tail = nvmeq->sq_tail; } /** @@ -480,7 +466,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - nvme_write_sq_db(nvmeq, write_sq); + if (write_sq) + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -489,8 +476,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data; spin_lock(&nvmeq->sq_lock); - if (nvmeq->sq_tail != nvmeq->last_sq_tail) - nvme_write_sq_db(nvmeq, true); + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -1494,7 +1480,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev; nvmeq->sq_tail = 0; - nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; -- cgit v1.2.3 From 2a5bcfdd41d68559567cec3c124a75e093506cc1 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Sat, 2 May 2020 15:29:41 +0800 Subject: nvme-pci: align io queue count with allocted nvme_queue in nvme_probe Since commit 147b27e4bd08 ("nvme-pci: allocate device queues storage space at probe"), nvme_alloc_queue does not alloc the nvme queues itself anymore. If the write/poll_queues module parameters are changed at runtime to values larger than the number of allocated queues in nvme_probe, nvme_alloc_queue will access unallocated memory. Add a new nr_allocated_queues member to struct nvme_dev to record how many queues were alloctated in nvme_probe to avoid using more than the allocated queues after a reset following a change to the write/poll_queues module parameters. Also add nr_write_queues and nr_poll_queues members to allow refreshing the number of write and poll queues based on a change to the module parameters when resetting the controller. Fixes: 147b27e4bd08 ("nvme-pci: allocate device queues storage space at probe") Signed-off-by: Weiping Zhang Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy [hch: add nvme_max_io_queues, update the commit message] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 57 ++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b945e9a89883..b0978ac554d5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -128,6 +128,9 @@ struct nvme_dev { dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; + unsigned int nr_allocated_queues; + unsigned int nr_write_queues; + unsigned int nr_poll_queues; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) @@ -208,25 +211,14 @@ struct nvme_iod { struct scatterlist *sg; }; -static unsigned int max_io_queues(void) +static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) { - return num_possible_cpus() + write_queues + poll_queues; -} - -static unsigned int max_queue_count(void) -{ - /* IO queues + admin queue */ - return 1 + max_io_queues(); -} - -static inline unsigned int nvme_dbbuf_size(u32 stride) -{ - return (max_queue_count() * 8 * stride); + return dev->nr_allocated_queues * 8 * dev->db_stride; } static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + unsigned int mem_size = nvme_dbbuf_size(dev); if (dev->dbbuf_dbs) return 0; @@ -251,7 +243,7 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) static void nvme_dbbuf_dma_free(struct nvme_dev *dev) { - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + unsigned int mem_size = nvme_dbbuf_size(dev); if (dev->dbbuf_dbs) { dma_free_coherent(dev->dev, mem_size, @@ -1981,7 +1973,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) { struct nvme_dev *dev = affd->priv; - unsigned int nr_read_queues; + unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; /* * If there is no interupt available for queues, ensure that @@ -1997,12 +1989,12 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) if (!nrirqs) { nrirqs = 1; nr_read_queues = 0; - } else if (nrirqs == 1 || !write_queues) { + } else if (nrirqs == 1 || !nr_write_queues) { nr_read_queues = 0; - } else if (write_queues >= nrirqs) { + } else if (nr_write_queues >= nrirqs) { nr_read_queues = 1; } else { - nr_read_queues = nrirqs - write_queues; + nr_read_queues = nrirqs - nr_write_queues; } dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; @@ -2026,7 +2018,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) * Poll queues don't need interrupts, but we need at least one IO * queue left over for non-polled IO. */ - this_p_queues = poll_queues; + this_p_queues = dev->nr_poll_queues; if (this_p_queues >= nr_io_queues) { this_p_queues = nr_io_queues - 1; irq_queues = 1; @@ -2056,14 +2048,25 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) __nvme_disable_io_queues(dev, nvme_admin_delete_cq); } +static unsigned int nvme_max_io_queues(struct nvme_dev *dev) +{ + return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, nr_io_queues; + unsigned int nr_io_queues; unsigned long size; + int result; - nr_io_queues = max_io_queues(); + /* + * Sample the module parameters once at reset time so that we have + * stable values to work with. + */ + dev->nr_write_queues = write_queues; + dev->nr_poll_queues = poll_queues; /* * If tags are shared with admin queue (Apple bug), then @@ -2071,6 +2074,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) nr_io_queues = 1; + else + nr_io_queues = min(nvme_max_io_queues(dev), + dev->nr_allocated_queues - 1); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) @@ -2745,8 +2751,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dev) return -ENOMEM; - dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue), - GFP_KERNEL, node); + dev->nr_write_queues = write_queues; + dev->nr_poll_queues = poll_queues; + dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1; + dev->queues = kcalloc_node(dev->nr_allocated_queues, + sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) goto free; -- cgit v1.2.3 From 386e5e6e1aa90b479fcf0467935922df8524393d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Apr 2020 13:59:32 -0700 Subject: nvme-tcp: use bh_lock in data_ready data_ready may be invoked from send context or from softirq, so need bh locking for that. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c15a92163c1f..4862fa962011 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -794,11 +794,11 @@ static void nvme_tcp_data_ready(struct sock *sk) { struct nvme_tcp_queue *queue; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (likely(queue && queue->rd_enabled)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void nvme_tcp_write_space(struct sock *sk) -- cgit v1.2.3 From 72e5d757c62029664c0287d14519ec4451901b5e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 1 May 2020 14:25:44 -0700 Subject: nvme-tcp: avoid scheduling io_work if we are already polling When the user runs polled I/O, we shouldn't have to trigger the workqueue to generate the receive work upon the .data_ready upcall. This prevents a redundant context switch when the application is already polling for completions. Proposed-by: Mark Wunderlich Signed-off-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4862fa962011..b28f91d0f083 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -60,6 +60,7 @@ struct nvme_tcp_request { enum nvme_tcp_queue_flags { NVME_TCP_Q_ALLOCATED = 0, NVME_TCP_Q_LIVE = 1, + NVME_TCP_Q_POLLING = 2, }; enum nvme_tcp_recv_state { @@ -796,7 +797,8 @@ static void nvme_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue && queue->rd_enabled)) + if (likely(queue && queue->rd_enabled) && + !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); read_unlock_bh(&sk->sk_callback_lock); } @@ -2302,9 +2304,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; + set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); + clear_bit(NVME_TCP_Q_POLLING, &queue->flags); return queue->nr_cqe; } -- cgit v1.2.3 From db5ad6b7f8cdd6d78efef62a1557461d0cbaee54 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 1 May 2020 14:25:45 -0700 Subject: nvme-tcp: try to send request in queue_rq context Today, nvme-tcp automatically schedules a send request to a workqueue context, which is 1 more than we'd need in case the socket buffer is wide open. However, because we have async send activity (as a result of r2t, or write_space callbacks), we need to synchronize sends from possibly multiple contexts (ideally all running on the same cpu though). Thus, we only try to send directly from queue_rq in cases: 1. the send_list is empty 2. we can send it synchronously (i.e. not from the RX path) 3. we run on the same cpu as the queue->io_cpu to avoid contention on the send operation. Proposed-by: Mark Wunderlich Signed-off-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b28f91d0f083..c79e248b9f43 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -76,6 +76,7 @@ struct nvme_tcp_queue { int io_cpu; spinlock_t lock; + struct mutex send_mutex; struct list_head send_list; /* recv state */ @@ -132,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; static struct blk_mq_ops nvme_tcp_mq_ops; static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) { @@ -258,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } -static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req) +static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, + bool sync) { struct nvme_tcp_queue *queue = req->queue; + bool empty; spin_lock(&queue->lock); + empty = list_empty(&queue->send_list) && !queue->request; list_add_tail(&req->entry, &queue->send_list); spin_unlock(&queue->lock); - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + /* + * if we're the first on the send_list and we can try to send + * directly, otherwise queue io_work. Also, only do that if we + * are on the same cpu, so we don't introduce contention. + */ + if (queue->io_cpu == smp_processor_id() && + sync && empty && mutex_trylock(&queue->send_mutex)) { + nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + } else { + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + } } static inline struct nvme_tcp_request * @@ -579,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, req->state = NVME_TCP_SEND_H2C_PDU; req->offset = 0; - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, false); return 0; } @@ -1065,11 +1081,14 @@ static void nvme_tcp_io_work(struct work_struct *w) bool pending = false; int result; - result = nvme_tcp_try_send(queue); - if (result > 0) - pending = true; - else if (unlikely(result < 0)) - break; + if (mutex_trylock(&queue->send_mutex)) { + result = nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + if (result > 0) + pending = true; + else if (unlikely(result < 0)) + break; + } result = nvme_tcp_try_recv(queue); if (result > 0) @@ -1321,6 +1340,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); spin_lock_init(&queue->lock); + mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); queue->queue_size = queue_size; @@ -1545,6 +1565,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = 1; @@ -1556,7 +1577,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ set->numa_node = NUMA_NO_NODE; - set->flags = BLK_MQ_F_SHOULD_MERGE; + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; @@ -2115,7 +2136,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ctrl->async_req.curr_bio = NULL; ctrl->async_req.data_len = 0; - nvme_tcp_queue_request(&ctrl->async_req); + nvme_tcp_queue_request(&ctrl->async_req, true); } static enum blk_eh_timer_return @@ -2246,7 +2267,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, true); return BLK_STS_OK; } -- cgit v1.2.3 From 7890b9701b792a4d75b4adef4abe325383ccfca4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Mar 2020 19:41:38 +0200 Subject: nvme-multipath: stop using ->queuedata nvme-multipath already uses the gendisk private data, not need to also set up the request_queue queuedata and use it in one place only. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9f2844935fdf..5f5537d0a024 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -294,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, struct bio *bio) { - struct nvme_ns_head *head = q->queuedata; + struct nvme_ns_head *head = bio->bi_disk->private_data; struct device *dev = disk_to_dev(head->disk); struct nvme_ns *ns; blk_qc_t ret = BLK_QC_T_NONE; @@ -378,7 +378,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); if (!q) goto out; - q->queuedata = head; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); -- cgit v1.2.3 From 45e2f3c2d2f5f39b47745cba41e9e2b0c58d7d94 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:43 -0700 Subject: nvmet: add generic type-name mapping This patch adds a new type to name mapping generic structure. It replaces nvmet_transport_name with new generic mapping structure nvmet_transport. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 58cabd7b6fc5..143f3d02f334 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -20,10 +20,12 @@ static const struct config_item_type nvmet_subsys_type; static LIST_HEAD(nvmet_ports_list); struct list_head *nvmet_ports = &nvmet_ports_list; -static const struct nvmet_transport_name { +struct nvmet_type_name_map { u8 type; const char *name; -} nvmet_transport_names[] = { +}; + +static struct nvmet_type_name_map nvmet_transport[] = { { NVMF_TRTYPE_RDMA, "rdma" }, { NVMF_TRTYPE_FC, "fc" }, { NVMF_TRTYPE_TCP, "tcp" }, @@ -254,10 +256,9 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { - if (port->disc_addr.trtype != nvmet_transport_names[i].type) - continue; - return sprintf(page, "%s\n", nvmet_transport_names[i].name); + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { + if (port->disc_addr.trtype == nvmet_transport[i].type) + return sprintf(page, "%s\n", nvmet_transport[i].name); } return sprintf(page, "\n"); @@ -282,8 +283,8 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, return -EACCES; } - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { - if (sysfs_streq(page, nvmet_transport_names[i].name)) + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { + if (sysfs_streq(page, nvmet_transport[i].name)) goto found; } @@ -291,7 +292,7 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, return -EINVAL; found: memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); - port->disc_addr.trtype = nvmet_transport_names[i].type; + port->disc_addr.trtype = nvmet_transport[i].type; if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) nvmet_port_init_tsas_rdma(port); return count; -- cgit v1.2.3 From 7e764179c86784594d54764556c4d3973645e6a5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:44 -0700 Subject: nvmet: use type-name map for address family Right now nvmet_addr_adrfam_[store|show]() uses switch and if else ladder for address family to string and reverse mapping which also repeats the strings in show and store function. With addition of generic nvmet_type_name_map structure we can now get rid of the switch and if else ladder and string duplication. Also, we add a newline in before found label in nvmet_addr_trtype_store() which keeps goto label code consistent with nvmet_allowed_hosts_drop_link(), nvmet_port_subsys_drop_link() and nvmet_ana_group_ana_state_store(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 51 ++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 143f3d02f334..8a5d99e25192 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -32,31 +32,36 @@ static struct nvmet_type_name_map nvmet_transport[] = { { NVMF_TRTYPE_LOOP, "loop" }, }; +static const struct nvmet_type_name_map nvmet_addr_family[] = { + { NVMF_ADDR_FAMILY_PCI, "pcie" }, + { NVMF_ADDR_FAMILY_IP4, "ipv4" }, + { NVMF_ADDR_FAMILY_IP6, "ipv6" }, + { NVMF_ADDR_FAMILY_IB, "ib" }, + { NVMF_ADDR_FAMILY_FC, "fc" }, +}; + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. */ -static ssize_t nvmet_addr_adrfam_show(struct config_item *item, - char *page) +static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - return sprintf(page, "ipv4\n"); - case NVMF_ADDR_FAMILY_IP6: - return sprintf(page, "ipv6\n"); - case NVMF_ADDR_FAMILY_IB: - return sprintf(page, "ib\n"); - case NVMF_ADDR_FAMILY_FC: - return sprintf(page, "fc\n"); - default: - return sprintf(page, "\n"); + u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam; + int i; + + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { + if (nvmet_addr_family[i].type == adrfam) + return sprintf(page, "%s\n", nvmet_addr_family[i].name); } + + return sprintf(page, "\n"); } static ssize_t nvmet_addr_adrfam_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + int i; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -64,19 +69,16 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, return -EACCES; } - if (sysfs_streq(page, "ipv4")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4; - } else if (sysfs_streq(page, "ipv6")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6; - } else if (sysfs_streq(page, "ib")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB; - } else if (sysfs_streq(page, "fc")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC; - } else { - pr_err("Invalid value '%s' for adrfam\n", page); - return -EINVAL; + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { + if (sysfs_streq(page, nvmet_addr_family[i].name)) + goto found; } + pr_err("Invalid value '%s' for adrfam\n", page); + return -EINVAL; + +found: + port->disc_addr.adrfam = i; return count; } @@ -290,6 +292,7 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, pr_err("Invalid value '%s' for trtype\n", page); return -EINVAL; + found: memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); port->disc_addr.trtype = nvmet_transport[i].type; -- cgit v1.2.3 From 84b8d0d7aa159652dc191d58c4d353b6c9173c54 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:45 -0700 Subject: nvmet: use type-name map for ana states Now that we have a generic type to name map for configfs, get rid of the nvmet_ana_state_names structure and replace it with newly added nvmet_type_name_map. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 8a5d99e25192..78eb822a20d5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1153,10 +1153,7 @@ static const struct config_item_type nvmet_referrals_type = { .ct_group_ops = &nvmet_referral_group_ops, }; -static struct { - enum nvme_ana_state state; - const char *name; -} nvmet_ana_state_names[] = { +struct nvmet_type_name_map nvmet_ana_state[] = { { NVME_ANA_OPTIMIZED, "optimized" }, { NVME_ANA_NONOPTIMIZED, "non-optimized" }, { NVME_ANA_INACCESSIBLE, "inaccessible" }, @@ -1171,10 +1168,9 @@ static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item, enum nvme_ana_state state = grp->port->ana_state[grp->grpid]; int i; - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { - if (state != nvmet_ana_state_names[i].state) - continue; - return sprintf(page, "%s\n", nvmet_ana_state_names[i].name); + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { + if (state == nvmet_ana_state[i].type) + return sprintf(page, "%s\n", nvmet_ana_state[i].name); } return sprintf(page, "\n"); @@ -1184,10 +1180,11 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ana_group *grp = to_ana_group(item); + enum nvme_ana_state *ana_state = grp->port->ana_state; int i; - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { - if (sysfs_streq(page, nvmet_ana_state_names[i].name)) + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { + if (sysfs_streq(page, nvmet_ana_state[i].name)) goto found; } @@ -1196,10 +1193,9 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, found: down_write(&nvmet_ana_sem); - grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state; + ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type; nvmet_ana_chgcnt++; up_write(&nvmet_ana_sem); - nvmet_port_send_ana_event(grp->port); return count; } -- cgit v1.2.3 From 87628e2851008d5b0e53859f6062988c6d66d7f5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:46 -0700 Subject: nvmet: use type-name map for address treq Currently nvmet_addr_treq_[store|show]() uses switch and if else ladder for address transport requirements to string and reverse mapping. With addtion of the generic nvmet_type_name_map structure we can get rid of the switch and if else ladder with string duplication. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 48 +++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 78eb822a20d5..9c808f4185a0 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -147,20 +147,24 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, addr_traddr); -static ssize_t nvmet_addr_treq_show(struct config_item *item, - char *page) +static const struct nvmet_type_name_map nvmet_addr_treq[] = { + { NVMF_TREQ_NOT_SPECIFIED, "not specified" }, + { NVMF_TREQ_REQUIRED, "required" }, + { NVMF_TREQ_NOT_REQUIRED, "not required" }, +}; + +static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.treq & - NVME_TREQ_SECURE_CHANNEL_MASK) { - case NVMF_TREQ_NOT_SPECIFIED: - return sprintf(page, "not specified\n"); - case NVMF_TREQ_REQUIRED: - return sprintf(page, "required\n"); - case NVMF_TREQ_NOT_REQUIRED: - return sprintf(page, "not required\n"); - default: - return sprintf(page, "\n"); + u8 treq = to_nvmet_port(item)->disc_addr.treq & + NVME_TREQ_SECURE_CHANNEL_MASK; + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { + if (treq == nvmet_addr_treq[i].type) + return sprintf(page, "%s\n", nvmet_addr_treq[i].name); } + + return sprintf(page, "\n"); } static ssize_t nvmet_addr_treq_store(struct config_item *item, @@ -168,6 +172,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; + int i; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -175,18 +180,17 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, return -EACCES; } - if (sysfs_streq(page, "not specified")) { - treq |= NVMF_TREQ_NOT_SPECIFIED; - } else if (sysfs_streq(page, "required")) { - treq |= NVMF_TREQ_REQUIRED; - } else if (sysfs_streq(page, "not required")) { - treq |= NVMF_TREQ_NOT_REQUIRED; - } else { - pr_err("Invalid value '%s' for treq\n", page); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { + if (sysfs_streq(page, nvmet_addr_treq[i].name)) + goto found; } - port->disc_addr.treq = treq; + pr_err("Invalid value '%s' for treq\n", page); + return -EINVAL; + +found: + treq |= nvmet_addr_treq[i].type; + port->disc_addr.treq = treq; return count; } -- cgit v1.2.3 From 3ecb5faa07c7fd33a3ce1a8340841aa368df7d43 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:47 -0700 Subject: nvmet: centralize port enable access for configfs The configfs attributes which are supposed to set when port is disable such as addr[addrfam|portid|traddr|treq|trsvcid|inline_data_size|trtype] has repetitive check and generic error message printing. This patch creates centralize helper to check and print an error message that also accepts caller as a parameter. This makes error message easy to parse for the user, removes the duplicate code and makes it available for futures such scenarios. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 44 +++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 9c808f4185a0..20779587eefe 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -40,6 +40,14 @@ static const struct nvmet_type_name_map nvmet_addr_family[] = { { NVMF_ADDR_FAMILY_FC, "fc" }, }; +static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) +{ + if (p->enabled) + pr_err("Disable port '%u' before changing attribute in %s\n", + le16_to_cpu(p->disc_addr.portid), caller); + return p->enabled; +} + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. @@ -63,11 +71,8 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { if (sysfs_streq(page, nvmet_addr_family[i].name)) @@ -104,11 +109,9 @@ static ssize_t nvmet_addr_portid_store(struct config_item *item, return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } + port->disc_addr.portid = cpu_to_le16(portid); return count; } @@ -134,11 +137,8 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item, return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1) return -EINVAL; @@ -174,11 +174,8 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { if (sysfs_streq(page, nvmet_addr_treq[i].name)) @@ -214,11 +211,8 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, pr_err("Invalid value '%s' for trsvcid\n", page); return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1) return -EINVAL; @@ -241,11 +235,8 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int ret; - if (port->enabled) { - pr_err("Cannot modify inline_data_size while port enabled\n"); - pr_err("Disable the port before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } ret = kstrtoint(page, 0, &port->inline_data_size); if (ret) { pr_err("Invalid value '%s' for inline_data_size\n", page); @@ -283,11 +274,8 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { if (sysfs_streq(page, nvmet_transport[i].name)) -- cgit v1.2.3 From d02abd198633a4c40411b9a5994111452720470d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:48 -0700 Subject: nvmet: align addrfam list to spec With reference to the NVMeOF Specification (page 44, Figure 38) discovery log page entry provides address family field. We do set the transport type field but the adrfam field is not set when using loop transport and also it doesn't have support in the nvme-cli. So when reading discovery log page with a loop transport it leads to confusing output. As per the spec for adrfam value 254 is reserved for Intra Host Transport i.e. loopback), we add a required macro in the protocol header file, set default port disc addr entry's adrfam to NVMF_ADDR_FAMILY_MAX, and update nvmet_addr_family configfs array for show/store attribute. Without this patch, setting adrfam to (ipv4/ipv6/ib/fc/loop/" ") we get following output for nvme discover command from nvme-cli which is confusing. trtype: loop adrfam: ipv4 trtype: loop adrfam: ipv6 trtype: loop adrfam: infiniband trtype: loop adrfam: fibre-channel trtype: loop # ${CFGFS_HOME}/nvmet/ports/1/addr_adrfam = loop adrfam: pci # <----- pci for loop trtype: loop # ${CFGFS_HOME}/nvmet/ports/1/addr_adrfam = " " adrfam: pci # <----- pci for unrecognized This patch fixes above output :- trtype: loop adrfam: ipv4 trtype: loop adrfam: ipv6 trtype: loop adrfam: infiniband trtype: loop adrfam: fibre-channel trtype: loop # ${CFGFS_HOME}/nvmet/ports/1/addr_adrfam = loop adrfam: loop # <----- loop for loop trtype: loop # ${CFGFS_HOME}/config/nvmet/ports/adrfam = " " adrfam: unrecognized # <----- unrecognized when invalid value Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 14 ++++++++------ include/linux/nvme.h | 2 ++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 20779587eefe..ae8fb4489a10 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -33,11 +33,12 @@ static struct nvmet_type_name_map nvmet_transport[] = { }; static const struct nvmet_type_name_map nvmet_addr_family[] = { - { NVMF_ADDR_FAMILY_PCI, "pcie" }, - { NVMF_ADDR_FAMILY_IP4, "ipv4" }, - { NVMF_ADDR_FAMILY_IP6, "ipv6" }, - { NVMF_ADDR_FAMILY_IB, "ib" }, - { NVMF_ADDR_FAMILY_FC, "fc" }, + { NVMF_ADDR_FAMILY_PCI, "pcie" }, + { NVMF_ADDR_FAMILY_IP4, "ipv4" }, + { NVMF_ADDR_FAMILY_IP6, "ipv6" }, + { NVMF_ADDR_FAMILY_IB, "ib" }, + { NVMF_ADDR_FAMILY_FC, "fc" }, + { NVMF_ADDR_FAMILY_LOOP, "loop" }, }; static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) @@ -83,7 +84,7 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, return -EINVAL; found: - port->disc_addr.adrfam = i; + port->disc_addr.adrfam = nvmet_addr_family[i].type; return count; } @@ -1338,6 +1339,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); + port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d5189f46cb1..2d978d0cbde6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -38,6 +38,8 @@ enum { NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */ NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */ NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */ + NVMF_ADDR_FAMILY_LOOP = 254, /* Reserved for host usage */ + NVMF_ADDR_FAMILY_MAX, }; /* Transport Type codes for Discovery Log Page entry TRTYPE field */ -- cgit v1.2.3 From 92decf118f1da4c866515f80387f9cf4d48611d6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 3 Apr 2020 10:53:46 -0700 Subject: nvme: define constants for identification values Improve code readability by defining the specification's constants that the driver is using when decoding identification payloads. Signed-off-by: Keith Busch Reviewed-by: Bart van Assche Reviewed-by: Chaitanya Kulkarni Acked-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 10 +++++----- drivers/nvme/host/multipath.c | 5 +++-- include/linux/nvme.h | 6 ++++++ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4a124a28118f..805d289e6cd9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1859,13 +1859,13 @@ static void nvme_update_disk_info(struct gendisk *disk, * and whether it should be used instead of AWUPF. If NAWUPF == * 0 then AWUPF must be used instead. */ - if (id->nsfeat & (1 << 1) && id->nawupf) + if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } - if (id->nsfeat & (1 << 4)) { + if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ phys_bs = bs * (1 + le16_to_cpu(id->npwg)); /* NOWS = Namespace Optimal Write Size */ @@ -1894,7 +1894,7 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if (id->nsattr & (1 << 0)) + if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); else set_disk_ro(disk, false); @@ -2685,7 +2685,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, return false; } - if ((id->cmic & (1 << 1)) || + if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || (ctrl->opts && ctrl->opts->discovery_nqn)) continue; @@ -3497,7 +3497,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; - bool is_shared = id->nmic & (1 << 0); + bool is_shared = id->nmic & NVME_NS_NMIC_SHARED; struct nvme_ns_head *head = NULL; struct nvme_ns_ids ids; int ret = 0; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5f5537d0a024..da78e499947a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -372,7 +372,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) * We also do this for private namespaces as the namespace sharing data could * change after a rescan. */ - if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) return 0; q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); @@ -694,7 +694,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) int error; /* check if multipath is enabled and we have the capability */ - if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) + if (!multipath || !ctrl->subsys || + !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) return 0; ctrl->anacap = id->anacap; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2d978d0cbde6..b235a48eac8c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -301,6 +301,8 @@ struct nvme_id_ctrl { }; enum { + NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, + NVME_CTRL_CMIC_ANA = 1 << 3, NVME_CTRL_ONCS_COMPARE = 1 << 0, NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, @@ -396,8 +398,12 @@ enum { enum { NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FEAT_ATOMICS = 1 << 1, + NVME_NS_FEAT_IO_OPT = 1 << 4, + NVME_NS_ATTR_RO = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, NVME_NS_FLBAS_META_EXT = 0x10, + NVME_NS_NMIC_SHARED = 1 << 0, NVME_LBAF_RP_BEST = 0, NVME_LBAF_RP_BETTER = 1, NVME_LBAF_RP_GOOD = 2, -- cgit v1.2.3 From e72e8bf1c9847a12de74f2fd3ea1f5511866526b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:32 +0200 Subject: floppy: split the base port from the register in I/O accesses Currently we have architecture-specific fd_inb() and fd_outb() functions or macros, taking just a port which is in fact made of a base address and a register. The base address is FDC-specific and derived from the local or global "fdc" variable through the FD_IOPORT macro used in the base address calculation. This change splits this by explicitly passing the FDC's base address and the register separately to fd_outb() and fd_inb(). It affects the following archs: - x86, alpha, mips, powerpc, parisc, arm, m68k: simple remap of port -> base+reg - sparc32: use of reg only, since the base address was already masked out and the FDC controller is known from a static struct. - sparc64: like x86 for PCI, like sparc32 for 82077 Some archs use inline functions and others macros. This was not unified in order to minimize the number of changes to review. For the same reason checkpatch still spews a few warnings about things that were already there before. The parisc still uses hard-coded register values and could be cleaned up by taking the register definitions. The sparc per-controller inb/outb functions could further be refined to explicitly take an FDC register instead of a port in argument but it was not needed yet and may be cleaned later. Link: https://lore.kernel.org/r/20200331094054.24441-2-w@1wt.eu Cc: Ivan Kokshaysky Cc: Richard Henderson Cc: Matt Turner Cc: Ian Molton Cc: Russell King Cc: Geert Uytterhoeven Cc: Thomas Bogendoerfer Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: x86@kernel.org Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/alpha/include/asm/floppy.h | 4 ++-- arch/arm/include/asm/floppy.h | 8 ++++---- arch/m68k/include/asm/floppy.h | 12 ++++++------ arch/mips/include/asm/mach-generic/floppy.h | 8 ++++---- arch/mips/include/asm/mach-jazz/floppy.h | 8 ++++---- arch/parisc/include/asm/floppy.h | 12 ++++++------ arch/powerpc/include/asm/floppy.h | 4 ++-- arch/sparc/include/asm/floppy_32.h | 4 ++-- arch/sparc/include/asm/floppy_64.h | 4 ++-- arch/x86/include/asm/floppy.h | 4 ++-- drivers/block/floppy.c | 4 ++-- 11 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h index 942924756cf2..8dfdb3aa1d96 100644 --- a/arch/alpha/include/asm/floppy.h +++ b/arch/alpha/include/asm/floppy.h @@ -11,8 +11,8 @@ #define __ASM_ALPHA_FLOPPY_H -#define fd_inb(port) inb_p(port) -#define fd_outb(value,port) outb_p(value,port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_enable_dma() enable_dma(FLOPPY_DMA) #define fd_disable_dma() disable_dma(FLOPPY_DMA) diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index 79fa327238e8..e1cb04ed5008 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -9,20 +9,20 @@ #ifndef __ASM_ARM_FLOPPY_H #define __ASM_ARM_FLOPPY_H -#define fd_outb(val,port) \ +#define fd_outb(val, base, reg) \ do { \ int new_val = (val); \ - if (((port) & 7) == FD_DOR) { \ + if ((reg) == FD_DOR) { \ if (new_val & 0xf0) \ new_val = (new_val & 0x0c) | \ floppy_selects[new_val & 3]; \ else \ new_val &= 0x0c; \ } \ - outb(new_val, (port)); \ + outb(new_val, (base) + (reg)); \ } while(0) -#define fd_inb(port) inb((port)) +#define fd_inb(base, reg) inb((base) + (reg)) #define fd_request_irq() request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\ 0,"floppy",NULL) #define fd_free_irq() free_irq(IRQ_FLOPPYDISK,NULL) diff --git a/arch/m68k/include/asm/floppy.h b/arch/m68k/include/asm/floppy.h index c3b9ad6732fc..2a6ce29b92aa 100644 --- a/arch/m68k/include/asm/floppy.h +++ b/arch/m68k/include/asm/floppy.h @@ -63,21 +63,21 @@ static __inline__ void release_dma_lock(unsigned long flags) } -static __inline__ unsigned char fd_inb(int port) +static __inline__ unsigned char fd_inb(int base, int reg) { if(MACH_IS_Q40) - return inb_p(port); + return inb_p(base + reg); else if(MACH_IS_SUN3X) - return sun3x_82072_fd_inb(port); + return sun3x_82072_fd_inb(base + reg); return 0; } -static __inline__ void fd_outb(unsigned char value, int port) +static __inline__ void fd_outb(unsigned char value, int base, int reg) { if(MACH_IS_Q40) - outb_p(value, port); + outb_p(value, base + reg); else if(MACH_IS_SUN3X) - sun3x_82072_fd_outb(value, port); + sun3x_82072_fd_outb(value, base + reg); } diff --git a/arch/mips/include/asm/mach-generic/floppy.h b/arch/mips/include/asm/mach-generic/floppy.h index 9ec2f6a5200b..e3f446d54827 100644 --- a/arch/mips/include/asm/mach-generic/floppy.h +++ b/arch/mips/include/asm/mach-generic/floppy.h @@ -26,14 +26,14 @@ /* * How to access the FDC's registers. */ -static inline unsigned char fd_inb(unsigned int port) +static inline unsigned char fd_inb(unsigned int base, unsigned int reg) { - return inb_p(port); + return inb_p(base + reg); } -static inline void fd_outb(unsigned char value, unsigned int port) +static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg) { - outb_p(value, port); + outb_p(value, base + reg); } /* diff --git a/arch/mips/include/asm/mach-jazz/floppy.h b/arch/mips/include/asm/mach-jazz/floppy.h index 4b86c88a03b7..095000c290e5 100644 --- a/arch/mips/include/asm/mach-jazz/floppy.h +++ b/arch/mips/include/asm/mach-jazz/floppy.h @@ -17,19 +17,19 @@ #include #include -static inline unsigned char fd_inb(unsigned int port) +static inline unsigned char fd_inb(unsigned int base, unsigned int reg) { unsigned char c; - c = *(volatile unsigned char *) port; + c = *(volatile unsigned char *) (base + reg); udelay(1); return c; } -static inline void fd_outb(unsigned char value, unsigned int port) +static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg) { - *(volatile unsigned char *) port = value; + *(volatile unsigned char *) (base + reg) = value; } /* diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h index 09b6f4c1687e..1aebc23b7744 100644 --- a/arch/parisc/include/asm/floppy.h +++ b/arch/parisc/include/asm/floppy.h @@ -29,8 +29,8 @@ #define CSW fd_routine[can_use_virtual_dma & 1] -#define fd_inb(port) readb(port) -#define fd_outb(value, port) writeb(value, port) +#define fd_inb(base, reg) readb((base) + (reg)) +#define fd_outb(value, base, reg) writeb(value, (base) + (reg)) #define fd_request_dma() CSW._request_dma(FLOPPY_DMA,"floppy") #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) @@ -75,19 +75,19 @@ static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) register char *lptr = virtual_dma_addr; for (lcount = virtual_dma_count; lcount; lcount--) { - st = fd_inb(virtual_dma_port+4) & 0xa0 ; + st = fd_inb(virtual_dma_port, 4) & 0xa0; if (st != 0xa0) break; if (virtual_dma_mode) { - fd_outb(*lptr, virtual_dma_port+5); + fd_outb(*lptr, virtual_dma_port, 5); } else { - *lptr = fd_inb(virtual_dma_port+5); + *lptr = fd_inb(virtual_dma_port, 5); } lptr++; } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = fd_inb(virtual_dma_port+4); + st = fd_inb(virtual_dma_port, 4); } #ifdef TRACE_FLPY_INT diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h index 167c44b58848..ed467eb0c4c8 100644 --- a/arch/powerpc/include/asm/floppy.h +++ b/arch/powerpc/include/asm/floppy.h @@ -13,8 +13,8 @@ #include -#define fd_inb(port) inb_p(port) -#define fd_outb(value,port) outb_p(value,port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_enable_dma() enable_dma(FLOPPY_DMA) #define fd_disable_dma() fd_ops->_disable_dma(FLOPPY_DMA) diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index b519acf4383d..4d08df4f4deb 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -59,8 +59,8 @@ struct sun_floppy_ops { static struct sun_floppy_ops sun_fdops; -#define fd_inb(port) sun_fdops.fd_inb(port) -#define fd_outb(value,port) sun_fdops.fd_outb(value,port) +#define fd_inb(base, reg) sun_fdops.fd_inb(reg) +#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, reg) #define fd_enable_dma() sun_fd_enable_dma() #define fd_disable_dma() sun_fd_disable_dma() #define fd_request_dma() (0) /* nothing... */ diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 3729fc35ba83..c0cf157e5b15 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -62,8 +62,8 @@ struct sun_floppy_ops { static struct sun_floppy_ops sun_fdops; -#define fd_inb(port) sun_fdops.fd_inb(port) -#define fd_outb(value,port) sun_fdops.fd_outb(value,port) +#define fd_inb(base, reg) sun_fdops.fd_inb((base) + (reg)) +#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, (base) + (reg)) #define fd_enable_dma() sun_fdops.fd_enable_dma() #define fd_disable_dma() sun_fdops.fd_disable_dma() #define fd_request_dma() (0) /* nothing... */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index 7ec59edde154..20088cb08f5e 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -31,8 +31,8 @@ #define CSW fd_routine[can_use_virtual_dma & 1] -#define fd_inb(port) inb_p(port) -#define fd_outb(value, port) outb_p(value, port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c3daa64cb52c..1cda39098b07 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -595,12 +595,12 @@ static unsigned char in_sector_offset; /* offset within physical sector, static inline unsigned char fdc_inb(int fdc, int reg) { - return fd_inb(fdc_state[fdc].address + reg); + return fd_inb(fdc_state[fdc].address, reg); } static inline void fdc_outb(unsigned char value, int fdc, int reg) { - fd_outb(value, fdc_state[fdc].address + reg); + fd_outb(value, fdc_state[fdc].address, reg); } static inline bool drive_no_geom(int drive) -- cgit v1.2.3 From 7d33850abdb9048c6aa421440a64905eb4ad07a2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:33 +0200 Subject: floppy: add references to 82077's extra registers This controller provides extra status registers SRA and SRB as well as a tape drive register (TDR) and a data rate select register (DSR), which are referenced in the sparc port, so let's have their symbolic definitions centralized. Link: https://lore.kernel.org/r/20200331094054.24441-3-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- include/uapi/linux/fdreg.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/fdreg.h b/include/uapi/linux/fdreg.h index 1318881954e1..10d33632939d 100644 --- a/include/uapi/linux/fdreg.h +++ b/include/uapi/linux/fdreg.h @@ -7,13 +7,23 @@ * Handbook", Sanches and Canton. */ -/* Fd controller regs. S&C, about page 340 */ -#define FD_STATUS 4 -#define FD_DATA 5 +/* 82077's auxiliary status registers A & B (R) */ +#define FD_SRA 0 +#define FD_SRB 1 /* Digital Output Register */ #define FD_DOR 2 +/* 82077's tape drive register (R/W) */ +#define FD_TDR 3 + +/* 82077's data rate select register (W) */ +#define FD_DSR 4 + +/* Fd controller regs. S&C, about page 340 */ +#define FD_STATUS 4 +#define FD_DATA 5 + /* Digital Input Register (read) */ #define FD_DIR 7 -- cgit v1.2.3 From 76373fc666a1c62d422f5e6fc9e4927877934045 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:34 +0200 Subject: floppy: use symbolic register names in the m68k port Now we can use FD_STATUS and FD_DATA instead of 4 or 5, let's do this, and also use STATUS_DMA and STATUS_READY for the status bits. Link: https://lore.kernel.org/r/20200331094054.24441-4-w@1wt.eu Cc: Geert Uytterhoeven Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/m68k/include/asm/floppy.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/m68k/include/asm/floppy.h b/arch/m68k/include/asm/floppy.h index 2a6ce29b92aa..a4d0fea47c6b 100644 --- a/arch/m68k/include/asm/floppy.h +++ b/arch/m68k/include/asm/floppy.h @@ -211,26 +211,27 @@ asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id) st=1; for(lcount=virtual_dma_count, lptr=virtual_dma_addr; lcount; lcount--, lptr++) { - st=inb(virtual_dma_port+4) & 0xa0 ; - if(st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if(virtual_dma_mode) - outb_p(*lptr, virtual_dma_port+5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port+5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port+4); + st = inb(virtual_dma_port + FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if(st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if(!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; #ifdef TRACE_FLPY_INT -- cgit v1.2.3 From 40b7d1b69093b3592ba8b877369d371d65ab7059 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:35 +0200 Subject: floppy: use symbolic register names in the parisc port Now we can use FD_STATUS and FD_DATA instead of 4 or 5, let's do this, and also use STATUS_DMA and STATUS_READY for the status bits. Link: https://lore.kernel.org/r/20200331094054.24441-5-w@1wt.eu Cc: Helge Deller Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/parisc/include/asm/floppy.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h index 1aebc23b7744..762cfe7778c0 100644 --- a/arch/parisc/include/asm/floppy.h +++ b/arch/parisc/include/asm/floppy.h @@ -75,27 +75,28 @@ static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) register char *lptr = virtual_dma_addr; for (lcount = virtual_dma_count; lcount; lcount--) { - st = fd_inb(virtual_dma_port, 4) & 0xa0; - if (st != 0xa0) + st = fd_inb(virtual_dma_port, FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) { - fd_outb(*lptr, virtual_dma_port, 5); + fd_outb(*lptr, virtual_dma_port, FD_DATA); } else { - *lptr = fd_inb(virtual_dma_port, 5); + *lptr = fd_inb(virtual_dma_port, FD_DATA); } lptr++; } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = fd_inb(virtual_dma_port, 4); + st = fd_inb(virtual_dma_port, FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if (st == 0x20) + if (st == STATUS_DMA) return; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT -- cgit v1.2.3 From 7fd346318847aa967593b25153ccbeb6cfe1daf1 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:36 +0200 Subject: floppy: use symbolic register names in the powerpc port Now we can use FD_STATUS and FD_DATA instead of 4 or 5, let's do this, and also use STATUS_DMA and STATUS_READY for the status bits. Link: https://lore.kernel.org/r/20200331094054.24441-6-w@1wt.eu Cc: Benjamin Herrenschmidt Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/powerpc/include/asm/floppy.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h index ed467eb0c4c8..7af9a68fd949 100644 --- a/arch/powerpc/include/asm/floppy.h +++ b/arch/powerpc/include/asm/floppy.h @@ -61,21 +61,22 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) st = 1; for (lcount=virtual_dma_count, lptr=virtual_dma_addr; lcount; lcount--, lptr++) { - st=inb(virtual_dma_port+4) & 0xa0 ; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port+5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port+5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port+4); + st = inb(virtual_dma_port + FD_STATUS); - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; doing_vdma = 0; -- cgit v1.2.3 From 6d362018c66a0ad13117357422e39a8821a812ad Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:37 +0200 Subject: floppy: use symbolic register names in the sparc32 port The sparc port used to be forced to rely on numeric register indexes with their equivalent in comments. Now that they don't depend on the IO port we can use their symbolic names. Link: https://lore.kernel.org/r/20200331094054.24441-7-w@1wt.eu Cc: "David S. Miller" Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/sparc/include/asm/floppy_32.h | 46 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index 4d08df4f4deb..946dbcbf3a83 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -114,15 +114,15 @@ static unsigned char sun_read_dir(void) static unsigned char sun_82072_fd_inb(int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to read unknown port %d\n", port); panic("floppy: Port bolixed."); - case 4: /* FD_STATUS */ + case FD_STATUS: return sun_fdc->status_82072 & ~STATUS_DMA; - case 5: /* FD_DATA */ + case FD_DATA: return sun_fdc->data_82072; - case 7: /* FD_DIR */ + case FD_DIR: return sun_read_dir(); } panic("sun_82072_fd_inb: How did I get here?"); @@ -131,20 +131,20 @@ static unsigned char sun_82072_fd_inb(int port) static void sun_82072_fd_outb(unsigned char value, int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to write to unknown port %d\n", port); panic("floppy: Port bolixed."); - case 2: /* FD_DOR */ + case FD_DOR: sun_set_dor(value, 0); break; - case 5: /* FD_DATA */ + case FD_DATA: sun_fdc->data_82072 = value; break; - case 7: /* FD_DCR */ + case FD_DCR: sun_fdc->dcr_82072 = value; break; - case 4: /* FD_STATUS */ + case FD_DSR: sun_fdc->status_82072 = value; break; } @@ -154,23 +154,23 @@ static void sun_82072_fd_outb(unsigned char value, int port) static unsigned char sun_82077_fd_inb(int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to read unknown port %d\n", port); panic("floppy: Port bolixed."); - case 0: /* FD_STATUS_0 */ + case FD_SRA: return sun_fdc->status1_82077; - case 1: /* FD_STATUS_1 */ + case FD_SRB: return sun_fdc->status2_82077; - case 2: /* FD_DOR */ + case FD_DOR: return sun_fdc->dor_82077; - case 3: /* FD_TDR */ + case FD_TDR: return sun_fdc->tapectl_82077; - case 4: /* FD_STATUS */ + case FD_STATUS: return sun_fdc->status_82077 & ~STATUS_DMA; - case 5: /* FD_DATA */ + case FD_DATA: return sun_fdc->data_82077; - case 7: /* FD_DIR */ + case FD_DIR: return sun_read_dir(); } panic("sun_82077_fd_inb: How did I get here?"); @@ -179,23 +179,23 @@ static unsigned char sun_82077_fd_inb(int port) static void sun_82077_fd_outb(unsigned char value, int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to write to unknown port %d\n", port); panic("floppy: Port bolixed."); - case 2: /* FD_DOR */ + case FD_DOR: sun_set_dor(value, 1); break; - case 5: /* FD_DATA */ + case FD_DATA: sun_fdc->data_82077 = value; break; - case 7: /* FD_DCR */ + case FD_DCR: sun_fdc->dcr_82077 = value; break; - case 4: /* FD_STATUS */ + case FD_DSR: sun_fdc->status_82077 = value; break; - case 3: /* FD_TDR */ + case FD_TDR: sun_fdc->tapectl_82077 = value; break; } -- cgit v1.2.3 From 6cb7e69671843c6f10fd25fc93eecadd4d52ac37 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:38 +0200 Subject: floppy: use symbolic register names in the sparc64 port Now by splitting the base address from the register index we can use the symbolic register names instead of the hard-coded numeric values. Link: https://lore.kernel.org/r/20200331094054.24441-8-w@1wt.eu Cc: "David S. Miller" [willy: fix printk warnings s/%lx/%x/g in sun_82077_fd_{inb,outb}()] Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/sparc/include/asm/floppy_64.h | 59 +++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index c0cf157e5b15..070c8c1f5c8f 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -47,8 +47,9 @@ unsigned long fdc_status; static struct platform_device *floppy_op = NULL; struct sun_floppy_ops { - unsigned char (*fd_inb) (unsigned long port); - void (*fd_outb) (unsigned char value, unsigned long port); + unsigned char (*fd_inb) (unsigned long port, unsigned int reg); + void (*fd_outb) (unsigned char value, unsigned long base, + unsigned int reg); void (*fd_enable_dma) (void); void (*fd_disable_dma) (void); void (*fd_set_dma_mode) (int); @@ -62,8 +63,8 @@ struct sun_floppy_ops { static struct sun_floppy_ops sun_fdops; -#define fd_inb(base, reg) sun_fdops.fd_inb((base) + (reg)) -#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, (base) + (reg)) +#define fd_inb(base, reg) sun_fdops.fd_inb(base, reg) +#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, base, reg) #define fd_enable_dma() sun_fdops.fd_enable_dma() #define fd_disable_dma() sun_fdops.fd_disable_dma() #define fd_request_dma() (0) /* nothing... */ @@ -97,42 +98,43 @@ static int sun_floppy_types[2] = { 0, 0 }; /* No 64k boundary crossing problems on the Sparc. */ #define CROSS_64KB(a,s) (0) -static unsigned char sun_82077_fd_inb(unsigned long port) +static unsigned char sun_82077_fd_inb(unsigned long base, unsigned int reg) { udelay(5); - switch(port & 7) { + switch (reg) { default: - printk("floppy: Asked to read unknown port %lx\n", port); + printk("floppy: Asked to read unknown port %x\n", reg); panic("floppy: Port bolixed."); - case 4: /* FD_STATUS */ + case FD_STATUS: return sbus_readb(&sun_fdc->status_82077) & ~STATUS_DMA; - case 5: /* FD_DATA */ + case FD_DATA: return sbus_readb(&sun_fdc->data_82077); - case 7: /* FD_DIR */ + case FD_DIR: /* XXX: Is DCL on 0x80 in sun4m? */ return sbus_readb(&sun_fdc->dir_82077); } panic("sun_82072_fd_inb: How did I get here?"); } -static void sun_82077_fd_outb(unsigned char value, unsigned long port) +static void sun_82077_fd_outb(unsigned char value, unsigned long base, + unsigned int reg) { udelay(5); - switch(port & 7) { + switch (reg) { default: - printk("floppy: Asked to write to unknown port %lx\n", port); + printk("floppy: Asked to write to unknown port %x\n", reg); panic("floppy: Port bolixed."); - case 2: /* FD_DOR */ + case FD_DOR: /* Happily, the 82077 has a real DOR register. */ sbus_writeb(value, &sun_fdc->dor_82077); break; - case 5: /* FD_DATA */ + case FD_DATA: sbus_writeb(value, &sun_fdc->data_82077); break; - case 7: /* FD_DCR */ + case FD_DCR: sbus_writeb(value, &sun_fdc->dcr_82077); break; - case 4: /* FD_STATUS */ + case FD_DSR: sbus_writeb(value, &sun_fdc->status_82077); break; } @@ -298,19 +300,21 @@ static struct sun_pci_dma_op sun_pci_dma_pending = { -1U, 0, 0, NULL}; irqreturn_t floppy_interrupt(int irq, void *dev_id); -static unsigned char sun_pci_fd_inb(unsigned long port) +static unsigned char sun_pci_fd_inb(unsigned long base, unsigned int reg) { udelay(5); - return inb(port); + return inb(base + reg); } -static void sun_pci_fd_outb(unsigned char val, unsigned long port) +static void sun_pci_fd_outb(unsigned char val, unsigned long base, + unsigned int reg) { udelay(5); - outb(val, port); + outb(val, base + reg); } -static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port) +static void sun_pci_fd_broken_outb(unsigned char val, unsigned long base, + unsigned int reg) { udelay(5); /* @@ -320,16 +324,17 @@ static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port) * this does not hurt correct hardware like the AXmp. * (Eddie, Sep 12 1998). */ - if (port == ((unsigned long)sun_fdc) + 2) { + if (reg == FD_DOR) { if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x20)) { val |= 0x10; } } - outb(val, port); + outb(val, base + reg); } #ifdef PCI_FDC_SWAP_DRIVES -static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port) +static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long base, + unsigned int reg) { udelay(5); /* @@ -339,13 +344,13 @@ static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port) * this does not hurt correct hardware like the AXmp. * (Eddie, Sep 12 1998). */ - if (port == ((unsigned long)sun_fdc) + 2) { + if (reg == FD_DOR) { if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x10)) { val &= ~(0x03); val |= 0x21; } } - outb(val, port); + outb(val, base + reg); } #endif /* PCI_FDC_SWAP_DRIVES */ -- cgit v1.2.3 From 38ede90831c7f3e931b58c2b2790d02d5d061592 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:39 +0200 Subject: floppy: use symbolic register names in the x86 port Now we can use FD_STATUS and FD_DATA instead of 4 or 5, let's do this, and also use STATUS_DMA and STATUS_READY for the status bits. Link: https://lore.kernel.org/r/20200331094054.24441-9-w@1wt.eu Cc: x86@kernel.org Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- arch/x86/include/asm/floppy.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index 20088cb08f5e..d43717b423cb 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -77,25 +77,26 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) st = 1; for (lcount = virtual_dma_count, lptr = virtual_dma_addr; lcount; lcount--, lptr++) { - st = inb(virtual_dma_port + 4) & 0xa0; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port + 5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port + 5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port + 4); + st = inb(virtual_dma_port + FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT -- cgit v1.2.3 From c1f710b5fe8c18d0c2be4514bf509e1a4203ce08 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:40 +0200 Subject: floppy: cleanup: make twaddle() not rely on current_{fdc,drive} anymore Now the fdc and drive are passed in argument so that the function does not use current_fdc nor current_drive anymore. Link: https://lore.kernel.org/r/20200331094054.24441-10-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 1cda39098b07..b1729daa2e2e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -827,14 +827,14 @@ static int set_dor(int fdc, char mask, char data) return olddor; } -static void twaddle(void) +static void twaddle(int fdc, int drive) { - if (drive_params[current_drive].select_delay) + if (drive_params[drive].select_delay) return; - fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)), - current_fdc, FD_DOR); - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); - drive_state[current_drive].select_date = jiffies; + fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(drive)), + fdc, FD_DOR); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); + drive_state[drive].select_date = jiffies; } /* @@ -1934,7 +1934,7 @@ static void floppy_ready(void) "calling disk change from floppy_ready\n"); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) && disk_change(current_drive) && !drive_params[current_drive].select_delay) - twaddle(); /* this clears the dcl on certain + twaddle(current_fdc, current_drive); /* this clears the dcl on certain * drive/controller combinations */ #ifdef fd_chose_dma_mode @@ -2904,7 +2904,7 @@ do_request: } if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) - twaddle(); + twaddle(current_fdc, current_drive); schedule_bh(floppy_start); debugt(__func__, "queue fd request"); return; @@ -3610,7 +3610,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int case FDTWADDLE: if (lock_fdc(drive)) return -EINTR; - twaddle(); + twaddle(current_fdc, current_drive); process_fd_request(); return 0; default: -- cgit v1.2.3 From f3e0dc1d8b71fa0bdd3a8e24bb129978567fefbb Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:41 +0200 Subject: floppy: cleanup: make reset_fdc_info() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-11-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b1729daa2e2e..6c98f8d169a9 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -838,19 +838,19 @@ static void twaddle(int fdc, int drive) } /* - * Reset all driver information about the current fdc. + * Reset all driver information about the specified fdc. * This is needed after a reset, and after a raw command. */ -static void reset_fdc_info(int mode) +static void reset_fdc_info(int fdc, int mode) { int drive; - fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1; - fdc_state[current_fdc].need_configure = 1; - fdc_state[current_fdc].perp_mode = 1; - fdc_state[current_fdc].rawcmd = 0; + fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1; + fdc_state[fdc].need_configure = 1; + fdc_state[fdc].perp_mode = 1; + fdc_state[fdc].rawcmd = 0; for (drive = 0; drive < N_DRIVE; drive++) - if (FDC(drive) == current_fdc && + if (FDC(drive) == fdc && (mode || drive_state[drive].track != NEED_1_RECAL)) drive_state[drive].track = NEED_2_RECAL; } @@ -874,7 +874,7 @@ static void set_fdc(int drive) set_dor(1 - current_fdc, ~8, 0); #endif if (fdc_state[current_fdc].rawcmd == 2) - reset_fdc_info(1); + reset_fdc_info(current_fdc, 1); if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY) fdc_state[current_fdc].reset = 1; } @@ -1800,7 +1800,7 @@ static void reset_fdc(void) do_floppy = reset_interrupt; fdc_state[current_fdc].reset = 0; - reset_fdc_info(0); + reset_fdc_info(current_fdc, 0); /* Pseudo-DMA may intercept 'reset finished' interrupt. */ /* Irrelevant for systems with true DMA (i386). */ @@ -4890,7 +4890,7 @@ static int floppy_grab_irq_and_dma(void) } for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { if (fdc_state[current_fdc].address != -1) { - reset_fdc_info(1); + reset_fdc_info(current_fdc, 1); fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); } } -- cgit v1.2.3 From 6d494ed03766ead1b180463380511fed9ac779d9 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:42 +0200 Subject: floppy: cleanup: make show_floppy() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-12-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6c98f8d169a9..dd739594fce7 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1104,7 +1104,7 @@ static void setup_DMA(void) #endif } -static void show_floppy(void); +static void show_floppy(int fdc); /* waits until the fdc becomes ready */ static int wait_til_ready(void) @@ -1121,7 +1121,7 @@ static int wait_til_ready(void) } if (initialized) { DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc); - show_floppy(); + show_floppy(current_fdc); } fdc_state[current_fdc].reset = 1; return -1; @@ -1147,7 +1147,7 @@ static int output_byte(char byte) if (initialized) { DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", byte, current_fdc, status); - show_floppy(); + show_floppy(current_fdc); } return -1; } @@ -1176,7 +1176,7 @@ static int result(void) if (initialized) { DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n", current_fdc, status, i); - show_floppy(); + show_floppy(current_fdc); } fdc_state[current_fdc].reset = 1; return -1; @@ -1819,7 +1819,7 @@ static void reset_fdc(void) } } -static void show_floppy(void) +static void show_floppy(int fdc) { int i; @@ -1842,7 +1842,7 @@ static void show_floppy(void) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, reply_buffer, resultsize, true); - pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS)); + pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS)); pr_info("fdc_busy=%lu\n", fdc_busy); if (do_floppy) pr_info("do_floppy=%ps\n", do_floppy); @@ -1868,7 +1868,7 @@ static void floppy_shutdown(struct work_struct *arg) unsigned long flags; if (initialized) - show_floppy(); + show_floppy(current_fdc); cancel_activity(); flags = claim_dma_lock(); -- cgit v1.2.3 From 5ea00bfc52f428cab828623e2d7084118c25d54b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:43 +0200 Subject: floppy: cleanup: make wait_til_ready() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-13-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index dd739594fce7..5dfddd4726fb 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1107,30 +1107,30 @@ static void setup_DMA(void) static void show_floppy(int fdc); /* waits until the fdc becomes ready */ -static int wait_til_ready(void) +static int wait_til_ready(int fdc) { int status; int counter; - if (fdc_state[current_fdc].reset) + if (fdc_state[fdc].reset) return -1; for (counter = 0; counter < 10000; counter++) { - status = fdc_inb(current_fdc, FD_STATUS); + status = fdc_inb(fdc, FD_STATUS); if (status & STATUS_READY) return status; } if (initialized) { - DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc); - show_floppy(current_fdc); + DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc); + show_floppy(fdc); } - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; return -1; } /* sends a command byte to the fdc */ static int output_byte(char byte) { - int status = wait_til_ready(); + int status = wait_til_ready(current_fdc); if (status < 0) return -1; @@ -1159,7 +1159,7 @@ static int result(void) int status = 0; for (i = 0; i < MAX_REPLIES; i++) { - status = wait_til_ready(); + status = wait_til_ready(current_fdc); if (status < 0) break; status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA; @@ -1186,7 +1186,7 @@ static int result(void) /* does the fdc need more output? */ static int need_more_output(void) { - int status = wait_til_ready(); + int status = wait_til_ready(current_fdc); if (status < 0) return -1; -- cgit v1.2.3 From f8a8e0f7a8941bfe105af7a1150c9f6a73ca253d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:44 +0200 Subject: floppy: cleanup: make output_byte() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-14-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 64 +++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5dfddd4726fb..81fd06eaea7d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1128,26 +1128,26 @@ static int wait_til_ready(int fdc) } /* sends a command byte to the fdc */ -static int output_byte(char byte) +static int output_byte(int fdc, char byte) { - int status = wait_til_ready(current_fdc); + int status = wait_til_ready(fdc); if (status < 0) return -1; if (is_ready_state(status)) { - fdc_outb(byte, current_fdc, FD_DATA); + fdc_outb(byte, fdc, FD_DATA); output_log[output_log_pos].data = byte; output_log[output_log_pos].status = status; output_log[output_log_pos].jiffies = jiffies; output_log_pos = (output_log_pos + 1) % OLOGSIZE; return 0; } - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; if (initialized) { DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", - byte, current_fdc, status); - show_floppy(current_fdc); + byte, fdc, status); + show_floppy(fdc); } return -1; } @@ -1229,8 +1229,8 @@ static void perpendicular_mode(void) if (fdc_state[current_fdc].perp_mode == perp_mode) return; if (fdc_state[current_fdc].version >= FDC_82077_ORIG) { - output_byte(FD_PERPENDICULAR); - output_byte(perp_mode); + output_byte(current_fdc, FD_PERPENDICULAR); + output_byte(current_fdc, perp_mode); fdc_state[current_fdc].perp_mode = perp_mode; } else if (perp_mode) { DPRINT("perpendicular mode not supported by this FDC.\n"); @@ -1243,12 +1243,12 @@ static int no_fifo; static int fdc_configure(void) { /* Turn on FIFO */ - output_byte(FD_CONFIGURE); + output_byte(current_fdc, FD_CONFIGURE); if (need_more_output() != MORE_OUTPUT) return 0; - output_byte(0); - output_byte(0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); - output_byte(0); /* pre-compensation from track + output_byte(current_fdc, 0); + output_byte(current_fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); + output_byte(current_fdc, 0); /* pre-compensation from track 0 upwards */ return 1; } @@ -1301,10 +1301,10 @@ static void fdc_specify(void) if (fdc_state[current_fdc].version >= FDC_82078) { /* chose the default rate table, not the one * where 1 = 2 Mbps */ - output_byte(FD_DRIVESPEC); + output_byte(current_fdc, FD_DRIVESPEC); if (need_more_output() == MORE_OUTPUT) { - output_byte(UNIT(current_drive)); - output_byte(0xc0); + output_byte(current_fdc, UNIT(current_drive)); + output_byte(current_fdc, 0xc0); } } break; @@ -1349,9 +1349,9 @@ static void fdc_specify(void) if (fdc_state[current_fdc].spec1 != spec1 || fdc_state[current_fdc].spec2 != spec2) { /* Go ahead and set spec1 and spec2 */ - output_byte(FD_SPECIFY); - output_byte(fdc_state[current_fdc].spec1 = spec1); - output_byte(fdc_state[current_fdc].spec2 = spec2); + output_byte(current_fdc, FD_SPECIFY); + output_byte(current_fdc, fdc_state[current_fdc].spec1 = spec1); + output_byte(current_fdc, fdc_state[current_fdc].spec2 = spec2); } } /* fdc_specify */ @@ -1513,7 +1513,7 @@ static void setup_rw_floppy(void) r = 0; for (i = 0; i < raw_cmd->cmd_count; i++) - r |= output_byte(raw_cmd->cmd[i]); + r |= output_byte(current_fdc, raw_cmd->cmd[i]); debugt(__func__, "rw_command"); @@ -1566,8 +1566,8 @@ static void check_wp(void) { if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) { /* check write protection */ - output_byte(FD_GETSTATUS); - output_byte(UNIT(current_drive)); + output_byte(current_fdc, FD_GETSTATUS); + output_byte(current_fdc, UNIT(current_drive)); if (result() != 1) { fdc_state[current_fdc].reset = 1; return; @@ -1639,9 +1639,9 @@ static void seek_floppy(void) } do_floppy = seek_interrupt; - output_byte(FD_SEEK); - output_byte(UNIT(current_drive)); - if (output_byte(track) < 0) { + output_byte(current_fdc, FD_SEEK); + output_byte(current_fdc, UNIT(current_drive)); + if (output_byte(current_fdc, track) < 0) { reset_fdc(); return; } @@ -1748,7 +1748,7 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) if (inr == 0) { int max_sensei = 4; do { - output_byte(FD_SENSEI); + output_byte(current_fdc, FD_SENSEI); inr = result(); if (do_print) print_result("sensei", inr); @@ -1771,8 +1771,8 @@ static void recalibrate_floppy(void) { debugt(__func__, ""); do_floppy = recal_interrupt; - output_byte(FD_RECALIBRATE); - if (output_byte(UNIT(current_drive)) < 0) + output_byte(current_fdc, FD_RECALIBRATE); + if (output_byte(current_fdc, UNIT(current_drive)) < 0) reset_fdc(); } @@ -4302,7 +4302,7 @@ static char __init get_fdc_version(void) { int r; - output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */ + output_byte(current_fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */ if (fdc_state[current_fdc].reset) return FDC_NONE; r = result(); @@ -4323,15 +4323,15 @@ static char __init get_fdc_version(void) return FDC_82072; /* 82072 doesn't know CONFIGURE */ } - output_byte(FD_PERPENDICULAR); + output_byte(current_fdc, FD_PERPENDICULAR); if (need_more_output() == MORE_OUTPUT) { - output_byte(0); + output_byte(current_fdc, 0); } else { pr_info("FDC %d is an 82072A\n", current_fdc); return FDC_82072A; /* 82072A as found on Sparcs. */ } - output_byte(FD_UNLOCK); + output_byte(current_fdc, FD_UNLOCK); r = result(); if ((r == 1) && (reply_buffer[0] == 0x80)) { pr_info("FDC %d is a pre-1991 82077\n", current_fdc); @@ -4343,7 +4343,7 @@ static char __init get_fdc_version(void) current_fdc, r); return FDC_UNKNOWN; } - output_byte(FD_PARTID); + output_byte(current_fdc, FD_PARTID); r = result(); if (r != 1) { pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", -- cgit v1.2.3 From 96dad77a6506ceb31eb520f97fbb5c82054f0a73 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:45 +0200 Subject: floppy: cleanup: make result() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. It's worth noting that there's still a single reply_buffer[] which will store the result for the current fdc. It may or may not make sense to implement one buffer per fdc in the future. Link: https://lore.kernel.org/r/20200331094054.24441-15-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 81fd06eaea7d..4aaf84217b53 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1153,13 +1153,13 @@ static int output_byte(int fdc, char byte) } /* gets the response from the fdc */ -static int result(void) +static int result(int fdc) { int i; int status = 0; for (i = 0; i < MAX_REPLIES; i++) { - status = wait_til_ready(current_fdc); + status = wait_til_ready(fdc); if (status < 0) break; status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA; @@ -1169,16 +1169,16 @@ static int result(void) return i; } if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY)) - reply_buffer[i] = fdc_inb(current_fdc, FD_DATA); + reply_buffer[i] = fdc_inb(fdc, FD_DATA); else break; } if (initialized) { DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n", - current_fdc, status, i); - show_floppy(current_fdc); + fdc, status, i); + show_floppy(fdc); } - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; return -1; } @@ -1194,7 +1194,7 @@ static int need_more_output(void) if (is_ready_state(status)) return MORE_OUTPUT; - return result(); + return result(current_fdc); } /* Set perpendicular mode as required, based on data rate, if supported. @@ -1524,7 +1524,7 @@ static void setup_rw_floppy(void) } if (!(flags & FD_RAW_INTR)) { - inr = result(); + inr = result(current_fdc); cont->interrupt(); } else if (flags & FD_RAW_NEED_DISK) fd_watchdog(); @@ -1568,7 +1568,7 @@ static void check_wp(void) /* check write protection */ output_byte(current_fdc, FD_GETSTATUS); output_byte(current_fdc, UNIT(current_drive)); - if (result() != 1) { + if (result(current_fdc) != 1) { fdc_state[current_fdc].reset = 1; return; } @@ -1742,14 +1742,14 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) do_print = !handler && print_unex && initialized; - inr = result(); + inr = result(current_fdc); if (do_print) print_result("unexpected interrupt", inr); if (inr == 0) { int max_sensei = 4; do { output_byte(current_fdc, FD_SENSEI); - inr = result(); + inr = result(current_fdc); if (do_print) print_result("sensei", inr); max_sensei--; @@ -1782,7 +1782,7 @@ static void recalibrate_floppy(void) static void reset_interrupt(void) { debugt(__func__, ""); - result(); /* get the status ready for set_fdc */ + result(current_fdc); /* get the status ready for set_fdc */ if (fdc_state[current_fdc].reset) { pr_info("reset set in interrupt, calling %ps\n", cont->error); cont->error(); /* a reset just after a reset. BAD! */ @@ -4305,7 +4305,7 @@ static char __init get_fdc_version(void) output_byte(current_fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */ if (fdc_state[current_fdc].reset) return FDC_NONE; - r = result(); + r = result(current_fdc); if (r <= 0x00) return FDC_NONE; /* No FDC present ??? */ if ((r == 1) && (reply_buffer[0] == 0x80)) { @@ -4332,7 +4332,7 @@ static char __init get_fdc_version(void) } output_byte(current_fdc, FD_UNLOCK); - r = result(); + r = result(current_fdc); if ((r == 1) && (reply_buffer[0] == 0x80)) { pr_info("FDC %d is a pre-1991 82077\n", current_fdc); return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know @@ -4344,7 +4344,7 @@ static char __init get_fdc_version(void) return FDC_UNKNOWN; } output_byte(current_fdc, FD_PARTID); - r = result(); + r = result(current_fdc); if (r != 1) { pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", current_fdc, r); -- cgit v1.2.3 From 3ab12a18209991fa430ea702d5d7d619bbb9ce67 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:46 +0200 Subject: floppy: cleanup: make need_more_output() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-16-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 4aaf84217b53..aa2d840bf06b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1184,9 +1184,9 @@ static int result(int fdc) #define MORE_OUTPUT -2 /* does the fdc need more output? */ -static int need_more_output(void) +static int need_more_output(int fdc) { - int status = wait_til_ready(current_fdc); + int status = wait_til_ready(fdc); if (status < 0) return -1; @@ -1194,7 +1194,7 @@ static int need_more_output(void) if (is_ready_state(status)) return MORE_OUTPUT; - return result(current_fdc); + return result(fdc); } /* Set perpendicular mode as required, based on data rate, if supported. @@ -1244,7 +1244,7 @@ static int fdc_configure(void) { /* Turn on FIFO */ output_byte(current_fdc, FD_CONFIGURE); - if (need_more_output() != MORE_OUTPUT) + if (need_more_output(current_fdc) != MORE_OUTPUT) return 0; output_byte(current_fdc, 0); output_byte(current_fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); @@ -1302,7 +1302,7 @@ static void fdc_specify(void) /* chose the default rate table, not the one * where 1 = 2 Mbps */ output_byte(current_fdc, FD_DRIVESPEC); - if (need_more_output() == MORE_OUTPUT) { + if (need_more_output(current_fdc) == MORE_OUTPUT) { output_byte(current_fdc, UNIT(current_drive)); output_byte(current_fdc, 0xc0); } @@ -4324,7 +4324,7 @@ static char __init get_fdc_version(void) } output_byte(current_fdc, FD_PERPENDICULAR); - if (need_more_output() == MORE_OUTPUT) { + if (need_more_output(current_fdc) == MORE_OUTPUT) { output_byte(current_fdc, 0); } else { pr_info("FDC %d is an 82072A\n", current_fdc); -- cgit v1.2.3 From 197c7ffdb8165854e9e2f11a699d2fcca5adbd5a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:47 +0200 Subject: floppy: cleanup: make perpendicular_mode() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. It's worth noting that there's still a single raw_cmd pointer specific to the current fdc. It may make sense to have one per fdc in the future. In addition, cont->done() still relies on the current drive and current raw_cmd. Link: https://lore.kernel.org/r/20200331094054.24441-17-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index aa2d840bf06b..fcccbb4c143e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1200,7 +1200,7 @@ static int need_more_output(int fdc) /* Set perpendicular mode as required, based on data rate, if supported. * 82077 Now tested. 1Mbps data rate only possible with 82077-1. */ -static void perpendicular_mode(void) +static void perpendicular_mode(int fdc) { unsigned char perp_mode; @@ -1215,7 +1215,7 @@ static void perpendicular_mode(void) default: DPRINT("Invalid data rate for perpendicular mode!\n"); cont->done(0); - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; /* * convenient way to return to * redo without too much hassle @@ -1226,12 +1226,12 @@ static void perpendicular_mode(void) } else perp_mode = 0; - if (fdc_state[current_fdc].perp_mode == perp_mode) + if (fdc_state[fdc].perp_mode == perp_mode) return; - if (fdc_state[current_fdc].version >= FDC_82077_ORIG) { - output_byte(current_fdc, FD_PERPENDICULAR); - output_byte(current_fdc, perp_mode); - fdc_state[current_fdc].perp_mode = perp_mode; + if (fdc_state[fdc].version >= FDC_82077_ORIG) { + output_byte(fdc, FD_PERPENDICULAR); + output_byte(fdc, perp_mode); + fdc_state[fdc].perp_mode = perp_mode; } else if (perp_mode) { DPRINT("perpendicular mode not supported by this FDC.\n"); } @@ -1946,7 +1946,7 @@ static void floppy_ready(void) #endif if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) { - perpendicular_mode(); + perpendicular_mode(current_fdc); fdc_specify(); /* must be done here because of hut, hlt ... */ seek_floppy(); } else { -- cgit v1.2.3 From d5da6fa2b892fff23ffd1cb8a04bf618b6072807 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:48 +0200 Subject: floppy: cleanup: make fdc_configure() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-18-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index fcccbb4c143e..c1338c4bb941 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1240,16 +1240,15 @@ static void perpendicular_mode(int fdc) static int fifo_depth = 0xa; static int no_fifo; -static int fdc_configure(void) +static int fdc_configure(int fdc) { /* Turn on FIFO */ - output_byte(current_fdc, FD_CONFIGURE); - if (need_more_output(current_fdc) != MORE_OUTPUT) + output_byte(fdc, FD_CONFIGURE); + if (need_more_output(fdc) != MORE_OUTPUT) return 0; - output_byte(current_fdc, 0); - output_byte(current_fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); - output_byte(current_fdc, 0); /* pre-compensation from track - 0 upwards */ + output_byte(fdc, 0); + output_byte(fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); + output_byte(fdc, 0); /* pre-compensation from track 0 upwards */ return 1; } @@ -1288,7 +1287,7 @@ static void fdc_specify(void) if (fdc_state[current_fdc].need_configure && fdc_state[current_fdc].version >= FDC_82072A) { - fdc_configure(); + fdc_configure(current_fdc); fdc_state[current_fdc].need_configure = 0; } @@ -4318,7 +4317,7 @@ static char __init get_fdc_version(void) return FDC_UNKNOWN; } - if (!fdc_configure()) { + if (!fdc_configure(current_fdc)) { pr_info("FDC %d is an 82072\n", current_fdc); return FDC_82072; /* 82072 doesn't know CONFIGURE */ } -- cgit v1.2.3 From 3631a674a2ed7233905c0a7f37f09eeb83aa4d67 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:49 +0200 Subject: floppy: cleanup: make fdc_specify() not rely on current_{fdc,drive} anymore Now the fdc and drive are passed in argument so that the function does not use current_fdc nor current_drive anymore. Link: https://lore.kernel.org/r/20200331094054.24441-19-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c1338c4bb941..b929b60afe9b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1273,7 +1273,7 @@ static int fdc_configure(int fdc) * * These values are rounded up to the next highest available delay time. */ -static void fdc_specify(void) +static void fdc_specify(int fdc, int drive) { unsigned char spec1; unsigned char spec2; @@ -1285,10 +1285,10 @@ static void fdc_specify(void) int hlt_max_code = 0x7f; int hut_max_code = 0xf; - if (fdc_state[current_fdc].need_configure && - fdc_state[current_fdc].version >= FDC_82072A) { - fdc_configure(current_fdc); - fdc_state[current_fdc].need_configure = 0; + if (fdc_state[fdc].need_configure && + fdc_state[fdc].version >= FDC_82072A) { + fdc_configure(fdc); + fdc_state[fdc].need_configure = 0; } switch (raw_cmd->rate & 0x03) { @@ -1297,13 +1297,13 @@ static void fdc_specify(void) break; case 1: dtr = 300; - if (fdc_state[current_fdc].version >= FDC_82078) { + if (fdc_state[fdc].version >= FDC_82078) { /* chose the default rate table, not the one * where 1 = 2 Mbps */ - output_byte(current_fdc, FD_DRIVESPEC); - if (need_more_output(current_fdc) == MORE_OUTPUT) { - output_byte(current_fdc, UNIT(current_drive)); - output_byte(current_fdc, 0xc0); + output_byte(fdc, FD_DRIVESPEC); + if (need_more_output(fdc) == MORE_OUTPUT) { + output_byte(fdc, UNIT(drive)); + output_byte(fdc, 0xc0); } } break; @@ -1312,14 +1312,14 @@ static void fdc_specify(void) break; } - if (fdc_state[current_fdc].version >= FDC_82072) { + if (fdc_state[fdc].version >= FDC_82072) { scale_dtr = dtr; hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */ hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */ } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000, + srt = 16 - DIV_ROUND_UP(drive_params[drive].srt * scale_dtr / 1000, NOMINAL_DTR); if (slow_floppy) srt = srt / 4; @@ -1327,14 +1327,14 @@ static void fdc_specify(void) SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2, + hlt = DIV_ROUND_UP(drive_params[drive].hlt * scale_dtr / 2, NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16, + hut = DIV_ROUND_UP(drive_params[drive].hut * scale_dtr / 16, NOMINAL_DTR); if (hut < 0x1) hut = 0x1; @@ -1345,12 +1345,12 @@ static void fdc_specify(void) spec2 = (hlt << 1) | (use_virtual_dma & 1); /* If these parameters did not change, just return with success */ - if (fdc_state[current_fdc].spec1 != spec1 || - fdc_state[current_fdc].spec2 != spec2) { + if (fdc_state[fdc].spec1 != spec1 || + fdc_state[fdc].spec2 != spec2) { /* Go ahead and set spec1 and spec2 */ - output_byte(current_fdc, FD_SPECIFY); - output_byte(current_fdc, fdc_state[current_fdc].spec1 = spec1); - output_byte(current_fdc, fdc_state[current_fdc].spec2 = spec2); + output_byte(fdc, FD_SPECIFY); + output_byte(fdc, fdc_state[fdc].spec1 = spec1); + output_byte(fdc, fdc_state[fdc].spec2 = spec2); } } /* fdc_specify */ @@ -1946,12 +1946,12 @@ static void floppy_ready(void) if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) { perpendicular_mode(current_fdc); - fdc_specify(); /* must be done here because of hut, hlt ... */ + fdc_specify(current_fdc, current_drive); /* must be done here because of hut, hlt ... */ seek_floppy(); } else { if ((raw_cmd->flags & FD_RAW_READ) || (raw_cmd->flags & FD_RAW_WRITE)) - fdc_specify(); + fdc_specify(current_fdc, current_drive); setup_rw_floppy(); } } -- cgit v1.2.3 From c7af70b0fb2535ee3f7165627fc0e73b1934dbfc Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:50 +0200 Subject: floppy: cleanup: make check_wp() not rely on current_{fdc,drive} anymore Now the fdc and drive are passed in argument so that the function does not use current_fdc nor current_drive anymore. Link: https://lore.kernel.org/r/20200331094054.24441-20-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b929b60afe9b..b9a3a04c2636 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1561,29 +1561,29 @@ static void seek_interrupt(void) floppy_ready(); } -static void check_wp(void) +static void check_wp(int fdc, int drive) { - if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) { + if (test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) { /* check write protection */ - output_byte(current_fdc, FD_GETSTATUS); - output_byte(current_fdc, UNIT(current_drive)); - if (result(current_fdc) != 1) { - fdc_state[current_fdc].reset = 1; + output_byte(fdc, FD_GETSTATUS); + output_byte(fdc, UNIT(drive)); + if (result(fdc) != 1) { + fdc_state[fdc].reset = 1; return; } - clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags); + clear_bit(FD_VERIFY_BIT, &drive_state[drive].flags); clear_bit(FD_NEED_TWADDLE_BIT, - &drive_state[current_drive].flags); - debug_dcl(drive_params[current_drive].flags, + &drive_state[drive].flags); + debug_dcl(drive_params[drive].flags, "checking whether disk is write protected\n"); - debug_dcl(drive_params[current_drive].flags, "wp=%x\n", + debug_dcl(drive_params[drive].flags, "wp=%x\n", reply_buffer[ST3] & 0x40); if (!(reply_buffer[ST3] & 0x40)) set_bit(FD_DISK_WRITABLE_BIT, - &drive_state[current_drive].flags); + &drive_state[drive].flags); else clear_bit(FD_DISK_WRITABLE_BIT, - &drive_state[current_drive].flags); + &drive_state[drive].flags); } } @@ -1627,7 +1627,7 @@ static void seek_floppy(void) track = 1; } } else { - check_wp(); + check_wp(current_fdc, current_drive); if (raw_cmd->track != drive_state[current_drive].track && (raw_cmd->flags & FD_RAW_NEED_SEEK)) track = raw_cmd->track; -- cgit v1.2.3 From 43d81bb6470c431e17f093b3f7adf70fd33ef15a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:51 +0200 Subject: floppy: cleanup: make next_valid_format() not rely on current_drive anymore Now the drive is passed in argument so that the function does not use current_drive anymore. Link: https://lore.kernel.org/r/20200331094054.24441-21-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b9a3a04c2636..f53810ba486d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2058,18 +2058,18 @@ static void success_and_wakeup(void) * ========================== */ -static int next_valid_format(void) +static int next_valid_format(int drive) { int probed_format; - probed_format = drive_state[current_drive].probed_format; + probed_format = drive_state[drive].probed_format; while (1) { - if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) { - drive_state[current_drive].probed_format = 0; + if (probed_format >= 8 || !drive_params[drive].autodetect[probed_format]) { + drive_state[drive].probed_format = 0; return 1; } - if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) { - drive_state[current_drive].probed_format = probed_format; + if (floppy_type[drive_params[drive].autodetect[probed_format]].sect) { + drive_state[drive].probed_format = probed_format; return 0; } probed_format++; @@ -2082,7 +2082,7 @@ static void bad_flp_intr(void) if (probing) { drive_state[current_drive].probed_format++; - if (!next_valid_format()) + if (!next_valid_format(current_drive)) return; } err_count = ++(*errors); @@ -2884,7 +2884,7 @@ do_request: if (!_floppy) { /* Autodetection */ if (!probing) { drive_state[current_drive].probed_format = 0; - if (next_valid_format()) { + if (next_valid_format(current_drive)) { DPRINT("no autodetectable formats\n"); _floppy = NULL; request_done(0); -- cgit v1.2.3 From e5a9c95f9bdb8ca52ce1ee47bd04f07de0e119ae Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:52 +0200 Subject: floppy: cleanup: make get_fdc_version() not rely on current_fdc anymore Now the fdc is passed in argument so that the function does not use current_fdc anymore. Link: https://lore.kernel.org/r/20200331094054.24441-22-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 52 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index f53810ba486d..8850baa3372a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4297,79 +4297,79 @@ static const struct block_device_operations floppy_fops = { /* Determine the floppy disk controller type */ /* This routine was written by David C. Niemi */ -static char __init get_fdc_version(void) +static char __init get_fdc_version(int fdc) { int r; - output_byte(current_fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */ - if (fdc_state[current_fdc].reset) + output_byte(fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */ + if (fdc_state[fdc].reset) return FDC_NONE; - r = result(current_fdc); + r = result(fdc); if (r <= 0x00) return FDC_NONE; /* No FDC present ??? */ if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is an 8272A\n", current_fdc); + pr_info("FDC %d is an 8272A\n", fdc); return FDC_8272A; /* 8272a/765 don't know DUMPREGS */ } if (r != 10) { pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n", - current_fdc, r); + fdc, r); return FDC_UNKNOWN; } - if (!fdc_configure(current_fdc)) { - pr_info("FDC %d is an 82072\n", current_fdc); + if (!fdc_configure(fdc)) { + pr_info("FDC %d is an 82072\n", fdc); return FDC_82072; /* 82072 doesn't know CONFIGURE */ } - output_byte(current_fdc, FD_PERPENDICULAR); - if (need_more_output(current_fdc) == MORE_OUTPUT) { - output_byte(current_fdc, 0); + output_byte(fdc, FD_PERPENDICULAR); + if (need_more_output(fdc) == MORE_OUTPUT) { + output_byte(fdc, 0); } else { - pr_info("FDC %d is an 82072A\n", current_fdc); + pr_info("FDC %d is an 82072A\n", fdc); return FDC_82072A; /* 82072A as found on Sparcs. */ } - output_byte(current_fdc, FD_UNLOCK); - r = result(current_fdc); + output_byte(fdc, FD_UNLOCK); + r = result(fdc); if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is a pre-1991 82077\n", current_fdc); + pr_info("FDC %d is a pre-1991 82077\n", fdc); return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know * LOCK/UNLOCK */ } if ((r != 1) || (reply_buffer[0] != 0x00)) { pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n", - current_fdc, r); + fdc, r); return FDC_UNKNOWN; } - output_byte(current_fdc, FD_PARTID); - r = result(current_fdc); + output_byte(fdc, FD_PARTID); + r = result(fdc); if (r != 1) { pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", - current_fdc, r); + fdc, r); return FDC_UNKNOWN; } if (reply_buffer[0] == 0x80) { - pr_info("FDC %d is a post-1991 82077\n", current_fdc); + pr_info("FDC %d is a post-1991 82077\n", fdc); return FDC_82077; /* Revised 82077AA passes all the tests */ } switch (reply_buffer[0] >> 5) { case 0x0: /* Either a 82078-1 or a 82078SL running at 5Volt */ - pr_info("FDC %d is an 82078.\n", current_fdc); + pr_info("FDC %d is an 82078.\n", fdc); return FDC_82078; case 0x1: - pr_info("FDC %d is a 44pin 82078\n", current_fdc); + pr_info("FDC %d is a 44pin 82078\n", fdc); return FDC_82078; case 0x2: - pr_info("FDC %d is a S82078B\n", current_fdc); + pr_info("FDC %d is a S82078B\n", fdc); return FDC_S82078B; case 0x3: - pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc); + pr_info("FDC %d is a National Semiconductor PC87306\n", fdc); return FDC_87306; default: pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n", - current_fdc, reply_buffer[0] >> 5); + fdc, reply_buffer[0] >> 5); return FDC_82078_UNKN; } } /* get_fdc_version */ @@ -4711,7 +4711,7 @@ static int __init do_floppy_init(void) continue; } /* Try to determine the floppy controller type */ - fdc_state[current_fdc].version = get_fdc_version(); + fdc_state[current_fdc].version = get_fdc_version(current_fdc); if (fdc_state[current_fdc].version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ floppy_release_regions(current_fdc); -- cgit v1.2.3 From 82a630105847d7b7657901643810542212082af6 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:53 +0200 Subject: floppy: cleanup: do not iterate on current_fdc in DMA grab/release functions Both floppy_grab_irq_and_dma() and floppy_release_irq_and_dma() used to iterate on the global variable while setting up or freeing resources. Now that they exclusively rely on functions which take the fdc as an argument, so let's not touch the global one anymore. Link: https://lore.kernel.org/r/20200331094054.24441-23-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8850baa3372a..77bb9a5fcd33 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4854,6 +4854,8 @@ static void floppy_release_regions(int fdc) static int floppy_grab_irq_and_dma(void) { + int fdc; + if (atomic_inc_return(&usage_count) > 1) return 0; @@ -4881,24 +4883,24 @@ static int floppy_grab_irq_and_dma(void) } } - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { - if (fdc_state[current_fdc].address != -1) { - if (floppy_request_regions(current_fdc)) + for (fdc = 0; fdc < N_FDC; fdc++) { + if (fdc_state[fdc].address != -1) { + if (floppy_request_regions(fdc)) goto cleanup; } } - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { - if (fdc_state[current_fdc].address != -1) { - reset_fdc_info(current_fdc, 1); - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); + for (fdc = 0; fdc < N_FDC; fdc++) { + if (fdc_state[fdc].address != -1) { + reset_fdc_info(fdc, 1); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); } } - current_fdc = 0; + set_dor(0, ~0, 8); /* avoid immediate interrupt */ - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) - if (fdc_state[current_fdc].address != -1) - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); + for (fdc = 0; fdc < N_FDC; fdc++) + if (fdc_state[fdc].address != -1) + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); /* * The driver will try and free resources and relies on us * to know if they were allocated or not. @@ -4909,15 +4911,16 @@ static int floppy_grab_irq_and_dma(void) cleanup: fd_free_irq(); fd_free_dma(); - while (--current_fdc >= 0) - floppy_release_regions(current_fdc); + while (--fdc >= 0) + floppy_release_regions(fdc); + current_fdc = 0; atomic_dec(&usage_count); return -1; } static void floppy_release_irq_and_dma(void) { - int old_fdc; + int fdc; #ifndef __sparc__ int drive; #endif @@ -4958,11 +4961,9 @@ static void floppy_release_irq_and_dma(void) pr_info("auxiliary floppy timer still active\n"); if (work_pending(&floppy_work)) pr_info("work still pending\n"); - old_fdc = current_fdc; - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) - if (fdc_state[current_fdc].address != -1) - floppy_release_regions(current_fdc); - current_fdc = old_fdc; + for (fdc = 0; fdc < N_FDC; fdc++) + if (fdc_state[fdc].address != -1) + floppy_release_regions(fdc); } #ifdef MODULE -- cgit v1.2.3 From 12aebfac27ab69b5ed333c94fda45ef31ba2fc2a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:54 +0200 Subject: floppy: cleanup: add a few comments about expectations in certain functions The locking in the driver is far from being obvious, with unlocking automatically happening at end of operations scheduled by interrupt, especially for the error paths where one does not necessarily expect that such an interrupt may be triggered. Let's add a few comments about what to expect at certain places to avoid misdetecting bugs which are not. Link: https://lore.kernel.org/r/20200331094054.24441-24-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 77bb9a5fcd33..07218f8b17f9 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1791,7 +1791,9 @@ static void reset_interrupt(void) /* * reset is done by pulling bit 2 of DOR low for a while (old FDCs), - * or by setting the self clearing bit 7 of STATUS (newer FDCs) + * or by setting the self clearing bit 7 of STATUS (newer FDCs). + * This WILL trigger an interrupt, causing the handlers in the current + * cont's ->redo() to be called via reset_interrupt(). */ static void reset_fdc(void) { @@ -2003,6 +2005,9 @@ static const struct cont_t intr_cont = { .done = (done_f)empty }; +/* schedules handler, waiting for completion. May be interrupted, will then + * return -EINTR, in which case the driver will automatically be unlocked. + */ static int wait_til_done(void (*handler)(void), bool interruptible) { int ret; @@ -2842,6 +2847,9 @@ static int set_next_request(void) return current_req != NULL; } +/* Starts or continues processing request. Will automatically unlock the + * driver at end of request. + */ static void redo_fd_request(void) { int drive; @@ -2916,6 +2924,7 @@ static const struct cont_t rw_cont = { .done = request_done }; +/* schedule the request and automatically unlock the driver on completion */ static void process_fd_request(void) { cont = &rw_cont; @@ -3005,6 +3014,9 @@ static int user_reset_fdc(int drive, int arg, bool interruptible) if (arg == FD_RESET_ALWAYS) fdc_state[current_fdc].reset = 1; if (fdc_state[current_fdc].reset) { + /* note: reset_fdc will take care of unlocking the driver + * on completion. + */ cont = &reset_cont; ret = wait_til_done(reset_fdc, interruptible); if (ret == -EINTR) -- cgit v1.2.3 From 05f5e319a1eb017442cd0eec87ad52a62d8c3224 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 10 Apr 2020 11:30:23 +0200 Subject: floppy: cleanup: do not iterate on current_fdc in do_floppy_init() There's no need to iterate on current_fdc in do_floppy_init() anymore, in the first case it's only used as an array index to access fdc_state[], so let's get rid of this confusing assignment. The second case is a bit trickier because user_reset_fdc() needs to already know current_fdc when called with drive==-1 due to this call chain: user_reset_fdc() lock_fdc() set_fdc() drive<0 ==> new_fdc = current_fdc Note that current_drive is not used in this code part and may even not match a unit belonging to current_fdc. Instead of passing -1 we can simply pass the first drive of the FDC being initialized, which is even cleaner as it will allow the function chain above to consistently assign both variables. Link: https://lore.kernel.org/r/20200410093023.14499-1-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 07218f8b17f9..8da7921659f1 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4657,16 +4657,15 @@ static int __init do_floppy_init(void) config_types(); for (i = 0; i < N_FDC; i++) { - current_fdc = i; - memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state)); - fdc_state[current_fdc].dtr = -1; - fdc_state[current_fdc].dor = 0x4; + memset(&fdc_state[i], 0, sizeof(*fdc_state)); + fdc_state[i].dtr = -1; + fdc_state[i].dor = 0x4; #if defined(__sparc__) || defined(__mc68000__) /*sparcs/sun3x don't have a DOR reset which we can fall back on to */ #ifdef __mc68000__ if (MACH_IS_SUN3X) #endif - fdc_state[current_fdc].version = FDC_82072A; + fdc_state[i].version = FDC_82072A; #endif } @@ -4708,30 +4707,29 @@ static int __init do_floppy_init(void) msleep(10); for (i = 0; i < N_FDC; i++) { - current_fdc = i; - fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION; + fdc_state[i].driver_version = FD_DRIVER_VERSION; for (unit = 0; unit < 4; unit++) - fdc_state[current_fdc].track[unit] = 0; - if (fdc_state[current_fdc].address == -1) + fdc_state[i].track[unit] = 0; + if (fdc_state[i].address == -1) continue; - fdc_state[current_fdc].rawcmd = 2; - if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) { + fdc_state[i].rawcmd = 2; + if (user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false)) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(current_fdc); - fdc_state[current_fdc].address = -1; - fdc_state[current_fdc].version = FDC_NONE; + floppy_release_regions(i); + fdc_state[i].address = -1; + fdc_state[i].version = FDC_NONE; continue; } /* Try to determine the floppy controller type */ - fdc_state[current_fdc].version = get_fdc_version(current_fdc); - if (fdc_state[current_fdc].version == FDC_NONE) { + fdc_state[i].version = get_fdc_version(i); + if (fdc_state[i].version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(current_fdc); - fdc_state[current_fdc].address = -1; + floppy_release_regions(i); + fdc_state[i].address = -1; continue; } if (can_use_virtual_dma == 2 && - fdc_state[current_fdc].version < FDC_82072A) + fdc_state[i].version < FDC_82072A) can_use_virtual_dma = 0; have_no_fdc = 0; @@ -4739,7 +4737,7 @@ static int __init do_floppy_init(void) * properly, so force a reset for the standard FDC clones, * to avoid interrupt garbage. */ - user_reset_fdc(-1, FD_RESET_ALWAYS, false); + user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false); } current_fdc = 0; cancel_delayed_work(&fd_timeout); -- cgit v1.2.3 From 6111a4f9bb189e76cda6a306074c9746ddeef04b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 10 Apr 2020 12:19:02 +0200 Subject: floppy: make sure to reset all FDCs upon resume() In floppy_resume() we don't properly reinitialize all FDCs, instead we reinitialize the current FDC once per available FDC because value -1 is passed to user_reset_fdc(). Let's simply save the current drive and properly reinitialize each FDC. Link: https://lore.kernel.org/r/20200410101904.14652-1-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8da7921659f1..b102f55dfa5d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4545,11 +4545,13 @@ static void floppy_device_release(struct device *dev) static int floppy_resume(struct device *dev) { int fdc; + int saved_drive; + saved_drive = current_drive; for (fdc = 0; fdc < N_FDC; fdc++) if (fdc_state[fdc].address != -1) - user_reset_fdc(-1, FD_RESET_ALWAYS, false); - + user_reset_fdc(REVDRIVE(fdc, 0), FD_RESET_ALWAYS, false); + set_fdc(saved_drive); return 0; } -- cgit v1.2.3 From 99ba6ccc7f8f362ae52ddddda2252e753329c7ec Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 10 Apr 2020 12:19:03 +0200 Subject: floppy: cleanup: get rid of current_reqD in favor of current_drive This macro equals -1 and is used as an alternative for current_drive when calling reschedule_timeout(), which in turn needs to remap it. This only adds obfuscation, let's simply use current_drive. Link: https://lore.kernel.org/r/20200410101904.14652-2-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b102f55dfa5d..20646d4c5437 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -668,16 +668,12 @@ static struct output_log { static int output_log_pos; -#define current_reqD -1 #define MAXTIMEOUT -2 static void __reschedule_timeout(int drive, const char *message) { unsigned long delay; - if (drive == current_reqD) - drive = current_drive; - if (drive < 0 || drive >= N_DRIVE) { delay = 20UL * HZ; drive = 0; @@ -1960,7 +1956,7 @@ static void floppy_ready(void) static void floppy_start(void) { - reschedule_timeout(current_reqD, "floppy start"); + reschedule_timeout(current_drive, "floppy start"); scandrives(); debug_dcl(drive_params[current_drive].flags, @@ -2874,7 +2870,7 @@ do_request: } drive = (long)current_req->rq_disk->private_data; set_fdc(drive); - reschedule_timeout(current_reqD, "redo fd request"); + reschedule_timeout(current_drive, "redo fd request"); set_floppy(drive); raw_cmd = &default_raw_cmd; -- cgit v1.2.3 From ca1b409a3b8a190c13bb30ed3ad91585d434d8e2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 10 Apr 2020 12:19:04 +0200 Subject: floppy: cleanup: make set_fdc() always set current_drive and current_fd When called with a negative drive value, set_fdc() would stick to the current fdc (which was assumed to reflect the current_drive's FDC). We do not need this anymore as the last call place with a negative value was just addressed. Let's make this function always set both current_fdc and current_drive so that there's no more ambiguity. A few comments stating this were added to a few non-obvious places. Link: https://lore.kernel.org/r/20200410101904.14652-3-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 20646d4c5437..2817170dd403 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -851,31 +851,42 @@ static void reset_fdc_info(int fdc, int mode) drive_state[drive].track = NEED_2_RECAL; } -/* selects the fdc and drive, and enables the fdc's input/dma. */ +/* + * selects the fdc and drive, and enables the fdc's input/dma. + * Both current_drive and current_fdc are changed to match the new drive. + */ static void set_fdc(int drive) { - unsigned int new_fdc = current_fdc; + unsigned int fdc; - if (drive >= 0 && drive < N_DRIVE) { - new_fdc = FDC(drive); - current_drive = drive; + if (drive < 0 || drive >= N_DRIVE) { + pr_info("bad drive value %d\n", drive); + return; } - if (new_fdc >= N_FDC) { + + fdc = FDC(drive); + if (fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } - current_fdc = new_fdc; - set_dor(current_fdc, ~0, 8); + + set_dor(fdc, ~0, 8); #if N_FDC > 1 - set_dor(1 - current_fdc, ~8, 0); + set_dor(1 - fdc, ~8, 0); #endif - if (fdc_state[current_fdc].rawcmd == 2) - reset_fdc_info(current_fdc, 1); - if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY) - fdc_state[current_fdc].reset = 1; + if (fdc_state[fdc].rawcmd == 2) + reset_fdc_info(fdc, 1); + if (fdc_inb(fdc, FD_STATUS) != STATUS_READY) + fdc_state[fdc].reset = 1; + + current_drive = drive; + current_fdc = fdc; } -/* locks the driver */ +/* + * locks the driver. + * Both current_drive and current_fdc are changed to match the new drive. + */ static int lock_fdc(int drive) { if (WARN(atomic_read(&usage_count) == 0, @@ -3000,6 +3011,10 @@ static const struct cont_t reset_cont = { .done = generic_done }; +/* + * Resets the FDC connected to drive . + * Both current_drive and current_fdc are changed to match the new drive. + */ static int user_reset_fdc(int drive, int arg, bool interruptible) { int ret; -- cgit v1.2.3 From 29ac67633c893dec0024fb7597860fde52fdc819 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:13 +0300 Subject: floppy: use print_hex_dump() in setup_DMA() Remove pr_cont() and use print_hex_dump() in setup_DMA() to print the contents of the cmd buffer. Link: https://lore.kernel.org/r/20200501134416.72248-2-efremov@linux.com Suggested-by: Joe Perches Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2817170dd403..3ab6e804b5ec 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1069,12 +1069,9 @@ static void setup_DMA(void) unsigned long f; if (raw_cmd->length == 0) { - int i; - - pr_info("zero dma transfer size:"); - for (i = 0; i < raw_cmd->cmd_count; i++) - pr_cont("%x,", raw_cmd->cmd[i]); - pr_cont("\n"); + print_hex_dump(KERN_INFO, "zero dma transfer size: ", + DUMP_PREFIX_NONE, 16, 1, + raw_cmd->cmd, raw_cmd->cmd_count, false); cont->done(0); fdc_state[current_fdc].reset = 1; return; -- cgit v1.2.3 From 9c4c5a24c85585fb8904bd2872501cd8181b3854 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:14 +0300 Subject: floppy: add FD_AUTODETECT_SIZE define for struct floppy_drive_params Use FD_AUTODETECT_SIZE for autodetect buffer size in struct floppy_drive_params instead of a magic number. Link: https://lore.kernel.org/r/20200501134416.72248-3-efremov@linux.com Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 9 +++++---- include/uapi/linux/fd.h | 5 ++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 3ab6e804b5ec..b82b3d38b834 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2073,7 +2073,8 @@ static int next_valid_format(int drive) probed_format = drive_state[drive].probed_format; while (1) { - if (probed_format >= 8 || !drive_params[drive].autodetect[probed_format]) { + if (probed_format >= FD_AUTODETECT_SIZE || + !drive_params[drive].autodetect[probed_format]) { drive_state[drive].probed_format = 0; return 1; } @@ -3442,13 +3443,13 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool valid_floppy_drive_params(const short autodetect[8], +static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE], int native_format) { size_t floppy_type_size = ARRAY_SIZE(floppy_type); size_t i = 0; - for (i = 0; i < 8; ++i) { + for (i = 0; i < FD_AUTODETECT_SIZE; ++i) { if (autodetect[i] < 0 || autodetect[i] >= floppy_type_size) return false; @@ -3673,7 +3674,7 @@ struct compat_floppy_drive_params { struct floppy_max_errors max_errors; char flags; char read_track; - short autodetect[8]; + short autodetect[FD_AUTODETECT_SIZE]; compat_int_t checkfreq; compat_int_t native_format; }; diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h index 90fb94712c41..3f6b7be4c096 100644 --- a/include/uapi/linux/fd.h +++ b/include/uapi/linux/fd.h @@ -172,7 +172,10 @@ struct floppy_drive_params { * used in succession to try to read the disk. If the FDC cannot lock onto * the disk, the next format is tried. This uses the variable 'probing'. */ - short autodetect[8]; /* autodetected formats */ + +#define FD_AUTODETECT_SIZE 8 + + short autodetect[FD_AUTODETECT_SIZE]; /* autodetected formats */ int checkfreq; /* how often should the drive be checked for disk * changes */ -- cgit v1.2.3 From bd10a5f3e21b1cb8e2133c1f08b3e8207cee12dd Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:15 +0300 Subject: floppy: add defines for sizes of cmd & reply buffers of floppy_raw_cmd Use FD_RAW_CMD_SIZE, FD_RAW_REPLY_SIZE defines instead of magic numbers for cmd & reply buffers of struct floppy_raw_cmd. Remove local to floppy.c MAX_REPLIES define, as it is now FD_RAW_REPLY_SIZE. FD_RAW_CMD_FULLSIZE added as we allow command to also fill reply_count and reply fields. Link: https://lore.kernel.org/r/20200501134416.72248-4-efremov@linux.com Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 19 +++++-------------- include/uapi/linux/fd.h | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b82b3d38b834..9e098d53b046 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -337,8 +337,7 @@ static bool initialized; /* * globals used by 'result()' */ -#define MAX_REPLIES 16 -static unsigned char reply_buffer[MAX_REPLIES]; +static unsigned char reply_buffer[FD_RAW_REPLY_SIZE]; static int inr; /* size of reply buffer, when called from interrupt */ #define ST0 0 #define ST1 1 @@ -1162,7 +1161,7 @@ static int result(int fdc) int i; int status = 0; - for (i = 0; i < MAX_REPLIES; i++) { + for (i = 0; i < FD_RAW_REPLY_SIZE; i++) { status = wait_til_ready(fdc); if (status < 0) break; @@ -3079,7 +3078,7 @@ static void raw_cmd_done(int flag) raw_cmd->flags |= FD_RAW_HARDFAILURE; } else { raw_cmd->reply_count = inr; - if (raw_cmd->reply_count > MAX_REPLIES) + if (raw_cmd->reply_count > FD_RAW_REPLY_SIZE) raw_cmd->reply_count = 0; for (i = 0; i < raw_cmd->reply_count; i++) raw_cmd->reply[i] = reply_buffer[i]; @@ -3190,18 +3189,10 @@ loop: if (ret) return -EFAULT; param += sizeof(struct floppy_raw_cmd); - if (ptr->cmd_count > 33) - /* the command may now also take up the space - * initially intended for the reply & the - * reply count. Needed for long 82078 commands - * such as RESTORE, which takes ... 17 command - * bytes. Murphy's law #137: When you reserve - * 16 bytes for a structure, you'll one day - * discover that you really need 17... - */ + if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE) return -EINVAL; - for (i = 0; i < 16; i++) + for (i = 0; i < FD_RAW_REPLY_SIZE; i++) ptr->reply[i] = 0; ptr->resultcode = 0; diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h index 3f6b7be4c096..2e9c2c1c18e6 100644 --- a/include/uapi/linux/fd.h +++ b/include/uapi/linux/fd.h @@ -360,10 +360,20 @@ struct floppy_raw_cmd { int buffer_length; /* length of allocated buffer */ unsigned char rate; + +#define FD_RAW_CMD_SIZE 16 +#define FD_RAW_REPLY_SIZE 16 +#define FD_RAW_CMD_FULLSIZE (FD_RAW_CMD_SIZE + 1 + FD_RAW_REPLY_SIZE) + + /* The command may take up the space initially intended for the reply + * and the reply count. Needed for long 82078 commands such as RESTORE, + * which takes 17 command bytes. + */ + unsigned char cmd_count; - unsigned char cmd[16]; + unsigned char cmd[FD_RAW_CMD_SIZE]; unsigned char reply_count; - unsigned char reply[16]; + unsigned char reply[FD_RAW_REPLY_SIZE]; int track; int resultcode; -- cgit v1.2.3 From 0836275df4db20daf040fff5d9a1da89c4c08a85 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:16 +0300 Subject: floppy: suppress UBSAN warning in setup_rw_floppy() UBSAN: array-index-out-of-bounds in drivers/block/floppy.c:1521:45 index 16 is out of range for type 'unsigned char [16]' Call Trace: ... setup_rw_floppy+0x5c3/0x7f0 floppy_ready+0x2be/0x13b0 process_one_work+0x2c1/0x5d0 worker_thread+0x56/0x5e0 kthread+0x122/0x170 ret_from_fork+0x35/0x40 From include/uapi/linux/fd.h: struct floppy_raw_cmd { ... unsigned char cmd_count; unsigned char cmd[16]; unsigned char reply_count; unsigned char reply[16]; ... } This out-of-bounds access is intentional. The command in struct floppy_raw_cmd may take up the space initially intended for the reply and the reply count. It is needed for long 82078 commands such as RESTORE, which takes 17 command bytes. Initial cmd size is not enough and since struct setup_rw_floppy is a part of uapi we check that cmd_count is in [0:16+1+16] in raw_cmd_copyin(). The patch adds union with original cmd,reply_count,reply fields and fullcmd field of equivalent size. The cmd accesses are turned to fullcmd where appropriate to suppress UBSAN warning. Link: https://lore.kernel.org/r/20200501134416.72248-5-efremov@linux.com Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- drivers/block/floppy.c | 4 ++-- include/uapi/linux/fd.h | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9e098d53b046..064c1acb9f00 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1070,7 +1070,7 @@ static void setup_DMA(void) if (raw_cmd->length == 0) { print_hex_dump(KERN_INFO, "zero dma transfer size: ", DUMP_PREFIX_NONE, 16, 1, - raw_cmd->cmd, raw_cmd->cmd_count, false); + raw_cmd->fullcmd, raw_cmd->cmd_count, false); cont->done(0); fdc_state[current_fdc].reset = 1; return; @@ -1515,7 +1515,7 @@ static void setup_rw_floppy(void) r = 0; for (i = 0; i < raw_cmd->cmd_count; i++) - r |= output_byte(current_fdc, raw_cmd->cmd[i]); + r |= output_byte(current_fdc, raw_cmd->fullcmd[i]); debugt(__func__, "rw_command"); diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h index 2e9c2c1c18e6..8b80c63b971c 100644 --- a/include/uapi/linux/fd.h +++ b/include/uapi/linux/fd.h @@ -371,9 +371,14 @@ struct floppy_raw_cmd { */ unsigned char cmd_count; - unsigned char cmd[FD_RAW_CMD_SIZE]; - unsigned char reply_count; - unsigned char reply[FD_RAW_REPLY_SIZE]; + union { + struct { + unsigned char cmd[FD_RAW_CMD_SIZE]; + unsigned char reply_count; + unsigned char reply[FD_RAW_REPLY_SIZE]; + }; + unsigned char fullcmd[FD_RAW_CMD_FULLSIZE]; + }; int track; int resultcode; -- cgit v1.2.3 From 21e0958ec9684e76e32f822c5e611a7d7ea0a5ba Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sat, 4 Apr 2020 23:57:07 +0200 Subject: md: add checkings before flush md_misc_wq Coly reported possible circular locking dependencyi with LOCKDEP enabled, quote the below info from the detailed report [1]. [ 1607.673903] Chain exists of: [ 1607.673903] kn->count#256 --> (wq_completion)md_misc --> (work_completion)(&rdev->del_work) [ 1607.673903] [ 1607.827946] Possible unsafe locking scenario: [ 1607.827946] [ 1607.898780] CPU0 CPU1 [ 1607.952980] ---- ---- [ 1608.007173] lock((work_completion)(&rdev->del_work)); [ 1608.069690] lock((wq_completion)md_misc); [ 1608.149887] lock((work_completion)(&rdev->del_work)); [ 1608.242563] lock(kn->count#256); [ 1608.283238] [ 1608.283238] *** DEADLOCK *** [ 1608.283238] [ 1608.354078] 2 locks held by kworker/5:0/843: [ 1608.405152] #0: ffff8889eecc9948 ((wq_completion)md_misc){+.+.}, at: process_one_work+0x42b/0xb30 [ 1608.512399] #1: ffff888a1d3b7e10 ((work_completion)(&rdev->del_work)){+.+.}, at: process_one_work+0x42b/0xb30 [ 1608.632130] Since works (rdev->del_work and mddev->del_work) are queued in md_misc_wq, then lockdep_map lock is held if either of them are running, then both of them try to hold kernfs lock by call kobject_del. Then if new_dev_store or array_state_store are triggered by write to the related sysfs node, so the write operation gets kernfs lock, but need the lockdep_map because all of them would trigger flush_workqueue(md_misc_wq) finally, then the same lockdep_map lock is needed. To suppress the lockdep warnning, we should flush the workqueue in case the related work is pending. And several works are attached to md_misc_wq, so we need to check which work should be checked: 1. for __md_stop_writes, the purpose of call flush workqueue is ensure sync thread is started if it was starting, so check mddev->del_work is pending or not since md_start_sync is attached to mddev->del_work. 2. __md_stop flushes md_misc_wq to ensure event_work is done, check the event_work is enough. Assume raid_{ctr,dtr} -> md_stop -> __md_stop doesn't need the kernfs lock. 3. both new_dev_store (holds kernfs lock) and ADD_NEW_DISK ioctl (holds the bdev->bd_mutex) call flush_workqueue to ensure md_delayed_delete has completed, this case will be handled in next patch. 4. md_open flushes workqueue to ensure the previous md is disappeared, but it holds bdev->bd_mutex then try to flush workqueue, so it is better to check mddev->del_work as well to avoid potential lock issue, this will be done in another patch. [1]: https://marc.info/?l=linux-raid&m=158518958031584&w=2 Cc: Coly Li Reported-by: Coly Li Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 271e8a587354..76c32ca6eee8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6147,7 +6147,8 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - flush_workqueue(md_misc_wq); + if (work_pending(&mddev->del_work)) + flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); @@ -6200,7 +6201,8 @@ static void __md_stop(struct mddev *mddev) md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ - flush_workqueue(md_misc_wq); + if (mddev->event_work.func) + flush_workqueue(md_misc_wq); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); -- cgit v1.2.3 From cc1ffe61c026e2318024bfa8722e8f689fd2a7f4 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sat, 4 Apr 2020 23:57:08 +0200 Subject: md: add new workqueue for delete rdev Since the purpose of call flush_workqueue in new_dev_store is to ensure md_delayed_delete() has completed, so we should check rdev->del_work is pending or not. To suppress lockdep warning, we have to check mddev->del_work while md_delayed_delete is attached to rdev->del_work, so it is not aligned to the purpose of flush workquee. So a new workqueue is needed to avoid the awkward situation, and introduce a new func flush_rdev_wq to flush the new workqueue after check if there was pending work. Also like new_dev_store, ADD_NEW_DISK ioctl has the same purpose to flush workqueue while it holds bdev->bd_mutex, so make the same change applies to the ioctl to avoid similar lock issue. And md_delayed_delete actually wants to delete rdev, so rename the function to rdev_delayed_delete. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 76c32ca6eee8..a970fdd0ce8c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -89,6 +89,7 @@ static struct module *md_cluster_mod; static DECLARE_WAIT_QUEUE_HEAD(resync_wait); static struct workqueue_struct *md_wq; static struct workqueue_struct *md_misc_wq; +static struct workqueue_struct *md_rdev_misc_wq; static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); @@ -2454,7 +2455,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) return err; } -static void md_delayed_delete(struct work_struct *ws) +static void rdev_delayed_delete(struct work_struct *ws) { struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work); kobject_del(&rdev->kobj); @@ -2479,9 +2480,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) * to delay it due to rcu usage. */ synchronize_rcu(); - INIT_WORK(&rdev->del_work, md_delayed_delete); + INIT_WORK(&rdev->del_work, rdev_delayed_delete); kobject_get(&rdev->kobj); - queue_work(md_misc_wq, &rdev->del_work); + queue_work(md_rdev_misc_wq, &rdev->del_work); } /* @@ -4514,6 +4515,20 @@ null_show(struct mddev *mddev, char *page) return -EINVAL; } +/* need to ensure rdev_delayed_delete() has completed */ +static void flush_rdev_wq(struct mddev *mddev) +{ + struct md_rdev *rdev; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) + if (work_pending(&rdev->del_work)) { + flush_workqueue(md_rdev_misc_wq); + break; + } + rcu_read_unlock(); +} + static ssize_t new_dev_store(struct mddev *mddev, const char *buf, size_t len) { @@ -4541,8 +4556,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) minor != MINOR(dev)) return -EOVERFLOW; - flush_workqueue(md_misc_wq); - + flush_rdev_wq(mddev); err = mddev_lock(mddev); if (err) return err; @@ -4780,7 +4794,8 @@ action_store(struct mddev *mddev, const char *page, size_t len) clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && mddev_lock(mddev) == 0) { - flush_workqueue(md_misc_wq); + if (work_pending(&mddev->del_work)) + flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); @@ -7498,8 +7513,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } if (cmd == ADD_NEW_DISK) - /* need to ensure md_delayed_delete() has completed */ - flush_workqueue(md_misc_wq); + flush_rdev_wq(mddev); if (cmd == HOT_REMOVE_DISK) /* need to ensure recovery thread has run */ @@ -9471,6 +9485,10 @@ static int __init md_init(void) if (!md_misc_wq) goto err_misc_wq; + md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0); + if (!md_misc_wq) + goto err_rdev_misc_wq; + if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) goto err_md; @@ -9492,6 +9510,8 @@ static int __init md_init(void) err_mdp: unregister_blkdev(MD_MAJOR, "md"); err_md: + destroy_workqueue(md_rdev_misc_wq); +err_rdev_misc_wq: destroy_workqueue(md_misc_wq); err_misc_wq: destroy_workqueue(md_wq); @@ -9778,6 +9798,7 @@ static __exit void md_exit(void) * destroy_workqueue() below will wait for that to complete. */ } + destroy_workqueue(md_rdev_misc_wq); destroy_workqueue(md_misc_wq); destroy_workqueue(md_wq); } -- cgit v1.2.3 From f6766ff6afff70e2aaf39e1511e16d471de7c3ae Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sat, 4 Apr 2020 23:57:09 +0200 Subject: md: don't flush workqueue unconditionally in md_open We need to check mddev->del_work before flush workqueu since the purpose of flush is to ensure the previous md is disappeared. Otherwise the similar deadlock appeared if LOCKDEP is enabled, it is due to md_open holds the bdev->bd_mutex before flush workqueue. kernel: [ 154.522645] ====================================================== kernel: [ 154.522647] WARNING: possible circular locking dependency detected kernel: [ 154.522650] 5.6.0-rc7-lp151.27-default #25 Tainted: G O kernel: [ 154.522651] ------------------------------------------------------ kernel: [ 154.522653] mdadm/2482 is trying to acquire lock: kernel: [ 154.522655] ffff888078529128 ((wq_completion)md_misc){+.+.}, at: flush_workqueue+0x84/0x4b0 kernel: [ 154.522673] kernel: [ 154.522673] but task is already holding lock: kernel: [ 154.522675] ffff88804efa9338 (&bdev->bd_mutex){+.+.}, at: __blkdev_get+0x79/0x590 kernel: [ 154.522691] kernel: [ 154.522691] which lock already depends on the new lock. kernel: [ 154.522691] kernel: [ 154.522694] kernel: [ 154.522694] the existing dependency chain (in reverse order) is: kernel: [ 154.522696] kernel: [ 154.522696] -> #4 (&bdev->bd_mutex){+.+.}: kernel: [ 154.522704] __mutex_lock+0x87/0x950 kernel: [ 154.522706] __blkdev_get+0x79/0x590 kernel: [ 154.522708] blkdev_get+0x65/0x140 kernel: [ 154.522709] blkdev_get_by_dev+0x2f/0x40 kernel: [ 154.522716] lock_rdev+0x3d/0x90 [md_mod] kernel: [ 154.522719] md_import_device+0xd6/0x1b0 [md_mod] kernel: [ 154.522723] new_dev_store+0x15e/0x210 [md_mod] kernel: [ 154.522728] md_attr_store+0x7a/0xc0 [md_mod] kernel: [ 154.522732] kernfs_fop_write+0x117/0x1b0 kernel: [ 154.522735] vfs_write+0xad/0x1a0 kernel: [ 154.522737] ksys_write+0xa4/0xe0 kernel: [ 154.522745] do_syscall_64+0x64/0x2b0 kernel: [ 154.522748] entry_SYSCALL_64_after_hwframe+0x49/0xbe kernel: [ 154.522749] kernel: [ 154.522749] -> #3 (&mddev->reconfig_mutex){+.+.}: kernel: [ 154.522752] __mutex_lock+0x87/0x950 kernel: [ 154.522756] new_dev_store+0xc9/0x210 [md_mod] kernel: [ 154.522759] md_attr_store+0x7a/0xc0 [md_mod] kernel: [ 154.522761] kernfs_fop_write+0x117/0x1b0 kernel: [ 154.522763] vfs_write+0xad/0x1a0 kernel: [ 154.522765] ksys_write+0xa4/0xe0 kernel: [ 154.522767] do_syscall_64+0x64/0x2b0 kernel: [ 154.522769] entry_SYSCALL_64_after_hwframe+0x49/0xbe kernel: [ 154.522770] kernel: [ 154.522770] -> #2 (kn->count#253){++++}: kernel: [ 154.522775] __kernfs_remove+0x253/0x2c0 kernel: [ 154.522778] kernfs_remove+0x1f/0x30 kernel: [ 154.522780] kobject_del+0x28/0x60 kernel: [ 154.522783] mddev_delayed_delete+0x24/0x30 [md_mod] kernel: [ 154.522786] process_one_work+0x2a7/0x5f0 kernel: [ 154.522788] worker_thread+0x2d/0x3d0 kernel: [ 154.522793] kthread+0x117/0x130 kernel: [ 154.522795] ret_from_fork+0x3a/0x50 kernel: [ 154.522796] kernel: [ 154.522796] -> #1 ((work_completion)(&mddev->del_work)){+.+.}: kernel: [ 154.522800] process_one_work+0x27e/0x5f0 kernel: [ 154.522802] worker_thread+0x2d/0x3d0 kernel: [ 154.522804] kthread+0x117/0x130 kernel: [ 154.522806] ret_from_fork+0x3a/0x50 kernel: [ 154.522807] kernel: [ 154.522807] -> #0 ((wq_completion)md_misc){+.+.}: kernel: [ 154.522813] __lock_acquire+0x1392/0x1690 kernel: [ 154.522816] lock_acquire+0xb4/0x1a0 kernel: [ 154.522818] flush_workqueue+0xab/0x4b0 kernel: [ 154.522821] md_open+0xb6/0xc0 [md_mod] kernel: [ 154.522823] __blkdev_get+0xea/0x590 kernel: [ 154.522825] blkdev_get+0x65/0x140 kernel: [ 154.522828] do_dentry_open+0x1d1/0x380 kernel: [ 154.522831] path_openat+0x567/0xcc0 kernel: [ 154.522834] do_filp_open+0x9b/0x110 kernel: [ 154.522836] do_sys_openat2+0x201/0x2a0 kernel: [ 154.522838] do_sys_open+0x57/0x80 kernel: [ 154.522840] do_syscall_64+0x64/0x2b0 kernel: [ 154.522842] entry_SYSCALL_64_after_hwframe+0x49/0xbe kernel: [ 154.522844] kernel: [ 154.522844] other info that might help us debug this: kernel: [ 154.522844] kernel: [ 154.522846] Chain exists of: kernel: [ 154.522846] (wq_completion)md_misc --> &mddev->reconfig_mutex --> &bdev->bd_mutex kernel: [ 154.522846] kernel: [ 154.522850] Possible unsafe locking scenario: kernel: [ 154.522850] kernel: [ 154.522852] CPU0 CPU1 kernel: [ 154.522853] ---- ---- kernel: [ 154.522854] lock(&bdev->bd_mutex); kernel: [ 154.522856] lock(&mddev->reconfig_mutex); kernel: [ 154.522858] lock(&bdev->bd_mutex); kernel: [ 154.522860] lock((wq_completion)md_misc); kernel: [ 154.522861] kernel: [ 154.522861] *** DEADLOCK *** kernel: [ 154.522861] kernel: [ 154.522864] 1 lock held by mdadm/2482: kernel: [ 154.522865] #0: ffff88804efa9338 (&bdev->bd_mutex){+.+.}, at: __blkdev_get+0x79/0x590 kernel: [ 154.522868] kernel: [ 154.522868] stack backtrace: kernel: [ 154.522873] CPU: 1 PID: 2482 Comm: mdadm Tainted: G O 5.6.0-rc7-lp151.27-default #25 kernel: [ 154.522875] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 kernel: [ 154.522878] Call Trace: kernel: [ 154.522881] dump_stack+0x8f/0xcb kernel: [ 154.522884] check_noncircular+0x194/0x1b0 kernel: [ 154.522888] ? __lock_acquire+0x1392/0x1690 kernel: [ 154.522890] __lock_acquire+0x1392/0x1690 kernel: [ 154.522893] lock_acquire+0xb4/0x1a0 kernel: [ 154.522895] ? flush_workqueue+0x84/0x4b0 kernel: [ 154.522898] flush_workqueue+0xab/0x4b0 kernel: [ 154.522900] ? flush_workqueue+0x84/0x4b0 kernel: [ 154.522905] ? md_open+0xb6/0xc0 [md_mod] kernel: [ 154.522908] md_open+0xb6/0xc0 [md_mod] kernel: [ 154.522910] __blkdev_get+0xea/0x590 kernel: [ 154.522912] ? bd_acquire+0xc0/0xc0 kernel: [ 154.522914] blkdev_get+0x65/0x140 kernel: [ 154.522916] ? bd_acquire+0xc0/0xc0 kernel: [ 154.522918] do_dentry_open+0x1d1/0x380 kernel: [ 154.522921] path_openat+0x567/0xcc0 kernel: [ 154.522923] ? __lock_acquire+0x380/0x1690 kernel: [ 154.522926] do_filp_open+0x9b/0x110 kernel: [ 154.522929] ? __alloc_fd+0xe5/0x1f0 kernel: [ 154.522935] ? kmem_cache_alloc+0x28c/0x630 kernel: [ 154.522939] ? do_sys_openat2+0x201/0x2a0 kernel: [ 154.522941] do_sys_openat2+0x201/0x2a0 kernel: [ 154.522944] do_sys_open+0x57/0x80 kernel: [ 154.522946] do_syscall_64+0x64/0x2b0 kernel: [ 154.522948] entry_SYSCALL_64_after_hwframe+0x49/0xbe kernel: [ 154.522951] RIP: 0033:0x7f98d279d9ae And md_alloc also flushed the same workqueue, but the thing is different here. Because all the paths call md_alloc don't hold bdev->bd_mutex, and the flush is necessary to avoid race condition, so leave it as it is. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index a970fdd0ce8c..f5dfa503fb6f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7768,7 +7768,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) */ mddev_put(mddev); /* Wait until bdev->bd_disk is definitely gone */ - flush_workqueue(md_misc_wq); + if (work_pending(&mddev->del_work)) + flush_workqueue(md_misc_wq); /* Then retry the open from the top */ return -ERESTARTSYS; } -- cgit v1.2.3 From 78b990cf2822d1150a419c13be48e74aefa83f27 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sat, 4 Apr 2020 23:57:10 +0200 Subject: md: flush md_rdev_misc_wq for HOT_ADD_DISK case Since rdev->kobj is removed asynchronously, it is possible that the rdev->kobj still exists when try to add the rdev again after rdev is removed. But this path md_ioctl (HOT_ADD_DISK) -> hot_add_disk -> bind_rdev_to_array missed it. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index f5dfa503fb6f..95b72e35b355 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7512,7 +7512,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } - if (cmd == ADD_NEW_DISK) + if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK) flush_rdev_wq(mddev); if (cmd == HOT_REMOVE_DISK) -- cgit v1.2.3 From 3f79cc22348fe7d285bd140209394c9e381e082c Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sat, 4 Apr 2020 23:57:11 +0200 Subject: md: remove the extra line for ->hot_add_disk It is not not necessary to add a newline for them since they don't exceed 80 characters, and it is not intutive to distinguish ->hot_add_disk() from hot_add_disk() too. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 95b72e35b355..8ff690ac6247 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3192,8 +3192,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) rdev->saved_raid_disk = -1; clear_bit(In_sync, &rdev->flags); clear_bit(Bitmap_sync, &rdev->flags); - err = rdev->mddev->pers-> - hot_add_disk(rdev->mddev, rdev); + err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev); if (err) { rdev->raid_disk = -1; return err; @@ -9057,8 +9056,7 @@ static int remove_and_add_spares(struct mddev *mddev, rdev->recovery_offset = 0; } - if (mddev->pers-> - hot_add_disk(mddev, rdev) == 0) { + if (mddev->pers->hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; if (!test_bit(Journal, &rdev->flags)) -- cgit v1.2.3 From 78f57ef9d50a75326da73d352d7c27828495229a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 9 Apr 2020 22:17:20 +0800 Subject: md: use memalloc scope APIs in mddev_suspend()/mddev_resume() In raid5.c:resize_chunk(), scribble_alloc() is called with GFP_NOIO flag, then it is sent into kvmalloc_array() inside scribble_alloc(). The problem is kvmalloc_array() eventually calls kvmalloc_node() which does not accept non GFP_KERNEL compatible flag like GFP_NOIO, then kmalloc_node() is called indeed to allocate physically continuous pages. When system memory is under heavy pressure, and the requesting size is large, there is high probability that allocating continueous pages will fail. But simply using GFP_KERNEL flag to call kvmalloc_array() is also progblematic. In the code path where scribble_alloc() is called, the raid array is suspended, if kvmalloc_node() triggers memory reclaim I/Os and such I/Os go back to the suspend raid array, deadlock will happen. What is desired here is to allocate non-physically (a.k.a virtually) continuous pages and avoid memory reclaim I/Os. Michal Hocko suggests to use the mmealloc sceope APIs to restrict memory reclaim I/O in allocating context, specifically to call memalloc_noio_save() when suspend the raid array and to call memalloc_noio_restore() when resume the raid array. This patch adds the memalloc scope APIs in mddev_suspend() and mddev_resume(), to restrict memory reclaim I/Os during the raid array is suspended. The benifit of adding the memalloc scope API in the unified entry point mddev_suspend()/mddev_resume() is, no matter which md raid array type (personality), we are sure the deadlock by recursive memory reclaim I/O won't happen on the suspending context. Please notice that the memalloc scope APIs only take effect on the raid array suspending context, if the memory allocation is from another new created kthread after raid array suspended, the recursive memory reclaim I/Os won't be restricted. The mddev_suspend()/mddev_resume() entries are used for the critical section where the raid metadata is modifying, creating a kthread to allocate memory inside the critical section is queer and very probably being buggy. Fixes: b330e6a49dc3 ("md: convert to kvmalloc") Suggested-by: Michal Hocko Signed-off-by: Coly Li Signed-off-by: Song Liu --- drivers/md/md.c | 4 ++++ drivers/md/md.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8ff690ac6247..1b3316c79b7b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -528,11 +528,15 @@ void mddev_suspend(struct mddev *mddev) wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); del_timer_sync(&mddev->safemode_timer); + /* restrict memory reclaim I/O during raid array is suspend */ + mddev->noio_flag = memalloc_noio_save(); } EXPORT_SYMBOL_GPL(mddev_suspend); void mddev_resume(struct mddev *mddev) { + /* entred the memalloc scope from mddev_suspend() */ + memalloc_noio_restore(mddev->noio_flag); lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; diff --git a/drivers/md/md.h b/drivers/md/md.h index acd681939112..612814d07d35 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -497,6 +497,7 @@ struct mddev { void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; unsigned int good_device_nr; /* good device num within cluster raid */ + unsigned int noio_flag; /* for memalloc scope API */ bool has_superblocks:1; bool fail_last_dev:1; -- cgit v1.2.3 From ba54d4d4d2844c234f1b4692bd8c9e0f833c8a54 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 9 Apr 2020 22:17:21 +0800 Subject: raid5: remove gfp flags from scribble_alloc() Using GFP_NOIO flag to call scribble_alloc() from resize_chunk() does not have the expected behavior. kvmalloc_array() inside scribble_alloc() which receives the GFP_NOIO flag will eventually call kmalloc_node() to allocate physically continuous pages. Now we have memalloc scope APIs in mddev_suspend()/mddev_resume() to prevent memory reclaim I/Os during raid array suspend context, calling to kvmalloc_array() with GFP_KERNEL flag may avoid deadlock of recursive I/O as expected. This patch removes the useless gfp flags from parameters list of scribble_alloc(), and call kvmalloc_array() with GFP_KERNEL flag. The incorrect GFP_NOIO flag does not exist anymore. Fixes: b330e6a49dc3 ("md: convert to kvmalloc") Suggested-by: Michal Hocko Signed-off-by: Coly Li Signed-off-by: Song Liu --- drivers/md/raid5.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ba00e9877f02..190dd70db514 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2228,14 +2228,19 @@ static int grow_stripes(struct r5conf *conf, int num) * of the P and Q blocks. */ static int scribble_alloc(struct raid5_percpu *percpu, - int num, int cnt, gfp_t flags) + int num, int cnt) { size_t obj_size = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); void *scribble; - scribble = kvmalloc_array(cnt, obj_size, flags); + /* + * If here is in raid array suspend context, it is in memalloc noio + * context as well, there is no potential recursive memory reclaim + * I/Os with the GFP_KERNEL flag. + */ + scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL); if (!scribble) return -ENOMEM; @@ -2267,8 +2272,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) percpu = per_cpu_ptr(conf->percpu, cpu); err = scribble_alloc(percpu, new_disks, - new_sectors / STRIPE_SECTORS, - GFP_NOIO); + new_sectors / STRIPE_SECTORS); if (err) break; } @@ -6759,8 +6763,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu conf->previous_raid_disks), max(conf->chunk_sectors, conf->prev_chunk_sectors) - / STRIPE_SECTORS, - GFP_KERNEL)) { + / STRIPE_SECTORS)) { free_scratch_buffer(conf, percpu); return -ENOMEM; } -- cgit v1.2.3 From 7f8a30e5d253793d660f30b7475d7bb41efd40b8 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 9 Apr 2020 22:17:22 +0800 Subject: raid5: update code comment of scribble_alloc() Code comments of scribble_alloc() is outdated for a while. This patch update the comments in function header for the new parameter list. Suggested-by: Song Liu Signed-off-by: Coly Li Signed-off-by: Song Liu --- drivers/md/raid5.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 190dd70db514..ab8067f9ce8c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num) } /** - * scribble_len - return the required size of the scribble region + * scribble_alloc - allocate percpu scribble buffer for required size + * of the scribble region + * @percpu - from for_each_present_cpu() of the caller * @num - total number of disks in the array + * @cnt - scribble objs count for required size of the scribble region * - * The size must be enough to contain: + * The scribble buffer size must be enough to contain: * 1/ a struct page pointer for each device in the array +2 * 2/ room to convert each entry in (1) to its corresponding dma * (dma_map_page()) or page (page_address()) address. -- cgit v1.2.3 From 3024ba2d6c5573b4797e62006d98f17f47f7d103 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 9 Apr 2020 22:17:23 +0800 Subject: md: remove redundant memalloc scope API usage In mddev_create_serial_pool(), memalloc scope APIs memalloc_noio_save() and memalloc_noio_restore() are used when allocating memory by calling mempool_create_kmalloc_pool(). After adding the memalloc scope APIs in raid array suspend context, it is unncessary to explicitly call them around mempool_create_kmalloc_pool() any longer. This patch removes the redundant memalloc scope APIs in mddev_create_serial_pool(). Signed-off-by: Coly Li Cc: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1b3316c79b7b..7b2ac5f23260 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -228,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, goto abort; if (mddev->serial_info_pool == NULL) { - unsigned int noio_flag; - - noio_flag = memalloc_noio_save(); + /* + * already in memalloc noio context by + * mddev_suspend() + */ mddev->serial_info_pool = mempool_create_kmalloc_pool(NR_SERIAL_INFOS, sizeof(struct serial_info)); - memalloc_noio_restore(noio_flag); if (!mddev->serial_info_pool) { rdevs_uninit_serial(mddev); pr_err("can't alloc memory pool for serialization\n"); -- cgit v1.2.3 From c91114c2b89d5bea132d34b40568053f8bfa963d Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 27 Jan 2020 10:26:19 -0500 Subject: md/raid1: release pending accounting for an I/O only after write-behind is also finished When using RAID1 and write-behind, md can deadlock when errors occur. With write-behind, r1bio structs can be accounted by raid1 as queued but not counted as pending. The pending count is dropped when the original bio is returned complete but write-behind for the r1bio may still be active. This breaks the accounting used in some conditions to know when the raid1 md device has reached an idle state. It can result in calls to freeze_array deadlocking. freeze_array will never complete from a negative "unqueued" value being calculated due to a queued count larger than the pending count. To properly account for write-behind, move the call to allow_barrier from call_bio_endio to raid_end_bio_io. When using write-behind, md can call call_bio_endio before all write-behind I/O is complete. Using raid_end_bio_io for the point to call allow_barrier will release the pending count at a point where all I/O for an r1bio, even write-behind, is done. Signed-off-by: David Jeffery Signed-off-by: Song Liu --- drivers/md/raid1.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cd810e195086..dcd27f3da84e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio) static void call_bio_endio(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; - struct r1conf *conf = r1_bio->mddev->private; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf, r1_bio->sector); } static void raid_end_bio_io(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; + struct r1conf *conf = r1_bio->mddev->private; /* if nobody has done the final endio yet, do it now */ if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { @@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio) call_bio_endio(r1_bio); } + /* + * Wake up any possible resync thread that waits for the device + * to go idle. All I/Os, even write-behind writes, are done. + */ + allow_barrier(conf, r1_bio->sector); + free_r1bio(r1_bio); } -- cgit v1.2.3 From e4fc5a74293fbe8a1b8598b8a3c53592a5d70398 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 18:15:14 +0200 Subject: md: stop using ->queuedata Pointer to mddev is already available in private_data. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/md.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7b2ac5f23260..79e36cb4e8b0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -467,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); const int sgrp = op_stat_group(bio_op(bio)); - struct mddev *mddev = q->queuedata; + struct mddev *mddev = bio->bi_disk->private_data; unsigned int sectors; if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { @@ -5644,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name) mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE); if (!mddev->queue) goto abort; - mddev->queue->queuedata = mddev; blk_set_stacking_limits(&mddev->queue->limits); -- cgit v1.2.3 From 3f99980c8f70bc56584450c0a69973eef7b65913 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 11 May 2020 16:23:25 +0800 Subject: md: add a newline when printing parameter 'start_ro' by sysfs Add a missing newline when printing module parameter 'start_ro' by sysfs. Signed-off-by: Xiongfeng Wang Signed-off-by: Song Liu --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 79e36cb4e8b0..f567f536b529 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9810,7 +9810,7 @@ module_exit(md_exit) static int get_ro(char *buffer, const struct kernel_param *kp) { - return sprintf(buffer, "%d", start_readonly); + return sprintf(buffer, "%d\n", start_readonly); } static int set_ro(const char *val, const struct kernel_param *kp) { -- cgit v1.2.3 From 358369f03ac94637c9fd9d8f94a2dfde86b9f25f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:22:10 -0500 Subject: md/raid1: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Song Liu --- drivers/md/md-linear.h | 2 +- drivers/md/raid1.h | 2 +- drivers/md/raid10.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h index 8381d651d4ed..24e97db50ebb 100644 --- a/drivers/md/md-linear.h +++ b/drivers/md/md-linear.h @@ -12,6 +12,6 @@ struct linear_conf struct rcu_head rcu; sector_t array_sectors; int raid_disks; /* a copy of mddev->raid_disks */ - struct dev_info disks[0]; + struct dev_info disks[]; }; #endif diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index e7ccad898736..b7eb09e8c025 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -180,7 +180,7 @@ struct r1bio { * if the IO is in WRITE direction, then multiple bios are used. * We choose the number when they are allocated. */ - struct bio *bios[0]; + struct bio *bios[]; /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ }; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index d3eaaf3eb1bc..79cd2b7d3128 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -153,7 +153,7 @@ struct r10bio { }; sector_t addr; int devnum; - } devs[0]; + } devs[]; }; /* bits for r10bio.state */ -- cgit v1.2.3 From c65165651d595fd77c38a9a25c14ade14444bc13 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 7 May 2020 15:12:11 +0800 Subject: block/swim3: use set_current_state macro Use set_current_state macro instead of current->state = TASK_RUNNING. Signed-off-by: Xu Wang Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- drivers/block/swim.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 4c297f69171d..dd34504382e5 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -327,7 +327,7 @@ static inline void swim_motor(struct swim __iomem *base, swim_select(base, RELAX); if (swim_readbit(base, MOTOR_ON)) break; - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } } else if (action == OFF) { @@ -346,7 +346,7 @@ static inline void swim_eject(struct swim __iomem *base) swim_select(base, RELAX); if (!swim_readbit(base, DISK_IN)) break; - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } swim_select(base, RELAX); @@ -370,7 +370,7 @@ static inline int swim_step(struct swim __iomem *base) for (wait = 0; wait < HZ; wait++) { - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); swim_select(base, RELAX); -- cgit v1.2.3 From 7c5014b0987a30e4989c90633c198aced454c0ec Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:35 +0200 Subject: loop: Call loop_config_discard() only after new config is applied loop_set_status() calls loop_config_discard() to configure discard for the loop device; however, the discard configuration depends on whether the loop device uses encryption, and when we call it the encryption configuration has not been updated yet. Move the call down so we apply the correct discard configuration based on the new configuration. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Reviewed-by: Bob Liu Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index da693e6a834e..f1754262fc94 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1334,8 +1334,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) } } - loop_config_discard(lo); - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; @@ -1359,6 +1357,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_key_owner = uid; } + loop_config_discard(lo); + /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); -- cgit v1.2.3 From 083a6a50783ef54256eec3499e6575237e0e3d53 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:36 +0200 Subject: loop: Remove sector_t truncation checks sector_t is now always u64, so we don't need to check for truncation. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f1754262fc94..00de7fec0ed5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -228,24 +228,20 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_unfreeze_queue(lo->lo_queue); } -static int +static void figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); - sector_t x = (sector_t)size; struct block_device *bdev = lo->lo_device; - if (unlikely((loff_t)x != size)) - return -EFBIG; if (lo->lo_offset != offset) lo->lo_offset = offset; if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; - set_capacity(lo->lo_disk, x); + set_capacity(lo->lo_disk, size); bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); /* let user-space know about the new size */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - return 0; } static inline int @@ -1003,10 +999,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, !file->f_op->write_iter) lo_flags |= LO_FLAGS_READ_ONLY; - error = -EFBIG; size = get_loop_size(lo, file); - if ((loff_t)(sector_t)size != size) - goto out_unlock; + error = loop_prepare_queue(lo); if (error) goto out_unlock; @@ -1328,10 +1322,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_device->bd_inode->i_mapping->nrpages); goto out_unfreeze; } - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { - err = -EFBIG; - goto out_unfreeze; - } + figure_loop_size(lo, info->lo_offset, info->lo_sizelimit); } memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); @@ -1534,7 +1525,9 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); + figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); + + return 0; } static int loop_set_dio(struct loop_device *lo, unsigned long arg) -- cgit v1.2.3 From 5795b6f5607f7e4db62ddea144727780cb351a9b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:37 +0200 Subject: loop: Factor out setting loop device size This code is used repeatedly. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 00de7fec0ed5..e69ff3c19eff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -228,20 +228,35 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_unfreeze_queue(lo->lo_queue); } +/** + * loop_set_size() - sets device size and notifies userspace + * @lo: struct loop_device to set the size for + * @size: new size of the loop device + * + * Callers must validate that the size passed into this function fits into + * a sector_t, eg using loop_validate_size() + */ +static void loop_set_size(struct loop_device *lo, loff_t size) +{ + struct block_device *bdev = lo->lo_device; + + set_capacity(lo->lo_disk, size); + bd_set_size(bdev, size << SECTOR_SHIFT); + /* let user-space know about the new size */ + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); +} + static void figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); - struct block_device *bdev = lo->lo_device; if (lo->lo_offset != offset) lo->lo_offset = offset; if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; - set_capacity(lo->lo_disk, size); - bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + + loop_set_size(lo, size); } static inline int @@ -1034,11 +1049,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, loop_update_rotational(lo); loop_update_dio(lo); - set_capacity(lo->lo_disk, size); - bd_set_size(bdev, size << 9); loop_sysfs_init(lo); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + loop_set_size(lo, size); set_blocksize(bdev, S_ISBLK(inode->i_mode) ? block_size(inode->i_bdev) : PAGE_SIZE); -- cgit v1.2.3 From 716ad0986cbd1d3b2ab3f6d23144a94638dac20b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:38 +0200 Subject: loop: Switch to set_capacity_revalidate_and_notify() This was recently added to block/genhd.c, and takes care of both updating the capacity and notifying userspace of the new size. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e69ff3c19eff..d9a756f8abd5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -240,10 +240,9 @@ static void loop_set_size(struct loop_device *lo, loff_t size) { struct block_device *bdev = lo->lo_device; - set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << SECTOR_SHIFT); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + + set_capacity_revalidate_and_notify(lo->lo_disk, size, false); } static void -- cgit v1.2.3 From b0bd158dd630bd47640e0e418c062cda1e0da5ad Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:39 +0200 Subject: loop: Refactor loop_set_status() size calculation figure_loop_size() calculates the loop size based on the passed in parameters, but at the same time it updates the offset and sizelimit parameters in the loop device configuration. That is a somewhat unexpected side effect of a function with this name, and it is only only needed by one of the two callers of this function - loop_set_status(). Move the lo_offset and lo_sizelimit assignment back into loop_set_status(), and use the newly factored out functions to validate and apply the newly calculated size. This allows us to get rid of figure_loop_size() in a follow-up commit. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d9a756f8abd5..c134b3439483 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -250,11 +250,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); - if (lo->lo_offset != offset) - lo->lo_offset = offset; - if (lo->lo_sizelimit != sizelimit) - lo->lo_sizelimit = sizelimit; - loop_set_size(lo, size); } @@ -1272,6 +1267,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) kuid_t uid = current_uid(); struct block_device *bdev; bool partscan = false; + bool size_changed = false; err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1293,6 +1289,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + size_changed = true; sync_blockdev(lo->lo_device); kill_bdev(lo->lo_device); } @@ -1300,6 +1297,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); + if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { + /* If any pages were dirtied after kill_bdev(), try again */ + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + err = loop_release_xfer(lo); if (err) goto out_unfreeze; @@ -1323,19 +1329,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (err) goto out_unfreeze; - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { - /* kill_bdev should have truncated all the pages */ - if (lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } - figure_loop_size(lo, info->lo_offset, info->lo_sizelimit); - } - + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; @@ -1359,6 +1354,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_key_owner = uid; } + if (size_changed) { + loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, + lo->lo_backing_file); + loop_set_size(lo, new_size); + } + loop_config_discard(lo); /* update dio if lo_offset or transfer is changed */ -- cgit v1.2.3 From 0a6ed1b5ff6757f11ad2d57906ceb40488a5ee52 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:40 +0200 Subject: loop: Remove figure_loop_size() This function was now only used by loop_set_capacity(). Just open code the remaining code in the caller instead. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c134b3439483..e281a9f03d96 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -245,14 +245,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size) set_capacity_revalidate_and_notify(lo->lo_disk, size, false); } -static void -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) -{ - loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); - - loop_set_size(lo, size); -} - static inline int lo_do_transfer(struct loop_device *lo, int cmd, struct page *rpage, unsigned roffs, @@ -1534,10 +1526,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo) { + loff_t size; + if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); + size = get_loop_size(lo, lo->lo_backing_file); + loop_set_size(lo, size); return 0; } -- cgit v1.2.3 From 0c3796c244598122a5d59d56f30d19390096817f Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:41 +0200 Subject: loop: Factor out configuring loop from status Factor out this code into a separate function, so it can be reused by other code more easily. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 117 +++++++++++++++++++++++++++++---------------------- 1 file changed, 67 insertions(+), 50 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e281a9f03d96..6a4c0ba225ca 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1251,75 +1251,43 @@ static int loop_clr_fd(struct loop_device *lo) return __loop_clr_fd(lo, false); } +/** + * loop_set_status_from_info - configure device from loop_info + * @lo: struct loop_device to configure + * @info: struct loop_info64 to configure the device with + * + * Configures the loop device parameters according to the passed + * in loop_info64 configuration. + */ static int -loop_set_status(struct loop_device *lo, const struct loop_info64 *info) +loop_set_status_from_info(struct loop_device *lo, + const struct loop_info64 *info) { int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); - struct block_device *bdev; - bool partscan = false; - bool size_changed = false; - - err = mutex_lock_killable(&loop_ctl_mutex); - if (err) - return err; - if (lo->lo_encrypt_key_size && - !uid_eq(lo->lo_key_owner, uid) && - !capable(CAP_SYS_ADMIN)) { - err = -EPERM; - goto out_unlock; - } - if (lo->lo_state != Lo_bound) { - err = -ENXIO; - goto out_unlock; - } - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) { - err = -EINVAL; - goto out_unlock; - } - - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { - size_changed = true; - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } - /* I/O need to be drained during transfer transition */ - blk_mq_freeze_queue(lo->lo_queue); - - if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { - /* If any pages were dirtied after kill_bdev(), try again */ - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) + return -EINVAL; err = loop_release_xfer(lo); if (err) - goto out_unfreeze; + return err; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; - if (type >= MAX_LO_CRYPT) { - err = -EINVAL; - goto out_unfreeze; - } + if (type >= MAX_LO_CRYPT) + return -EINVAL; xfer = xfer_funcs[type]; - if (xfer == NULL) { - err = -EINVAL; - goto out_unfreeze; - } + if (xfer == NULL) + return -EINVAL; } else xfer = NULL; err = loop_init_xfer(lo, xfer, info); if (err) - goto out_unfreeze; + return err; lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; @@ -1346,6 +1314,55 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_key_owner = uid; } + return 0; +} + +static int +loop_set_status(struct loop_device *lo, const struct loop_info64 *info) +{ + int err; + struct block_device *bdev; + kuid_t uid = current_uid(); + bool partscan = false; + bool size_changed = false; + + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; + if (lo->lo_encrypt_key_size && + !uid_eq(lo->lo_key_owner, uid) && + !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_unlock; + } + if (lo->lo_state != Lo_bound) { + err = -ENXIO; + goto out_unlock; + } + + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + size_changed = true; + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + + /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + + if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { + /* If any pages were dirtied after kill_bdev(), try again */ + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + + err = loop_set_status_from_info(lo, info); + if (err) + goto out_unfreeze; + if (size_changed) { loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, lo->lo_backing_file); -- cgit v1.2.3 From 62ab466ca881fe200c21aa74b65f8bd83ec482dc Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:42 +0200 Subject: loop: Move loop_set_status_from_info() and friends up So we can use it without forward declaration. This is a separate commit to make it easier to verify that this is just a move, without functional modifications. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 206 +++++++++++++++++++++++++-------------------------- 1 file changed, 103 insertions(+), 103 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 6a4c0ba225ca..4dc11d954169 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -949,6 +949,109 @@ static void loop_update_rotational(struct loop_device *lo) blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); } +static int +loop_release_xfer(struct loop_device *lo) +{ + int err = 0; + struct loop_func_table *xfer = lo->lo_encryption; + + if (xfer) { + if (xfer->release) + err = xfer->release(lo); + lo->transfer = NULL; + lo->lo_encryption = NULL; + module_put(xfer->owner); + } + return err; +} + +static int +loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, + const struct loop_info64 *i) +{ + int err = 0; + + if (xfer) { + struct module *owner = xfer->owner; + + if (!try_module_get(owner)) + return -EINVAL; + if (xfer->init) + err = xfer->init(lo, i); + if (err) + module_put(owner); + else + lo->lo_encryption = xfer; + } + return err; +} + +/** + * loop_set_status_from_info - configure device from loop_info + * @lo: struct loop_device to configure + * @info: struct loop_info64 to configure the device with + * + * Configures the loop device parameters according to the passed + * in loop_info64 configuration. + */ +static int +loop_set_status_from_info(struct loop_device *lo, + const struct loop_info64 *info) +{ + int err; + struct loop_func_table *xfer; + kuid_t uid = current_uid(); + + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) + return -EINVAL; + + err = loop_release_xfer(lo); + if (err) + return err; + + if (info->lo_encrypt_type) { + unsigned int type = info->lo_encrypt_type; + + if (type >= MAX_LO_CRYPT) + return -EINVAL; + xfer = xfer_funcs[type]; + if (xfer == NULL) + return -EINVAL; + } else + xfer = NULL; + + err = loop_init_xfer(lo, xfer, info); + if (err) + return err; + + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); + lo->lo_file_name[LO_NAME_SIZE-1] = 0; + lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; + + if (!xfer) + xfer = &none_funcs; + lo->transfer = xfer->transfer; + lo->ioctl = xfer->ioctl; + + if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != + (info->lo_flags & LO_FLAGS_AUTOCLEAR)) + lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; + + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; + lo->lo_init[0] = info->lo_init[0]; + lo->lo_init[1] = info->lo_init[1]; + if (info->lo_encrypt_key_size) { + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, + info->lo_encrypt_key_size); + lo->lo_key_owner = uid; + } + + return 0; +} + static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { @@ -1070,43 +1173,6 @@ out: return error; } -static int -loop_release_xfer(struct loop_device *lo) -{ - int err = 0; - struct loop_func_table *xfer = lo->lo_encryption; - - if (xfer) { - if (xfer->release) - err = xfer->release(lo); - lo->transfer = NULL; - lo->lo_encryption = NULL; - module_put(xfer->owner); - } - return err; -} - -static int -loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, - const struct loop_info64 *i) -{ - int err = 0; - - if (xfer) { - struct module *owner = xfer->owner; - - if (!try_module_get(owner)) - return -EINVAL; - if (xfer->init) - err = xfer->init(lo, i); - if (err) - module_put(owner); - else - lo->lo_encryption = xfer; - } - return err; -} - static int __loop_clr_fd(struct loop_device *lo, bool release) { struct file *filp = NULL; @@ -1251,72 +1317,6 @@ static int loop_clr_fd(struct loop_device *lo) return __loop_clr_fd(lo, false); } -/** - * loop_set_status_from_info - configure device from loop_info - * @lo: struct loop_device to configure - * @info: struct loop_info64 to configure the device with - * - * Configures the loop device parameters according to the passed - * in loop_info64 configuration. - */ -static int -loop_set_status_from_info(struct loop_device *lo, - const struct loop_info64 *info) -{ - int err; - struct loop_func_table *xfer; - kuid_t uid = current_uid(); - - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) - return -EINVAL; - - err = loop_release_xfer(lo); - if (err) - return err; - - if (info->lo_encrypt_type) { - unsigned int type = info->lo_encrypt_type; - - if (type >= MAX_LO_CRYPT) - return -EINVAL; - xfer = xfer_funcs[type]; - if (xfer == NULL) - return -EINVAL; - } else - xfer = NULL; - - err = loop_init_xfer(lo, xfer, info); - if (err) - return err; - - lo->lo_offset = info->lo_offset; - lo->lo_sizelimit = info->lo_sizelimit; - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); - lo->lo_file_name[LO_NAME_SIZE-1] = 0; - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; - - if (!xfer) - xfer = &none_funcs; - lo->transfer = xfer->transfer; - lo->ioctl = xfer->ioctl; - - if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != - (info->lo_flags & LO_FLAGS_AUTOCLEAR)) - lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; - - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; - lo->lo_init[0] = info->lo_init[0]; - lo->lo_init[1] = info->lo_init[1]; - if (info->lo_encrypt_key_size) { - memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, - info->lo_encrypt_key_size); - lo->lo_key_owner = uid; - } - - return 0; -} - static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { -- cgit v1.2.3 From 571fae6e290d64a3e8132c455e7786c99c467ed1 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:43 +0200 Subject: loop: Rework lo_ioctl() __user argument casting In preparation for a new ioctl that needs to copy_from_user(); makes the code easier to read as well. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4dc11d954169..31f10da4945e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1634,6 +1634,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct loop_device *lo = bdev->bd_disk->private_data; + void __user *argp = (void __user *) arg; int err; switch (cmd) { @@ -1646,21 +1647,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_SET_STATUS: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { - err = loop_set_status_old(lo, - (struct loop_info __user *)arg); + err = loop_set_status_old(lo, argp); } break; case LOOP_GET_STATUS: - return loop_get_status_old(lo, (struct loop_info __user *) arg); + return loop_get_status_old(lo, argp); case LOOP_SET_STATUS64: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { - err = loop_set_status64(lo, - (struct loop_info64 __user *) arg); + err = loop_set_status64(lo, argp); } break; case LOOP_GET_STATUS64: - return loop_get_status64(lo, (struct loop_info64 __user *) arg); + return loop_get_status64(lo, argp); case LOOP_SET_CAPACITY: case LOOP_SET_DIRECT_IO: case LOOP_SET_BLOCK_SIZE: -- cgit v1.2.3 From faf1d25440d6ad06d509dada4b6fe62fea844370 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:44 +0200 Subject: loop: Clean up LOOP_SET_STATUS lo_flags handling LOOP_SET_STATUS(64) will actually allow some lo_flags to be modified; in particular, LO_FLAGS_AUTOCLEAR can be set and cleared, whereas LO_FLAGS_PARTSCAN can be set to request a partition scan. Make this explicit by updating the UAPI to include the flags that can be set/cleared using this ioctl. The implementation can then blindly take over the passed in flags, and use the previous flags for those flags that can't be set / cleared using LOOP_SET_STATUS. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 19 +++++++++++++------ include/uapi/linux/loop.h | 10 ++++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 31f10da4945e..13518ba191f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1036,9 +1036,7 @@ loop_set_status_from_info(struct loop_device *lo, lo->transfer = xfer->transfer; lo->ioctl = xfer->ioctl; - if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != - (info->lo_flags & LO_FLAGS_AUTOCLEAR)) - lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; + lo->lo_flags = info->lo_flags; lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; @@ -1323,6 +1321,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct block_device *bdev; kuid_t uid = current_uid(); + int prev_lo_flags; bool partscan = false; bool size_changed = false; @@ -1359,10 +1358,19 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unfreeze; } + prev_lo_flags = lo->lo_flags; + err = loop_set_status_from_info(lo, info); if (err) goto out_unfreeze; + /* Mask out flags that can't be set using LOOP_SET_STATUS. */ + lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS; + /* For those flags, use the previous values instead */ + lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS; + /* For flags that can't be cleared, use previous values too */ + lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS; + if (size_changed) { loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, lo->lo_backing_file); @@ -1377,9 +1385,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_flags |= LO_FLAGS_PARTSCAN; + if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && + !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; bdev = lo->lo_device; partscan = true; diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 080a8df134ef..6b32fee80ce0 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -25,6 +25,12 @@ enum { LO_FLAGS_DIRECT_IO = 16, }; +/* LO_FLAGS that can be set using LOOP_SET_STATUS(64) */ +#define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN) + +/* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */ +#define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR) + #include /* for __kernel_old_dev_t */ #include /* for __u64 */ @@ -37,7 +43,7 @@ struct loop_info { int lo_offset; int lo_encrypt_type; int lo_encrypt_key_size; /* ioctl w/o */ - int lo_flags; /* ioctl r/o */ + int lo_flags; char lo_name[LO_NAME_SIZE]; unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ unsigned long lo_init[2]; @@ -53,7 +59,7 @@ struct loop_info64 { __u32 lo_number; /* ioctl r/o */ __u32 lo_encrypt_type; __u32 lo_encrypt_key_size; /* ioctl w/o */ - __u32 lo_flags; /* ioctl r/o */ + __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; __u8 lo_crypt_name[LO_NAME_SIZE]; __u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ -- cgit v1.2.3 From 3448914e8cc550ba792d4ccc74471d1ca4293aae Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:45 +0200 Subject: loop: Add LOOP_CONFIGURE ioctl This allows userspace to completely setup a loop device with a single ioctl, removing the in-between state where the device can be partially configured - eg the loop device has a backing file associated with it, but is reading from the wrong offset. Besides removing the intermediate state, another big benefit of this ioctl is that LOOP_SET_STATUS can be slow; the main reason for this slowness is that LOOP_SET_STATUS(64) calls blk_mq_freeze_queue() to freeze the associated queue; this requires waiting for RCU synchronization, which I've measured can take about 15-20ms on this device on average. In addition to doing what LOOP_SET_STATUS can do, LOOP_CONFIGURE can also be used to: - Set the correct block size immediately by setting loop_config.block_size (avoids LOOP_SET_BLOCK_SIZE) - Explicitly request direct I/O mode by setting LO_FLAGS_DIRECT_IO in loop_config.info.lo_flags (avoids LOOP_SET_DIRECT_IO) - Explicitly request read-only mode by setting LO_FLAGS_READ_ONLY in loop_config.info.lo_flags Here's setting up ~70 regular loop devices with an offset on an x86 Android device, using LOOP_SET_FD and LOOP_SET_STATUS: vsoc_x86:/system/apex # time for i in `seq 30 100`; do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done 0m03.40s real 0m00.02s user 0m00.03s system Here's configuring ~70 devices in the same way, but using a modified losetup that uses the new LOOP_CONFIGURE ioctl: vsoc_x86:/system/apex # time for i in `seq 30 100`; do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done 0m01.94s real 0m00.01s user 0m00.01s system Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 104 +++++++++++++++++++++++++++++++++------------- include/uapi/linux/loop.h | 21 ++++++++++ 2 files changed, 97 insertions(+), 28 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 13518ba191f5..a565c5aafa52 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -228,6 +228,19 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_unfreeze_queue(lo->lo_queue); } +/** + * loop_validate_block_size() - validates the passed in block size + * @bsize: size to validate + */ +static int +loop_validate_block_size(unsigned short bsize) +{ + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; + + return 0; +} + /** * loop_set_size() - sets device size and notifies userspace * @lo: struct loop_device to set the size for @@ -1050,23 +1063,24 @@ loop_set_status_from_info(struct loop_device *lo, return 0; } -static int loop_set_fd(struct loop_device *lo, fmode_t mode, - struct block_device *bdev, unsigned int arg) +static int loop_configure(struct loop_device *lo, fmode_t mode, + struct block_device *bdev, + const struct loop_config *config) { struct file *file; struct inode *inode; struct address_space *mapping; struct block_device *claimed_bdev = NULL; - int lo_flags = 0; int error; loff_t size; bool partscan; + unsigned short bsize; /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); error = -EBADF; - file = fget(arg); + file = fget(config->fd); if (!file) goto out; @@ -1075,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bd_start_claiming(bdev, loop_set_fd); + claimed_bdev = bd_start_claiming(bdev, loop_configure); if (IS_ERR(claimed_bdev)) { error = PTR_ERR(claimed_bdev); goto out_putf; @@ -1097,11 +1111,26 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mapping = file->f_mapping; inode = mapping->host; + size = get_loop_size(lo, file); + + if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) { + error = -EINVAL; + goto out_unlock; + } + + if (config->block_size) { + error = loop_validate_block_size(config->block_size); + if (error) + goto out_unlock; + } + + error = loop_set_status_from_info(lo, &config->info); + if (error) + goto out_unlock; + if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || !file->f_op->write_iter) - lo_flags |= LO_FLAGS_READ_ONLY; - - size = get_loop_size(lo, file); + lo->lo_flags |= LO_FLAGS_READ_ONLY; error = loop_prepare_queue(lo); if (error) @@ -1109,30 +1138,28 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, error = 0; - set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); + set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); - lo->use_dio = false; + lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; - lo->lo_flags = lo_flags; lo->lo_backing_file = file; - lo->transfer = NULL; - lo->ioctl = NULL; - lo->lo_sizelimit = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) + if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_write_cache(lo->lo_queue, true, false); - if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) { + if (config->block_size) + bsize = config->block_size; + else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) /* In case of direct I/O, match underlying block size */ - unsigned short bsize = bdev_logical_block_size( - inode->i_sb->s_bdev); + bsize = bdev_logical_block_size(inode->i_sb->s_bdev); + else + bsize = 512; - blk_queue_logical_block_size(lo->lo_queue, bsize); - blk_queue_physical_block_size(lo->lo_queue, bsize); - blk_queue_io_min(lo->lo_queue, bsize); - } + blk_queue_logical_block_size(lo->lo_queue, bsize); + blk_queue_physical_block_size(lo->lo_queue, bsize); + blk_queue_io_min(lo->lo_queue, bsize); loop_update_rotational(lo); loop_update_dio(lo); @@ -1155,14 +1182,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (partscan) loop_reread_partitions(lo, bdev); if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); + bd_abort_claiming(bdev, claimed_bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); + bd_abort_claiming(bdev, claimed_bdev, loop_configure); out_putf: fput(file); out: @@ -1582,8 +1609,9 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (lo->lo_state != Lo_bound) return -ENXIO; - if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) - return -EINVAL; + err = loop_validate_block_size(arg); + if (err) + return err; if (lo->lo_queue->limits.logical_block_size == arg) return 0; @@ -1645,8 +1673,27 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, int err; switch (cmd) { - case LOOP_SET_FD: - return loop_set_fd(lo, mode, bdev, arg); + case LOOP_SET_FD: { + /* + * Legacy case - pass in a zeroed out struct loop_config with + * only the file descriptor set , which corresponds with the + * default parameters we'd have used otherwise. + */ + struct loop_config config; + + memset(&config, 0, sizeof(config)); + config.fd = arg; + + return loop_configure(lo, mode, bdev, &config); + } + case LOOP_CONFIGURE: { + struct loop_config config; + + if (copy_from_user(&config, argp, sizeof(config))) + return -EFAULT; + + return loop_configure(lo, mode, bdev, &config); + } case LOOP_CHANGE_FD: return loop_change_fd(lo, bdev, arg); case LOOP_CLR_FD: @@ -1818,6 +1865,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_CLR_FD: case LOOP_GET_STATUS64: case LOOP_SET_STATUS64: + case LOOP_CONFIGURE: arg = (unsigned long) compat_ptr(arg); /* fall through */ case LOOP_SET_FD: diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 6b32fee80ce0..24a1c45bd1ae 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -31,6 +31,10 @@ enum { /* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */ #define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR) +/* LO_FLAGS that can be set using LOOP_CONFIGURE */ +#define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \ + | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO) + #include /* for __kernel_old_dev_t */ #include /* for __u64 */ @@ -66,6 +70,22 @@ struct loop_info64 { __u64 lo_init[2]; }; +/** + * struct loop_config - Complete configuration for a loop device. + * @fd: fd of the file to be used as a backing file for the loop device. + * @block_size: block size to use; ignored if 0. + * @info: struct loop_info64 to configure the loop device with. + * + * This structure is used with the LOOP_CONFIGURE ioctl, and can be used to + * atomically setup and configure all loop device parameters at once. + */ +struct loop_config { + __u32 fd; + __u32 block_size; + struct loop_info64 info; + __u64 __reserved[8]; +}; + /* * Loop filter types */ @@ -96,6 +116,7 @@ struct loop_info64 { #define LOOP_SET_CAPACITY 0x4C07 #define LOOP_SET_DIRECT_IO 0x4C08 #define LOOP_SET_BLOCK_SIZE 0x4C09 +#define LOOP_CONFIGURE 0x4C0A /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 -- cgit v1.2.3 From 9353848c6589ffe6373d03f3a58feaeda1009641 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2020 16:22:58 +0200 Subject: dasd: refactor dasd_ioctl_information Prepare for in-kernel callers of this functionality. Signed-off-by: Christoph Hellwig [sth@de.ibm.com: remove leftover kfree] Signed-off-by: Stefan Haberland Reviewed-by: Peter Oberparleiter Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_ioctl.c | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 9a5f3add325f..9b7782395c37 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -457,10 +457,9 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp) /* * Return dasd information. Used for BIODASDINFO and BIODASDINFO2. */ -static int dasd_ioctl_information(struct dasd_block *block, - unsigned int cmd, void __user *argp) +static int __dasd_ioctl_information(struct dasd_block *block, + struct dasd_information2_t *dasd_info) { - struct dasd_information2_t *dasd_info; struct subchannel_id sch_id; struct ccw_dev_id dev_id; struct dasd_device *base; @@ -473,15 +472,9 @@ static int dasd_ioctl_information(struct dasd_block *block, if (!base->discipline || !base->discipline->fill_info) return -EINVAL; - dasd_info = kzalloc(sizeof(struct dasd_information2_t), GFP_KERNEL); - if (dasd_info == NULL) - return -ENOMEM; - rc = base->discipline->fill_info(base, dasd_info); - if (rc) { - kfree(dasd_info); + if (rc) return rc; - } cdev = base->cdev; ccw_device_get_id(cdev, &dev_id); @@ -520,15 +513,24 @@ static int dasd_ioctl_information(struct dasd_block *block, list_for_each(l, &base->ccw_queue) dasd_info->chanq_len++; spin_unlock_irqrestore(&block->queue_lock, flags); + return 0; +} - rc = 0; - if (copy_to_user(argp, dasd_info, - ((cmd == (unsigned int) BIODASDINFO2) ? - sizeof(struct dasd_information2_t) : - sizeof(struct dasd_information_t)))) - rc = -EFAULT; +static int dasd_ioctl_information(struct dasd_block *block, void __user *argp, + size_t copy_size) +{ + struct dasd_information2_t *dasd_info; + int error; + + dasd_info = kzalloc(sizeof(*dasd_info), GFP_KERNEL); + if (!dasd_info) + return -ENOMEM; + + error = __dasd_ioctl_information(block, dasd_info); + if (!error && copy_to_user(argp, dasd_info, copy_size)) + error = -EFAULT; kfree(dasd_info); - return rc; + return error; } /* @@ -622,10 +624,12 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, rc = dasd_ioctl_check_format(bdev, argp); break; case BIODASDINFO: - rc = dasd_ioctl_information(block, cmd, argp); + rc = dasd_ioctl_information(block, argp, + sizeof(struct dasd_information_t)); break; case BIODASDINFO2: - rc = dasd_ioctl_information(block, cmd, argp); + rc = dasd_ioctl_information(block, argp, + sizeof(struct dasd_information2_t)); break; case BIODASDPRRD: rc = dasd_ioctl_read_profile(block, argp); -- cgit v1.2.3 From 26d7e28e38206b1b3207af1409eee2269ab36f82 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 19 May 2020 16:22:59 +0200 Subject: s390/dasd: remove ioctl_by_bdev calls The IBM partition parser requires device type specific information only available to the DASD driver to correctly register partitions. The current approach of using ioctl_by_bdev with a fake user space pointer is discouraged. Fix this by replacing IOCTL calls with direct in-kernel function calls. Suggested-by: Christoph Hellwig Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Reviewed-by: Peter Oberparleiter Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + block/partitions/ibm.c | 24 ++++++++++++++++++------ drivers/s390/block/dasd_ioctl.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/dasd_mod.h | 9 +++++++++ 4 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 include/linux/dasd_mod.h diff --git a/MAINTAINERS b/MAINTAINERS index b816a453b10e..927fd9845d2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14628,6 +14628,7 @@ S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ F: block/partitions/ibm.c F: drivers/s390/block/dasd* +F: include/linux/dasd_mod.h S390 IOMMU (PCI) M: Gerald Schaefer diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index 073faa6a69b8..d6e18df9c53c 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -13,10 +13,11 @@ #include #include #include +#include +#include #include "check.h" - union label_t { struct vtoc_volume_label_cdl vol; struct vtoc_volume_label_ldl lnx; @@ -288,7 +289,9 @@ static int find_cms1_partitions(struct parsed_partitions *state, */ int ibm_partition(struct parsed_partitions *state) { + int (*fn)(struct gendisk *disk, dasd_information2_t *info); struct block_device *bdev = state->bdev; + struct gendisk *disk = bdev->bd_disk; int blocksize, res; loff_t i_size, offset, size; dasd_information2_t *info; @@ -299,24 +302,31 @@ int ibm_partition(struct parsed_partitions *state) union label_t *label; res = 0; + if (!disk->fops->getgeo) + goto out_exit; + fn = symbol_get(dasd_biodasdinfo); + if (!fn) + goto out_exit; blocksize = bdev_logical_block_size(bdev); if (blocksize <= 0) - goto out_exit; + goto out_symbol; i_size = i_size_read(bdev->bd_inode); if (i_size == 0) - goto out_exit; + goto out_symbol; info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); if (info == NULL) - goto out_exit; + goto out_symbol; geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL); if (geo == NULL) goto out_nogeo; label = kmalloc(sizeof(union label_t), GFP_KERNEL); if (label == NULL) goto out_nolab; - if (ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) + /* set start if not filled by getgeo function e.g. virtblk */ + geo->start = get_start_sect(bdev); + if (disk->fops->getgeo(bdev, geo)) goto out_freeall; - if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0) { + if (fn(disk, info)) { kfree(info); info = NULL; } @@ -359,6 +369,8 @@ out_nolab: kfree(geo); out_nogeo: kfree(info); +out_symbol: + symbol_put(dasd_biodasdinfo); out_exit: return res; } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 9b7782395c37..777734d1b4e5 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -22,6 +22,7 @@ #include #include #include +#include /* This is ugly... */ #define PRINTK_HEADER "dasd_ioctl:" @@ -664,3 +665,36 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, dasd_put_device(base); return rc; } + + +/** + * dasd_biodasdinfo() - fill out the dasd information structure + * @disk [in]: pointer to gendisk structure that references a DASD + * @info [out]: pointer to the dasd_information2_t structure + * + * Provide access to DASD specific information. + * The gendisk structure is checked if it belongs to the DASD driver by + * comparing the gendisk->fops pointer. + * If it does not belong to the DASD driver -EINVAL is returned. + * Otherwise the provided dasd_information2_t structure is filled out. + * + * Returns: + * %0 on success and a negative error value on failure. + */ +int dasd_biodasdinfo(struct gendisk *disk, struct dasd_information2_t *info) +{ + struct dasd_device *base; + int error; + + if (disk->fops != &dasd_device_operations) + return -EINVAL; + + base = dasd_device_from_gendisk(disk); + if (!base) + return -ENODEV; + error = __dasd_ioctl_information(base->block, info); + dasd_put_device(base); + return error; +} +/* export that symbol_get in partition detection is possible */ +EXPORT_SYMBOL_GPL(dasd_biodasdinfo); diff --git a/include/linux/dasd_mod.h b/include/linux/dasd_mod.h new file mode 100644 index 000000000000..d39abad2ff6e --- /dev/null +++ b/include/linux/dasd_mod.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef DASD_MOD_H +#define DASD_MOD_H + +#include + +extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info); + +#endif -- cgit v1.2.3 From 3783daeb1d24696ff00125050353cfce4f5b6239 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2020 16:33:21 +0200 Subject: block: remove ioctl_by_bdev No callers left. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 12 ------------ include/linux/fs.h | 1 - 2 files changed, 13 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 7cbb7b79935e..3003831c771d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -2166,18 +2166,6 @@ const struct file_operations def_blk_fops = { .fallocate = blkdev_fallocate, }; -int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) -{ - int res; - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); - res = blkdev_ioctl(bdev, 0, cmd, arg); - set_fs(old_fs); - return res; -} - -EXPORT_SYMBOL(ioctl_by_bdev); - /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device diff --git a/include/linux/fs.h b/include/linux/fs.h index 1a95e5158811..861ca61d728b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2636,7 +2636,6 @@ extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; #ifdef CONFIG_BLOCK -extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); -- cgit v1.2.3 From d29b92f57ecee125a86587919a22152a702a6411 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 24 May 2020 17:10:43 +0100 Subject: loop: remove redundant assignment to variable error The variable error is being assigned a value that is never read so the assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a565c5aafa52..8462ada86e91 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1136,8 +1136,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (error) goto out_unlock; - error = 0; - set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; -- cgit v1.2.3 From 263c61581a38d0a5ad1f5f4a9143b27d68caeffd Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 26 May 2020 11:49:18 +0200 Subject: block/floppy: fix contended case in floppy_queue_rq() Since the switch of floppy driver to blk-mq, the contended (fdc_busy) case in floppy_queue_rq() is not handled correctly. In case we reach floppy_queue_rq() with fdc_busy set (i.e. with the floppy locked due to another request still being in-flight), we put the request on the list of requests and return BLK_STS_OK to the block core, without actually scheduling delayed work / doing further processing of the request. This means that processing of this request is postponed until another request comes and passess uncontended. Which in some cases might actually never happen and we keep waiting indefinitely. The simple testcase is for i in `seq 1 2000`; do echo -en $i '\r'; blkid --info /dev/fd0 2> /dev/null; done run in quemu. That reliably causes blkid eventually indefinitely hanging in __floppy_read_block_0() waiting for completion, as the BIO callback never happens, and no further IO is ever submitted on the (non-existent) floppy device. This was observed reliably on qemu-emulated device. Fix that by not queuing the request in the contended case, and return BLK_STS_RESOURCE instead, so that blk core handles the request rescheduling and let it pass properly non-contended later. Fixes: a9f38e1dec107a ("floppy: convert to blk-mq") Cc: stable@vger.kernel.org Tested-by: Libor Pechacek Signed-off-by: Jiri Kosina Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 064c1acb9f00..3e9db22db2a8 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2950,17 +2950,17 @@ static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx, (unsigned long long) current_req->cmd_flags)) return BLK_STS_IOERR; - spin_lock_irq(&floppy_lock); - list_add_tail(&bd->rq->queuelist, &floppy_reqs); - spin_unlock_irq(&floppy_lock); - if (test_and_set_bit(0, &fdc_busy)) { /* fdc busy, this new request will be treated when the current one is done */ is_alive(__func__, "old request running"); - return BLK_STS_OK; + return BLK_STS_RESOURCE; } + spin_lock_irq(&floppy_lock); + list_add_tail(&bd->rq->queuelist, &floppy_reqs); + spin_unlock_irq(&floppy_lock); + command_status = FD_COMMAND_NONE; __reschedule_timeout(MAXTIMEOUT, "fd_request"); set_fdc(0); -- cgit v1.2.3 From 09bb8986c99cd3395e96635df9e712f5da45bc07 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Fri, 8 May 2020 19:59:06 +0800 Subject: nvmet: replace kstrndup() with kmemdup_nul() It is more efficient to use kmemdup_nul() if the size is known exactly. The doc in kernel: "Note: Use kmemdup_nul() instead if the size is known exactly." Signed-off-by: Chen Zhou Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index ae8fb4489a10..19bd5e1c681c 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -324,7 +324,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, kfree(ns->device_path); ret = -ENOMEM; - ns->device_path = kstrndup(page, len, GFP_KERNEL); + ns->device_path = kmemdup_nul(page, len, GFP_KERNEL); if (!ns->device_path) goto out_unlock; @@ -960,7 +960,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, return -EINVAL; } - new_model_number = kstrndup(page, len, GFP_KERNEL); + new_model_number = kmemdup_nul(page, len, GFP_KERNEL); if (!new_model_number) return -ENOMEM; -- cgit v1.2.3 From 7425596945d7f8bf79f42b2a9c8463d16e24fc34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 May 2020 18:19:13 +0200 Subject: nvmet: mark nvmet_ana_state static Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 19bd5e1c681c..24eb4cf53b4f 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1146,7 +1146,7 @@ static const struct config_item_type nvmet_referrals_type = { .ct_group_ops = &nvmet_referral_group_ops, }; -struct nvmet_type_name_map nvmet_ana_state[] = { +static struct nvmet_type_name_map nvmet_ana_state[] = { { NVME_ANA_OPTIMIZED, "optimized" }, { NVME_ANA_NONOPTIMIZED, "non-optimized" }, { NVME_ANA_INACCESSIBLE, "inaccessible" }, -- cgit v1.2.3 From 5bb052d7aad1f4063631f2aa05452c700139f0f3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 May 2020 22:20:01 -0700 Subject: nvme-tcp: set MSG_SENDPAGE_NOTLAST with MSG_MORE when we have more to send We can signal the stack that this is not the last page coming and the stack can build a larger tso segment, so go ahead and use it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c79e248b9f43..7c7c1886642f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -885,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) if (last && !queue->data_digest) flags |= MSG_EOR; else - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; /* can't zcopy slab pages */ if (unlikely(PageSlab(page))) { @@ -924,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_cmd_pdu *pdu = req->pdu; bool inline_data = nvme_tcp_has_inline_data(req); - int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) + hdgst - req->offset; + int flags = MSG_DONTWAIT; int ret; + if (inline_data) + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + else + flags |= MSG_EOR; + if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); @@ -967,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) ret = kernel_sendpage(queue->sock, virt_to_page(pdu), offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE); + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (unlikely(ret <= 0)) return ret; -- cgit v1.2.3 From 4eea804364628c30facd99c30499b2b01c6272c6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 May 2020 22:20:02 -0700 Subject: nvmet-tcp: set MSG_SENDPAGE_NOTLAST with MSG_MORE when we have more to send We can signal the stack that this is not the last page coming and the stack can build a larger tso segment, so go ahead and use it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f0da04e960f4..c08aec62115e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -510,7 +510,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), offset_in_page(cmd->data_pdu) + cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (ret <= 0) return ret; @@ -538,7 +538,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) if ((!last_in_batch && cmd->queue->send_list_len) || cmd->wbytes_done + left < cmd->req.transfer_len || queue->data_digest || !queue->nvme_sq.sqhd_disabled) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, left, flags); @@ -585,7 +585,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; @@ -614,7 +614,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; -- cgit v1.2.3 From f381ab1f26aa33f14412cb9b7cb7f947b88a55b2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 12 May 2020 18:01:43 -0700 Subject: nvmet-tcp: set MSG_EOR if we send last payload in the batch when trying to send the pdu data digest, we should set this flag. Reported-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c08aec62115e..f8dd69f3b657 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -644,6 +644,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) if (!last_in_batch && cmd->queue->send_list_len) msg.msg_flags |= MSG_MORE; + else + msg.msg_flags |= MSG_EOR; ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) -- cgit v1.2.3 From 0236d3437909ff888e5c79228e2d5a851651c4c6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 18 May 2020 10:47:48 -0700 Subject: nvmet-tcp: move send/recv error handling in the send/recv methods instead of call-sites Have routines handle errors and just bail out of the poll loop. This simplifies the code and will help as we may enhance the poll loop logic and these are somewhat in the way. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f8dd69f3b657..6f557db0320d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) kernel_sock_shutdown(queue->sock, SHUT_RDWR); } +static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) +{ + if (status == -EPIPE || status == -ECONNRESET) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + else + nvmet_tcp_fatal_error(queue); +} + static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) { struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; @@ -718,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_send_one(queue, i == budget - 1); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*sends)++; } - +done: return ret; } @@ -1159,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_recv_one(queue); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*recvs)++; } - +done: return ret; } @@ -1188,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w) pending = false; ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); - if (ret > 0) { + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); - if (ret > 0) { - /* transmitted message/data */ + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); -- cgit v1.2.3 From 9c9e76d5792b121f10c3b8ddbb639617e49197f7 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Sat, 9 May 2020 14:22:08 +0800 Subject: nvme-pci: make sure write/poll_queues less or equal then cpu count Check module parameter write/poll_queues before using it to catch too large values. Reproducer: modprobe -r nvme modprobe nvme write_queues=`nproc` echo $((`nproc`+1)) > /sys/module/nvme/parameters/write_queues echo 1 > /sys/block/nvme0n1/device/reset_controller [ 657.069000] ------------[ cut here ]------------ [ 657.069022] WARNING: CPU: 10 PID: 1163 at kernel/irq/affinity.c:390 irq_create_affinity_masks+0x47c/0x4a0 [ 657.069056] dm_region_hash dm_log dm_mod [ 657.069059] CPU: 10 PID: 1163 Comm: kworker/u193:9 Kdump: loaded Tainted: G W 5.6.0+ #8 [ 657.069060] Hardware name: Inspur SA5212M5/YZMB-00882-104, BIOS 4.0.9 08/27/2019 [ 657.069064] Workqueue: nvme-reset-wq nvme_reset_work [nvme] [ 657.069066] RIP: 0010:irq_create_affinity_masks+0x47c/0x4a0 [ 657.069067] Code: fe ff ff 48 c7 c0 b0 89 14 95 48 89 46 20 e9 e9 fb ff ff 31 c0 e9 90 fc ff ff 0f 0b 48 c7 44 24 08 00 00 00 00 e9 e9 fc ff ff <0f> 0b e9 87 fe ff ff 48 8b 7c 24 28 e8 33 a0 80 00 e9 b6 fc ff ff [ 657.069068] RSP: 0018:ffffb505ce1ffc78 EFLAGS: 00010202 [ 657.069069] RAX: 0000000000000060 RBX: ffff9b97921fe5c0 RCX: 0000000000000000 [ 657.069069] RDX: ffff9b67bad80000 RSI: 00000000ffffffa0 RDI: 0000000000000000 [ 657.069070] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9b97921fe718 [ 657.069070] R10: ffff9b97921fe710 R11: 0000000000000001 R12: 0000000000000064 [ 657.069070] R13: 0000000000000060 R14: 0000000000000000 R15: 0000000000000001 [ 657.069071] FS: 0000000000000000(0000) GS:ffff9b67c0880000(0000) knlGS:0000000000000000 [ 657.069072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 657.069072] CR2: 0000559eac6fc238 CR3: 000000057860a002 CR4: 00000000007606e0 [ 657.069073] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 657.069073] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 657.069073] PKRU: 55555554 [ 657.069074] Call Trace: [ 657.069080] __pci_enable_msix_range+0x233/0x5a0 [ 657.069085] ? kernfs_put+0xec/0x190 [ 657.069086] pci_alloc_irq_vectors_affinity+0xbb/0x130 [ 657.069089] nvme_reset_work+0x6e6/0xeab [nvme] [ 657.069093] ? __switch_to_asm+0x34/0x70 [ 657.069094] ? __switch_to_asm+0x40/0x70 [ 657.069095] ? nvme_irq_check+0x30/0x30 [nvme] [ 657.069098] process_one_work+0x1a7/0x370 [ 657.069101] worker_thread+0x1c9/0x380 [ 657.069102] ? max_active_store+0x80/0x80 [ 657.069103] kthread+0x112/0x130 [ 657.069104] ? __kthread_parkme+0x70/0x70 [ 657.069105] ret_from_fork+0x35/0x40 [ 657.069106] ---[ end trace f4f06b7d24513d06 ]--- [ 657.077110] nvme nvme0: 95/1/0 default/read/poll queues Signed-off-by: Weiping Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b0978ac554d5..6cc4630ddd6d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -68,14 +68,30 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +static int io_queue_count_set(const char *val, const struct kernel_param *kp) +{ + unsigned int n; + int ret; + + ret = kstrtouint(val, 10, &n); + if (ret != 0 || n > num_possible_cpus()) + return -EINVAL; + return param_set_uint(val, kp); +} + +static const struct kernel_param_ops io_queue_count_ops = { + .set = io_queue_count_set, + .get = param_get_uint, +}; + static unsigned int write_queues; -module_param(write_queues, uint, 0644); +module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644); MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); static unsigned int poll_queues; -module_param(poll_queues, uint, 0644); +module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); struct nvme_dev; @@ -3118,8 +3134,6 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - write_queues = min(write_queues, num_possible_cpus()); - poll_queues = min(poll_queues, num_possible_cpus()); return pci_register_driver(&nvme_driver); } -- cgit v1.2.3 From 614fc1c0d980423a131bfb5c93d8d53e5272f587 Mon Sep 17 00:00:00 2001 From: Martin George Date: Tue, 12 May 2020 22:17:04 +0530 Subject: nvme-fc: print proper nvme-fc devloss_tmo value The nvme-fc devloss_tmo is computed as the min of either the ctrl_loss_tmo (max_retries * reconnect_delay) or the remote port's devloss_tmo. But what gets printed as the nvme-fc devloss_tmo in nvme_fc_reconnect_or_delete() is always the remote port's devloss_tmo value. So correct this by printing the min value instead. Signed-off-by: Martin George Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b3ab3355e25..8aabc056c754 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3246,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " "while waiting for remoteport connectivity.\n", - ctrl->cnum, portptr->dev_loss_tmo); + ctrl->cnum, min_t(int, portptr->dev_loss_tmo, + (ctrl->ctrl.opts->max_reconnects * + ctrl->ctrl.opts->reconnect_delay))); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } -- cgit v1.2.3 From 84e4c204b6a0e81f56bd7d1254123390ef0498c8 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 13 May 2020 16:18:13 +0800 Subject: nvme: disable streams when get stream params failed Disable streams again if getting the stream params fails. Signed-off-by: Wu Bo Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 805d289e6cd9..43a6336d6264 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -552,19 +552,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) - return ret; + goto out_disable_stream; ctrl->nssa = le16_to_cpu(s.nssa); if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", ctrl->nssa); - nvme_disable_streams(ctrl); - return 0; + goto out_disable_stream; } ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); return 0; + +out_disable_stream: + nvme_disable_streams(ctrl); + return ret; } /* -- cgit v1.2.3 From 68ab60ca2d6bd8e6b1ecc2857a174c1c2b9451e9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 14 May 2020 14:56:26 +0900 Subject: nvme: fix io_opt limit setting Currently, a namespace io_opt queue limit is set by default to the physical sector size of the namespace and to the the write optimal size (NOWS) when the namespace reports optimal IO sizes. This causes problems with block limits stacking in blk_stack_limits() when a namespace block device is combined with an HDD which generally do not report any optimal transfer size (io_opt limit is 0). The code: /* Optimal I/O a multiple of the physical block size? */ if (t->io_opt & (t->physical_block_size - 1)) { t->io_opt = 0; t->misaligned = 1; ret = -1; } in blk_stack_limits() results in an error return for this function when the combined devices have different but compatible physical sector sizes (e.g. 512B sector SSD with 4KB sector disks). Fix this by not setting the optimal IO size queue limit if the namespace does not report an optimal write size value. Signed-off-by: Damien Le Moal Reviewed-by: Bart van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 43a6336d6264..4c194dcabd12 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1845,7 +1845,7 @@ static void nvme_update_disk_info(struct gendisk *disk, { sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; - u32 atomic_bs, phys_bs, io_opt; + u32 atomic_bs, phys_bs, io_opt = 0; if (ns->lba_shift > PAGE_SHIFT) { /* unsupported block size, set capacity to 0 later */ @@ -1854,7 +1854,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); - atomic_bs = phys_bs = io_opt = bs; + atomic_bs = phys_bs = bs; nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); if (id->nabo == 0) { /* -- cgit v1.2.3 From f1e71d75f04792721d411170168e68019ccb7de3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:04:52 -0500 Subject: nvme: replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/lightnvm.c | 2 +- include/linux/nvme.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8aabc056c754..cb0007592c12 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -108,7 +108,7 @@ struct nvme_fc_fcp_op { struct nvme_fcp_op_w_sgl { struct nvme_fc_fcp_op op; struct scatterlist sgl[NVME_INLINE_SG_CNT]; - uint8_t priv[0]; + uint8_t priv[]; }; struct nvme_fc_lport { diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index ec46693f6b64..3002bf972c6b 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl { __le32 tdresv; __le32 thresv; __le32 rsvd2[8]; - __u8 blk[0]; + __u8 blk[]; }; struct nvme_nvm_id20_addrf { diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b235a48eac8c..e2993e6a9d7c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1185,7 +1185,7 @@ struct nvmf_disc_rsp_page_hdr { __le64 numrec; __le16 recfmt; __u8 resv14[1006]; - struct nvmf_disc_rsp_page_entry entries[0]; + struct nvmf_disc_rsp_page_entry entries[]; }; enum { -- cgit v1.2.3 From ec0862ac5aa03961fd2027f861689beccd91c5d1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 May 2020 15:06:59 +0300 Subject: nvme: delete an unnecessary declaration The nvme_put_ctrl() is implemented earlier as an inline function so this declaration isn't required. Signed-off-by: Dan Carpenter Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f3ab17778349..86f152e777bc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -497,7 +497,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); -void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); -- cgit v1.2.3 From 696ece751366e7a02a81fa0228125fe25a47969d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:30 -0700 Subject: nvmet: add async event tracing support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new tracepoint for the target to trace async event. This is helpful in debugging and comparing host and target side async events especially when host is connected to different targets on different machines and now that we rely on userspace components to generate AEN.  Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/trace.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b685f99d56a1..7ce3d3b0f5d6 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -151,6 +151,7 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) kfree(aen); mutex_unlock(&ctrl->lock); + trace_nvmet_async_event(ctrl, req->cqe->result.u32); nvmet_req_complete(req, status); } } diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index e645caa882dd..0458046d6501 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvmet_async_event, + TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->cntlid; + __entry->result = (le32_to_cpu(result) & 0xff00) >> 8; + ), + TP_printk("nvmet%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_NOTICE_DISC_CHANGED), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); +#undef aer_name + #endif /* _TRACE_NVMET_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 463c5fabb8dfca4941c4c50365bb7749ac5c9916 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:27 -0700 Subject: nvmet: add helper to revalidate bdev and file ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a wrapper helper to indicate size change in the bdev & file-backed namespace when revalidating ns. This helper is needed in order to minimize code repetition in the next patch for configfs.c and existing admin-cmd.c.   Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 5 +---- drivers/nvme/target/core.c | 8 ++++++++ drivers/nvme/target/nvmet.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 4c79aa804887..f544a14e8b5c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -486,10 +486,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) if (!ns) goto done; - if (ns->bdev) - nvmet_bdev_ns_revalidate(ns); - else - nvmet_file_ns_revalidate(ns); + nvmet_ns_revalidate(ns); /* * nuse = ncap = nsze isn't always true, but we have no way to find diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 7ce3d3b0f5d6..ee5865568588 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -515,6 +515,14 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, ns->nsid); } +void nvmet_ns_revalidate(struct nvmet_ns *ns) +{ + if (ns->bdev) + nvmet_bdev_ns_revalidate(ns); + else + nvmet_file_ns_revalidate(ns); +} + int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 3d981eb6e100..93e0c2aa3e71 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -500,6 +500,7 @@ u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); int nvmet_file_ns_revalidate(struct nvmet_ns *ns); +void nvmet_ns_revalidate(struct nvmet_ns *ns); static inline u32 nvmet_rw_len(struct nvmet_req *req) { -- cgit v1.2.3 From de124f427347b0a1eb9708b1007251fc4c8a222f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:28 -0700 Subject: nvmet: generate AEN for ns revalidate size change The newly added function nvmet_ns_revalidate() does update the ns size in the identify namespace in-core target data structure when host issues id-ns command. This can lead to host having inconsistencies between size of the namespace present in the id-ns command result and size of the corresponding block device until host scans the namespaces explicitly. To avoid this scenario generate AEN if old size is not same as the new one in nvmet_ns_revalidate(). This will allow automatic AEN generation when host calls id-ns command and also allows target to install userspace rules so that it can trigger nvmet_ns_revalidate() (using configfs interface with the help of next patch) resulting in appropriate AEN generation when underlying namespace size change is detected. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ee5865568588..fb78b165c123 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -517,10 +517,15 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, void nvmet_ns_revalidate(struct nvmet_ns *ns) { + loff_t oldsize = ns->size; + if (ns->bdev) nvmet_bdev_ns_revalidate(ns); else nvmet_file_ns_revalidate(ns); + + if (oldsize != ns->size) + nvmet_ns_changed(ns->subsys, ns->nsid); } int nvmet_ns_enable(struct nvmet_ns *ns) -- cgit v1.2.3 From 1f357548ec79a34e069716716b71194ce8b53e10 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:29 -0700 Subject: nvmet: revalidate-ns & generate AEN from configfs Add a new attribute "revalidate_size" for the namespace which allows user to revalidate and generate the AEN if needed. This attribute is needed so that we can install userspace rules with systemd service based on inotify/fsnotify/uevent. The registered callback for such a service will end up writing to this attribute to generate AEN if needed. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 24eb4cf53b4f..17c529260e12 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -540,6 +540,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item, CONFIGFS_ATTR(nvmet_ns_, buffered_io); +static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_ns *ns = to_nvmet_ns(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (!val) + return -EINVAL; + + mutex_lock(&ns->subsys->lock); + if (!ns->enabled) { + pr_err("enable ns before revalidate.\n"); + mutex_unlock(&ns->subsys->lock); + return -EINVAL; + } + nvmet_ns_revalidate(ns); + mutex_unlock(&ns->subsys->lock); + return count; +} + +CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size); + static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_device_path, &nvmet_ns_attr_device_nguid, @@ -547,6 +572,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_ana_grpid, &nvmet_ns_attr_enable, &nvmet_ns_attr_buffered_io, + &nvmet_ns_attr_revalidate_size, #ifdef CONFIG_PCI_P2PDMA &nvmet_ns_attr_p2pmem, #endif -- cgit v1.2.3 From c295ee4742fda49de598a304ef9a95cf8da6b1f5 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:48 +0300 Subject: block: always define struct blk_integrity in genhd.h This will reduce the amount of ifdefs inside the source code for various drivers and also will reduce the amount of stub functions that were created for the !CONFIG_BLK_DEV_INTEGRITY case. Suggested-by: Christoph Hellwig Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- include/linux/genhd.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f9c226f9546a..2590bed6e6b3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -169,8 +169,6 @@ struct disk_part_tbl { struct disk_events; struct badblocks; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - struct blk_integrity { const struct blk_integrity_profile *profile; unsigned char flags; @@ -179,8 +177,6 @@ struct blk_integrity { unsigned char tag_size; }; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - struct gendisk { /* major, first_minor and minors are input parameters only, * don't use directly. Use disk_devt() and disk_max_parts(). -- cgit v1.2.3 From ffc89b1d3ca45669e8d2226f5fd4dde756f7ad17 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:49 +0300 Subject: nvme: introduce namespace features flag Replace the specific ext boolean (that implies on extended LBA format) with a feature in the new namespace features flag. This is a preparation for adding more namespace features (such as metadata specific features). Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++--- drivers/nvme/host/lightnvm.c | 5 ++++- drivers/nvme/host/nvme.h | 6 +++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4c194dcabd12..fecf484e7b08 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1305,7 +1305,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; metadata = nvme_to_user_ptr(io.metadata); - if (ns->ext) { + if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { @@ -1885,7 +1885,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !ns->ext && + if (ns->ms && !(ns->features & NVME_NS_EXT_LBAS) && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || @@ -1924,8 +1924,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + if (ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT)) + ns->features |= NVME_NS_EXT_LBAS; /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 3002bf972c6b..69608755d415 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) geo = &dev->geo; geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; - geo->ext = ns->ext; + if (ns->features & NVME_NS_EXT_LBAS) + geo->ext = true; + else + geo->ext = false; geo->mdts = ns->ctrl->max_hw_sectors; dev->q = q; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 86f152e777bc..58ae6ebdc63a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -364,6 +364,10 @@ struct nvme_ns_head { #endif }; +enum nvme_ns_features { + NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ +}; + struct nvme_ns { struct list_head list; @@ -383,8 +387,8 @@ struct nvme_ns { u16 ms; u16 sgs; u32 sws; - bool ext; u8 pi_type; + unsigned long features; unsigned long flags; #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 -- cgit v1.2.3 From b29f84857a0f1cb4355363d0307d2b83897e8955 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:50 +0300 Subject: nvme: introduce NVME_NS_METADATA_SUPPORTED flag This is a preparation for adding support for metadata in fabric controllers. New flag will imply that NVMe namespace supports getting metadata that was originally generated by host's block layer. Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 41 ++++++++++++++++++++++++++++++++++------- drivers/nvme/host/nvme.h | 1 + 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fecf484e7b08..aa168dd58f31 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1885,13 +1885,27 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !(ns->features & NVME_NS_EXT_LBAS) && - (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); - if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || - ns->lba_shift > PAGE_SHIFT) + /* + * The block layer can't support LBA sizes larger than the page size + * yet, so catch this early and don't allow block I/O. + */ + if (ns->lba_shift > PAGE_SHIFT) capacity = 0; + /* + * Register a metadata profile for PI, or the plain non-integrity NVMe + * metadata masquerading as Type 0 if supported, otherwise reject block + * I/O to namespaces with metadata except when the namespace supports + * PI, as it can strip/insert in that case. + */ + if (ns->ms) { + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + (ns->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, ns->ms, ns->pi_type); + else if (!nvme_ns_has_pi(ns)) + capacity = 0; + } + set_capacity_revalidate_and_notify(disk, capacity, false); nvme_config_discard(disk, ns); @@ -1926,14 +1940,27 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - if (ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT)) - ns->features |= NVME_NS_EXT_LBAS; /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else ns->pi_type = 0; + if (ns->ms) { + if (id->flbas & NVME_NS_FLBAS_META_EXT) + ns->features |= NVME_NS_EXT_LBAS; + + /* + * For PCI, Extended logical block will be generated by the + * controller. Non-extended format can be generated by the + * block layer. + */ + if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { + if (!(ns->features & NVME_NS_EXT_LBAS)) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } + } + if (iob) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 58ae6ebdc63a..9ed6a3dac537 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -366,6 +366,7 @@ struct nvme_ns_head { enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ + NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ }; struct nvme_ns { -- cgit v1.2.3 From 4d2ce68835649afebbc5e8816b79426fb04c639f Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 19 May 2020 17:05:51 +0300 Subject: nvme: make nvme_ns_has_pi accessible to transports Move the nvme_ns_has_pi() inline from core.c to the nvme.h header. This allows use by the transports. Signed-off-by: James Smart [maxg: added a comment for nvme_ns_has_pi()] Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ------ drivers/nvme/host/nvme.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aa168dd58f31..beea013f7b73 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) nvme_put_ctrl(ctrl); } -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) -{ - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); -} - static blk_status_t nvme_error_status(u16 status) { switch (status & 0x7ff) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9ed6a3dac537..c83ff69338d8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -399,6 +400,12 @@ struct nvme_ns { }; +/* NVMe ns supports metadata actions by the controller (generate/strip) */ +static inline bool nvme_ns_has_pi(struct nvme_ns *ns) +{ + return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); +} + struct nvme_ctrl_ops { const char *name; struct module *module; -- cgit v1.2.3 From 95093350394a394e7c4e778176194b14b76ec5d8 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:52 +0300 Subject: nvme: introduce max_integrity_segments ctrl attribute This patch doesn't change any logic, and is needed as a preparation for adding PI support for fabrics drivers that will use an extended LBA format for metadata and will support more than 1 integrity segment. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 +++++++---- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index beea013f7b73..404edceb84cb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1693,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { struct blk_integrity integrity; @@ -1716,10 +1717,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } integrity.tuple_size = ms; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, 1); + blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1895,7 +1897,8 @@ static void nvme_update_disk_info(struct gendisk *disk, if (ns->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && (ns->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); + nvme_init_integrity(disk, ns->ms, ns->pi_type, + ns->ctrl->max_integrity_segments); else if (!nvme_ns_has_pi(ns)) capacity = 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c83ff69338d8..5b5ed128fd9e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -229,6 +229,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u32 max_integrity_segments; u16 crdt[3]; u16 oncs; u16 oacs; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6cc4630ddd6d..b307c06a783d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2565,6 +2565,12 @@ static void nvme_reset_work(struct work_struct *work) goto out; } + /* + * We do not support an SGL for metadata (yet), so we are limited to a + * single integrity segment for the separate metadata pointer. + */ + dev->ctrl.max_integrity_segments = 1; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; -- cgit v1.2.3 From 33cfdc2aa6969829f42640f758357e4b015e9f7d Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:53 +0300 Subject: nvme: enforce extended LBA format for fabrics metadata An extended LBA is a larger LBA that is created when metadata associated with the LBA is transferred contiguously with the LBA data (AKA interleaved). The metadata may be either transferred as part of the LBA (creating an extended LBA) or it may be transferred as a separate contiguous buffer of data. According to the NVMeoF spec, a fabrics ctrl supports only an Extended LBA format. Fail revalidation in case we have a spec violation. Also add a flag that will imply on capable transports and controllers as part of a preparation for allowing end-to-end protection information for fabric controllers. Suggested-by: Christoph Hellwig Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 404edceb84cb..a3a4dbc59af1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1916,9 +1916,10 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_unfreeze_queue(disk->queue); } -static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + struct nvme_ctrl *ctrl = ns->ctrl; u32 iob; /* @@ -1929,9 +1930,9 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->lba_shift == 0) ns->lba_shift = 9; - if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ns->ctrl->max_hw_sectors)) - iob = ns->ctrl->max_hw_sectors; + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) + iob = ctrl->max_hw_sectors; else iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); @@ -1944,16 +1945,24 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->pi_type = 0; if (ns->ms) { - if (id->flbas & NVME_NS_FLBAS_META_EXT) - ns->features |= NVME_NS_EXT_LBAS; - /* - * For PCI, Extended logical block will be generated by the - * controller. Non-extended format can be generated by the - * block layer. + * For PCIe only the separate metadata pointer is supported, + * as the block layer supplies metadata in a separate bio_vec + * chain. For Fabrics, only metadata as part of extended data + * LBA is supported on the wire per the Fabrics specification, + * but the HBA/HCA will do the remapping from the separate + * metadata buffers for us. */ - if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { - if (!(ns->features & NVME_NS_EXT_LBAS)) + if (id->flbas & NVME_NS_FLBAS_META_EXT) { + ns->features |= NVME_NS_EXT_LBAS; + if ((ctrl->ops->flags & NVME_F_FABRICS) && + (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) && + ctrl->max_integrity_segments) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } else { + if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS)) + return -EINVAL; + if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) ns->features |= NVME_NS_METADATA_SUPPORTED; } } @@ -1968,6 +1977,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) revalidate_disk(ns->head->disk); } #endif + return 0; } static int nvme_revalidate_disk(struct gendisk *disk) @@ -2003,7 +2013,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); + ret = __nvme_revalidate_disk(disk, id); free_id: kfree(id); out: @@ -3657,7 +3667,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - __nvme_revalidate_disk(disk, id); + if (__nvme_revalidate_disk(disk, id)) + goto out_free_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3684,6 +3695,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); + out_free_disk: + del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); -- cgit v1.2.3 From ba7ca2ae029607c7eb2c18e37e8bc0d2252d3d12 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:54 +0300 Subject: nvme: introduce NVME_INLINE_METADATA_SG_CNT SGL size of metadata is usually small. Thus, 1 inline sg should cover most cases. The macro will be used for pre-allocate a single SGL entry for metadata. The preallocation of small inline SGLs depends on SG_CHAIN capability so if the ARCH doesn't support SG_CHAIN, use the runtime allocation for the SGL. This patch is a preparation for adding metadata (T10-PI) over fabric support. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5b5ed128fd9e..fa5c75501049 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -31,8 +31,10 @@ extern unsigned int admin_timeout; #ifdef CONFIG_ARCH_NO_SG_CHAIN #define NVME_INLINE_SG_CNT 0 +#define NVME_INLINE_METADATA_SG_CNT 0 #else #define NVME_INLINE_SG_CNT 2 +#define NVME_INLINE_METADATA_SG_CNT 1 #endif extern struct workqueue_struct *nvme_wq; -- cgit v1.2.3 From 324d9e7814dd9c76bb3aebf2529b02149c340d48 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:55 +0300 Subject: nvme-rdma: introduce nvme_rdma_sgl structure Remove first_sgl pointer from struct nvme_rdma_request and use pointer arithmetic instead. The inline scatterlist, if exists, will be located right after the nvme_rdma_request. This patch is needed as a preparation for adding PI support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cac8a930396a..40868749547a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -48,6 +48,11 @@ struct nvme_rdma_qe { u64 dma; }; +struct nvme_rdma_sgl { + int nents; + struct sg_table sg_table; +}; + struct nvme_rdma_queue; struct nvme_rdma_request { struct nvme_request req; @@ -58,12 +63,10 @@ struct nvme_rdma_request { refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; - int nents; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; - struct sg_table sg_table; - struct scatterlist first_sgl[]; + struct nvme_rdma_sgl data_sgl; }; enum nvme_rdma_queue_flags { @@ -1158,8 +1161,9 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, req->mr = NULL; } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1178,7 +1182,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; - struct scatterlist *sgl = req->sg_table.sgl; + struct scatterlist *sgl = req->data_sgl.sg_table.sgl; struct ib_sge *sge = &req->sge[1]; u32 len = 0; int i; @@ -1203,8 +1207,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, { struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; - sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); - put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); + sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl)); + put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length); put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; @@ -1225,7 +1229,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); + nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, + SZ_4K); if (unlikely(nr < count)) { ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); req->mr = NULL; @@ -1272,17 +1277,18 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return nvme_rdma_set_sg_null(c); - req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, - blk_rq_nr_phys_segments(rq), req->sg_table.sgl, + req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); + ret = sg_alloc_table_chained(&req->data_sgl.sg_table, + blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl, NVME_INLINE_SG_CNT); if (ret) return -ENOMEM; - req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); + req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, + req->data_sgl.sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, - rq_dma_dir(rq)); + count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, + req->data_sgl.nents, rq_dma_dir(rq)); if (unlikely(count <= 0)) { ret = -EIO; goto out_free_table; @@ -1311,9 +1317,10 @@ out: return 0; out_unmap_sg: - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); out_free_table: - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); return ret; } -- cgit v1.2.3 From 5ec5d3bddc6b912b7de9e3eb6c1f2397faeca2bc Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:56 +0300 Subject: nvme-rdma: add metadata/T10-PI support For capable HCAs (e.g. ConnectX-5/ConnectX-6) this will allow end-to-end protection information passthrough and validation for NVMe over RDMA transport. Metadata offload support was implemented over the new RDMA signature verbs API and it is enabled for capable controllers. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 280 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 270 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 40868749547a..f8f856dc0c67 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,6 +34,11 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 4 +#define NVME_RDMA_DATA_SGL_SIZE \ + (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT) +#define NVME_RDMA_METADATA_SGL_SIZE \ + (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT) + struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; @@ -67,6 +72,8 @@ struct nvme_rdma_request { struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; struct nvme_rdma_sgl data_sgl; + struct nvme_rdma_sgl *metadata_sgl; + bool use_sig_mr; }; enum nvme_rdma_queue_flags { @@ -88,6 +95,7 @@ struct nvme_rdma_queue { struct rdma_cm_id *cm_id; int cm_error; struct completion cm_done; + bool pi_support; }; struct nvme_rdma_ctrl { @@ -264,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = queue->ib_cq; init_attr.recv_cq = queue->ib_cq; + if (queue->pi_support) + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); @@ -293,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (!req->sqe.data) return -ENOMEM; + /* metadata nvme_rdma_sgl struct is located after command's data SGL */ + if (queue->pi_support) + req->metadata_sgl = (void *)nvme_req(rq) + + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; + req->queue = queue; return 0; @@ -403,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) dev = queue->device; ibdev = dev->dev; + if (queue->pi_support) + ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs); ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); /* @@ -419,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } -static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) { - return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len - 1); + u32 max_page_list_len; + + if (pi_support) + max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len; + else + max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len; + + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -479,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) * misaligned we'll end up using two entries for a single data page, * so one additional entry is required. */ - pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, @@ -491,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + if (queue->pi_support) { + ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs, + queue->queue_size, IB_MR_TYPE_INTEGRITY, + pages_per_mr, pages_per_mr); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize PI MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_mr_pool; + } + } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); return 0; +out_destroy_mr_pool: + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); out_destroy_ring: nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, sizeof(struct nvme_completion), DMA_FROM_DEVICE); @@ -516,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + if (idx && ctrl->ctrl.max_integrity_segments) + queue->pi_support = true; + else + queue->pi_support = false; init_completion(&queue->cm_done); if (idx > 0) @@ -726,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + NVME_RDMA_DATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = 1; set->timeout = ADMIN_TIMEOUT; @@ -740,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + NVME_RDMA_DATA_SGL_SIZE; + if (nctrl->max_integrity_segments) + set->cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; @@ -773,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, bool new) { + bool pi_capable = false; int error; error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); @@ -782,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); - ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); + /* T10-PI support */ + if (ctrl->device->dev->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER) + pi_capable = true; + + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, + pi_capable); /* * Bind the async event SQE DMA mapping to the admin queue lifetime. @@ -824,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->ctrl.max_segments = ctrl->max_fr_pages; ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); + if (pi_capable) + ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages; + else + ctrl->ctrl.max_integrity_segments = 0; blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -1152,12 +1208,23 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + struct list_head *pool = &queue->qp->rdma_mrs; if (!blk_rq_nr_phys_segments(rq)) return; + if (blk_integrity_rq(rq)) { + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); + } + + if (req->use_sig_mr) + pool = &queue->qp->sig_mrs; + if (req->mr) { - ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + ib_mr_pool_put(queue->qp, pool, req->mr); req->mr = NULL; } @@ -1261,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, return 0; } +static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control, u8 pi_type) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; + + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_escape = true; + if (pi_type == NVME_NS_DPS_PI_TYPE3) + domain->sig.dif.ref_escape = true; +} + +static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs, + u8 pi_type) +{ + u16 control = le16_to_cpu(cmd->rw.control); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no memory domain */ + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + } +} + +static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask) +{ + *mask = 0; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF) + *mask |= IB_SIG_CHECK_REFTAG; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD) + *mask |= IB_SIG_CHECK_GUARD; +} + +static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "SIG"); +} + +static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, + struct nvme_rdma_request *req, struct nvme_command *c, + int count, int pi_count) +{ + struct nvme_rdma_sgl *sgl = &req->data_sgl; + struct ib_reg_wr *wr = &req->reg_wr; + struct request *rq = blk_mq_rq_from_pdu(req); + struct nvme_ns *ns = rq->q->queuedata; + struct bio *bio = rq->bio; + struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + int nr; + + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + + nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL, + req->metadata_sgl->sg_table.sgl, pi_count, NULL, + SZ_4K); + if (unlikely(nr)) + goto mr_put; + + nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c, + req->mr->sig_attrs, ns->pi_type); + nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); + + ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); + + req->reg_cqe.done = nvme_rdma_sig_done; + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; + wr->wr.wr_cqe = &req->reg_cqe; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = req->mr; + wr->key = req->mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + + sg->addr = cpu_to_le64(req->mr->iova); + put_unaligned_le24(req->mr->length, sg->length); + put_unaligned_le32(req->mr->rkey, sg->key); + sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; + + return 0; + +mr_put: + ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); + req->mr = NULL; + if (nr < 0) + return nr; + return -EINVAL; +} + static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct request *rq, struct nvme_command *c) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + int pi_count = 0; int count, ret; req->num_sge = 1; @@ -1294,6 +1474,35 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, goto out_free_table; } + if (blk_integrity_rq(rq)) { + req->metadata_sgl->sg_table.sgl = + (struct scatterlist *)(req->metadata_sgl + 1); + ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, + blk_rq_count_integrity_sg(rq->q, rq->bio), + req->metadata_sgl->sg_table.sgl, + NVME_INLINE_METADATA_SG_CNT); + if (unlikely(ret)) { + ret = -ENOMEM; + goto out_unmap_sg; + } + + req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, + rq->bio, req->metadata_sgl->sg_table.sgl); + pi_count = ib_dma_map_sg(ibdev, + req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, + rq_dma_dir(rq)); + if (unlikely(pi_count <= 0)) { + ret = -EIO; + goto out_free_pi_table; + } + } + + if (req->use_sig_mr) { + ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); + goto out; + } + if (count <= dev->num_inline_segments) { if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && queue->ctrl->use_inline_data && @@ -1312,10 +1521,18 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ret = nvme_rdma_map_sg_fr(queue, req, c, count); out: if (unlikely(ret)) - goto out_unmap_sg; + goto out_unmap_pi_sg; return 0; +out_unmap_pi_sg: + if (blk_integrity_rq(rq)) + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); +out_free_pi_table: + if (blk_integrity_rq(rq)) + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); out_unmap_sg: ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, rq_dma_dir(rq)); @@ -1768,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + queue->pi_support && + (c->common.opcode == nvme_cmd_write || + c->common.opcode == nvme_cmd_read) && + nvme_ns_has_pi(ns)) + req->use_sig_mr = true; + else + req->use_sig_mr = false; + err = nvme_rdma_map_data(queue, rq, c); if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1808,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) return ib_process_cq_direct(queue->ib_cq, -1); } +static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req) +{ + struct request *rq = blk_mq_rq_from_pdu(req); + struct ib_mr_status mr_status; + int ret; + + ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + nvme_req(rq)->status = NVME_SC_INVALID_PI; + return; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + nvme_req(rq)->status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + nvme_req(rq)->status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + nvme_req(rq)->status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, mr_status.sig_err.expected, + mr_status.sig_err.actual); + } +} + static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct ib_device *ibdev = queue->device->dev; + if (req->use_sig_mr) + nvme_rdma_check_pi_status(req); + nvme_rdma_unmap_data(queue, rq); ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1933,7 +2193,7 @@ out_fail: static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, - .flags = NVME_F_FABRICS, + .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, -- cgit v1.2.3 From d2d1c454a4a44010cac627fd63945ff5e7dd3b4c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:57 +0300 Subject: nvmet: add metadata characteristics for a namespace Fill those namespace fields from the block device format for adding metadata (T10-PI) over fabric support with block devices. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Kconfig | 1 + drivers/nvme/target/io-cmd-bdev.c | 22 ++++++++++++++++++++++ drivers/nvme/target/nvmet.h | 3 +++ 3 files changed, 26 insertions(+) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index d7f48c0fb311..4474952d64c6 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -4,6 +4,7 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK depends on CONFIGFS_FS + select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help This enabled target side support for the NVMe protocol, that is diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 0427e040e3dd..e518bb9bf718 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) id->nows = to0based(ql->io_opt / ql->logical_block_size); } +static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) +{ + struct blk_integrity *bi = bdev_get_integrity(ns->bdev); + + if (bi) { + ns->metadata_size = bi->tuple_size; + if (bi->profile == &t10_pi_type1_crc) + ns->pi_type = NVME_NS_DPS_PI_TYPE1; + else if (bi->profile == &t10_pi_type3_crc) + ns->pi_type = NVME_NS_DPS_PI_TYPE3; + else + /* Unsupported metadata type */ + ns->metadata_size = 0; + } +} + int nvmet_bdev_ns_enable(struct nvmet_ns *ns) { int ret; @@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) } ns->size = i_size_read(ns->bdev->bd_inode); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + + ns->pi_type = 0; + ns->metadata_size = 0; + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) + nvmet_bdev_ns_enable_integrity(ns); + return 0; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 93e0c2aa3e71..daef06a3aff4 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -19,6 +19,7 @@ #include #include #include +#include #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 @@ -77,6 +78,8 @@ struct nvmet_ns { int use_p2pmem; struct pci_dev *p2p_dev; + int pi_type; + int metadata_size; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) -- cgit v1.2.3 From 26af180c1bd9cdd6f9b96d8df58b51d5900a2978 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:58 +0300 Subject: nvmet: rename nvmet_rw_len to nvmet_rw_data_len The function doesn't add the metadata length (only data length is calculated). This is preparation for adding metadata (T10-PI) support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/nvme/target/io-cmd-file.c | 2 +- drivers/nvme/target/nvmet.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index e518bb9bf718..628e81a74444 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -178,7 +178,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector_t sector; int op, i; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index f0bd08d86ac0..b31717c5d75f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -241,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index daef06a3aff4..8bed8a61345e 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -505,7 +505,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); int nvmet_file_ns_revalidate(struct nvmet_ns *ns); void nvmet_ns_revalidate(struct nvmet_ns *ns); -static inline u32 nvmet_rw_len(struct nvmet_req *req) +static inline u32 nvmet_rw_data_len(struct nvmet_req *req) { return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << req->ns->blksize_shift; -- cgit v1.2.3 From 136cc1ffcf0a3309c59d844cb1a4ddad964ea3d8 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:59 +0300 Subject: nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len The function doesn't check only the data length, because the transfer length includes also the metadata length in some cases. This is preparation for adding metadata (T10-PI) support. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 14 +++++++------- drivers/nvme/target/core.c | 6 +++--- drivers/nvme/target/discovery.c | 8 ++++---- drivers/nvme/target/fabrics-cmd.c | 8 ++++---- drivers/nvme/target/io-cmd-bdev.c | 6 +++--- drivers/nvme/target/io-cmd-file.c | 6 +++--- drivers/nvme/target/nvmet.h | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index f544a14e8b5c..7b6b0395eaca 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -295,7 +295,7 @@ out: static void nvmet_execute_get_log_page(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd))) + if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd))) return; switch (req->cmd->get_log_page.lid) { @@ -627,7 +627,7 @@ out: static void nvmet_execute_identify(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; switch (req->cmd->identify.cns) { @@ -656,7 +656,7 @@ static void nvmet_execute_identify(struct nvmet_req *req) */ static void nvmet_execute_abort(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; nvmet_set_result(req, 1); nvmet_req_complete(req, 0); @@ -745,7 +745,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u16 nsqr; u16 ncqr; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -817,7 +817,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; - if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10))) + if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10))) return; switch (cdw10 & 0xff) { @@ -884,7 +884,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; mutex_lock(&ctrl->lock); @@ -903,7 +903,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; pr_debug("ctrl %d update keep-alive timer for %d secs\n", diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index fb78b165c123..a4ed70e4afe3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -950,9 +950,9 @@ void nvmet_req_uninit(struct nvmet_req *req) } EXPORT_SYMBOL_GPL(nvmet_req_uninit); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) { - if (unlikely(data_len != req->transfer_len)) { + if (unlikely(len != req->transfer_len)) { req->error_loc = offsetof(struct nvme_common_command, dptr); nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); return false; @@ -960,7 +960,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) return true; } -EXPORT_SYMBOL_GPL(nvmet_check_data_len); +EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 0c2274b21e15..40cf0b6e6c9d 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) u16 status = 0; void *buffer; - if (!nvmet_check_data_len(req, data_len)) + if (!nvmet_check_transfer_len(req, data_len)) return; if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { @@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) const char model[] = "Linux"; u16 status = 0; - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { @@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index feef15c38ec9..52a6f70c9d80 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req) u64 val = le64_to_cpu(req->cmd->prop_set.value); u16 status = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_set.attrib & 1) { @@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) u16 status = 0; u64 val = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_get.attrib & 1) { @@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); @@ -223,7 +223,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 628e81a74444..e065c2430bfa 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -178,7 +178,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector_t sector; int op, i; - if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt) { @@ -239,7 +239,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); @@ -331,7 +331,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) sector_t nr_sector; int ret; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; sector = le64_to_cpu(write_zeroes->slba) << diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index b31717c5d75f..0abbefd9925e 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -241,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { @@ -285,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w) static void nvmet_file_execute_flush(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); schedule_work(&req->f.work); @@ -375,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 8bed8a61345e..29ab0b96bd32 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -387,7 +387,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); -- cgit v1.2.3 From 39481fbd14ee272edd419d73a98bc637e2a3fd35 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:00 +0300 Subject: nvme: add Metadata Capabilities enumerations The enumerations will be used to expose the namespace metadata format by the target. Suggested-by: Christoph Hellwig Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: James Smart Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e2993e6a9d7c..5ce51ab4c50e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -420,6 +420,12 @@ enum { NVME_NS_DPS_PI_TYPE3 = 3, }; +/* Identify Namespace Metadata Capabilities (MC): */ +enum { + NVME_MC_EXTENDED_LBA = (1 << 0), + NVME_MC_METADATA_PTR = (1 << 1), +}; + struct nvme_ns_id_desc { __u8 nidt; __u8 nidl; -- cgit v1.2.3 From ea52ac1c6605fbd25347fabf46233e260dd92eb2 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:01 +0300 Subject: nvmet: add metadata/T10-PI support Expose the namespace metadata format when PI is enabled. The user needs to enable the capability per subsystem and per port. The other metadata properties are taken from the namespace/bdev. Usage example: echo 1 > /config/nvmet/subsystems/${NAME}/attr_pi_enable echo 1 > /config/nvmet/ports/${PORT_NUM}/param_pi_enable Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 26 +++++++++++++++--- drivers/nvme/target/configfs.c | 58 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 21 +++++++++++--- drivers/nvme/target/fabrics-cmd.c | 7 +++-- drivers/nvme/target/nvmet.h | 19 +++++++++++++ 5 files changed, 121 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 7b6b0395eaca..1db8c0498668 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; + u32 cmd_capsule_size; u16 status = 0; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); - /* Max command capsule size is sqe + single page of in-capsule data */ - id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + - req->port->inline_data_size) / 16); + /* + * Max command capsule size is sqe + in-capsule data size. + * Disable in-capsule data for Metadata capable controllers. + */ + cmd_capsule_size = sizeof(struct nvme_command); + if (!ctrl->pi_support) + cmd_capsule_size += req->port->inline_data_size; + id->ioccsz = cpu_to_le32(cmd_capsule_size / 16); + /* Max response capsule size is cqe */ id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); @@ -465,6 +472,7 @@ out: static void nvmet_execute_identify_ns(struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ns *ns; struct nvme_id_ns *id; u16 status = 0; @@ -482,7 +490,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) } /* return an all zeroed buffer if we can't find an active namespace */ - ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); + ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); if (!ns) goto done; @@ -523,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->lbaf[0].ds = ns->blksize_shift; + if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { + id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | + NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | + NVME_NS_DPC_PI_TYPE3; + id->mc = NVME_MC_EXTENDED_LBA; + id->dps = ns->pi_type; + id->flbas = NVME_NS_FLBAS_META_EXT; + id->lbaf[0].ms = cpu_to_le16(ns->metadata_size); + } + if (ns->readonly) id->nsattr |= (1 << 0); nvmet_put_namespace(ns); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 17c529260e12..419e0d4ce79b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -248,6 +248,36 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_inline_data_size); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_param_pi_enable_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable); +} + +static ssize_t nvmet_param_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (port->enabled) { + pr_err("Disable port before setting pi_enable value.\n"); + return -EACCES; + } + + port->pi_enable = val; + return count; +} + +CONFIGFS_ATTR(nvmet_, param_pi_enable); +#endif + static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { @@ -1010,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support); +} + +static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + bool pi_enable; + + if (strtobool(page, &pi_enable)) + return -EINVAL; + + subsys->pi_support = pi_enable; + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); +#endif + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, @@ -1017,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_subsys_attr_attr_pi_enable, +#endif NULL, }; @@ -1316,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, &nvmet_attr_param_inline_data_size, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_attr_param_pi_enable, +#endif NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a4ed70e4afe3..f9172d38aff5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -323,12 +323,21 @@ int nvmet_enable_port(struct nvmet_port *port) if (!try_module_get(ops->owner)) return -EINVAL; - ret = ops->add_port(port); - if (ret) { - module_put(ops->owner); - return ret; + /* + * If the user requested PI support and the transport isn't pi capable, + * don't enable the port. + */ + if (port->pi_enable && !ops->metadata_support) { + pr_err("T10-PI is not supported by transport type %d\n", + port->disc_addr.trtype); + ret = -EINVAL; + goto out_put; } + ret = ops->add_port(port); + if (ret) + goto out_put; + /* If the transport didn't set inline_data_size, then disable it. */ if (port->inline_data_size < 0) port->inline_data_size = 0; @@ -336,6 +345,10 @@ int nvmet_enable_port(struct nvmet_port *port) port->enabled = true; port->tr_ops = ops; return 0; + +out_put: + module_put(ops->owner); + return ret; } void nvmet_disable_port(struct nvmet_port *port) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 52a6f70c9d80..42bd12b8bf00 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); @@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - pr_info("creating controller %d for subsystem %s for NQN %s.\n", - ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); + pr_info("creating controller %d for subsystem %s for NQN %s%s.\n", + ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, + ctrl->pi_support ? " T10-PI is enabled" : ""); req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); out: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 29ab0b96bd32..46b5d2b4ca0a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -145,6 +145,7 @@ struct nvmet_port { bool enabled; int inline_data_size; const struct nvmet_fabrics_ops *tr_ops; + bool pi_enable; }; static inline struct nvmet_port *to_nvmet_port(struct config_item *item) @@ -204,6 +205,7 @@ struct nvmet_ctrl { spinlock_t error_lock; u64 err_counter; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; + bool pi_support; }; struct nvmet_subsys_model { @@ -233,6 +235,7 @@ struct nvmet_subsys { u64 ver; u64 serial; char *subsysnqn; + bool pi_support; struct config_group group; @@ -284,6 +287,7 @@ struct nvmet_fabrics_ops { unsigned int type; unsigned int msdbd; bool has_keyed_sgls : 1; + bool metadata_support : 1; void (*queue_response)(struct nvmet_req *req); int (*add_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port); @@ -511,6 +515,14 @@ static inline u32 nvmet_rw_data_len(struct nvmet_req *req) req->ns->blksize_shift; } +static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return 0; + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) * + req->ns->metadata_size; +} + static inline u32 nvmet_dsm_len(struct nvmet_req *req) { return (le32_to_cpu(req->cmd->dsm.nr) + 1) * @@ -525,4 +537,11 @@ static inline __le16 to0based(u32 a) return cpu_to_le16(max(1U, min(1U << 16, a)) - 1); } +static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return false; + return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple); +} + #endif /* _NVMET_H */ -- cgit v1.2.3 From c6e3f13398123a008cd2ee28f93510b113a32791 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:02 +0300 Subject: nvmet: add metadata support for block devices Allocate the metadata SGL buffers and set metadata fields for the request. Then create a block IO request for the metadata from the protection SG list. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 95 ++++++++++++++++++++++++++++----------- drivers/nvme/target/io-cmd-bdev.c | 87 ++++++++++++++++++++++++++++++++++- drivers/nvme/target/nvmet.h | 7 ++- drivers/nvme/target/rdma.c | 4 +- 4 files changed, 161 insertions(+), 32 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f9172d38aff5..edf54d9957b7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -900,8 +900,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sq = sq; req->ops = ops; req->sg = NULL; + req->metadata_sg = NULL; req->sg_cnt = 0; + req->metadata_sg_cnt = 0; req->transfer_len = 0; + req->metadata_len = 0; req->cqe->status = 0; req->cqe->sq_head = 0; req->ns = NULL; @@ -986,50 +989,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) return true; } -int nvmet_req_alloc_sgl(struct nvmet_req *req) +static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) { - struct pci_dev *p2p_dev = NULL; + return req->transfer_len - req->metadata_len; +} - if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { - if (req->sq->ctrl && req->ns) - p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, - req->ns->nsid); +static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req) +{ + req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt, + nvmet_data_transfer_len(req)); + if (!req->sg) + goto out_err; - req->p2p_dev = NULL; - if (req->sq->qid && p2p_dev) { - req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, - req->transfer_len); - if (req->sg) { - req->p2p_dev = p2p_dev; - return 0; - } - } + if (req->metadata_len) { + req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev, + &req->metadata_sg_cnt, req->metadata_len); + if (!req->metadata_sg) + goto out_free_sg; + } + return 0; +out_free_sg: + pci_p2pmem_free_sgl(req->p2p_dev, req->sg); +out_err: + return -ENOMEM; +} - /* - * If no P2P memory was available we fallback to using - * regular memory - */ +static bool nvmet_req_find_p2p_dev(struct nvmet_req *req) +{ + if (!IS_ENABLED(CONFIG_PCI_P2PDMA)) + return false; + + if (req->sq->ctrl && req->sq->qid && req->ns) { + req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, + req->ns->nsid); + if (req->p2p_dev) + return true; } - req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); + req->p2p_dev = NULL; + return false; +} + +int nvmet_req_alloc_sgls(struct nvmet_req *req) +{ + if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req)) + return 0; + + req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, + &req->sg_cnt); if (unlikely(!req->sg)) - return -ENOMEM; + goto out; + + if (req->metadata_len) { + req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, + &req->metadata_sg_cnt); + if (unlikely(!req->metadata_sg)) + goto out_free; + } return 0; +out_free: + sgl_free(req->sg); +out: + return -ENOMEM; } -EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); +EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); -void nvmet_req_free_sgl(struct nvmet_req *req) +void nvmet_req_free_sgls(struct nvmet_req *req) { - if (req->p2p_dev) + if (req->p2p_dev) { pci_p2pmem_free_sgl(req->p2p_dev, req->sg); - else + if (req->metadata_sg) + pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); + } else { sgl_free(req->sg); + if (req->metadata_sg) + sgl_free(req->metadata_sg); + } req->sg = NULL; + req->metadata_sg = NULL; req->sg_cnt = 0; + req->metadata_sg_cnt = 0; } -EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); +EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); static inline bool nvmet_cc_en(u32 cc) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index e065c2430bfa..07055f7ac398 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -169,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio) bio_put(bio); } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + struct blk_integrity *bi; + struct bio_integrity_payload *bip; + struct block_device *bdev = req->ns->bdev; + int rc; + size_t resid, len; + + bi = bdev_get_integrity(bdev); + if (unlikely(!bi)) { + pr_err("Unable to locate bio_integrity\n"); + return -ENODEV; + } + + bip = bio_integrity_alloc(bio, GFP_NOIO, + min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES)); + if (IS_ERR(bip)) { + pr_err("Unable to allocate bio_integrity_payload\n"); + return PTR_ERR(bip); + } + + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); + /* virtual start sector must be in integrity interval units */ + bip_set_seed(bip, bio->bi_iter.bi_sector >> + (bi->interval_exp - SECTOR_SHIFT)); + + resid = bip->bip_iter.bi_size; + while (resid > 0 && sg_miter_next(miter)) { + len = min_t(size_t, miter->length, resid); + rc = bio_integrity_add_page(bio, miter->page, len, + offset_in_page(miter->addr)); + if (unlikely(rc != len)) { + pr_err("bio_integrity_add_page() failed; %d\n", rc); + sg_miter_stop(miter); + return -ENOMEM; + } + + resid -= len; + if (len < miter->length) + miter->consumed -= miter->length - len; + } + sg_miter_stop(miter); + + return 0; +} +#else +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + return -EINVAL; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; @@ -176,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) struct scatterlist *sg; struct blk_plug plug; sector_t sector; - int op, i; + int op, i, rc; + struct sg_mapping_iter prot_miter; + unsigned int iter_flags; + unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; - if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, total_len)) return; if (!req->sg_cnt) { @@ -190,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op |= REQ_FUA; + iter_flags = SG_MITER_TO_SG; } else { op = REQ_OP_READ; + iter_flags = SG_MITER_FROM_SG; } if (is_pci_p2pdma_page(sg_page(req->sg))) @@ -213,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio->bi_opf = op; blk_start_plug(&plug); + if (req->metadata_len) + sg_miter_start(&prot_miter, req->metadata_sg, + req->metadata_sg_cnt, iter_flags); + for_each_sg(req->sg, sg, req->sg_cnt, i) { while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { struct bio *prev = bio; + if (req->metadata_len) { + rc = nvmet_bdev_alloc_bip(req, bio, + &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; @@ -231,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sg_cnt--; } + if (req->metadata_len) { + rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + submit_bio(bio); blk_finish_plug(&plug); } @@ -358,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_read: case nvme_cmd_write: req->execute = nvmet_bdev_execute_rw; + if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) + req->metadata_len = nvmet_rw_metadata_len(req); return 0; case nvme_cmd_flush: req->execute = nvmet_bdev_execute_flush; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 46b5d2b4ca0a..809691291e73 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -309,6 +309,7 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; + struct scatterlist *metadata_sg; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; union { struct { @@ -322,8 +323,10 @@ struct nvmet_req { } f; }; int sg_cnt; + int metadata_sg_cnt; /* data length as parsed from the SGL descriptor: */ size_t transfer_len; + size_t metadata_len; struct nvmet_port *port; @@ -394,8 +397,8 @@ void nvmet_req_uninit(struct nvmet_req *req); bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); -int nvmet_req_alloc_sgl(struct nvmet_req *req); -void nvmet_req_free_sgl(struct nvmet_req *req); +int nvmet_req_alloc_sgls(struct nvmet_req *req); +void nvmet_req_free_sgls(struct nvmet_req *req); void nvmet_execute_keep_alive(struct nvmet_req *req); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 7a90b10359bb..178dbffa8c41 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -546,7 +546,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) } if (rsp->req.sg != rsp->cmd->inline_sg) - nvmet_req_free_sgl(&rsp->req); + nvmet_req_free_sgls(&rsp->req); if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) nvmet_rdma_process_wr_wait_list(queue); @@ -708,7 +708,7 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!rsp->req.transfer_len) return 0; - ret = nvmet_req_alloc_sgl(&rsp->req); + ret = nvmet_req_alloc_sgls(&rsp->req); if (unlikely(ret < 0)) goto error_out; -- cgit v1.2.3 From b09160c3996c11d62a08f9534c755103a10a89b4 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:03 +0300 Subject: nvmet-rdma: add metadata/T10-PI support For capable HCAs (e.g. ConnectX-5/ConnectX-6) this will allow end-to-end protection information passthrough and validation for NVMe over RDMA transport. Metadata support was implemented over the new RDMA signature verbs API. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 234 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 215 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 178dbffa8c41..d5141780592e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -33,6 +33,7 @@ /* Assume mpsmin == device_page_size == 4KB */ #define NVMET_RDMA_MAX_MDTS 8 +#define NVMET_RDMA_MAX_METADATA_MDTS 5 struct nvmet_rdma_srq; @@ -60,6 +61,7 @@ struct nvmet_rdma_rsp { struct nvmet_rdma_queue *queue; struct ib_cqe read_cqe; + struct ib_cqe write_cqe; struct rdma_rw_ctx rw; struct nvmet_req req; @@ -161,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp); static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, @@ -423,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, /* Data In / RDMA READ */ r->read_cqe.done = nvmet_rdma_read_data_done; + /* Data Out / RDMA WRITE */ + r->write_cqe.done = nvmet_rdma_write_data_done; + return 0; out_free_rsp: @@ -532,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) spin_unlock(&queue->rsp_wr_wait_lock); } +static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) +{ + struct ib_mr_status mr_status; + int ret; + u16 status = 0; + + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + return NVME_SC_INVALID_PI; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + } + + return status; +} + +static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control, u8 pi_type) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; + + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_escape = true; + if (pi_type == NVME_NS_DPS_PI_TYPE3) + domain->sig.dif.ref_escape = true; +} + +static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, + struct ib_sig_attrs *sig_attrs) +{ + struct nvme_command *cmd = req->cmd; + u16 control = le16_to_cpu(cmd->rw.control); + u8 pi_type = req->ns->pi_type; + struct blk_integrity *bi; + + bi = bdev_get_integrity(req->ns->bdev); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no wire domain */ + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + /* PI is added by the HW */ + req->transfer_len += req->metadata_len; + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + } + + if (control & NVME_RW_PRINFO_PRCHK_REF) + sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; + if (control & NVME_RW_PRINFO_PRCHK_GUARD) + sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; + if (control & NVME_RW_PRINFO_PRCHK_APP) + sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; +} + +static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, + struct ib_sig_attrs *sig_attrs) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + int ret; + + if (req->metadata_len) + ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, + req->metadata_sg, req->metadata_sg_cnt, sig_attrs, + addr, key, nvmet_data_dir(req)); + else + ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, 0, addr, key, + nvmet_data_dir(req)); + + return ret; +} + +static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + + if (req->metadata_len) + rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, + req->metadata_sg, req->metadata_sg_cnt, + nvmet_data_dir(req)); + else + rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, nvmet_data_dir(req)); +} static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) { @@ -539,11 +668,8 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); - if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); - } + if (rsp->n_rdma) + nvmet_rdma_rw_ctx_destroy(rsp); if (rsp->req.sg != rsp->cmd->inline_sg) nvmet_req_free_sgls(&rsp->req); @@ -598,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_wr.opcode = IB_WR_SEND; } - if (nvmet_rdma_need_data_out(rsp)) - first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, - cm_id->port_num, NULL, &rsp->send_wr); - else + if (nvmet_rdma_need_data_out(rsp)) { + if (rsp->req.metadata_len) + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, &rsp->write_cqe, NULL); + else + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, NULL, &rsp->send_wr); + } else { first_wr = &rsp->send_wr; + } nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); @@ -621,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); struct nvmet_rdma_queue *queue = cq->cq_context; + u16 status = 0; WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { @@ -640,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) return; } - rsp->req.execute(&rsp->req); + if (rsp->req.metadata_len) + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(status)) + nvmet_req_complete(&rsp->req, status); + else + rsp->req.execute(&rsp->req); +} + +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct nvmet_rdma_rsp *rsp = + container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + u16 status; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return; + + WARN_ON(rsp->n_rdma <= 0); + atomic_add(rsp->n_rdma, &queue->sq_wr_avail); + rsp->n_rdma = 0; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); + nvmet_req_uninit(&rsp->req); + nvmet_rdma_release_rsp(rsp); + if (wc->status != IB_WC_WR_FLUSH_ERR) { + pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", + wc->wr_cqe, ib_wc_status_msg(wc->status), + wc->status); + nvmet_rdma_error_comp(queue); + } + return; + } + + /* + * Upon RDMA completion check the signature status + * - if succeeded send good NVMe response + * - if failed send bad NVMe response with appropriate error + */ + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + if (unlikely(status)) + rsp->req.cqe->status = cpu_to_le16(status << 1); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { + pr_err("sending cmd response failed\n"); + nvmet_rdma_release_rsp(rsp); + } } static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, @@ -697,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, struct nvme_keyed_sgl_desc *sgl, bool invalidate) { - struct rdma_cm_id *cm_id = rsp->queue->cm_id; u64 addr = le64_to_cpu(sgl->addr); u32 key = get_unaligned_le32(sgl->key); + struct ib_sig_attrs sig_attrs; int ret; rsp->req.transfer_len = get_unaligned_le24(sgl->length); @@ -708,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!rsp->req.transfer_len) return 0; + if (rsp->req.metadata_len) + nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); + ret = nvmet_req_alloc_sgls(&rsp->req); if (unlikely(ret < 0)) goto error_out; - ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, - rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, - nvmet_data_dir(&rsp->req)); + ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); if (unlikely(ret < 0)) goto error_out; rsp->n_rdma += ret; @@ -1108,6 +1290,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; } + if (queue->port->pi_enable && queue->host_qid) + qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; + ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); if (ret) { pr_err("failed to create_qp ret= %d\n", ret); @@ -1226,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_queue *queue; int ret; @@ -1252,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); queue->dev = ndev; queue->cm_id = cm_id; + queue->port = port->nport; spin_lock_init(&queue->state_lock); queue->state = NVMET_RDMA_Q_CONNECTING; @@ -1369,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { - struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1385,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1657,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } + if (port->nport->pi_enable && + !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_err("T10-PI is not supported for %pISpcs\n", addr); + ret = -EINVAL; + goto out_destroy_id; + } + port->cm_id = cm_id; return 0; @@ -1766,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) { + if (ctrl->pi_support) + return NVMET_RDMA_MAX_METADATA_MDTS; return NVMET_RDMA_MAX_MDTS; } @@ -1774,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .type = NVMF_TRTYPE_RDMA, .msdbd = 1, .has_keyed_sgls = 1, + .metadata_support = 1, .add_port = nvmet_rdma_add_port, .remove_port = nvmet_rdma_remove_port, .queue_response = nvmet_rdma_queue_response, -- cgit v1.2.3 From 64f5e9cdd711b030b05062c17b2ecfbce890cf4c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 20 May 2020 12:48:12 -0700 Subject: nvmet: fix memory leak when removing namespaces and controllers concurrently When removing a namespace, we add an NS_CHANGE async event, however if the controller admin queue is removed after the event was added but not yet processed, we won't free the aens, resulting in the below memory leak [1]. Fix that by moving nvmet_async_event_free to the final controller release after it is detached from subsys->ctrls ensuring no async events are added, and modify it to simply remove all pending aens. -- $ cat /sys/kernel/debug/kmemleak unreferenced object 0xffff888c1af2c000 (size 32): comm "nvmetcli", pid 5164, jiffies 4295220864 (age 6829.924s) hex dump (first 32 bytes): 28 01 82 3b 8b 88 ff ff 28 01 82 3b 8b 88 ff ff (..;....(..;.... 02 00 04 65 76 65 6e 74 5f 66 69 6c 65 00 00 00 ...event_file... backtrace: [<00000000217ae580>] nvmet_add_async_event+0x57/0x290 [nvmet] [<0000000012aa2ea9>] nvmet_ns_changed+0x206/0x300 [nvmet] [<00000000bb3fd52e>] nvmet_ns_disable+0x367/0x4f0 [nvmet] [<00000000e91ca9ec>] nvmet_ns_free+0x15/0x180 [nvmet] [<00000000a15deb52>] config_item_release+0xf1/0x1c0 [<000000007e148432>] configfs_rmdir+0x555/0x7c0 [<00000000f4506ea6>] vfs_rmdir+0x142/0x3c0 [<0000000000acaaf0>] do_rmdir+0x2b2/0x340 [<0000000034d1aa52>] do_syscall_64+0xa5/0x4d0 [<00000000211f13bc>] entry_SYSCALL_64_after_hwframe+0x6a/0xdf Fixes: a07b4970f464 ("nvmet: add a generic NVMe target") Reported-by: David Milburn Signed-off-by: Sagi Grimberg Tested-by: David Milburn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index edf54d9957b7..48f5123d875b 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -158,14 +158,12 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) { - struct nvmet_req *req; + struct nvmet_async_event *aen, *tmp; mutex_lock(&ctrl->lock); - while (ctrl->nr_async_event_cmds) { - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); - mutex_lock(&ctrl->lock); + list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { + list_del(&aen->entry); + kfree(aen); } mutex_unlock(&ctrl->lock); } @@ -791,10 +789,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * If this is the admin queue, complete all AERs so that our * queue doesn't have outstanding requests on it. */ - if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) nvmet_async_events_process(ctrl, status); - nvmet_async_events_free(ctrl); - } percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); @@ -1427,6 +1423,7 @@ static void nvmet_ctrl_free(struct kref *ref) ida_simple_remove(&cntlid_ida, ctrl->cntlid); + nvmet_async_events_free(ctrl); kfree(ctrl->sqs); kfree(ctrl->cqs); kfree(ctrl->changed_ns_list); -- cgit v1.2.3 From 1cdf9f7670a7d74e27177d5c390c2f8b3b9ba338 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Mon, 18 May 2020 13:59:55 -0500 Subject: nvmet: cleanups the loop in nvmet_async_events_process Based-on-a-patch-by: Christoph Hellwig Tested-by: Yi Zhang Signed-off-by: David Milburn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 48f5123d875b..6392bcd30bd7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) struct nvmet_async_event *aen; struct nvmet_req *req; - while (1) { - mutex_lock(&ctrl->lock); - aen = list_first_entry_or_null(&ctrl->async_events, - struct nvmet_async_event, entry); - if (!aen || !ctrl->nr_async_event_cmds) { - mutex_unlock(&ctrl->lock); - break; - } - + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { + aen = list_first_entry(&ctrl->async_events, + struct nvmet_async_event, entry); req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; if (status == 0) nvmet_set_result(req, nvmet_async_event_result(aen)); @@ -153,7 +148,9 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) mutex_unlock(&ctrl->lock); trace_nvmet_async_event(ctrl, req->cqe->result.u32); nvmet_req_complete(req, status); + mutex_lock(&ctrl->lock); } + mutex_unlock(&ctrl->lock); } static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) -- cgit v1.2.3 From 3b2a1ebceba3e03b17ef0970bb7757a3a64cdc8b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 20 May 2020 19:22:53 -0700 Subject: nvme: set dma alignment to qword The default dma alignment mask is 511, which is much larger than any nvme controller requires. NVMe controllers accept qword aligned DMA addresses, so set the request_queue constraints to that. This can help avoid bounce buffers on user passthrough commands. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a3a4dbc59af1..569671e264b5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2322,6 +2322,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, ctrl->page_size - 1); + blk_queue_dma_alignment(q, 7); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; blk_queue_write_cache(q, vwc, vwc); -- cgit v1.2.3 From fcdd14b86f6b891b5e894bf1dbeaf02cc79bdbce Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 20 May 2020 11:59:27 -0700 Subject: lpfc: Fix pointer checks and comments in LS receive refactoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additional testing encountered null pointers that weren't fully qualified in lpfc_nvmet_xmt_ls_abort_cmp() and lpfc_nvmet_unsol_issue_abort(). The same error was detected and reported by static checker reporting: drivers/scsi/lpfc/lpfc_sli.c:2905 lpfc_nvme_unsol_ls_handler() error: we previously assumed 'phba->targetport' could be null (see line 2837) Fix by making phba->nvmet_support and phba->targetport validity checks in lpfc_nvmet_xmt_ls_abort_cmp() and lpfc_nvmet_unsol_issue_abort(). Fixes: 3a8070c567aaa (“lpfc: Refactor NVME LS receive handling”) Reported-by: Dan Carpenter Signed-off-by: Paul Ely Signed-off-by: James Smart Reviewed-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_nvmet.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 1c6bbbba70b5..bccf9da302ee 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -3207,8 +3207,10 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, ctxp = cmdwqe->context2; result = wcqe->parameter; - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - atomic_inc(&tgtp->xmt_ls_abort_cmpl); + if (phba->nvmet_support) { + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + atomic_inc(&tgtp->xmt_ls_abort_cmpl); + } lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n", @@ -3244,7 +3246,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { - struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_tgtport *tgtp = NULL; struct lpfc_iocbq *abts_wqeq; union lpfc_wqe128 *wqe_abts; struct lpfc_nodelist *ndlp; @@ -3253,13 +3255,15 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, "6067 ABTS: sid %x xri x%x/x%x\n", sid, xri, ctxp->wqeq->sli4_xritag); - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (phba->nvmet_support && phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; ndlp = lpfc_findnode_did(phba->pport, sid); if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { - atomic_inc(&tgtp->xmt_abort_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6134 Drop ABTS - wrong NDLP state x%x.\n", (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); @@ -3538,7 +3542,7 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { - struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_tgtport *tgtp = NULL; struct lpfc_iocbq *abts_wqeq; unsigned long flags; int rc; @@ -3555,7 +3559,9 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, ctxp->state = LPFC_NVME_STE_LS_ABORT; } - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (phba->nvmet_support && phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (!ctxp->wqeq) { /* Issue ABTS for this WQE based on iotag */ ctxp->wqeq = lpfc_sli_get_iocbq(phba); @@ -3582,11 +3588,13 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); if (rc == WQE_SUCCESS) { - atomic_inc(&tgtp->xmt_abort_unsol); + if (tgtp) + atomic_inc(&tgtp->xmt_abort_unsol); return 0; } out: - atomic_inc(&tgtp->xmt_abort_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_abort_rsp_error); abts_wqeq->context2 = NULL; abts_wqeq->context3 = NULL; lpfc_sli_release_iocbq(phba, abts_wqeq); -- cgit v1.2.3 From 4e57e0b9f343fd14497ab04b2bc08c1784830b9d Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 20 May 2020 11:59:28 -0700 Subject: lpfc: fix axchg pointer reference after free and double frees The axchg structure is a structure allocated early in the lpfc_nvme_unsol_ls_handler() to represent the newly received exchange. Upon error, the out_fail path in the routine unconditionally frees the pointer, yet subsequently passes the pointer to the abort routine. Additionally, the abort routine, lpfc_nvme_unsol_ls_issue_abort(), also has a failure path that will attempt to delete the pointer on error. Fix these errors by: - Removing the unconditional free so that it stays valid if passed to the abort routine. - Revise the abort routine to not free the pointer. Instead, return a success/failure status. Note: if success, the later completion of the abort frees the structure. - Back in the unsol_ls_handler() error path, if the abort routine was skipped (thus no possible reference) or the abort routine returned error, free the pointer. Fixes: 3a8070c567aa ("lpfc: Refactor NVME LS receive handling") Reported-by: Dan Carpenter Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_nvmet.c | 3 +-- drivers/scsi/lpfc/lpfc_sli.c | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index bccf9da302ee..32eb5e873e9b 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -3598,10 +3598,9 @@ out: abts_wqeq->context2 = NULL; abts_wqeq->context3 = NULL; lpfc_sli_release_iocbq(phba, abts_wqeq); - kfree(ctxp); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6056 Failed to Issue ABTS. Status x%x\n", rc); - return 0; + return 1; } /** diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1aaf40081e21..9e21c4f3b009 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2813,7 +2813,7 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb) struct lpfc_async_xchg_ctx *axchg = NULL; char *failwhy = NULL; uint32_t oxid, sid, did, fctl, size; - int ret; + int ret = 1; d_buf = piocb->context2; @@ -2897,14 +2897,16 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb) (phba->nvmet_support) ? "T" : "I", ret); out_fail: - kfree(axchg); /* recycle receive buffer */ lpfc_in_buf_free(phba, &nvmebuf->dbuf); /* If start of new exchange, abort it */ - if (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)) - lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid); + if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX))) + ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid); + + if (ret) + kfree(axchg); } /** -- cgit v1.2.3 From 6b6e89636f51581895922780c3c4fd51bb9e1483 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 20 May 2020 11:59:29 -0700 Subject: lpfc: Fix return value in __lpfc_nvme_ls_abort A static checker reported the following issue: drivers/scsi/lpfc/lpfc_nvmet.c:1366 lpfc_nvmet_ls_abort() warn: 'ret' can be either negative or positive The comment indicates a non-zero value indicates error in the form of -Exxx, but the code is returning "1". Fix the code to return -EINVAL to be compliant to comment. Fixes: e96a22b0b7c2 ("lpfc: Refactor Send LS Abort support") Reported-by: Dan Carpenter Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 21bbccf0dc31..b46ba70f78da 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -895,7 +895,7 @@ __lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, "6213 NVMEx LS REQ Abort: Unable to locate req x%p\n", pnvme_lsreq); - return 1; + return -EINVAL; } static int -- cgit v1.2.3 From 3b5b7b1f70e79fb060d485412b3c748f0a83111b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 27 May 2020 12:01:51 +0800 Subject: bcache: remove redundant variables i and n Variables i and n are being assigned but are never used. They are redundant and can be removed. Signed-off-by: Colin Ian King Signed-off-by: Coly Li Addresses-Coverity: ("Unused value") Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 72856e5f23a3..114d0d73d909 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1907,10 +1907,8 @@ static int bch_btree_check_thread(void *arg) struct btree_iter iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; - int i, n; k = p = NULL; - i = n = 0; cur_idx = prev_idx = 0; ret = 0; -- cgit v1.2.3 From 46f5aa8806e34f2e48de852cc7db2c74c3a5cd8d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 27 May 2020 12:01:52 +0800 Subject: bcache: Convert pr_ uses to a more typical style Remove the trailing newline from the define of pr_fmt and add newlines to the uses. Miscellanea: o Convert bch_bkey_dump from multiple uses of pr_err to pr_cont as the earlier conversion was inappropriate done causing multiple lines to be emitted where only a single output line was desired o Use vsprintf extension %pV in bch_cache_set_error to avoid multiple line output where only a single line output was desired o Coalesce formats Fixes: 6ae63e3501c4 ("bcache: replace printk() by pr_*() routines") Signed-off-by: Joe Perches Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/bset.c | 6 +-- drivers/md/bcache/btree.c | 14 ++--- drivers/md/bcache/extents.c | 12 ++--- drivers/md/bcache/io.c | 8 +-- drivers/md/bcache/journal.c | 34 ++++++------ drivers/md/bcache/request.c | 6 +-- drivers/md/bcache/super.c | 123 +++++++++++++++++++++--------------------- drivers/md/bcache/sysfs.c | 8 +-- drivers/md/bcache/writeback.c | 6 +-- 10 files changed, 110 insertions(+), 109 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 74a9849ea164..221e0191b687 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -176,7 +176,7 @@ * - updates to non leaf nodes just happen synchronously (see btree_split()). */ -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ #include #include diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4385303836d8..4995fcaefe29 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -6,7 +6,7 @@ * Copyright 2012 Google, Inc. */ -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ #include "util.h" #include "bset.h" @@ -31,7 +31,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set) if (b->ops->key_dump) b->ops->key_dump(b, k); else - pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); + pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); if (next < bset_bkey_last(i) && bkey_cmp(k, b->ops->is_extents ? @@ -1225,7 +1225,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; - pr_debug("sorted %i keys", out->keys); + pr_debug("sorted %i keys\n", out->keys); } static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 114d0d73d909..39de94edd73a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -619,7 +619,7 @@ retry: * and BTREE_NODE_journal_flush bit cleared by btree_flush_write(). */ if (btree_node_journal_flush(b)) { - pr_debug("bnode %p is flushing by journal, retry", b); + pr_debug("bnode %p is flushing by journal, retry\n", b); mutex_unlock(&b->write_lock); udelay(1); goto retry; @@ -802,7 +802,7 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.batch = c->btree_pages * 2; if (register_shrinker(&c->shrink)) - pr_warn("bcache: %s: could not register shrinker", + pr_warn("bcache: %s: could not register shrinker\n", __func__); return 0; @@ -1054,7 +1054,7 @@ retry: */ if (btree_node_journal_flush(b)) { mutex_unlock(&b->write_lock); - pr_debug("bnode %p journal_flush set, retry", b); + pr_debug("bnode %p journal_flush set, retry\n", b); udelay(1); goto retry; } @@ -1798,7 +1798,7 @@ static void bch_btree_gc(struct cache_set *c) schedule_timeout_interruptible(msecs_to_jiffies (GC_SLEEP_MS)); else if (ret) - pr_warn("gc failed!"); + pr_warn("gc failed!\n"); } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); bch_btree_gc_finish(c); @@ -2043,7 +2043,7 @@ int bch_btree_check(struct cache_set *c) &check_state->infos[i], name); if (IS_ERR(check_state->infos[i].thread)) { - pr_err("fails to run thread bch_btrchk[%d]", i); + pr_err("fails to run thread bch_btrchk[%d]\n", i); for (--i; i >= 0; i--) kthread_stop(check_state->infos[i].thread); ret = -ENOMEM; @@ -2454,7 +2454,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys, if (ret) { struct bkey *k; - pr_err("error %i", ret); + pr_err("error %i\n", ret); while ((k = bch_keylist_pop(keys))) bkey_put(c, k); @@ -2742,7 +2742,7 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, break; if (bkey_cmp(&buf->last_scanned, end) >= 0) { - pr_debug("scan finished"); + pr_debug("scan finished\n"); break; } diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 886710043025..9162af5bb6ec 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -130,18 +130,18 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k) char buf[80]; bch_extent_to_text(buf, sizeof(buf), k); - pr_err(" %s", buf); + pr_cont(" %s", buf); for (j = 0; j < KEY_PTRS(k); j++) { size_t n = PTR_BUCKET_NR(b->c, k, j); - pr_err(" bucket %zu", n); + pr_cont(" bucket %zu", n); if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) - pr_err(" prio %i", - PTR_BUCKET(b->c, k, j)->prio); + pr_cont(" prio %i", + PTR_BUCKET(b->c, k, j)->prio); } - pr_err(" %s\n", bch_ptr_status(b->c, k)); + pr_cont(" %s\n", bch_ptr_status(b->c, k)); } /* Btree ptrs */ @@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) if (stale && KEY_DIRTY(k)) { bch_extent_to_text(buf, sizeof(buf), k); - pr_info("stale dirty pointer, stale %u, key: %s", + pr_info("stale dirty pointer, stale %u, key: %s\n", stale, buf); } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 4d93f07f63e5..b25ee33b0d0b 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -65,14 +65,14 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. */ if (bio->bi_opf & REQ_RAHEAD) { - pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore", + pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", dc->backing_dev_name); return; } errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) - pr_err("%s: IO error on backing device, unrecoverable", + pr_err("%s: IO error on backing device, unrecoverable\n", dc->backing_dev_name); else bch_cached_dev_error(dc); @@ -123,12 +123,12 @@ void bch_count_io_errors(struct cache *ca, errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) - pr_err("%s: IO error on %s%s", + pr_err("%s: IO error on %s%s\n", ca->cache_dev_name, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, - "%s: too many IO errors %s", + "%s: too many IO errors %s\n", ca->cache_dev_name, m); } } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0e3ff9745ac7..90aac4e2333f 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -47,7 +47,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, closure_init_stack(&cl); - pr_debug("reading %u", bucket_index); + pr_debug("reading %u\n", bucket_index); while (offset < ca->sb.bucket_size) { reread: left = ca->sb.bucket_size - offset; @@ -78,13 +78,13 @@ reread: left = ca->sb.bucket_size - offset; size_t blocks, bytes = set_bytes(j); if (j->magic != jset_magic(&ca->sb)) { - pr_debug("%u: bad magic", bucket_index); + pr_debug("%u: bad magic\n", bucket_index); return ret; } if (bytes > left << 9 || bytes > PAGE_SIZE << JSET_BITS) { - pr_info("%u: too big, %zu bytes, offset %u", + pr_info("%u: too big, %zu bytes, offset %u\n", bucket_index, bytes, offset); return ret; } @@ -93,7 +93,7 @@ reread: left = ca->sb.bucket_size - offset; goto reread; if (j->csum != csum_set(j)) { - pr_info("%u: bad csum, %zu bytes, offset %u", + pr_info("%u: bad csum, %zu bytes, offset %u\n", bucket_index, bytes, offset); return ret; } @@ -190,7 +190,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) uint64_t seq; bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); - pr_debug("%u journal buckets", ca->sb.njournal_buckets); + pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); /* * Read journal buckets ordered by golden ratio hash to quickly @@ -215,7 +215,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) * If that fails, check all the buckets we haven't checked * already */ - pr_debug("falling back to linear search"); + pr_debug("falling back to linear search\n"); for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); l < ca->sb.njournal_buckets; @@ -233,7 +233,7 @@ bsearch: /* Binary search */ m = l; r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); - pr_debug("starting binary search, l %u r %u", l, r); + pr_debug("starting binary search, l %u r %u\n", l, r); while (l + 1 < r) { seq = list_entry(list->prev, struct journal_replay, @@ -253,7 +253,7 @@ bsearch: * Read buckets in reverse order until we stop finding more * journal entries */ - pr_debug("finishing up: m %u njournal_buckets %u", + pr_debug("finishing up: m %u njournal_buckets %u\n", m, ca->sb.njournal_buckets); l = m; @@ -370,10 +370,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) if (n != i->j.seq) { if (n == start && is_discard_enabled(s)) - pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)", + pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n", n, i->j.seq - 1, start, end); else { - pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", + pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", n, i->j.seq - 1, start, end); ret = -EIO; goto err; @@ -403,7 +403,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) entries++; } - pr_info("journal replay done, %i keys in %i entries, seq %llu", + pr_info("journal replay done, %i keys in %i entries, seq %llu\n", keys, entries, end); err: while (!list_empty(list)) { @@ -481,7 +481,7 @@ static void btree_flush_write(struct cache_set *c) break; if (btree_node_journal_flush(b)) - pr_err("BUG: flush_write bit should not be set here!"); + pr_err("BUG: flush_write bit should not be set here!\n"); mutex_lock(&b->write_lock); @@ -534,13 +534,13 @@ static void btree_flush_write(struct cache_set *c) for (i = 0; i < nr; i++) { b = btree_nodes[i]; if (!b) { - pr_err("BUG: btree_nodes[%d] is NULL", i); + pr_err("BUG: btree_nodes[%d] is NULL\n", i); continue; } /* safe to check without holding b->write_lock */ if (!btree_node_journal_flush(b)) { - pr_err("BUG: bnode %p: journal_flush bit cleaned", b); + pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b); continue; } @@ -548,14 +548,14 @@ static void btree_flush_write(struct cache_set *c) if (!btree_current_write(b)->journal) { clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); - pr_debug("bnode %p: written by others", b); + pr_debug("bnode %p: written by others\n", b); continue; } if (!btree_node_dirty(b)) { clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); - pr_debug("bnode %p: dirty bit cleaned by others", b); + pr_debug("bnode %p: dirty bit cleaned by others\n", b); continue; } @@ -716,7 +716,7 @@ void bch_journal_next(struct journal *j) j->cur->data->keys = 0; if (fifo_full(&j->pin)) - pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); + pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin)); } static void journal_write_endio(struct bio *bio) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 77d1a2697517..72672178970b 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -110,7 +110,7 @@ static void bch_data_invalidate(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio; - pr_debug("invalidating %i sectors from %llu", + pr_debug("invalidating %i sectors from %llu\n", bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector); while (bio_sectors(bio)) { @@ -396,7 +396,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { - pr_debug("skipping unaligned io"); + pr_debug("skipping unaligned io\n"); goto skip; } @@ -650,7 +650,7 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - pr_err("Can't flush %s: returned bi_status %i", + pr_err("Can't flush %s: returned bi_status %i\n", dc->backing_dev_name, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d98354fa28e3..467149f3bcc5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -89,7 +89,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, for (i = 0; i < SB_JOURNAL_BUCKETS; i++) sb->d[i] = le64_to_cpu(s->d[i]); - pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u", + pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n", sb->version, sb->flags, sb->seq, sb->keys); err = "Not a bcache superblock (bad offset)"; @@ -234,7 +234,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, out->csum = csum_set(out); - pr_debug("ver %llu, flags %llu, seq %llu", + pr_debug("ver %llu, flags %llu, seq %llu\n", sb->version, sb->flags, sb->seq); submit_bio(bio); @@ -365,11 +365,11 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, } bch_extent_to_text(buf, sizeof(buf), k); - pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf); + pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) - pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", + pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n", u - c->uuids, u->uuid, u->label, u->first_reg, u->last_reg, u->invalidated); @@ -534,7 +534,7 @@ int bch_prio_write(struct cache *ca, bool wait) struct bucket *b; struct closure cl; - pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu", + pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n", fifo_used(&ca->free[RESERVE_PRIO]), fifo_used(&ca->free[RESERVE_NONE]), fifo_used(&ca->free_inc)); @@ -629,12 +629,12 @@ static int prio_read(struct cache *ca, uint64_t bucket) if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) { - pr_warn("bad csum reading priorities"); + pr_warn("bad csum reading priorities\n"); goto out; } if (p->magic != pset_magic(&ca->sb)) { - pr_warn("bad magic reading priorities"); + pr_warn("bad magic reading priorities\n"); goto out; } @@ -728,11 +728,11 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); if (ret < 0) - pr_err("Couldn't create device -> cache set symlink"); + pr_err("Couldn't create device -> cache set symlink\n"); ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); if (ret < 0) - pr_err("Couldn't create cache set -> device symlink"); + pr_err("Couldn't create cache set -> device symlink\n"); clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); } @@ -789,9 +789,9 @@ static void bcache_device_free(struct bcache_device *d) lockdep_assert_held(&bch_register_lock); if (disk) - pr_info("%s stopped", disk->disk_name); + pr_info("%s stopped\n", disk->disk_name); else - pr_err("bcache device (NULL gendisk) stopped"); + pr_err("bcache device (NULL gendisk) stopped\n"); if (d->c) bcache_device_detach(d); @@ -830,7 +830,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); if (!d->nr_stripes || d->nr_stripes > max_stripes) { - pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", + pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n", (unsigned int)d->nr_stripes); return -ENOMEM; } @@ -928,11 +928,11 @@ static int cached_dev_status_update(void *arg) dc->offline_seconds = 0; if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { - pr_err("%s: device offline for %d seconds", + pr_err("%s: device offline for %d seconds\n", dc->backing_dev_name, BACKING_DEV_OFFLINE_TIMEOUT); - pr_err("%s: disable I/O request due to backing " - "device offline", dc->disk.name); + pr_err("%s: disable I/O request due to backing device offline\n", + dc->disk.name); dc->io_disable = true; /* let others know earlier that io_disable is true */ smp_mb(); @@ -959,7 +959,7 @@ int bch_cached_dev_run(struct cached_dev *dc) }; if (dc->io_disable) { - pr_err("I/O disabled on cached dev %s", + pr_err("I/O disabled on cached dev %s\n", dc->backing_dev_name); kfree(env[1]); kfree(env[2]); @@ -971,7 +971,7 @@ int bch_cached_dev_run(struct cached_dev *dc) kfree(env[1]); kfree(env[2]); kfree(buf); - pr_info("cached dev %s is running already", + pr_info("cached dev %s is running already\n", dc->backing_dev_name); return -EBUSY; } @@ -1001,16 +1001,14 @@ int bch_cached_dev_run(struct cached_dev *dc) if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) { - pr_err("Couldn't create bcache dev <-> disk sysfs symlinks"); + pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n"); return -ENOMEM; } dc->status_update_thread = kthread_run(cached_dev_status_update, dc, "bcache_status_update"); if (IS_ERR(dc->status_update_thread)) { - pr_warn("failed to create bcache_status_update kthread, " - "continue to run without monitoring backing " - "device status"); + pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n"); } return 0; @@ -1036,7 +1034,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) } while (time_out > 0); if (time_out == 0) - pr_warn("give up waiting for dc->writeback_write_update to quit"); + pr_warn("give up waiting for dc->writeback_write_update to quit\n"); cancel_delayed_work_sync(&dc->writeback_rate_update); } @@ -1077,7 +1075,7 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s", dc->backing_dev_name); + pr_info("Caching disabled for %s\n", dc->backing_dev_name); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -1117,20 +1115,20 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached", + pr_err("Can't attach %s: already attached\n", dc->backing_dev_name); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down", + pr_err("Can't attach %s: shutting down\n", dc->backing_dev_name); return -EINVAL; } if (dc->sb.block_size < c->sb.block_size) { /* Will die */ - pr_err("Couldn't attach %s: block size less than set's block size", + pr_err("Couldn't attach %s: block size less than set's block size\n", dc->backing_dev_name); return -EINVAL; } @@ -1138,7 +1136,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, /* Check whether already attached */ list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { - pr_err("Tried to attach %s but duplicate UUID already attached", + pr_err("Tried to attach %s but duplicate UUID already attached\n", dc->backing_dev_name); return -EINVAL; @@ -1157,14 +1155,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set", + pr_err("Couldn't find uuid for %s in set\n", dc->backing_dev_name); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID", + pr_err("Not caching %s, no room for UUID\n", dc->backing_dev_name); return -EINVAL; } @@ -1210,7 +1208,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, down_write(&dc->writeback_lock); if (bch_cached_dev_writeback_start(dc)) { up_write(&dc->writeback_lock); - pr_err("Couldn't start writeback facilities for %s", + pr_err("Couldn't start writeback facilities for %s\n", dc->disk.disk->disk_name); return -ENOMEM; } @@ -1233,7 +1231,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, */ kthread_stop(dc->writeback_thread); cancel_writeback_rate_update_dwork(dc); - pr_err("Couldn't run cached device %s", + pr_err("Couldn't run cached device %s\n", dc->backing_dev_name); return ret; } @@ -1244,7 +1242,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); - pr_info("Caching %s as %s on set %pU", + pr_info("Caching %s as %s on set %pU\n", dc->backing_dev_name, dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); @@ -1384,7 +1382,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s", dc->backing_dev_name); + pr_info("registered backing device %s\n", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); /* attach to a matched cache set if it exists */ @@ -1401,7 +1399,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, return 0; err: - pr_notice("error %s: %s", dc->backing_dev_name, err); + pr_notice("error %s: %s\n", dc->backing_dev_name, err); bcache_device_stop(&dc->disk); return ret; } @@ -1497,7 +1495,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) u = uuid_find_empty(c); if (!u) { - pr_err("Can't create volume, no room for UUID"); + pr_err("Can't create volume, no room for UUID\n"); return -EINVAL; } @@ -1523,7 +1521,7 @@ bool bch_cached_dev_error(struct cached_dev *dc) smp_mb(); pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, dc->backing_dev_name); + dc->disk.disk->disk_name, dc->backing_dev_name); bcache_device_stop(&dc->disk); return true; @@ -1534,6 +1532,7 @@ bool bch_cached_dev_error(struct cached_dev *dc) __printf(2, 3) bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) { + struct va_format vaf; va_list args; if (c->on_error != ON_ERROR_PANIC && @@ -1541,20 +1540,22 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) return false; if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_info("CACHE_SET_IO_DISABLE already set"); + pr_info("CACHE_SET_IO_DISABLE already set\n"); /* * XXX: we can be called from atomic context * acquire_console_sem(); */ - pr_err("bcache: error on %pU: ", c->sb.set_uuid); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - pr_err(", disabling caching\n"); + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("error on %pU: %pV, disabling caching\n", + c->sb.set_uuid, &vaf); + + va_end(args); if (c->on_error == ON_ERROR_PANIC) panic("panic forced after error\n"); @@ -1606,7 +1607,7 @@ static void cache_set_free(struct closure *cl) list_del(&c->list); mutex_unlock(&bch_register_lock); - pr_info("Cache set %pU unregistered", c->sb.set_uuid); + pr_info("Cache set %pU unregistered\n", c->sb.set_uuid); wake_up(&unregister_wait); closure_debug_destroy(&c->cl); @@ -1677,7 +1678,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, struct cached_dev *dc) { if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { - pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.", + pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n", d->disk->disk_name, c->sb.set_uuid); bcache_device_stop(d); } else if (atomic_read(&dc->has_dirty)) { @@ -1685,7 +1686,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO * and dc->has_dirty == 1 */ - pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n", d->disk->disk_name); /* * There might be a small time gap that cache set is @@ -1707,7 +1708,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO * and dc->has_dirty == 0 */ - pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.", + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n", d->disk->disk_name); } } @@ -1874,7 +1875,7 @@ static int run_cache_set(struct cache_set *c) if (bch_journal_read(c, &journal)) goto err; - pr_debug("btree_journal_read() done"); + pr_debug("btree_journal_read() done\n"); err = "no journal entries found"; if (list_empty(&journal)) @@ -1920,7 +1921,7 @@ static int run_cache_set(struct cache_set *c) bch_journal_mark(c, &journal); bch_initial_gc_finish(c); - pr_debug("btree_check() done"); + pr_debug("btree_check() done\n"); /* * bcache_journal_next() can't happen sooner, or @@ -1951,7 +1952,7 @@ static int run_cache_set(struct cache_set *c) if (bch_journal_replay(c, &journal)) goto err; } else { - pr_notice("invalidating existing data"); + pr_notice("invalidating existing data\n"); for_each_cache(ca, c, i) { unsigned int j; @@ -2085,7 +2086,7 @@ found: memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); c->sb.flags = ca->sb.flags; c->sb.seq = ca->sb.seq; - pr_debug("set version = %llu", c->sb.version); + pr_debug("set version = %llu\n", c->sb.version); } kobject_get(&ca->kobj); @@ -2247,7 +2248,7 @@ err_btree_alloc: err_free: module_put(THIS_MODULE); if (err) - pr_notice("error %s: %s", ca->cache_dev_name, err); + pr_notice("error %s: %s\n", ca->cache_dev_name, err); return ret; } @@ -2301,14 +2302,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %s", ca->cache_dev_name); + pr_info("registered cache device %s\n", ca->cache_dev_name); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s", ca->cache_dev_name, err); + pr_notice("error %s: %s\n", ca->cache_dev_name, err); return ret; } @@ -2461,7 +2462,7 @@ out_free_path: out_module_put: module_put(THIS_MODULE); out: - pr_info("error %s: %s", path?path:"", err); + pr_info("error %s: %s\n", path?path:"", err); return ret; } @@ -2506,7 +2507,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, mutex_unlock(&bch_register_lock); list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { - pr_info("delete pdev %p", pdev); + pr_info("delete pdev %p\n", pdev); list_del(&pdev->list); bcache_device_stop(&pdev->dc->disk); kfree(pdev); @@ -2549,7 +2550,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) mutex_unlock(&bch_register_lock); - pr_info("Stopping all devices:"); + pr_info("Stopping all devices:\n"); /* * The reason bch_register_lock is not held to call @@ -2599,9 +2600,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) finish_wait(&unregister_wait, &wait); if (stopped) - pr_info("All devices stopped"); + pr_info("All devices stopped\n"); else - pr_notice("Timeout waiting for devices to be closed"); + pr_notice("Timeout waiting for devices to be closed\n"); out: mutex_unlock(&bch_register_lock); } @@ -2637,7 +2638,7 @@ static void check_module_parameters(void) if (bch_cutoff_writeback_sync == 0) bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC; else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) { - pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u", + pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n", bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX); bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX; } @@ -2645,13 +2646,13 @@ static void check_module_parameters(void) if (bch_cutoff_writeback == 0) bch_cutoff_writeback = CUTOFF_WRITEBACK; else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) { - pr_warn("set bch_cutoff_writeback (%u) to max value %u", + pr_warn("set bch_cutoff_writeback (%u) to max value %u\n", bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX); bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX; } if (bch_cutoff_writeback > bch_cutoff_writeback_sync) { - pr_warn("set bch_cutoff_writeback (%u) to %u", + pr_warn("set bch_cutoff_writeback (%u) to %u\n", bch_cutoff_writeback, bch_cutoff_writeback_sync); bch_cutoff_writeback = bch_cutoff_writeback_sync; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 323276994aab..0dadec5a78f6 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -421,7 +421,7 @@ STORE(__cached_dev) return size; } if (v == -ENOENT) - pr_err("Can't attach %s: cache set not found", buf); + pr_err("Can't attach %s: cache set not found\n", buf); return v; } @@ -455,7 +455,7 @@ STORE(bch_cached_dev) */ if (dc->writeback_running) { dc->writeback_running = false; - pr_err("%s: failed to run non-existent writeback thread", + pr_err("%s: failed to run non-existent writeback thread\n", dc->disk.disk->disk_name); } } else @@ -872,11 +872,11 @@ STORE(__bch_cache_set) if (v) { if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already set"); + pr_warn("CACHE_SET_IO_DISABLE already set\n"); } else { if (!test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already cleared"); + pr_warn("CACHE_SET_IO_DISABLE already cleared\n"); } } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 3f7641fb28d5..1cf1e5016cb9 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -809,7 +809,7 @@ static int bch_root_node_dirty_init(struct cache_set *c, schedule_timeout_interruptible( msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); else if (ret < 0) { - pr_warn("sectors dirty init failed, ret=%d!", ret); + pr_warn("sectors dirty init failed, ret=%d!\n", ret); break; } } while (ret == -EAGAIN); @@ -917,7 +917,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); if (!state) { - pr_warn("sectors dirty init failed: cannot allocate memory"); + pr_warn("sectors dirty init failed: cannot allocate memory\n"); return; } @@ -945,7 +945,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) &state->infos[i], name); if (IS_ERR(state->infos[i].thread)) { - pr_err("fails to run thread bch_dirty_init[%d]", i); + pr_err("fails to run thread bch_dirty_init[%d]\n", i); for (--i; i >= 0; i--) kthread_stop(state->infos[i].thread); goto out; -- cgit v1.2.3 From 86da9f736740eba602389908574dfbb0f517baa5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 27 May 2020 12:01:53 +0800 Subject: bcache: fix refcount underflow in bcache_device_free() The problematic code piece in bcache_device_free() is, 785 static void bcache_device_free(struct bcache_device *d) 786 { 787 struct gendisk *disk = d->disk; [snipped] 799 if (disk) { 800 if (disk->flags & GENHD_FL_UP) 801 del_gendisk(disk); 802 803 if (disk->queue) 804 blk_cleanup_queue(disk->queue); 805 806 ida_simple_remove(&bcache_device_idx, 807 first_minor_to_idx(disk->first_minor)); 808 put_disk(disk); 809 } [snipped] 816 } At line 808, put_disk(disk) may encounter kobject refcount of 'disk' being underflow. Here is how to reproduce the issue, - Attche the backing device to a cache device and do random write to make the cache being dirty. - Stop the bcache device while the cache device has dirty data of the backing device. - Only register the backing device back, NOT register cache device. - The bcache device node /dev/bcache0 won't show up, because backing device waits for the cache device shows up for the missing dirty data. - Now echo 1 into /sys/fs/bcache/pendings_cleanup, to stop the pending backing device. - After the pending backing device stopped, use 'dmesg' to check kernel message, a use-after-free warning from KASA reported the refcount of kobject linked to the 'disk' is underflow. The dropping refcount at line 808 in the above code piece is added by add_disk(d->disk) in bch_cached_dev_run(). But in the above condition the cache device is not registered, bch_cached_dev_run() has no chance to be called and the refcount is not added. The put_disk() for a non- added refcount of gendisk kobject triggers a underflow warning. This patch checks whether GENHD_FL_UP is set in disk->flags, if it is not set then the bcache device was not added, don't call put_disk() and the the underflow issue can be avoided. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 467149f3bcc5..a10a3c78f4ff 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -797,7 +797,9 @@ static void bcache_device_free(struct bcache_device *d) bcache_device_detach(d); if (disk) { - if (disk->flags & GENHD_FL_UP) + bool disk_added = (disk->flags & GENHD_FL_UP) != 0; + + if (disk_added) del_gendisk(disk); if (disk->queue) @@ -805,7 +807,8 @@ static void bcache_device_free(struct bcache_device *d) ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); - put_disk(disk); + if (disk_added) + put_disk(disk); } bioset_exit(&d->bio_split); -- cgit v1.2.3 From 9e23ccf8f0a22e5b86a9e0d8ecbb49fe2fa73ae9 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 27 May 2020 12:01:54 +0800 Subject: bcache: asynchronous devices registration When there is a lot of data cached on cache device, the bcach internal btree can take a very long to validate during the backing device and cache device registration. In my test, it may takes 55+ minutes to check all the internal btree nodes. The problem is that the registration is invoked by udev rules and the udevd has 180 seconds timeout by default. If the btree node checking time is longer than udevd timeout, the registering process will be killed by udevd with SIGKILL. If the registering process has pending sigal, creating kthread for bcache will fail and the device registration will fail. The result is, for bcache device which cached a lot of data on cache device, the bcache device node like /dev/bcache won't create always due to the very long btree checking time. A solution to avoid the udevd 180 seconds timeout is to register devices in an asynchronous way. Which is, after writing cache or backing device path into /sys/fs/bcache/register_async, the kernel code will create a kworker and move all the btree node checking (for cache device) or dirty data counting (for cached device) in the kwork context. Then the kworder is scheduled on system_wq and the registration code just returned to user space udev rule task. By this asynchronous way, the udev task for bcache rule will complete in seconds, no matter how long time spent in the kworker context, it won't be killed by udevd for a timeout. After all the checking and counting are done asynchronously in the kworker, the bcache device will eventually be created successfully. This patch does the above chagne and add a register sysfs file /sys/fs/bcache/register_async. Writing the registering device path into this sysfs file will do the asynchronous registration. The register_async interface is for very rare condition and won't be used for common users. In future I plan to make the asynchronous registration as default behavior, which depends on feedback for this patch. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a10a3c78f4ff..b971d8e916d5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2327,6 +2327,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); +kobj_attribute_write(register_async, register_bcache); kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); static bool bch_is_open_backing(struct block_device *bdev) @@ -2362,6 +2363,83 @@ static bool bch_is_open(struct block_device *bdev) return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); } +struct async_reg_args { + struct work_struct reg_work; + char *path; + struct cache_sb *sb; + struct cache_sb_disk *sb_disk; + struct block_device *bdev; +}; + +static void register_bdev_worker(struct work_struct *work) +{ + int fail = false; + struct async_reg_args *args = + container_of(work, struct async_reg_args, reg_work); + struct cached_dev *dc; + + dc = kzalloc(sizeof(*dc), GFP_KERNEL); + if (!dc) { + fail = true; + put_page(virt_to_page(args->sb_disk)); + blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + goto out; + } + + mutex_lock(&bch_register_lock); + if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + fail = true; + mutex_unlock(&bch_register_lock); + +out: + if (fail) + pr_info("error %s: fail to register backing device\n", + args->path); + kfree(args->sb); + kfree(args->path); + kfree(args); + module_put(THIS_MODULE); +} + +static void register_cache_worker(struct work_struct *work) +{ + int fail = false; + struct async_reg_args *args = + container_of(work, struct async_reg_args, reg_work); + struct cache *ca; + + ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) { + fail = true; + put_page(virt_to_page(args->sb_disk)); + blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + goto out; + } + + /* blkdev_put() will be called in bch_cache_release() */ + if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + fail = true; + +out: + if (fail) + pr_info("error %s: fail to register cache device\n", + args->path); + kfree(args->sb); + kfree(args->path); + kfree(args); + module_put(THIS_MODULE); +} + +static void register_device_aync(struct async_reg_args *args) +{ + if (SB_IS_BDEV(args->sb)) + INIT_WORK(&args->reg_work, register_bdev_worker); + else + INIT_WORK(&args->reg_work, register_cache_worker); + + queue_work(system_wq, &args->reg_work); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2424,6 +2502,26 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, goto out_blkdev_put; err = "failed to register device"; + if (attr == &ksysfs_register_async) { + /* register in asynchronous way */ + struct async_reg_args *args = + kzalloc(sizeof(struct async_reg_args), GFP_KERNEL); + + if (!args) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + + args->path = path; + args->sb = sb; + args->sb_disk = sb_disk; + args->bdev = bdev; + register_device_aync(args); + /* No wait and returns to user space */ + goto async_done; + } + if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); @@ -2451,6 +2549,7 @@ done: kfree(sb); kfree(path); module_put(THIS_MODULE); +async_done: return size; out_put_sb_page: @@ -2666,6 +2765,7 @@ static int __init bcache_init(void) static const struct attribute *files[] = { &ksysfs_register.attr, &ksysfs_register_quiet.attr, + &ksysfs_register_async.attr, &ksysfs_pendings_cleanup.attr, NULL }; -- cgit v1.2.3 From 0c8d3fceade2ab1bbac68bca013e62bfdb851d19 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 27 May 2020 12:01:55 +0800 Subject: bcache: configure the asynchronous registertion to be experimental In order to avoid the experimental async registration interface to be treated as new kernel ABI for common users, this patch makes it as an experimental kernel configure BCACHE_ASYNC_REGISTRAION. This interface is for extreme large cached data situation, to make sure the bcache device can always created without the udev timeout issue. For normal users the async or sync registration does not make difference. In future when we decide to use the asynchronous registration as default behavior, this experimental interface may be removed. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/Kconfig | 9 +++++++++ drivers/md/bcache/super.c | 2 ++ 2 files changed, 11 insertions(+) diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 6dfa653d30db..bf7dd96db9b3 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -26,3 +26,12 @@ config BCACHE_CLOSURES_DEBUG Keeps all active closures in a linked list and provides a debugfs interface to list them, which makes it possible to see asynchronous operations that get stuck. + +config BCACHE_ASYNC_REGISTRAION + bool "Asynchronous device registration (EXPERIMENTAL)" + depends on BCACHE + help + Add a sysfs file /sys/fs/bcache/register_async. Writing registering + device path into this file will returns immediately and the real + registration work is handled in kernel work queue in asynchronous + way. diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index b971d8e916d5..f9975c22bf7e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2765,7 +2765,9 @@ static int __init bcache_init(void) static const struct attribute *files[] = { &ksysfs_register.attr, &ksysfs_register_quiet.attr, +#ifdef CONFIG_BCACHE_ASYNC_REGISTRAION &ksysfs_register_async.attr, +#endif &ksysfs_pendings_cleanup.attr, NULL }; -- cgit v1.2.3