From 20cf4e026730104892fa1268de0371a631cee294 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 29 Jul 2019 13:22:09 -0400 Subject: [PATCH] rdma: Enable ib_alloc_cq to spread work over a device's comp_vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Send and Receive completion is handled on a single CPU selected at the time each Completion Queue is allocated. Typically this is when an initiator instantiates an RDMA transport, or when a target accepts an RDMA connection. Some ULPs cannot open a connection per CPU to spread completion workload across available CPUs and MSI vectors. For such ULPs, provide an API that allows the RDMA core to select a completion vector based on the device's complement of available comp_vecs. ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted to use the new API so that their completion workloads interfere less with each other. Suggested-by: HÃ¥kon Bugge Signed-off-by: Chuck Lever Reviewed-by: Leon Romanovsky Cc: Cc: Link: https://lore.kernel.org/r/20190729171923.13428.52555.stgit@manet.1015granger.net Signed-off-by: Doug Ledford --- drivers/infiniband/core/cq.c | 28 ++++++++++++++++++++++++ drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++-- fs/cifs/smbdirect.c | 10 +++++---- include/rdma/ib_verbs.h | 19 ++++++++++++++++ net/9p/trans_rdma.c | 6 ++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 +++---- net/sunrpc/xprtrdma/verbs.c | 13 +++++------ 7 files changed, 68 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 7c599878ccf7..bbfded6d5d3d 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -252,6 +252,34 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, } EXPORT_SYMBOL(__ib_alloc_cq_user); +/** + * __ib_alloc_cq_any - allocate a completion queue + * @dev: device to allocate the CQ for + * @private: driver private data, accessible from cq->cq_context + * @nr_cqe: number of CQEs to allocate + * @poll_ctx: context to poll the CQ from + * @caller: module owner name + * + * Attempt to spread ULP Completion Queues over each device's interrupt + * vectors. A simple best-effort mechanism is used. + */ +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller) +{ + static atomic_t counter; + int comp_vector = 0; + + if (dev->num_comp_vectors > 1) + comp_vector = + atomic_inc_return(&counter) % + min_t(int, dev->num_comp_vectors, num_online_cpus()); + + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + caller, NULL); +} +EXPORT_SYMBOL(__ib_alloc_cq_any); + /** * ib_free_cq_user - free a completion queue * @cq: completion queue to free. diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1a039f16d315..e25c70a56be6 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: - ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size, - 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); + ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, + IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index cd07e5301d42..3c91fa97c9a8 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection( info->send_cq = NULL; info->recv_cq = NULL; - info->send_cq = ib_alloc_cq(info->id->device, info, - info->send_credit_target, 0, IB_POLL_SOFTIRQ); + info->send_cq = + ib_alloc_cq_any(info->id->device, info, + info->send_credit_target, IB_POLL_SOFTIRQ); if (IS_ERR(info->send_cq)) { info->send_cq = NULL; goto alloc_cq_failed; } - info->recv_cq = ib_alloc_cq(info->id->device, info, - info->receive_credit_max, 0, IB_POLL_SOFTIRQ); + info->recv_cq = + ib_alloc_cq_any(info->id->device, info, + info->receive_credit_max, IB_POLL_SOFTIRQ); if (IS_ERR(info->recv_cq)) { info->recv_cq = NULL; goto alloc_cq_failed; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f17063..2a1523ccd7ab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3711,6 +3711,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, NULL); } +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller); + +/** + * ib_alloc_cq_any: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @poll_ctx: Context used for polling the CQ + */ +static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, + void *private, int nr_cqe, + enum ib_poll_context poll_ctx) +{ + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, + KBUILD_MODNAME); +} + /** * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index bac8dad5dd69..b21c3c209815 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -685,9 +685,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Completion Queue */ - rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, - opts.sq_depth + opts.rq_depth + 1, - 0, IB_POLL_SOFTIRQ); + rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, + opts.sq_depth + opts.rq_depth + 1, + IB_POLL_SOFTIRQ); if (IS_ERR(rdma->cq)) goto error; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3fe665152d95..4d3db6ee7f09 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -454,14 +454,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk("svcrdma: error creating PD for connect request\n"); goto errout; } - newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, - 0, IB_POLL_WORKQUEUE); + newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, + IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_sq_cq)) { dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } - newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, - 0, IB_POLL_WORKQUEUE); + newxprt->sc_rq_cq = + ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_rq_cq)) { dprintk("svcrdma: error creating RQ CQ for connect request\n"); goto errout; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 805b1f35e1ca..b10aa16557f0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -521,18 +521,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; - sendcq = ib_alloc_cq(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_send_wr + 1, - ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, - IB_POLL_WORKQUEUE); + sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, + ep->rep_attr.cap.max_send_wr + 1, + IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); goto out1; } - recvcq = ib_alloc_cq(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_recv_wr + 1, - 0, IB_POLL_WORKQUEUE); + recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL, + ep->rep_attr.cap.max_recv_wr + 1, + IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); goto out2;