net/smc: preallocated memory for rdma work requests

The work requests for rdma writes are built in local variables within
function smc_tx_rdma_write(). This violates the rule that the work
request storage has to stay till the work request is confirmed by
a completion queue response.
This patch introduces preallocated memory for these work requests.
The storage is allocated, once a link (and thus a queue pair) is
established.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ursula Braun 2019-02-04 13:44:44 +01:00 committed by David S. Miller
parent 53bc8d2af0
commit ad6f317f72
7 changed files with 92 additions and 33 deletions

View File

@ -21,13 +21,6 @@
/********************************** send *************************************/
struct smc_cdc_tx_pend {
struct smc_connection *conn; /* socket connection */
union smc_host_cursor cursor; /* tx sndbuf cursor sent */
union smc_host_cursor p_cursor; /* rx RMBE cursor produced */
u16 ctrl_seq; /* conn. tx sequence # */
};
/* handler for send/transmission completion of a CDC msg */
static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
struct smc_link *link,
@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
wr_rdma_buf,
(struct smc_wr_tx_pend_priv **)pend);
if (!conn->alert_token_local)
/* abnormal termination */
@ -121,7 +116,7 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
if (rc)
return rc;

View File

@ -270,10 +270,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
smcr_cdc_msg_to_host(local, peer, conn);
}
struct smc_cdc_tx_pend;
struct smc_cdc_tx_pend {
struct smc_connection *conn; /* socket connection */
union smc_host_cursor cursor; /* tx sndbuf cursor sent */
union smc_host_cursor p_cursor; /* rx RMBE cursor produced */
u16 ctrl_seq; /* conn. tx sequence # */
};
int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend);
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,

View File

@ -52,6 +52,24 @@ enum smc_wr_reg_state {
FAILED /* ib_wr_reg_mr response: failure */
};
struct smc_rdma_sge { /* sges for RDMA writes */
struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE];
};
#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per
* message send
*/
struct smc_rdma_sges { /* sges per message send */
struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES];
};
struct smc_rdma_wr { /* work requests per message
* send
*/
struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES];
};
struct smc_link {
struct smc_ib_device *smcibdev; /* ib-device */
u8 ibport; /* port - values 1 | 2 */
@ -64,6 +82,8 @@ struct smc_link {
struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */
struct ib_send_wr *wr_tx_ibs; /* WR send meta data */
struct ib_sge *wr_tx_sges; /* WR send gather meta data */
struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
/* above four vectors have wr_tx_cnt elements and use the same index */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */

View File

@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link,
{
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
pend);
if (rc < 0)
return rc;
BUILD_BUG_ON_MSG(

View File

@ -266,27 +266,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_sge sges[])
int num_sges, struct ib_rdma_wr *rdma_wr)
{
struct smc_link_group *lgr = conn->lgr;
struct ib_rdma_wr rdma_wr;
struct smc_link *link;
int rc;
memset(&rdma_wr, 0, sizeof(rdma_wr));
link = &lgr->lnk[SMC_SINGLE_LINK];
rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
rdma_wr.wr.sg_list = sges;
rdma_wr.wr.num_sge = num_sges;
rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
rdma_wr.remote_addr =
rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
rdma_wr->wr.num_sge = num_sges;
rdma_wr->remote_addr =
lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
/* RMBE within RMB */
conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
smc_lgr_terminate(lgr);
@ -313,24 +309,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
/* SMC-R helper for smc_tx_rdma_writes() */
static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t src_off, size_t src_len,
size_t dst_off, size_t dst_len)
size_t dst_off, size_t dst_len,
struct smc_rdma_wr *wr_rdma_buf)
{
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int src_len_sum = src_len, dst_len_sum = dst_len;
struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
int sent_count = src_off;
int srcchunk, dstchunk;
int num_sges;
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
struct ib_sge *sge =
wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
sges[srcchunk].addr = dma_addr + src_off;
sges[srcchunk].length = src_len;
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
sge[srcchunk].addr = dma_addr + src_off;
sge[srcchunk].length = src_len;
num_sges++;
src_off += src_len;
@ -343,7 +340,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
rc = smc_tx_rdma_write(conn, dst_off, num_sges,
&wr_rdma_buf->wr_tx_rdma[dstchunk]);
if (rc)
return rc;
if (dst_len_sum == len)
@ -402,7 +400,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
* usable snd_wnd as max transmit
*/
static int smc_tx_rdma_writes(struct smc_connection *conn)
static int smc_tx_rdma_writes(struct smc_connection *conn,
struct smc_rdma_wr *wr_rdma_buf)
{
size_t len, src_len, dst_off, dst_len; /* current chunk values */
union smc_host_cursor sent, prep, prod, cons;
@ -463,7 +462,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
dst_off, dst_len);
else
rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
dst_off, dst_len);
dst_off, dst_len, wr_rdma_buf);
if (rc)
return rc;
@ -484,11 +483,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags;
struct smc_rdma_wr *wr_rdma_buf;
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
int rc;
rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
@ -506,7 +506,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
spin_lock_bh(&conn->send_lock);
if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
rc = smc_tx_rdma_writes(conn);
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
(struct smc_wr_tx_pend_priv *)pend);
@ -533,7 +533,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
spin_lock_bh(&conn->send_lock);
if (!pflags->urg_data_present)
rc = smc_tx_rdma_writes(conn);
rc = smc_tx_rdma_writes(conn, NULL);
if (!rc)
rc = smcd_cdc_msg_send(conn);

View File

@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
* @link: Pointer to smc_link used to later send the message.
* @handler: Send completion handler function pointer.
* @wr_buf: Out value returns pointer to message buffer.
* @wr_rdma_buf: Out value returns pointer to rdma work request.
* @wr_pend_priv: Out value returns pointer serving as handler context.
*
* Return: 0 on success, or -errno on error.
@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
int smc_wr_tx_get_free_slot(struct smc_link *link,
smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
wr_ib = &link->wr_tx_ibs[idx];
wr_ib->wr_id = wr_id;
*wr_buf = &link->wr_tx_bufs[idx];
if (wr_rdma_buf)
*wr_rdma_buf = &link->wr_tx_rdmas[idx];
*wr_pend_priv = &wr_pend->priv;
return 0;
}
@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_ibs[i].next = NULL;
lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
lnk->wr_tx_ibs[i].num_sge = 1;
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags =
IB_SEND_SIGNALED | IB_SEND_SOLICITED;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
}
for (i = 0; i < lnk->wr_rx_cnt; i++) {
lnk->wr_rx_sges[i].addr =
@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
lnk->wr_tx_mask = NULL;
kfree(lnk->wr_tx_sges);
lnk->wr_tx_sges = NULL;
kfree(lnk->wr_tx_rdma_sges);
lnk->wr_tx_rdma_sges = NULL;
kfree(lnk->wr_rx_sges);
lnk->wr_rx_sges = NULL;
kfree(lnk->wr_tx_rdmas);
lnk->wr_tx_rdmas = NULL;
kfree(lnk->wr_rx_ibs);
lnk->wr_rx_ibs = NULL;
kfree(lnk->wr_tx_ibs);
@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_rx_ibs)
goto no_mem_wr_tx_ibs;
link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
sizeof(link->wr_tx_rdmas[0]),
GFP_KERNEL);
if (!link->wr_tx_rdmas)
goto no_mem_wr_rx_ibs;
link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
sizeof(link->wr_tx_rdma_sges[0]),
GFP_KERNEL);
if (!link->wr_tx_rdma_sges)
goto no_mem_wr_tx_rdmas;
link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
GFP_KERNEL);
if (!link->wr_tx_sges)
goto no_mem_wr_rx_ibs;
goto no_mem_wr_tx_rdma_sges;
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
sizeof(link->wr_rx_sges[0]),
GFP_KERNEL);
@ -579,6 +611,10 @@ no_mem_wr_rx_sges:
kfree(link->wr_rx_sges);
no_mem_wr_tx_sges:
kfree(link->wr_tx_sges);
no_mem_wr_tx_rdma_sges:
kfree(link->wr_tx_rdma_sges);
no_mem_wr_tx_rdmas:
kfree(link->wr_tx_rdmas);
no_mem_wr_rx_ibs:
kfree(link->wr_rx_ibs);
no_mem_wr_tx_ibs:

View File

@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev);
int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wrs,
struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);