Merge branch 'net-tls-add-a-TX-lock'

Jakub Kicinski says:

====================
net/tls: add a TX lock

Some time ago Pooja and Mallesham started reporting crashes with
an async accelerator. After trying to poke the existing logic into
shape I came to the conclusion that it can't be trusted, and to
preserve our sanity we should just add a lock around the TX side.

First patch removes the sk_write_pending checks from the write
space callbacks. Those don't seem to have a logical justification.

Patch 2 adds the TX lock and patch 3 associated test (which should
hang with current net).

Mallesham reports that even with these fixes applied the async
accelerator workload still occasionally hangs waiting for socket
memory. I suspect that's strictly related to the way async crypto
is integrated in TLS, so I think we should get these into net or
net-next and move from there.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-11-06 17:33:32 -08:00
commit 9990a79d8f
5 changed files with 134 additions and 21 deletions

View File

@ -40,6 +40,7 @@
#include <linux/socket.h>
#include <linux/tcp.h>
#include <linux/skmsg.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
@ -269,6 +270,10 @@ struct tls_context {
bool in_tcp_sendpages;
bool pending_open_record_frags;
struct mutex tx_lock; /* protects partially_sent_* fields and
* per-type TX fields
*/
unsigned long flags;
/* cache cold stuff */

View File

@ -523,8 +523,10 @@ static int tls_push_data(struct sock *sk,
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
int rc;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
if (unlikely(msg->msg_controllen)) {
@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
out:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return rc;
}
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct iov_iter msg_iter;
char *kaddr = kmap(page);
struct kvec iov;
@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
if (flags & MSG_SENDPAGE_NOTLAST)
flags |= MSG_MORE;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
if (flags & MSG_OOB) {
@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
out:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return rc;
}
@ -623,9 +629,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
{
if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
if (tls_is_partially_sent_record(ctx)) {
gfp_t sk_allocation = sk->sk_allocation;
WARN_ON_ONCE(sk->sk_write_pending);
sk->sk_allocation = GFP_ATOMIC;
tls_push_partial_record(sk, ctx,
MSG_DONTWAIT | MSG_NOSIGNAL |

View File

@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
mutex_destroy(&ctx->tx_lock);
if (sk)
kfree_rcu(ctx, rcu);
@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk)
if (!ctx)
return NULL;
mutex_init(&ctx->tx_lock);
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
ctx->sk_proto = sk->sk_prot;
return ctx;

View File

@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -ENOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
/* Wait till there is any pending write on socket */
if (unlikely(sk->sk_write_pending)) {
ret = wait_on_pending_writer(sk, &timeo);
if (unlikely(ret))
goto send_end;
}
if (unlikely(msg->msg_controllen)) {
ret = tls_proccess_cmsg(sk, msg, &record_type);
if (ret) {
@ -1091,6 +1085,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
ret = sk_stream_error(sk, msg->msg_flags, ret);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return copied ? copied : ret;
}
@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/* Wait till there is any pending write on socket */
if (unlikely(sk->sk_write_pending)) {
ret = wait_on_pending_writer(sk, &timeo);
if (unlikely(ret))
goto sendpage_end;
}
/* Call the sk_stream functions to manage the sndbuf mem. */
while (size > 0) {
size_t copy, required_size;
@ -1219,15 +1207,18 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
int ret;
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -ENOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return ret;
}
@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work)
if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
return;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
tls_tx_records(sk, -1);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
}
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
@ -2180,12 +2173,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
/* Schedule the transmission if tx list is ready */
if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
/* Schedule the transmission */
if (!test_and_set_bit(BIT_TX_SCHEDULED,
&tx_ctx->tx_bitmask))
schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
if (is_tx_ready(tx_ctx) &&
!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)

View File

@ -898,6 +898,114 @@ TEST_F(tls, nonblocking)
}
}
static void
test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self,
bool sendpg, unsigned int n_readers, unsigned int n_writers)
{
const unsigned int n_children = n_readers + n_writers;
const size_t data = 6 * 1000 * 1000;
const size_t file_sz = data / 100;
size_t read_bias, write_bias;
int i, fd, child_id;
char buf[file_sz];
pid_t pid;
/* Only allow multiples for simplicity */
ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true);
read_bias = n_writers / n_readers ?: 1;
write_bias = n_readers / n_writers ?: 1;
/* prep a file to send */
fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600);
ASSERT_GE(fd, 0);
memset(buf, 0xac, file_sz);
ASSERT_EQ(write(fd, buf, file_sz), file_sz);
/* spawn children */
for (child_id = 0; child_id < n_children; child_id++) {
pid = fork();
ASSERT_NE(pid, -1);
if (!pid)
break;
}
/* parent waits for all children */
if (pid) {
for (i = 0; i < n_children; i++) {
int status;
wait(&status);
EXPECT_EQ(status, 0);
}
return;
}
/* Split threads for reading and writing */
if (child_id < n_readers) {
size_t left = data * read_bias;
char rb[8001];
while (left) {
int res;
res = recv(self->cfd, rb,
left > sizeof(rb) ? sizeof(rb) : left, 0);
EXPECT_GE(res, 0);
left -= res;
}
} else {
size_t left = data * write_bias;
while (left) {
int res;
ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0);
if (sendpg)
res = sendfile(self->fd, fd, NULL,
left > file_sz ? file_sz : left);
else
res = send(self->fd, buf,
left > file_sz ? file_sz : left, 0);
EXPECT_GE(res, 0);
left -= res;
}
}
}
TEST_F(tls, mutliproc_even)
{
test_mutliproc(_metadata, self, false, 6, 6);
}
TEST_F(tls, mutliproc_readers)
{
test_mutliproc(_metadata, self, false, 4, 12);
}
TEST_F(tls, mutliproc_writers)
{
test_mutliproc(_metadata, self, false, 10, 2);
}
TEST_F(tls, mutliproc_sendpage_even)
{
test_mutliproc(_metadata, self, true, 6, 6);
}
TEST_F(tls, mutliproc_sendpage_readers)
{
test_mutliproc(_metadata, self, true, 4, 12);
}
TEST_F(tls, mutliproc_sendpage_writers)
{
test_mutliproc(_metadata, self, true, 10, 2);
}
TEST_F(tls, control_msg)
{
if (self->notls)