[PATCH net] tcp: deny tcp_disconnect() when threads are waiting

8 views
Skip to first unread message

Eric Dumazet

unread,
May 26, 2023, 12:35:02 PM5/26/23
to David S . Miller, Jakub Kicinski, Paolo Abeni, net...@vger.kernel.org, eric.d...@gmail.com, Eric Dumazet, syzbot
Historically connect(AF_UNSPEC) has been abused by syzkaller
and other fuzzers to trigger various bugs.

A recent one triggers a divide-by-zero [1], and Paolo Abeni
was able to diagnose the issue.

tcp_recvmsg_locked() has tests about sk_state being not TCP_LISTEN
and TCP REPAIR mode being not used.

Then later if socket lock is released in sk_wait_data(),
another thread can call connect(AF_UNSPEC), then make this
socket a TCP listener.

When recvmsg() is resumed, it can eventually call tcp_cleanup_rbuf()
and attempt a divide by 0 in tcp_rcv_space_adjust() [1]

This patch adds a new socket field, counting number of threads
blocked in sk_wait_event() and inet_wait_for_connect().

If this counter is not zero, tcp_disconnect() returns an error.

This patch adds code in blocking socket system calls, thus should
not hurt performance of non blocking ones.

Note that we probably could revert commit 499350a5a6e7 ("tcp:
initialize rcv_mss to TCP_MIN_MSS instead of 0") to restore
original tcpi_rcv_mss meaning (was 0 if no payload was ever
received on a socket)

[1]
divide error: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 13832 Comm: syz-executor.5 Not tainted 6.3.0-rc4-syzkaller-00224-g00c7b5f4ddc5 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
RIP: 0010:tcp_rcv_space_adjust+0x36e/0x9d0 net/ipv4/tcp_input.c:740
Code: 00 00 00 00 fc ff df 4c 89 64 24 48 8b 44 24 04 44 89 f9 41 81 c7 80 03 00 00 c1 e1 04 44 29 f0 48 63 c9 48 01 e9 48 0f af c1 <49> f7 f6 48 8d 04 41 48 89 44 24 40 48 8b 44 24 30 48 c1 e8 03 48
RSP: 0018:ffffc900033af660 EFLAGS: 00010206
RAX: 4a66b76cbade2c48 RBX: ffff888076640cc0 RCX: 00000000c334e4ac
RDX: 0000000000000000 RSI: dffffc0000000000 RDI: 0000000000000001
RBP: 00000000c324e86c R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8880766417f8
R13: ffff888028fbb980 R14: 0000000000000000 R15: 0000000000010344
FS: 00007f5bffbfe700(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b32f25000 CR3: 000000007ced0000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
tcp_recvmsg_locked+0x100e/0x22e0 net/ipv4/tcp.c:2616
tcp_recvmsg+0x117/0x620 net/ipv4/tcp.c:2681
inet6_recvmsg+0x114/0x640 net/ipv6/af_inet6.c:670
sock_recvmsg_nosec net/socket.c:1017 [inline]
sock_recvmsg+0xe2/0x160 net/socket.c:1038
____sys_recvmsg+0x210/0x5a0 net/socket.c:2720
___sys_recvmsg+0xf2/0x180 net/socket.c:2762
do_recvmmsg+0x25e/0x6e0 net/socket.c:2856
__sys_recvmmsg net/socket.c:2935 [inline]
__do_sys_recvmmsg net/socket.c:2958 [inline]
__se_sys_recvmmsg net/socket.c:2951 [inline]
__x64_sys_recvmmsg+0x20f/0x260 net/socket.c:2951
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f5c0108c0f9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f5bffbfe168 EFLAGS: 00000246 ORIG_RAX: 000000000000012b
RAX: ffffffffffffffda RBX: 00007f5c011ac050 RCX: 00007f5c0108c0f9
RDX: 0000000000000001 RSI: 0000000020000bc0 RDI: 0000000000000003
RBP: 00007f5c010e7b39 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000122 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f5c012cfb1f R14: 00007f5bffbfe300 R15: 0000000000022000
</TASK>

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot <syzk...@googlegroups.com>
Reported-by: Paolo Abeni <pab...@redhat.com>
Diagnosed-by: Paolo Abeni <pab...@redhat.com>
Signed-off-by: Eric Dumazet <edum...@google.com>
---
include/net/sock.h | 4 ++++
net/ipv4/af_inet.c | 2 ++
net/ipv4/inet_connection_sock.c | 1 +
net/ipv4/tcp.c | 6 ++++++
4 files changed, 13 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index 656ea89f60ff90d600d16f40302000db64057c64..b418425d7230c8cee81df34fcc66d771ea5085e9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -336,6 +336,7 @@ struct sk_filter;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -428,6 +429,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;

struct sk_filter __rcu *sk_filter;
union {
@@ -1174,6 +1176,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)

#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1183,6 +1186,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c4aab3aacbd89e35520583ab5e17b4e15e53b87e..4a76ebf793b857f5e64d440870d3cca737d1dac2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -586,6 +586,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -601,6 +602,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 65ad4251f6fd849c7aeb0d981dc337111fb246b0..1386787eaf1a53e2d1364f3782a68dd2206bde9d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1142,6 +1142,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
newicsk->icsk_bind2_hash = NULL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a60f6f4e7cd91f5d06a78b1ae2bf61218bc52ca5..635607ac37e4585a3f730bf93e80e6673733cd55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3081,6 +3081,12 @@ int tcp_disconnect(struct sock *sk, int flags)
int old_state = sk->sk_state;
u32 seq;

+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);

--
2.41.0.rc0.172.g3f132b7071-goog

Paolo Abeni

unread,
May 29, 2023, 5:56:03 AM5/29/23
to Eric Dumazet, David S . Miller, Jakub Kicinski, net...@vger.kernel.org, eric.d...@gmail.com, syzbot
Thanks Eric!

FWIW I gave this a spin in my testbed and solves the issue for me.

Tested-by: Paolo Abeni <pab...@redhat.com>

patchwork-b...@kernel.org

unread,
May 30, 2023, 1:20:22 AM5/30/23
to Eric Dumazet, da...@davemloft.net, ku...@kernel.org, pab...@redhat.com, net...@vger.kernel.org, eric.d...@gmail.com, syzk...@googlegroups.com
Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <ku...@kernel.org>:

On Fri, 26 May 2023 16:34:58 +0000 you wrote:
> Historically connect(AF_UNSPEC) has been abused by syzkaller
> and other fuzzers to trigger various bugs.
>
> A recent one triggers a divide-by-zero [1], and Paolo Abeni
> was able to diagnose the issue.
>
> tcp_recvmsg_locked() has tests about sk_state being not TCP_LISTEN
> and TCP REPAIR mode being not used.
>
> [...]

Here is the summary with links:
- [net] tcp: deny tcp_disconnect() when threads are waiting
https://git.kernel.org/netdev/net/c/4faeee0cf8a5

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html


Greg Kroah-Hartman

unread,
Jun 7, 2023, 4:22:29 PM6/7/23
to sta...@vger.kernel.org, Greg Kroah-Hartman, pat...@lists.linux.dev, syzbot, Paolo Abeni, Eric Dumazet, Jakub Kicinski, Sasha Levin
From: Eric Dumazet <edum...@google.com>

[ Upstream commit 4faeee0cf8a5d88d63cdbc3bab124fb0e6aed08c ]

Historically connect(AF_UNSPEC) has been abused by syzkaller
and other fuzzers to trigger various bugs.

A recent one triggers a divide-by-zero [1], and Paolo Abeni
was able to diagnose the issue.

tcp_recvmsg_locked() has tests about sk_state being not TCP_LISTEN
and TCP REPAIR mode being not used.

Tested-by: Paolo Abeni <pab...@redhat.com>
Link: https://lore.kernel.org/r/20230526163458.2...@google.com
Signed-off-by: Jakub Kicinski <ku...@kernel.org>
Signed-off-by: Sasha Levin <sas...@kernel.org>
---
include/net/sock.h | 4 ++++
net/ipv4/af_inet.c | 2 ++
net/ipv4/inet_connection_sock.c | 1 +
net/ipv4/tcp.c | 6 ++++++
4 files changed, 13 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9cd0354221507..45e46a1c4afc6 100644
index 70fd769f1174b..daeec363b0976 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -586,6 +586,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -601,6 +602,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 65ad4251f6fd8..1386787eaf1a5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1142,6 +1142,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
newicsk->icsk_bind2_hash = NULL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ed63ee8f0d7e3..f4b243cc7f4a5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3080,6 +3080,12 @@ int tcp_disconnect(struct sock *sk, int flags)
int old_state = sk->sk_state;
u32 seq;

+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);

--
2.39.2



Greg Kroah-Hartman

unread,
Jun 7, 2023, 4:33:44 PM6/7/23
to sta...@vger.kernel.org, Greg Kroah-Hartman, pat...@lists.linux.dev, syzbot, Paolo Abeni, Eric Dumazet, Jakub Kicinski, Sasha Levin
index cfbd241935a30..c140c6f86e4b1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -312,6 +312,7 @@ struct sock_common {
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -392,6 +393,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;

struct sk_filter __rcu *sk_filter;
union {
@@ -1010,6 +1012,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)

#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1019,6 +1022,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fb142ea730060..7c902a1efbbf6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -578,6 +578,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -593,6 +594,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0f9085220ecf9..7392a744c677e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -826,6 +826,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b51e0a1e15b67..22218ad71409f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2568,6 +2568,12 @@ int tcp_disconnect(struct sock *sk, int flags)

Greg Kroah-Hartman

unread,
Jun 7, 2023, 4:38:19 PM6/7/23
to sta...@vger.kernel.org, Greg Kroah-Hartman, pat...@lists.linux.dev, syzbot, Paolo Abeni, Eric Dumazet, Jakub Kicinski, Sasha Levin
index beb1b747fb09d..f11b98bd0244c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -333,6 +333,7 @@ struct sk_filter;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -425,6 +426,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;

struct sk_filter __rcu *sk_filter;
union {
@@ -1170,6 +1172,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)

#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1179,6 +1182,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5fd0ff5734e36..ebb737ac9e894 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -589,6 +589,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -604,6 +605,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 916075e00d066..8e35ea66d930a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1143,6 +1143,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
newicsk->icsk_bind2_hash = NULL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 021a8bf6a1898..c77b57d4a832a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3079,6 +3079,12 @@ int tcp_disconnect(struct sock *sk, int flags)

Greg Kroah-Hartman

unread,
Jun 7, 2023, 4:47:41 PM6/7/23
to sta...@vger.kernel.org, Greg Kroah-Hartman, pat...@lists.linux.dev, syzbot, Paolo Abeni, Eric Dumazet, Jakub Kicinski, Sasha Levin
index 651dc0a7bbd58..3da0601b573ed 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -326,6 +326,7 @@ struct bpf_local_storage;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -410,6 +411,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;

struct sk_filter __rcu *sk_filter;
union {
@@ -1095,6 +1097,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)

#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1104,6 +1107,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 800c2c7607e1a..acb4887351daf 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -584,6 +584,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -599,6 +600,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e05dd87848f78..406305aaec904 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -839,6 +839,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eecce63ba25e3..edb743bcbc391 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2748,6 +2748,12 @@ int tcp_disconnect(struct sock *sk, int flags)

Greg Kroah-Hartman

unread,
Jun 7, 2023, 4:54:05 PM6/7/23
to sta...@vger.kernel.org, Greg Kroah-Hartman, pat...@lists.linux.dev, syzbot, Paolo Abeni, Eric Dumazet, Jakub Kicinski, Sasha Levin
index fa19c6ba24441..06fdb8f207b69 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -309,6 +309,7 @@ struct bpf_sk_storage;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -390,6 +391,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;

struct sk_filter __rcu *sk_filter;
union {
@@ -1019,6 +1021,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)

#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1028,6 +1031,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3c6412cb4b486..a3f77ac173b53 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -578,6 +578,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -593,6 +594,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 04593893e0c63..374a0c3f39cc1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -824,6 +824,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a74965a6a54f4..e427eabc7f278 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2581,6 +2581,12 @@ int tcp_disconnect(struct sock *sk, int flags)

Greg Kroah-Hartman

unread,
Jun 7, 2023, 4:57:35 PM6/7/23
to sta...@vger.kernel.org, Greg Kroah-Hartman, pat...@lists.linux.dev, syzbot, Paolo Abeni, Eric Dumazet, Jakub Kicinski, Sasha Levin
index 0309d2311487d..104d80d850e41 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -334,6 +334,7 @@ struct bpf_local_storage;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -418,6 +419,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;

struct sk_filter __rcu *sk_filter;
union {
@@ -1115,6 +1117,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)

#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1124,6 +1127,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7fa1b0a45176f..e46b11507edc2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -587,6 +587,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)

add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;

/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -602,6 +603,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 29ec42c1f5d09..4fb0506430774 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -963,6 +963,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b91ddd2a2f96d..682503227effe 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2972,6 +2972,12 @@ int tcp_disconnect(struct sock *sk, int flags)
Reply all
Reply to author
Forward
0 new messages