use-after-free in ip6_setup_cork

1,657 views
Skip to first unread message

Dmitry Vyukov

unread,
Nov 28, 2015, 6:00:47 AM11/28/15
to David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Hello,

The following program triggers use-after-free in ip6_setup_cork:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <syscall.h>
#include <string.h>
#include <stdint.h>
#include <pthread.h>

int r1, r3, r4;

void *thr0(void *arg)
{
*(uint64_t*)0x20000d90 = 0x20000fd3;
*(uint64_t*)0x20000d98 = 0x2d;
*(uint64_t*)0x20000da0 = 0x20000fa4;
*(uint64_t*)0x20000da8 = 0x5c;
*(uint64_t*)0x20000db0 = 0x20000fac;
*(uint64_t*)0x20000db8 = 0x71;
*(uint64_t*)0x20000dc0 = 0x20000fb6;
*(uint64_t*)0x20000dc8 = 0xec;
*(uint64_t*)0x20000dd0 = 0x20000fae;
*(uint64_t*)0x20000dd8 = 0x70;
syscall(SYS_vmsplice, r4, 0x20000d90ul, 0x5ul, 0x2ul, 0, 0);
return 0;
}

void *thr1(void *arg)
{
memcpy((void*)0x200025e5, "\xbb\xef\x44\xd6\x33\x93", 6);
syscall(SYS_setsockopt, r1, 0x29ul, 0x6ul, 0x200025e5ul, 0x6ul, 0);
return 0;
}

void *thr2(void *arg)
{
syscall(SYS_splice, r3, 0x0ul, r1, 0x0ul, 0xaful, 0x3ul);
return 0;
}

int main()
{
syscall(SYS_mmap, 0x20000000ul, 0x10000ul, 0x3ul, 0x32ul,
0xfffffffffffffffful, 0x0ul);
r1 = syscall(SYS_socket, 0xaul, 0x80002ul, 0x0ul, 0, 0, 0);
syscall(SYS_pipe2, 0x20001000ul, 0x800ul, 0, 0, 0, 0);
r3 = *(uint32_t*)0x20001000;
r4 = *(uint32_t*)0x20001004;
memcpy((void*)0x20003000,
"\x0a\x00\x33\xe2\x61\x44\xfe\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x5b\x11\x6d\x22",
28);
long r6 = syscall(SYS_connect, r1, 0x20003000ul, 0x1cul, 0, 0, 0);
pthread_t th[4];
pthread_create(&th[0], 0, thr0, 0);
pthread_create(&th[1], 0, thr1, 0);
pthread_create(&th[2], 0, thr2, 0);
pthread_create(&th[3], 0, thr1, 0);
pthread_join(th[0], 0);
pthread_join(th[1], 0);
pthread_join(th[2], 0);
pthread_join(th[3], 0);
return 0;
}


==================================================================
BUG: KASAN: use-after-free in ip6_setup_cork+0xeb8/0x11a0 at addr
ffff88006d36da08
Read of size 4 by task executor/23958
=============================================================================
BUG kmalloc-64 (Not tainted): kasan: bad access detected
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: Allocated in sock_kmalloc+0x7f/0xc0 age=9 cpu=3 pid=23946
[< none >] ___slab_alloc+0x41c/0x460 mm/slub.c:2438
[< none >] __slab_alloc+0x1b/0x30 mm/slub.c:2467
[< inline >] slab_alloc_node mm/slub.c:2530
[< inline >] slab_alloc mm/slub.c:2572
[< none >] __kmalloc+0x156/0x1b0 mm/slub.c:3532
[< inline >] kmalloc include/linux/slab.h:463
[< none >] sock_kmalloc+0x7f/0xc0 net/core/sock.c:1774
[< none >] do_ipv6_setsockopt.isra.8+0x779/0x2a60
net/ipv6/ipv6_sockglue.c:483
[< none >] ipv6_setsockopt+0x9b/0x140 net/ipv6/ipv6_sockglue.c:885
[< none >] udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1425
[< none >] sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2645
[< inline >] SYSC_setsockopt net/socket.c:1757
[< none >] SyS_setsockopt+0x158/0x240 net/socket.c:1736
[< none >] entry_SYSCALL_64_fastpath+0x12/0x71
arch/x86/entry/entry_64.S:185

INFO: Freed in sock_kfree_s+0x29/0x70 age=11 cpu=2 pid=23957
[< none >] __slab_free+0x1fb/0x300 mm/slub.c:2648
[< inline >] slab_free mm/slub.c:2803
[< none >] kfree+0x13b/0x160 mm/slub.c:3632
[< inline >] __sock_kfree_s net/core/sock.c:1795
[< none >] sock_kfree_s+0x29/0x70 net/core/sock.c:1801
[< none >] do_ipv6_setsockopt.isra.8+0x815/0x2a60
net/ipv6/ipv6_sockglue.c:506
[< none >] ipv6_setsockopt+0x9b/0x140 net/ipv6/ipv6_sockglue.c:885
[< none >] udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1425
[< none >] sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2645
[< inline >] SYSC_setsockopt net/socket.c:1757
[< none >] SyS_setsockopt+0x158/0x240 net/socket.c:1736
[< none >] entry_SYSCALL_64_fastpath+0x12/0x71
arch/x86/entry/entry_64.S:185

INFO: Slab 0xffffea0001b4db00 objects=20 used=8 fp=0xffff88006d36da08
flags=0x500000000004080
INFO: Object 0xffff88006d36da08 @offset=6664 fp=0xffff88006d36d3e8
CPU: 3 PID: 23958 Comm: executor Tainted: G B 4.4.0-rc2+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
00000000ffffffff ffff88003c32f348 ffffffff81a2bdb0 ffff88003e807840
ffff88006d36da08 ffff88006d36c000 ffff88003c32f378 ffffffff814541d4
ffff88003e807840 ffffea0001b4db00 ffff88006d36da08 ffff88006d126000

Call Trace:
[<ffffffff8145b9ae>] __asan_report_load4_noabort+0x3e/0x40
mm/kasan/report.c:279
[<ffffffff829de1e8>] ip6_setup_cork+0xeb8/0x11a0 net/ipv6/ip6_output.c:1200
[<ffffffff829e9e2a>] ip6_make_skb+0x19a/0x3b0 net/ipv6/ip6_output.c:1769
[<ffffffff82a3c3e6>] udpv6_sendmsg+0x11c6/0x2120 net/ipv6/udp.c:1314
[<ffffffff829038cc>] inet_sendmsg+0x23c/0x340 net/ipv4/af_inet.c:733
[< inline >] sock_sendmsg_nosec net/socket.c:610
[<ffffffff826adc2a>] sock_sendmsg+0xca/0x110 net/socket.c:620
[<ffffffff826ae057>] kernel_sendmsg+0x47/0x60 net/socket.c:628
[<ffffffff826b564a>] sock_no_sendpage+0xfa/0x130 net/core/sock.c:2270
[<ffffffff826ac6d0>] kernel_sendpage+0x90/0xe0 net/socket.c:3278
[<ffffffff826ac7c5>] sock_sendpage+0xa5/0xd0 net/socket.c:765
[<ffffffff814feca4>] pipe_to_sendpage+0x264/0x320 fs/splice.c:720
[< inline >] splice_from_pipe_feed fs/splice.c:772
[<ffffffff81501435>] __splice_from_pipe+0x235/0x6d0 fs/splice.c:897
[<ffffffff81504677>] splice_from_pipe+0xf7/0x140 fs/splice.c:932
[<ffffffff81504700>] generic_splice_sendpage+0x40/0x50 fs/splice.c:1105
[< inline >] do_splice_from fs/splice.c:1124
[< inline >] do_splice fs/splice.c:1400
[< inline >] SYSC_splice fs/splice.c:1703
[<ffffffff815052f8>] SyS_splice+0x7c8/0x15c0 fs/splice.c:1686
[<ffffffff82ddc0ae>] entry_SYSCALL_64_fastpath+0x12/0x71
arch/x86/entry/entry_64.S:185
==================================================================


On commit 78c4a49a69e910a162b05e4e8727b9bdbf948f13 (Nov 25).

Eric Dumazet

unread,
Nov 28, 2015, 12:11:25 PM11/28/15
to Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
> --

Thanks for the report.

Bug probably added in :

commit 03485f2adcde0c2d4e9228b659be78e872486bbb
Author: Vlad Yasevich <vyas...@gmail.com>
Date: Sat Jan 31 10:40:17 2015 -0500

udpv6: Add lockless sendmsg() support

This commit adds the same functionaliy to IPv6 that
commit 903ab86d195cca295379699299c5fc10beba31c7
Author: Herbert Xu <her...@gondor.apana.org.au>
Date: Tue Mar 1 02:36:48 2011 +0000

udp: Add lockless transmit path

added to IPv4.

UDP transmit path can now run without a socket lock,
thus allowing multiple threads to send to a single socket
more efficiently.
This is only used when corking/MSG_MORE is not used.

Signed-off-by: Vladislav Yasevich <vyas...@redhat.com>
Signed-off-by: David S. Miller <da...@davemloft.net>



Eric Dumazet

unread,
Nov 28, 2015, 12:23:59 PM11/28/15
to Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Oh well, bug is older than that.

inet6_sk(sk)->opt handling is completely racy, and has some xchg() calls
to hide how ugly it is.




Eric Dumazet

unread,
Nov 29, 2015, 10:29:05 PM11/29/15
to Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
I have a patch that looks ready for submission.

While testing it, I also saw a problem in ipv6_update_options(),
calling sk_dst_reset() which directly conflicts with ip6_dst_store()

(ip6_dst_store() uses spin_lock(&sk->sk_dst_lock), which basically
protects councurrent ip6_dst_store() ... :( )



Eric Dumazet

unread,
Nov 29, 2015, 10:38:00 PM11/29/15
to Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
From: Eric Dumazet <edum...@google.com>

This patch addresses multiple problems :

UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
while socket is not locked : Other threads can change np->opt
concurrently. Dmitry posted a syzkaller
(http://github.com/google/syzkaller) program desmonstrating
use-after-free.

Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
and dccp_v6_request_recv_sock() also need to use RCU protection
to dereference np->opt once (before calling ipv6_dup_options())

This patch adds full RCU protection to np->opt

Reported-by: Dmitry Vyukov <dvy...@google.com>
Signed-off-by: Eric Dumazet <edum...@google.com>
---
include/linux/ipv6.h | 2 -
include/net/ipv6.h | 21 +++++++++++++++++-
net/dccp/ipv6.c | 33 ++++++++++++++++++-----------
net/ipv6/af_inet6.c | 13 +++++++----
net/ipv6/datagram.c | 4 ++-
net/ipv6/exthdrs.c | 3 +-
net/ipv6/inet6_connection_sock.c | 11 +++++++--
net/ipv6/ipv6_sockglue.c | 33 +++++++++++++++++++----------
net/ipv6/raw.c | 8 +++++--
net/ipv6/syncookies.c | 2 -
net/ipv6/tcp_ipv6.c | 28 ++++++++++++++----------
net/ipv6/udp.c | 8 +++++--
net/l2tp/l2tp_ip6.c | 8 +++++--
13 files changed, 122 insertions(+), 52 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0ef2a97ccdb5..402753bccafa 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@ struct ipv6_pinfo {
struct ipv6_ac_socklist *ipv6_ac_list;
struct ipv6_fl_socklist __rcu *ipv6_fl_list;

- struct ipv6_txoptions *opt;
+ struct ipv6_txoptions __rcu *opt;
struct sk_buff *pktoptions;
struct sk_buff *rxpmtu;
struct inet6_cork cork;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e1a10b0ac0b0..d1da7d7ecb93 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock;
*/

struct ipv6_txoptions {
+ atomic_t refcnt;
/* Length of this structure */
int tot_len;

@@ -217,7 +218,7 @@ struct ipv6_txoptions {
struct ipv6_opt_hdr *dst0opt;
struct ipv6_rt_hdr *srcrt; /* Routing Header */
struct ipv6_opt_hdr *dst1opt;
-
+ struct rcu_head rcu;
/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
};

@@ -252,6 +253,24 @@ struct ipv6_fl_socklist {
struct rcu_head rcu;
};

+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+ struct ipv6_txoptions *opt;
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt && !atomic_inc_not_zero(&opt->refcnt))
+ opt = NULL;
+ rcu_read_unlock();
+ return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+ if (opt && atomic_dec_and_test(&opt->refcnt))
+ kfree_rcu(opt, rcu);
+}
+
struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc2440a23..e7e0b9bc2a43 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
security_req_classify_flow(req, flowi6_to_flowi(&fl6));


- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();

dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
&ireq->ir_v6_loc_addr,
&ireq->ir_v6_rmt_addr);
fl6.daddr = ireq->ir_v6_rmt_addr;
- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}

@@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (np->opt != NULL)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt != NULL)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;

dccp_sync_mss(newsk, dst_mtu(dst));

@@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);

dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -876,9 +886,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
__ip6_dst_store(sk, dst, NULL, NULL);

icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;

inet->inet_dport = usin->sin6_port;

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2..38d66ddfb937 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)

/* Free tx options */

- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);

@@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();

dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b0238f468..517c55b01ba8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@ ipv4_connected:

security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();

dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402be..ea7c4d64a00a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return ERR_PTR(-ENOBUFS);

memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7cee2cb2..3ff5208772bb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = proto;
fl6->daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = ireq->ir_mark;
@@ -142,7 +144,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));

- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();

dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
@@ -175,7 +179,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;

- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9..4449ad1f8114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
sk_dst_reset(sk);

return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);

@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;

- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}

@@ -486,6 +493,7 @@ sticky_done:
break;

memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;

lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..99140986e887 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,

static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa1c7fb..eaf7ac496d50 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c5429a636f1a..6a50bb4a0dae 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);

security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tcp_fetch_timewait_stamp(sk, dst);

icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;

tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);

@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));

- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
err = net_xmit_eval(err);
}

@@ -972,6 +975,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
@@ -1098,13 +1102,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (np->opt)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;

tcp_ca_openreq_child(newsk, dst);

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49619ee..9da3287a3923 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1110,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1263,8 +1264,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1376,7 @@ release_dst:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8aed8e..a2c8747d2936 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = NULL;
}

- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);

return err < 0 ? err : len;



Hannes Frederic Sowa

unread,
Dec 1, 2015, 6:11:29 AM12/1/15
to Eric Dumazet, Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Hi Eric,

On Mon, Nov 30, 2015, at 04:37, Eric Dumazet wrote:
> - opt = xchg(&np->opt, NULL);
> - if (opt)
> - sock_kfree_s(sk, opt, opt->tot_len);
> + opt = xchg((__force struct ipv6_txoptions
> **)&np->opt,
> + NULL);
> + if (opt) {
> + atomic_sub(opt->tot_len,
> &sk->sk_omem_alloc);
> + txopt_put(opt);
> + }
> pktopt = xchg(&np->pktoptions, NULL);
> kfree_skb(pktopt);

Is here something special going on (because of the xchg). I don't see
why you cannot simply use a RCU_INIT_POINTER?

Thanks,
Hannes

Eric Dumazet

unread,
Dec 1, 2015, 8:05:03 AM12/1/15
to Hannes Frederic Sowa, Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Yes, I mentioned this earlier, and will be addressed in net-next tree
later.

(Same for np->pktoptions)

The xchg() here does not bring additional protection as we are the last
user of np.

Thanks.




Hannes Frederic Sowa

unread,
Dec 1, 2015, 8:13:19 AM12/1/15
to Eric Dumazet, Dmitry Vyukov, Vlad Yasevich, David S. Miller, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet, William Dauchy, Rainer Weikusat, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Ah, sorry, maybe I missed it. Just reviewed the patches after a longer
weekend here.

The rest looked fine to me:

Acked-by: Hannes Frederic Sowa <han...@stressinduktion.org>

Thanks!

David Miller

unread,
Dec 2, 2015, 11:38:25 PM12/2/15
to eric.d...@gmail.com, dvy...@google.com, vyas...@gmail.com, kuz...@ms2.inr.ac.ru, jmo...@namei.org, yosh...@linux-ipv6.org, ka...@trash.net, net...@vger.kernel.org, linux-...@vger.kernel.org, edum...@google.com, wda...@gmail.com, rwei...@mobileactivedefense.com, syzk...@googlegroups.com, k...@google.com, gli...@google.com, sasha...@oracle.com
From: Eric Dumazet <eric.d...@gmail.com>
Date: Sun, 29 Nov 2015 19:37:57 -0800

> From: Eric Dumazet <edum...@google.com>
>
> This patch addresses multiple problems :
>
> UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
> while socket is not locked : Other threads can change np->opt
> concurrently. Dmitry posted a syzkaller
> (http://github.com/google/syzkaller) program desmonstrating
> use-after-free.
>
> Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
> and dccp_v6_request_recv_sock() also need to use RCU protection
> to dereference np->opt once (before calling ipv6_dup_options())
>
> This patch adds full RCU protection to np->opt
>
> Reported-by: Dmitry Vyukov <dvy...@google.com>
> Signed-off-by: Eric Dumazet <edum...@google.com>

Applied and queued up for -stable.

Eric Dumazet

unread,
Dec 3, 2015, 12:35:51 AM12/3/15
to David Miller, dvy...@google.com, vyas...@gmail.com, kuz...@ms2.inr.ac.ru, jmo...@namei.org, yosh...@linux-ipv6.org, ka...@trash.net, net...@vger.kernel.org, linux-...@vger.kernel.org, edum...@google.com, wda...@gmail.com, rwei...@mobileactivedefense.com, syzk...@googlegroups.com, k...@google.com, gli...@google.com, sasha...@oracle.com
Thanks David.

I will send a followup patch, as I missed the sctp part, now triggering
following sparse warnings.

CHECK net/sctp/ipv6.c
net/sctp/ipv6.c:223:41: warning: incorrect type in argument 4 (different address spaces)
net/sctp/ipv6.c:223:41: expected struct ipv6_txoptions *opt
net/sctp/ipv6.c:223:41: got struct ipv6_txoptions [noderef] <asn:4>*opt
net/sctp/ipv6.c:265:41: warning: incorrect type in argument 2 (different address spaces)
net/sctp/ipv6.c:265:41: expected struct ipv6_txoptions const *opt
net/sctp/ipv6.c:265:41: got struct ipv6_txoptions [noderef] <asn:4>*opt
net/sctp/ipv6.c:324:49: warning: incorrect type in argument 2 (different address spaces)
net/sctp/ipv6.c:324:49: expected struct ipv6_txoptions const *opt
net/sctp/ipv6.c:324:49: got struct ipv6_txoptions [noderef] <asn:4>*opt



Reply all
Reply to author
Forward
0 new messages