CVE-2017-8890调试笔记

poc运行

在github找了一个poc
在target里

1
2
git clone https://github.com/beraphin/CVE-2017-8890.git
g++ -pthread -o poc poc.cpp

target里
mark
然后回到host发现已经断下了(panic)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
gef➤  c
Continuing.
[ 41.176158] sd 2:0:0:0: [sda] Assuming drive cache: write through
[ 121.392169] BUG: unable to handle kernel NULL pointer dereference at 0000000000000006
[ 121.417112] IP: ip_mc_leave_src+0x25/0x90
[ 121.433182] PGD 1e957067
[ 121.433184] PUD 1bc25067
[ 121.446277] PMD 0
[ 121.462184]
[ 121.490174] Oops: 0000 [#1] SMP


──────────────────────────────────────────────────[ code:i386:x86-64:intel ]────
0xffffffff8182f49d <ip_mc_leave_src+29> test rbx, rbx
0xffffffff8182f4a0 <ip_mc_leave_src+32> je 0xffffffff8182f4ef <ip_mc_leave_src+111>
0xffffffff8182f4a2 <ip_mc_leave_src+34> mov edx, DWORD PTR [rsi+0x14]
0xffffffff8182f4a5 <ip_mc_leave_src+37> mov ecx, DWORD PTR [rbx+0x4]
0xffffffff8182f4a8 <ip_mc_leave_src+40> lea rsi, [rsi+0x8]
0xffffffff8182f4ac <ip_mc_leave_src+44> lea r8, [rbx+0x18]
0xffffffff8182f4b0 <ip_mc_leave_src+48> xor r9d, r9d
0xffffffff8182f4b3 <ip_mc_leave_src+51> call 0xffffffff8182f110 <ip_mc_del_src>
0xffffffff8182f4b8 <ip_mc_leave_src+56> mov QWORD PTR [r12+0x18], 0x0
[!] Command 'context' failed to execute properly, reason: Sign not allowed in string format specifier
gef➤ bt
#0 0xffffffff8182f4a5 in ip_mc_leave_src (sk=0xffff88003a10f800, iml=0xffff88003a6cac40, in_dev=0x0 <irq_stack_union>) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/net/ipv4/igmp.c:2155
#1 0xffffffff81832f18 in ip_mc_drop_socket (sk=0xffff88003a10f800) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/net/ipv4/igmp.c:2607
#2 0xffffffff8182c2c0 in inet_release (sock=0xffff8800383c3200) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/net/ipv4/af_inet.c:411
#3 0xffffffff8178b7bf in sock_release (sock=0x0 <irq_stack_union>) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/net/socket.c:599
#4 0xffffffff8178b832 in sock_close (inode=<optimized out>, filp=<optimized out>) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/net/socket.c:1063
#5 0xffffffff81246937 in __fput (file=0xffff8800341ea200) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/fs/file_table.c:209
#6 0xffffffff81246ade in ____fput (work=<optimized out>) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/fs/file_table.c:245
#7 0xffffffff810a706e in task_work_run () at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/kernel/task_work.c:116
#8 0xffffffff810032ba in tracehook_notify_resume (regs=<optimized out>) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/include/linux/tracehook.h:191
#9 exit_to_usermode_loop (regs=0xffffc90003a0bf58, cached_flags=0x2) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/arch/x86/entry/common.c:160
#10 0xffffffff81003b29 in prepare_exit_to_usermode (regs=<optimized out>) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/arch/x86/entry/common.c:190
#11 syscall_return_slowpath (regs=0xffffc90003a0bf58) at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/arch/x86/entry/common.c:259
#12 0xffffffff818ce948 in entry_SYSCALL_64 () at /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/arch/x86/entry/entry_64.S:239
#13 0x0000000000000000 in ?? ()
gef➤ l
2150 if (!psf) {
2151 /* any-source empty exclude case */
2152 return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
2153 iml->sfmode, 0, NULL, 0);
2154 }
2155 err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
2156 iml->sfmode, psf->sl_count, psf->sl_addr, 0);
2157 RCU_INIT_POINTER(iml->sflist, NULL);
2158 /* decrease mem now to avoid the memleak warning */
2159 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
gef➤ info registers
rax 0x0 0x0
rbx 0x2 0x2
...
gef➤ x /50i ip_mc_leave_src
0xffffffff8182f480 <ip_mc_leave_src>: nop DWORD PTR [rax+rax*1+0x0]
0xffffffff8182f485 <ip_mc_leave_src+5>: push rbp
0xffffffff8182f486 <ip_mc_leave_src+6>: mov rbp,rsp
0xffffffff8182f489 <ip_mc_leave_src+9>: push r14
0xffffffff8182f48b <ip_mc_leave_src+11>: push r13
0xffffffff8182f48d <ip_mc_leave_src+13>: push r12
0xffffffff8182f48f <ip_mc_leave_src+15>: push rbx
0xffffffff8182f490 <ip_mc_leave_src+16>: mov r14,rdi
0xffffffff8182f493 <ip_mc_leave_src+19>: mov rbx,QWORD PTR [rsi+0x18]
0xffffffff8182f497 <ip_mc_leave_src+23>: mov r12,rsi
0xffffffff8182f49a <ip_mc_leave_src+26>: mov rdi,rdx
0xffffffff8182f49d <ip_mc_leave_src+29>: test rbx,rbx
0xffffffff8182f4a0 <ip_mc_leave_src+32>: je 0xffffffff8182f4ef <ip_mc_leave_src+111>
0xffffffff8182f4a2 <ip_mc_leave_src+34>: mov edx,DWORD PTR [rsi+0x14]
=> 0xffffffff8182f4a5 <ip_mc_leave_src+37>: mov ecx,DWORD PTR [rbx+0x4]--->crash
0xffffffff8182f4a8 <ip_mc_leave_src+40>: lea rsi,[rsi+0x8]
0xffffffff8182f4ac <ip_mc_leave_src+44>: lea r8,[rbx+0x18]
0xffffffff8182f4b0 <ip_mc_leave_src+48>: xor r9d,r9d
0xffffffff8182f4b3 <ip_mc_leave_src+51>: call 0xffffffff8182f110 <ip_mc_del_src>
0xffffffff8182f4b8 <ip_mc_leave_src+56>: mov QWORD PTR [r12+0x18],0x0
0xffffffff8182f4c1 <ip_mc_leave_src+65>: mov r13d,eax
0xffffffff8182f4c4 <ip_mc_leave_src+68>: mov eax,DWORD PTR [rbx]
0xffffffff8182f4c6 <ip_mc_leave_src+70>: lea eax,[rax*4+0x18]
0xffffffff8182f4cd <ip_mc_leave_src+77>: sub DWORD PTR ds:[r14+0x138],eax
0xffffffff8182f4d5 <ip_mc_leave_src+85>: lea rdi,[rbx+0x8]
0xffffffff8182f4d9 <ip_mc_leave_src+89>: mov esi,0x8
0xffffffff8182f4de <ip_mc_leave_src+94>: call 0xffffffff810f4a40 <kfree_call_rcu>
0xffffffff8182f4e3 <ip_mc_leave_src+99>: pop rbx
0xffffffff8182f4e4 <ip_mc_leave_src+100>: mov eax,r13d
0xffffffff8182f4e7 <ip_mc_leave_src+103>: pop r12
0xffffffff8182f4e9 <ip_mc_leave_src+105>: pop r13
0xffffffff8182f4eb <ip_mc_leave_src+107>: pop r14
0xffffffff8182f4ed <ip_mc_leave_src+109>: pop rbp
0xffffffff8182f4ee <ip_mc_leave_src+110>: ret
0xffffffff8182f4ef <ip_mc_leave_src+111>: mov edx,DWORD PTR [rsi+0x14]
0xffffffff8182f4f2 <ip_mc_leave_src+114>: lea rsi,[rsi+0x8]
0xffffffff8182f4f6 <ip_mc_leave_src+118>: xor r9d,r9d
0xffffffff8182f4f9 <ip_mc_leave_src+121>: xor r8d,r8d
0xffffffff8182f4fc <ip_mc_leave_src+124>: xor ecx,ecx
0xffffffff8182f4fe <ip_mc_leave_src+126>: call 0xffffffff8182f110 <ip_mc_del_src>
0xffffffff8182f503 <ip_mc_leave_src+131>: pop rbx
0xffffffff8182f504 <ip_mc_leave_src+132>: pop r12
0xffffffff8182f506 <ip_mc_leave_src+134>: pop r13
0xffffffff8182f508 <ip_mc_leave_src+136>: pop r14
0xffffffff8182f50a <ip_mc_leave_src+138>: pop rbp
0xffffffff8182f50b <ip_mc_leave_src+139>: ret

通过打印堆栈和查看源码/分析汇编,我们知道kernel panic的原因是NULL pointer dereference at 0000000000000006。

关键代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
if (!psf) {
2151 /* any-source empty exclude case */
2152 return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
2153 iml->sfmode, 0, NULL, 0);
2154 }
2155 err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
2156 iml->sfmode, psf->sl_count, psf->sl_addr, 0);
2157 RCU_INIT_POINTER(iml->sflist, NULL);
2158 /* decrease mem now to avoid the memleak warning */
2159 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
...
...
...
=> 0xffffffff8182f4a5 <ip_mc_leave_src+37>: mov ecx,DWORD PTR [rbx+0x4]--->crash
...
...
gef➤ info registers
rbx 0x2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
struct in_device *in_dev)
{
// rbx = [rsi+0x18]--->iml->sflist--->psf
struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist);
int err;

if (!psf) { // --->test rbx, rbx; je err
return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
iml->sfmode, 0, NULL, 0);
}
// ecx = [rbx+0x4]--->psf->sl_count
err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,iml->sfmode, psf->sl_count, psf->sl_addr, 0);
...
...
}
1
2
3
4
5
6
7
8
9
10
11
12
struct ip_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
struct rcu_head rcu;//0x10字节大小的结构体
__be32 sl_addr[0];
};
...
struct callback_head {
struct callback_head *next;
void (*func)(struct callback_head *head);
} __attribute__((aligned(sizeof(void *))));
#define rcu_head callback_head

漏洞成因

patch

看资料和patch
https://bugzilla.redhat.com/show_bug.cgi?id=CVE-2017-8890

1
2
The inet_csk_clone_lock function in net/ipv4/inet_connection_sock.c in the Linux kernel allows attackers to cause a denial of service (double free) or possibly have unspecified other impact by leveraging use of the accept system call. 
An unprivileged local user could use this flaw to induce kernel memory corruption on the system, leading to a crash. Due to the nature of the flaw, privilege escalation cannot be fully ruled out, although we believe it is unlikely.

linux内核的net/ipv4/inet_connection_sock.c文件中的inet_csk_clone_lock函数允许攻击者利用accept system call去触发double free,造成拒绝服务攻击或者其他可能的影响。
一个没有特权的本地用户能够使用这个缺陷去触发系统内核内存损坏,导致系统崩溃。

patch
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=657831ffc38e30092a2d5f03d385d710eb88b09a
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8b485ce69876c65db12ed390e7f9c0d2a64eff2c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
dccp/tcp: do not inherit mc_list from parent
syzkaller found a way to trigger double frees from ip_mc_drop_socket()

It turns out that leave a copy of parent mc_list at accept() time,
which is very bad.

Very similar to commit 8b485ce69876 ("tcp: do not inherit
fastopen_req from parent")

Initial report from Pray3r, completed by Andrey one.
Thanks a lot to them !
...

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5e313c1..1054d33 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* listeners have SOCK_RCU_FREE, not the children */
sock_reset_flag(newsk, SOCK_RCU_FREE);

+ inet_sk(newsk)->mc_list = NULL;
+
newsk->sk_mark = inet_rsk(req)->ir_mark;
atomic64_set(&newsk->sk_cookie,
atomic64_read(&inet_rsk(req)->ir_cookie));

patch前的源码
https://elixir.bootlin.com/linux/v4.11.3/source/net/ipv4/inet_connection_sock.c
dccp/tcp: do not inherit mc_list from parent

根据patch推测,在parent对象free了之后,由于child对象直接继承parent对象的值,于是又得到了mc_list的地址,在后面再次被free。

使用understand阅读源码

使用understand
我导入的源码是kernel4.10




然后等它建立好索引后,搜索想看的函数,右键选择call by就可以自动绘制被调用图。

double free

mc_list对象创建



mc_list结构的定义如下,大小为0x30

1
2
3
4
5
6
7
8
9
10
11
12
struct ip_mc_socklist {
struct ip_mc_socklist __rcu *next_rcu;
struct ip_mreqn multi;
unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */
struct ip_sf_socklist __rcu *sflist;
struct rcu_head rcu;
};
struct callback_head {
struct callback_head *next;
void (*func)(struct callback_head *head);
} __attribute__((aligned(sizeof(void *))));
#define rcu_head callback_head

1
2
gef➤  b ip_mc_join_group
gef➤ b sock_kmalloc

然后单步
mark
mark

mc_list对象释放

1
sock_close -> sock_release() ->  inet_release() ->  ip_mc_drop_socket()


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
* A socket is closing.
*/

void ip_mc_drop_socket(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *iml;
struct net *net = sock_net(sk);

if (!inet->mc_list)
return;

rtnl_lock();
while ((iml = rtnl_dereference(inet->mc_list)) != NULL) {
struct in_device *in_dev;

inet->mc_list = iml->next_rcu;
in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
(void) ip_mc_leave_src(sk, iml, in_dev);
if (in_dev)
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
}
rtnl_unlock();
}

ip_mc_drop_socket 这个函数导致释放操作,该函数获取到mc_list对象后,最后调用kfree_rcu,该函数并不是真正的释放该对象,而是调用call_rcu将要删除的对象保存起来,并标记或者开始一个宽限期,等到cpu宽限期结束,会触发一个RCU软中断,再进行释放,如果有回调函数func,则进行回调函数处理流程,整个函数调用逻辑为:kfree_rcu -> … -> call_rcu -> … -> invoke_rcu_core -> RCU_SOFTIRQ -> rcu_process_callbacks -> … __rcu_reclaim


1
2
b  /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/net/ipv4/igmp.c:2612 
b /build/linux-hwe-edge-gyUj63/linux-hwe-edge-4.10.0/kernel/rcu/rcu.h:113

mark
mark

accept


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/**
* inet_csk_clone_lock - clone an inet socket, and lock its clone
* @sk: the socket to clone
* @req: request_sock
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
*
* Caller must unlock socket even in error path (bh_unlock_sock(newsk))
*/
struct sock *inet_csk_clone_lock(const struct sock *sk,
const struct request_sock *req,
const gfp_t priority)
{
struct sock *newsk = sk_clone_lock(sk, priority);//<-----
...
...
}
return newsk;
}

struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
struct sock *newsk;
bool is_charged = true;

newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
if (newsk != NULL) {
struct sk_filter *filter;

sock_copy(newsk, sk);//<-----
...
...
return newsk;
}

/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
* even temporarly, because of RCU lookups. sk_node should also be left as is.
* We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));

memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));

#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
security_sk_clone(osk, nsk);
#endif
}

/** struct inet_sock - representation of INET sockets
*
* @sk - ancestor class
* @pinet6 - pointer to IPv6 control block
* @inet_daddr - Foreign IPv4 addr
* @inet_rcv_saddr - Bound local IPv4 addr
* @inet_dport - Destination port
* @inet_num - Local port
* @inet_saddr - Sending source
* @uc_ttl - Unicast TTL
* @inet_sport - Source port
* @inet_id - ID counter for DF pkts
* @tos - TOS
* @mc_ttl - Multicasting TTL
* @is_icsk - is this an inet_connection_sock?
* @uc_index - Unicast outgoing device index
* @mc_index - Multicast device index
* @mc_list - Group array
* @cork - info to build ip hdr on each ip frag while socket is corked
*/
struct inet_sock {
/* sk and pinet6 has to be the first two members of inet_sock */
struct sock sk;
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6_pinfo *pinet6;
#endif
/* Socket demultiplex comparisons on incoming packets. */
#define inet_daddr sk.__sk_common.skc_daddr
#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
#define inet_dport sk.__sk_common.skc_dport
#define inet_num sk.__sk_common.skc_num

__be32 inet_saddr;
__s16 uc_ttl;
__u16 cmsg_flags;
__be16 inet_sport;
__u16 inet_id;

struct ip_options_rcu __rcu *inet_opt;
int rx_dst_ifindex;
__u8 tos;
__u8 min_ttl;
__u8 mc_ttl;
__u8 pmtudisc;
__u8 recverr:1,
is_icsk:1,
freebind:1,
hdrincl:1,
mc_loop:1,
transparent:1,
mc_all:1,
nodefrag:1;
__u8 bind_address_no_port:1;
__u8 rcv_tos;
__u8 convert_csum;
int uc_index;
int mc_index;
__be32 mc_addr;
struct ip_mc_socklist __rcu *mc_list;
struct inet_cork_full cork;
};

在accecpt的时候,创建一个新的socket的,parent的所有field(除了ref_cnt)被拷贝给新生成的socket对象,包括mc_list指针的值,于是就有了多个指针指向同一块内存,从而在后面造成double free。

poc分析

1
2
3
4
5
6
7
8
sockfd = socket(AF_INET, xx, IPPROTO_TCP);
setsockopt(sockfd, SOL_IP, MCAST_JOIN_GROUP, xxxx, xxxx);
bind(sockfd, xxxx, xxxx);
listen(sockfd, xxxx);
newsockfd = accept(sockfd, xxxx, xxxx);
close(newsockfd);// first free (kfree_rcu)
sleep(5);// wait rcu free(real first free)
close(sockfd);// double free
  • int socket(int af, int type, int protocol);
    1) af 为地址族(Address Family),也就是 IP 地址类型,常用的有 AF_INET 和 AF_INET6。AF 是“Address Family”的简写,INET是“Inetnet”的简写。AF_INET 表示 IPv4 地址,例如 127.0.0.1;AF_INET6 表示 IPv6 地址,例如 1030::C9B4:FF12:48AA:1A2B。
    你也可以使用PF前缀,PF是“Protocol Family”的简写,它和AF是一样的。例如,PF_INET 等价于 AF_INET,PF_INET6 等价于 AF_INET6。
    2) type 为数据传输方式,常用的有 SOCK_STREAM 和 SOCK_DGRAM,
    3) protocol 表示传输协议,常用的有 IPPROTO_TCP 和 IPPTOTO_UDP,分别表示 TCP 传输协议和 UDP 传输协议。

  • int setsockopt(int sock, int level, int optname, void optval, socklen_t optlen);
    参数:
    sock:将要被设置或者获取选项的套接字。
    level:选项所在的协议层。
    optname:需要访问的选项名。
    optval:对于getsockopt(),指向返回选项值的缓冲。 对于setsockopt(),指向包含新选项值的缓冲。
    optlen:对于getsockopt(),作为入口参数时,选项值的最大长度。 作为出口参数时,选项值的实际长度。 对于setsockopt(),现选项的长度。

  • int bind(int sock, struct sockaddr *addr, socklen_t addrlen); //Linux
    sock 为 socket 文件描述符,addr 为 sockaddr 结构体变量的指针,addrlen 为 addr 变量的大小,可由 sizeof() 计算得出。
    http://c.biancheng.net/cpp/html/3033.html
  • int listen(int sock, int backlog); //Linux
    sock 为需要进入监听状态的套接字,backlog 为请求队列的最大长度。
    所谓被动监听,是指当没有客户端请求时,套接字处于“睡眠”状态,只有当接收到客户端请求时,套接字才会被“唤醒”来响应请求。
    注意:listen() 只是让套接字处于监听状态,并没有接收请求。接收请求需要使用 accept() 函数。
  • int accept(int sock, struct sockaddr addr, socklen_t addrlen); //Linux
    它的参数与 listen() 和 connect() 是相同的:sock 为服务器端套接字,addr 为 sockaddr_in 结构体变量,addrlen 为参数 addr 的长度,可由 sizeof() 求得。
    accept() 返回一个新的套接字来和客户端通信,addr 保存了客户端的IP地址和端口号,而 sock 是服务器端的套接字,大家注意区分。后面和客户端通信时,要使用这个新生成的套接字,而不是原来服务器端的套接字。

    漏洞利用

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    + mc_list 在内核中以链表形式存在,通过第一个成员next_rcu指向下一个mc_list

    + 当要释放一个mc_list的时候,rcu_head会被链到一个释放链表中

    + 我们可以在用户态伪造一个fake_mc_list,让被堆喷的对象的next_rcu指向用户态的fake_mc_list

    那么最终的利用思路如下:

    + 用户态创建结构 fake_mc_list

    + mmap(fake_mc_list)

    + 创建漏洞结构体

    + 第一次释放

    + 堆喷占位,覆盖被释放对象的next_rcu

    + 第二次释放,使 fake_mc_list 上链

    + fake_mc_list.rcu->func 为 JOP 地址

漏洞模型

commit

事实上类似的模式早就被commit过patch
https://github.com/torvalds/linux/commit/8b485ce69876c65db12ed390e7f9c0d2a64eff2c
所以多看commit很重要,可以学习挖洞的模式。
可以看一下说明,提到了double free,触发地点也很接近。

tcp: do not inherit fastopen_req from parent

1
2
3
4
5
6
7
8
9
10
tcp: do not inherit fastopen_req from parent

Under fuzzer stress, it is possible that a child gets a non NULL
fastopen_req pointer from its parent at accept() time, when/if parent
morphs from listener to active session.

We need to make sure this can not happen, by clearing the field after
socket cloning.

BUG: Double free or freeing an invalid pointer

patch

1
2
3
4
5
6
7
		newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
newtp->rack.mstamp.v64 = 0;

CVE-2017-9075

do not inherit ipv6_{mc|ac|fl}_list from parent

1
2
3
sctp: do not inherit ipv6_{mc|ac|fl}_list from parent
SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit
ipv6_mc_list from parent"), otherwise bad things can happen.

patch

1
2
3
4
5
6
7
8
9
10
11
+++ b/net/sctp/ipv6.c
@@ -677,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newnp = inet6_sk(newsk);

memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;

rcu_read_lock();
opt = rcu_dereference(np->opt);

CVE-2017-9076/CVE-2017-9077

ipv6/dccp: do not inherit ipv6_mc_list from parent

1
2
Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent")
we should clear ipv6_mc_list etc. for IPv6 sockets too.

patch

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d9b6a4e..b6bbb71 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
newnp->pktoptions = NULL;
newnp->opt = NULL;
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;

@@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;

+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index aeb9497..df5a9ff 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1062,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif

+ newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
@@ -1131,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
First: no IPv4 options.
*/
newinet->inet_opt = NULL;
+ newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;

参考资料

https://2freeman.github.io/2018/01/06/CVE-2017-8890-internals.html
https://bbs.pediy.com/thread-226057.htm
http://www.freebuf.com/articles/terminal/160041.html