diff options
authorRoman Gushchin <guro@fb.com>2018-02-02 15:26:57 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-02-12 07:07:20 +0100
commitb22b76fbd802e8cdcd70b9e95f2755b8ee36705d (patch)
parent81259f3592f8bc36f146034663a27184966c9e62 (diff)
Revert "defer call to mem_cgroup_sk_alloc()"
[ Upstream commit edbe69ef2c90fc86998a74b08319a01c508bd497 ] This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol: defer call to mem_cgroup_sk_alloc()"). Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks memcg socket memory accounting, as packets received before memcg pointer initialization are not accounted and are causing refcounting underflow on socket release. Actually the free-after-use problem was fixed by commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in sk_clone_lock()") for the cgroup pointer. So, let's revert it and call mem_cgroup_sk_alloc() just before cgroup_sk_alloc(). This is safe, as we hold a reference to the socket we're cloning, and it holds a reference to the memcg. Also, let's drop BUG_ON(mem_cgroup_is_root()) check from mem_cgroup_sk_alloc(). I see no reasons why bumping the root memcg counter is a good reason to panic, and there are no realistic ways to hit it. Signed-off-by: Roman Gushchin <guro@fb.com> Cc: Eric Dumazet <edumazet@google.com> Cc: David S. Miller <davem@davemloft.net> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
3 files changed, 15 insertions, 5 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ac2ffd5e02b9..0a78ce57872d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5828,6 +5828,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
if (!mem_cgroup_sockets_enabled)
+ /*
+ * Socket cloning can throw us here with sk_memcg already
+ * filled. It won't however, necessarily happen from
+ * process context. So the test for root memcg given
+ * the current task's memcg won't help us in this case.
+ *
+ * Respecting the original socket's memcg is a better
+ * decision in this case.
+ */
+ if (sk->sk_memcg) {
+ css_get(&sk->sk_memcg->css);
+ return;
+ }
memcg = mem_cgroup_from_task(current);
if (memcg == root_mem_cgroup)
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f17412..7571dabfc4cf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1675,16 +1675,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
- /* sk->sk_memcg will be populated at accept() time */
- newsk->sk_memcg = NULL;
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ mem_cgroup_sk_alloc(newsk);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc08e63..3668c4182655 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
- mem_cgroup_sk_alloc(newsk);
if (req)