aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@samsung.com>2019-06-28 11:04:07 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2019-07-03 15:10:55 +0200
commit455302d1c9ae9318660aaeb9748a01ff414c9741 (patch)
treea9b25a22c509f0c3c5b1a937de24008d0637fc26
parent162c820ed8965bf94d2685f97388aea5aee9e258 (diff)
downloadlinux-stericsson-455302d1c9ae9318660aaeb9748a01ff414c9741.tar.gz
xdp: fix hang while unregistering device bound to xdp socket
Device that bound to XDP socket will not have zero refcount until the userspace application will not close it. This leads to hang inside 'netdev_wait_allrefs()' if device unregistering requested: # ip link del p1 < hang on recvmsg on netlink socket > # ps -x | grep ip 5126 pts/0 D+ 0:00 ip link del p1 # journalctl -b Jun 05 07:19:16 kernel: unregister_netdevice: waiting for p1 to become free. Usage count = 1 Jun 05 07:19:27 kernel: unregister_netdevice: waiting for p1 to become free. Usage count = 1 ... Fix that by implementing NETDEV_UNREGISTER event notification handler to properly clean up all the resources and unref device. This should also allow socket killing via ss(8) utility. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Ilya Maximets <i.maximets@samsung.com> Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/net/xdp_sock.h5
-rw-r--r--net/xdp/xdp_umem.c10
-rw-r--r--net/xdp/xdp_umem.h1
-rw-r--r--net/xdp/xsk.c87
4 files changed, 87 insertions, 16 deletions
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index d074b6d60f8a..7da155164947 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -61,6 +61,11 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head list;
bool zc;
+ enum {
+ XSK_READY = 0,
+ XSK_BOUND,
+ XSK_UNBOUND,
+ } state;
/* Protects multiple processes in the control path */
struct mutex mutex;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 267b82a4cbcf..20c91f02d3d8 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -140,11 +140,13 @@ out_rtnl_unlock:
return err;
}
-static void xdp_umem_clear_dev(struct xdp_umem *umem)
+void xdp_umem_clear_dev(struct xdp_umem *umem)
{
struct netdev_bpf bpf;
int err;
+ ASSERT_RTNL();
+
if (!umem->dev)
return;
@@ -153,17 +155,13 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
bpf.xsk.umem = NULL;
bpf.xsk.queue_id = umem->queue_id;
- rtnl_lock();
err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
- rtnl_unlock();
if (err)
WARN(1, "failed to disable umem!\n");
}
- rtnl_lock();
xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
- rtnl_unlock();
dev_put(umem->dev);
umem->dev = NULL;
@@ -195,7 +193,9 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
static void xdp_umem_release(struct xdp_umem *umem)
{
+ rtnl_lock();
xdp_umem_clear_dev(umem);
+ rtnl_unlock();
ida_simple_remove(&umem_ida, umem->id);
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 27603227601b..a63a9fb251f5 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -10,6 +10,7 @@
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
u16 queue_id, u16 flags);
+void xdp_umem_clear_dev(struct xdp_umem *umem);
bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a14e8864e4fa..f53a6ef7c155 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -335,6 +335,22 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
return 0;
}
+static void xsk_unbind_dev(struct xdp_sock *xs)
+{
+ struct net_device *dev = xs->dev;
+
+ if (!dev || xs->state != XSK_BOUND)
+ return;
+
+ xs->state = XSK_UNBOUND;
+
+ /* Wait for driver to stop using the xdp socket. */
+ xdp_del_sk_umem(xs->umem, xs);
+ xs->dev = NULL;
+ synchronize_net();
+ dev_put(dev);
+}
+
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -354,15 +370,7 @@ static int xsk_release(struct socket *sock)
sock_prot_inuse_add(net, sk->sk_prot, -1);
local_bh_enable();
- if (xs->dev) {
- struct net_device *dev = xs->dev;
-
- /* Wait for driver to stop using the xdp socket. */
- xdp_del_sk_umem(xs->umem, xs);
- xs->dev = NULL;
- synchronize_net();
- dev_put(dev);
- }
+ xsk_unbind_dev(xs);
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
@@ -412,7 +420,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
mutex_lock(&xs->mutex);
- if (xs->dev) {
+ if (xs->state != XSK_READY) {
err = -EBUSY;
goto out_release;
}
@@ -492,6 +500,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
out_unlock:
if (err)
dev_put(dev);
+ else
+ xs->state = XSK_BOUND;
out_release:
mutex_unlock(&xs->mutex);
return err;
@@ -520,6 +530,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
mutex_lock(&xs->mutex);
+ if (xs->state != XSK_READY) {
+ mutex_unlock(&xs->mutex);
+ return -EBUSY;
+ }
q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
err = xsk_init_queue(entries, q, false);
mutex_unlock(&xs->mutex);
@@ -534,7 +548,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
mutex_lock(&xs->mutex);
- if (xs->umem) {
+ if (xs->state != XSK_READY || xs->umem) {
mutex_unlock(&xs->mutex);
return -EBUSY;
}
@@ -561,6 +575,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
mutex_lock(&xs->mutex);
+ if (xs->state != XSK_READY) {
+ mutex_unlock(&xs->mutex);
+ return -EBUSY;
+ }
if (!xs->umem) {
mutex_unlock(&xs->mutex);
return -EINVAL;
@@ -662,6 +680,9 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long pfn;
struct page *qpg;
+ if (xs->state != XSK_READY)
+ return -EBUSY;
+
if (offset == XDP_PGOFF_RX_RING) {
q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) {
@@ -693,6 +714,38 @@ static int xsk_mmap(struct file *file, struct socket *sock,
size, vma->vm_page_prot);
}
+static int xsk_notifier(struct notifier_block *this,
+ unsigned long msg, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct sock *sk;
+
+ switch (msg) {
+ case NETDEV_UNREGISTER:
+ mutex_lock(&net->xdp.lock);
+ sk_for_each(sk, &net->xdp.list) {
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ mutex_lock(&xs->mutex);
+ if (xs->dev == dev) {
+ sk->sk_err = ENETDOWN;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+
+ xsk_unbind_dev(xs);
+
+ /* Clear device references in umem. */
+ xdp_umem_clear_dev(xs->umem);
+ }
+ mutex_unlock(&xs->mutex);
+ }
+ mutex_unlock(&net->xdp.lock);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
static struct proto xsk_proto = {
.name = "XDP",
.owner = THIS_MODULE,
@@ -764,6 +817,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sock_set_flag(sk, SOCK_RCU_FREE);
xs = xdp_sk(sk);
+ xs->state = XSK_READY;
mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);
@@ -784,6 +838,10 @@ static const struct net_proto_family xsk_family_ops = {
.owner = THIS_MODULE,
};
+static struct notifier_block xsk_netdev_notifier = {
+ .notifier_call = xsk_notifier,
+};
+
static int __net_init xsk_net_init(struct net *net)
{
mutex_init(&net->xdp.lock);
@@ -816,8 +874,15 @@ static int __init xsk_init(void)
err = register_pernet_subsys(&xsk_net_ops);
if (err)
goto out_sk;
+
+ err = register_netdevice_notifier(&xsk_netdev_notifier);
+ if (err)
+ goto out_pernet;
+
return 0;
+out_pernet:
+ unregister_pernet_subsys(&xsk_net_ops);
out_sk:
sock_unregister(PF_XDP);
out_proto: