aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c110
-rw-r--r--net/xdp/xsk.c10
-rw-r--r--net/xdp/xsk_queue.h9
3 files changed, 109 insertions, 20 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 0ca6907d7efe..5fa66a33927f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+static inline int sk_skb_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err = __bpf_try_make_writable(skb, write_len);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return err;
+}
+
+BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+ /* Idea is the following: should the needed direct read/write
+ * test fail during runtime, we can pull in more data and redo
+ * again, since implicitly, we invalidate previous checks here.
+ *
+ * Or, since we know how much we need to make read/writeable,
+ * this can be done once at the program beginning for direct
+ * access case. By this we overcome limitations of only current
+ * headroom being accessible.
+ */
+ return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto sk_skb_pull_data_proto = {
+ .func = sk_skb_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
u64, from, u64, to, u64, flags)
{
@@ -2779,7 +2810,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
static u32 __bpf_skb_max_len(const struct sk_buff *skb)
{
- return skb->dev->mtu + skb->dev->hard_header_len;
+ return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+ SKB_MAX_ALLOC;
}
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
@@ -2863,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
return __skb_trim_rcsum(skb, new_len);
}
-BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
- u64, flags)
+static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+ u64 flags)
{
u32 max_len = __bpf_skb_max_len(skb);
u32 min_len = __bpf_skb_min_len(skb);
@@ -2900,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
if (!ret && skb_is_gso(skb))
skb_gso_reset(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_pointers(skb);
return ret;
@@ -2914,9 +2953,27 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_tail_proto = {
+ .func = sk_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+ u64 flags)
+{
u32 max_len = __bpf_skb_max_len(skb);
u32 new_len = skb->len + head_room;
int ret;
@@ -2941,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
bpf_compute_data_pointers(skb);
- return 0;
+ return ret;
}
static const struct bpf_func_proto bpf_skb_change_head_proto = {
@@ -2954,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_head_proto = {
+ .func = sk_skb_change_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3046,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index)
{
struct xdp_frame *xdpf;
- int sent;
+ int err, sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
}
+ err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+ if (unlikely(err))
+ return err;
+
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -3285,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
goto err;
}
- if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ err = xdp_ok_fwd_dev(fwd, skb->len);
+ if (unlikely(err))
goto err;
skb->dev = fwd;
@@ -4617,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
+ func == sk_skb_change_head ||
func == bpf_skb_change_tail ||
+ func == sk_skb_change_tail ||
func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
+ func == sk_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
@@ -4871,11 +4961,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data:
- return &bpf_skb_pull_data_proto;
+ return &sk_skb_pull_data_proto;
case BPF_FUNC_skb_change_tail:
- return &bpf_skb_change_tail_proto;
+ return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head:
- return &bpf_skb_change_head_proto;
+ return &sk_skb_change_head_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59fb7d3c36a3..7d220cbd09b6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
{
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
+ unsigned long flags;
+ spin_lock_irqsave(&xs->tx_completion_lock, flags);
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+ spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
}
@@ -268,15 +271,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
+ xskq_discard_desc(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
- err = -EAGAIN;
- /* SKB consumed by dev_direct_xmit() */
+ /* SKB completed but not sent */
+ err = -EBUSY;
goto out;
}
sent_frame = true;
- xskq_discard_desc(xs->tx);
}
out:
@@ -755,6 +758,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
+ spin_lock_init(&xs->tx_completion_lock);
local_bh_disable();
sock_prot_inuse_add(net, &xsk_proto, 1);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index ef6a6f0ec949..52ecaf770642 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
return (entries > dcnt) ? dcnt : entries;
}
-static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
-{
- return q->nentries - (producer - q->cons_tail);
-}
-
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
- u32 free_entries = xskq_nb_free_lazy(q, producer);
+ u32 free_entries = q->nentries - (producer - q->cons_tail);
if (free_entries >= dcnt)
return free_entries;
@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+ if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC;
ring->desc[q->prod_tail++ & q->ring_mask] = addr;