aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-06-19 02:22:53 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2019-06-19 02:22:53 +0200
commit94079b64255fe40b9b53fd2e4081f68b9b14f54a (patch)
tree2901e802c5161bb29dc5882fa14043f46de96cd0
parenta324aae32fa9bfdd03e89078e20ebcbd7737fda5 (diff)
parentb5dc0163d8fd78e64a7e21f309cf932fda34353e (diff)
downloadlinux-stericsson-94079b64255fe40b9b53fd2e4081f68b9b14f54a.tar.gz
Merge branch 'bpf-bounded-loops'
Alexei Starovoitov says: ==================== v2->v3: fixed issues in backtracking pointed out by Andrii. The next step is to add a lot more tests for backtracking. v1->v2: addressed Andrii's feedback. this patch set introduces verifier support for bounded loops and adds several other improvements. Ideally they would be introduced one at a time, but to support bounded loop the verifier needs to 'step back' in the patch 1. That patch introduces tracking of spill/fill of constants through the stack. Though it's a useful feature it hurts cilium tests. Patch 3 introduces another feature by extending is_branch_taken logic to 'if rX op rY' conditions. This feature is also necessary to support bounded loops. Then patch 4 adds support for the loops while adding key heuristics with jmp_processed. Introduction of parentage chain of verifier states in patch 4 allows patch 9 to add backtracking of precise scalar registers which finally resolves degradation from patch 1. The end result is much faster verifier for existing programs and new support for loops. See patch 8 for many kinds of loops that are now validated. Patch 9 is the most tricky one and could be rewritten with a different algorithm in the future. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/linux/bpf_verifier.h69
-rw-r--r--kernel/bpf/verifier.c767
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c67
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c28
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c28
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c22
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600.c9
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_nounroll.c8
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.c10
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h528
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c9
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c261
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c231
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/cfg.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c28
-rw-r--r--tools/testing/selftests/bpf/verifier/loops1.c161
23 files changed, 2317 insertions, 114 deletions
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 704ed7971472..19393b0964a8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -139,6 +139,8 @@ struct bpf_reg_state {
*/
s32 subreg_def;
enum bpf_reg_liveness live;
+ /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
+ bool precise;
};
enum bpf_stack_slot_type {
@@ -190,14 +192,77 @@ struct bpf_func_state {
struct bpf_stack_state *stack;
};
+struct bpf_idx_pair {
+ u32 prev_idx;
+ u32 idx;
+};
+
#define MAX_CALL_FRAMES 8
struct bpf_verifier_state {
/* call stack tracking */
struct bpf_func_state *frame[MAX_CALL_FRAMES];
+ struct bpf_verifier_state *parent;
+ /*
+ * 'branches' field is the number of branches left to explore:
+ * 0 - all possible paths from this state reached bpf_exit or
+ * were safely pruned
+ * 1 - at least one path is being explored.
+ * This state hasn't reached bpf_exit
+ * 2 - at least two paths are being explored.
+ * This state is an immediate parent of two children.
+ * One is fallthrough branch with branches==1 and another
+ * state is pushed into stack (to be explored later) also with
+ * branches==1. The parent of this state has branches==1.
+ * The verifier state tree connected via 'parent' pointer looks like:
+ * 1
+ * 1
+ * 2 -> 1 (first 'if' pushed into stack)
+ * 1
+ * 2 -> 1 (second 'if' pushed into stack)
+ * 1
+ * 1
+ * 1 bpf_exit.
+ *
+ * Once do_check() reaches bpf_exit, it calls update_branch_counts()
+ * and the verifier state tree will look:
+ * 1
+ * 1
+ * 2 -> 1 (first 'if' pushed into stack)
+ * 1
+ * 1 -> 1 (second 'if' pushed into stack)
+ * 0
+ * 0
+ * 0 bpf_exit.
+ * After pop_stack() the do_check() will resume at second 'if'.
+ *
+ * If is_state_visited() sees a state with branches > 0 it means
+ * there is a loop. If such state is exactly equal to the current state
+ * it's an infinite loop. Note states_equal() checks for states
+ * equvalency, so two states being 'states_equal' does not mean
+ * infinite loop. The exact comparison is provided by
+ * states_maybe_looping() function. It's a stronger pre-check and
+ * much faster than states_equal().
+ *
+ * This algorithm may not find all possible infinite loops or
+ * loop iteration count may be too high.
+ * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
+ */
+ u32 branches;
u32 insn_idx;
u32 curframe;
u32 active_spin_lock;
bool speculative;
+
+ /* first and last insn idx of this verifier state */
+ u32 first_insn_idx;
+ u32 last_insn_idx;
+ /* jmp history recorded from first to last.
+ * backtracking is using it to go from last to first.
+ * For most states jmp_history_cnt is [0-3].
+ * For loops can go up to ~40.
+ */
+ struct bpf_idx_pair *jmp_history;
+ u32 jmp_history_cnt;
};
#define bpf_get_spilled_reg(slot, frame) \
@@ -312,7 +377,9 @@ struct bpf_verifier_env {
} cfg;
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
- u32 insn_processed;
+ u32 prev_insn_processed, insn_processed;
+ /* number of jmps, calls, exits analyzed so far */
+ u32 prev_jmps_processed, jmps_processed;
/* total verification time */
u64 verification_time;
/* maximum number of verifier states kept in 'branching' instructions */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8d1786357a09..709ce4cef8ba 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -455,12 +455,12 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, " R%d", i);
print_liveness(env, reg->live);
verbose(env, "=%s", reg_type_str[t]);
+ if (t == SCALAR_VALUE && reg->precise)
+ verbose(env, "P");
if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
tnum_is_const(reg->var_off)) {
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
- if (t == PTR_TO_STACK)
- verbose(env, ",call_%d", func(env, reg)->callsite);
} else {
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
@@ -522,11 +522,17 @@ static void print_verifier_state(struct bpf_verifier_env *env,
continue;
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
print_liveness(env, state->stack[i].spilled_ptr.live);
- if (state->stack[i].slot_type[0] == STACK_SPILL)
- verbose(env, "=%s",
- reg_type_str[state->stack[i].spilled_ptr.type]);
- else
+ if (state->stack[i].slot_type[0] == STACK_SPILL) {
+ reg = &state->stack[i].spilled_ptr;
+ t = reg->type;
+ verbose(env, "=%s", reg_type_str[t]);
+ if (t == SCALAR_VALUE && reg->precise)
+ verbose(env, "P");
+ if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
+ verbose(env, "%lld", reg->var_off.value + reg->off);
+ } else {
verbose(env, "=%s", types_buf);
+ }
}
if (state->acquired_refs && state->refs[0].id) {
verbose(env, " refs=%d", state->refs[0].id);
@@ -675,6 +681,13 @@ static void free_func_state(struct bpf_func_state *state)
kfree(state);
}
+static void clear_jmp_history(struct bpf_verifier_state *state)
+{
+ kfree(state->jmp_history);
+ state->jmp_history = NULL;
+ state->jmp_history_cnt = 0;
+}
+
static void free_verifier_state(struct bpf_verifier_state *state,
bool free_self)
{
@@ -684,6 +697,7 @@ static void free_verifier_state(struct bpf_verifier_state *state,
free_func_state(state->frame[i]);
state->frame[i] = NULL;
}
+ clear_jmp_history(state);
if (free_self)
kfree(state);
}
@@ -711,8 +725,18 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
const struct bpf_verifier_state *src)
{
struct bpf_func_state *dst;
+ u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
int i, err;
+ if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
+ kfree(dst_state->jmp_history);
+ dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
+ if (!dst_state->jmp_history)
+ return -ENOMEM;
+ }
+ memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
+ dst_state->jmp_history_cnt = src->jmp_history_cnt;
+
/* if dst has more stack frames then src frame, free them */
for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
free_func_state(dst_state->frame[i]);
@@ -721,6 +745,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->speculative = src->speculative;
dst_state->curframe = src->curframe;
dst_state->active_spin_lock = src->active_spin_lock;
+ dst_state->branches = src->branches;
+ dst_state->parent = src->parent;
+ dst_state->first_insn_idx = src->first_insn_idx;
+ dst_state->last_insn_idx = src->last_insn_idx;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
@@ -736,6 +764,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
return 0;
}
+static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ while (st) {
+ u32 br = --st->branches;
+
+ /* WARN_ON(br > 1) technically makes sense here,
+ * but see comment in push_stack(), hence:
+ */
+ WARN_ONCE((int)br < 0,
+ "BUG update_branch_counts:branches_to_explore=%d\n",
+ br);
+ if (br)
+ break;
+ st = st->parent;
+ }
+}
+
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
int *insn_idx)
{
@@ -789,6 +834,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
env->stack_size);
goto err;
}
+ if (elem->st.parent) {
+ ++elem->st.parent->branches;
+ /* WARN_ON(branches > 2) technically makes sense here,
+ * but
+ * 1. speculative states will bump 'branches' for non-branch
+ * instructions
+ * 2. is_state_visited() heuristics may decide not to create
+ * a new state for a sequence of branches and all such current
+ * and cloned states will be pointing to a single parent state
+ * which might have large 'branches' count.
+ */
+ }
return &elem->st;
err:
free_verifier_state(env->cur_state, true);
@@ -936,6 +993,9 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
reg->smax_value = S64_MAX;
reg->umin_value = 0;
reg->umax_value = U64_MAX;
+
+ /* constant backtracking is enabled for root only for now */
+ reg->precise = capable(CAP_SYS_ADMIN) ? false : true;
}
/* Mark a register as having a completely unknown (scalar) value. */
@@ -1347,6 +1407,389 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return 0;
}
+/* for any branch, call, exit record the history of jmps in the given state */
+static int push_jmp_history(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *cur)
+{
+ u32 cnt = cur->jmp_history_cnt;
+ struct bpf_idx_pair *p;
+
+ cnt++;
+ p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
+ if (!p)
+ return -ENOMEM;
+ p[cnt - 1].idx = env->insn_idx;
+ p[cnt - 1].prev_idx = env->prev_insn_idx;
+ cur->jmp_history = p;
+ cur->jmp_history_cnt = cnt;
+ return 0;
+}
+
+/* Backtrack one insn at a time. If idx is not at the top of recorded
+ * history then previous instruction came from straight line execution.
+ */
+static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+ u32 *history)
+{
+ u32 cnt = *history;
+
+ if (cnt && st->jmp_history[cnt - 1].idx == i) {
+ i = st->jmp_history[cnt - 1].prev_idx;
+ (*history)--;
+ } else {
+ i--;
+ }
+ return i;
+}
+
+/* For given verifier state backtrack_insn() is called from the last insn to
+ * the first insn. Its purpose is to compute a bitmask of registers and
+ * stack slots that needs precision in the parent verifier state.
+ */
+static int backtrack_insn(struct bpf_verifier_env *env, int idx,
+ u32 *reg_mask, u64 *stack_mask)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = verbose,
+ .private_data = env,
+ };
+ struct bpf_insn *insn = env->prog->insnsi + idx;
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode = BPF_OP(insn->code);
+ u8 mode = BPF_MODE(insn->code);
+ u32 dreg = 1u << insn->dst_reg;
+ u32 sreg = 1u << insn->src_reg;
+ u32 spi;
+
+ if (insn->code == 0)
+ return 0;
+ if (env->log.level & BPF_LOG_LEVEL) {
+ verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
+ verbose(env, "%d: ", idx);
+ print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
+ }
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (!(*reg_mask & dreg))
+ return 0;
+ if (opcode == BPF_MOV) {
+ if (BPF_SRC(insn->code) == BPF_X) {
+ /* dreg = sreg
+ * dreg needs precision after this insn
+ * sreg needs precision before this insn
+ */
+ *reg_mask &= ~dreg;
+ *reg_mask |= sreg;
+ } else {
+ /* dreg = K
+ * dreg needs precision after this insn.
+ * Corresponding register is already marked
+ * as precise=true in this verifier state.
+ * No further markings in parent are necessary
+ */
+ *reg_mask &= ~dreg;
+ }
+ } else {
+ if (BPF_SRC(insn->code) == BPF_X) {
+ /* dreg += sreg
+ * both dreg and sreg need precision
+ * before this insn
+ */
+ *reg_mask |= sreg;
+ } /* else dreg += K
+ * dreg still needs precision before this insn
+ */
+ }
+ } else if (class == BPF_LDX) {
+ if (!(*reg_mask & dreg))
+ return 0;
+ *reg_mask &= ~dreg;
+
+ /* scalars can only be spilled into stack w/o losing precision.
+ * Load from any other memory can be zero extended.
+ * The desire to keep that precision is already indicated
+ * by 'precise' mark in corresponding register of this state.
+ * No further tracking necessary.
+ */
+ if (insn->src_reg != BPF_REG_FP)
+ return 0;
+ if (BPF_SIZE(insn->code) != BPF_DW)
+ return 0;
+
+ /* dreg = *(u64 *)[fp - off] was a fill from the stack.
+ * that [fp - off] slot contains scalar that needs to be
+ * tracked with precision
+ */
+ spi = (-insn->off - 1) / BPF_REG_SIZE;
+ if (spi >= 64) {
+ verbose(env, "BUG spi %d\n", spi);
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+ }
+ *stack_mask |= 1ull << spi;
+ } else if (class == BPF_STX) {
+ if (*reg_mask & dreg)
+ /* stx shouldn't be using _scalar_ dst_reg
+ * to access memory. It means backtracking
+ * encountered a case of pointer subtraction.
+ */
+ return -ENOTSUPP;
+ /* scalars can only be spilled into stack */
+ if (insn->dst_reg != BPF_REG_FP)
+ return 0;
+ if (BPF_SIZE(insn->code) != BPF_DW)
+ return 0;
+ spi = (-insn->off - 1) / BPF_REG_SIZE;
+ if (spi >= 64) {
+ verbose(env, "BUG spi %d\n", spi);
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+ }
+ if (!(*stack_mask & (1ull << spi)))
+ return 0;
+ *stack_mask &= ~(1ull << spi);
+ *reg_mask |= sreg;
+ } else if (class == BPF_JMP || class == BPF_JMP32) {
+ if (opcode == BPF_CALL) {
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ return -ENOTSUPP;
+ /* regular helper call sets R0 */
+ *reg_mask &= ~1;
+ if (*reg_mask & 0x3f) {
+ /* if backtracing was looking for registers R1-R5
+ * they should have been found already.
+ */
+ verbose(env, "BUG regs %x\n", *reg_mask);
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+ }
+ } else if (opcode == BPF_EXIT) {
+ return -ENOTSUPP;
+ }
+ } else if (class == BPF_LD) {
+ if (!(*reg_mask & dreg))
+ return 0;
+ *reg_mask &= ~dreg;
+ /* It's ld_imm64 or ld_abs or ld_ind.
+ * For ld_imm64 no further tracking of precision
+ * into parent is necessary
+ */
+ if (mode == BPF_IND || mode == BPF_ABS)
+ /* to be analyzed */
+ return -ENOTSUPP;
+ } else if (class == BPF_ST) {
+ if (*reg_mask & dreg)
+ /* likely pointer subtraction */
+ return -ENOTSUPP;
+ }
+ return 0;
+}
+
+/* the scalar precision tracking algorithm:
+ * . at the start all registers have precise=false.
+ * . scalar ranges are tracked as normal through alu and jmp insns.
+ * . once precise value of the scalar register is used in:
+ * . ptr + scalar alu
+ * . if (scalar cond K|scalar)
+ * . helper_call(.., scalar, ...) where ARG_CONST is expected
+ * backtrack through the verifier states and mark all registers and
+ * stack slots with spilled constants that these scalar regisers
+ * should be precise.
+ * . during state pruning two registers (or spilled stack slots)
+ * are equivalent if both are not precise.
+ *
+ * Note the verifier cannot simply walk register parentage chain,
+ * since many different registers and stack slots could have been
+ * used to compute single precise scalar.
+ *
+ * The approach of starting with precise=true for all registers and then
+ * backtrack to mark a register as not precise when the verifier detects
+ * that program doesn't care about specific value (e.g., when helper
+ * takes register as ARG_ANYTHING parameter) is not safe.
+ *
+ * It's ok to walk single parentage chain of the verifier states.
+ * It's possible that this backtracking will go all the way till 1st insn.
+ * All other branches will be explored for needing precision later.
+ *
+ * The backtracking needs to deal with cases like:
+ * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
+ * r9 -= r8
+ * r5 = r9
+ * if r5 > 0x79f goto pc+7
+ * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
+ * r5 += 1
+ * ...
+ * call bpf_perf_event_output#25
+ * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
+ *
+ * and this case:
+ * r6 = 1
+ * call foo // uses callee's r6 inside to compute r0
+ * r0 += r6
+ * if r0 == 0 goto
+ *
+ * to track above reg_mask/stack_mask needs to be independent for each frame.
+ *
+ * Also if parent's curframe > frame where backtracking started,
+ * the verifier need to mark registers in both frames, otherwise callees
+ * may incorrectly prune callers. This is similar to
+ * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
+ *
+ * For now backtracking falls back into conservative marking.
+ */
+static void mark_all_scalars_precise(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ int i, j;
+
+ /* big hammer: mark all scalars precise in this path.
+ * pop_stack may still get !precise scalars.
+ */
+ for (; st; st = st->parent)
+ for (i = 0; i <= st->curframe; i++) {
+ func = st->frame[i];
+ for (j = 0; j < BPF_REG_FP; j++) {
+ reg = &func->regs[j];
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ reg->precise = true;
+ }
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+ if (func->stack[j].slot_type[0] != STACK_SPILL)
+ continue;
+ reg = &func->stack[j].spilled_ptr;
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ reg->precise = true;
+ }
+ }
+}
+
+static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
+{
+ struct bpf_verifier_state *st = env->cur_state;
+ int first_idx = st->first_insn_idx;
+ int last_idx = env->insn_idx;
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ u32 reg_mask = 1u << regno;
+ u64 stack_mask = 0;
+ bool skip_first = true;
+ int i, err;
+
+ if (!env->allow_ptr_leaks)
+ /* backtracking is root only for now */
+ return 0;
+
+ func = st->frame[st->curframe];
+ reg = &func->regs[regno];
+ if (reg->type != SCALAR_VALUE) {
+ WARN_ONCE(1, "backtracing misuse");
+ return -EFAULT;
+ }
+ if (reg->precise)
+ return 0;
+ func->regs[regno].precise = true;
+
+ for (;;) {
+ DECLARE_BITMAP(mask, 64);
+ bool new_marks = false;
+ u32 history = st->jmp_history_cnt;
+
+ if (env->log.level & BPF_LOG_LEVEL)
+ verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
+ for (i = last_idx;;) {
+ if (skip_first) {
+ err = 0;
+ skip_first = false;
+ } else {
+ err = backtrack_insn(env, i, &reg_mask, &stack_mask);
+ }
+ if (err == -ENOTSUPP) {
+ mark_all_scalars_precise(env, st);
+ return 0;
+ } else if (err) {
+ return err;
+ }
+ if (!reg_mask && !stack_mask)
+ /* Found assignment(s) into tracked register in this state.
+ * Since this state is already marked, just return.
+ * Nothing to be tracked further in the parent state.
+ */
+ return 0;
+ if (i == first_idx)
+ break;
+ i = get_prev_insn_idx(st, i, &history);
+ if (i >= env->prog->len) {
+ /* This can happen if backtracking reached insn 0
+ * and there are still reg_mask or stack_mask
+ * to backtrack.
+ * It means the backtracking missed the spot where
+ * particular register was initialized with a constant.
+ */
+ verbose(env, "BUG backtracking idx %d\n", i);
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+ }
+ }
+ st = st->parent;
+ if (!st)
+ break;
+
+ func = st->frame[st->curframe];
+ bitmap_from_u64(mask, reg_mask);
+ for_each_set_bit(i, mask, 32) {
+ reg = &func->regs[i];
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ if (!reg->precise)
+ new_marks = true;
+ reg->precise = true;
+ }
+
+ bitmap_from_u64(mask, stack_mask);
+ for_each_set_bit(i, mask, 64) {
+ if (i >= func->allocated_stack / BPF_REG_SIZE) {
+ /* This can happen if backtracking
+ * is propagating stack precision where
+ * caller has larger stack frame
+ * than callee, but backtrack_insn() should
+ * have returned -ENOTSUPP.
+ */
+ verbose(env, "BUG spi %d stack_size %d\n",
+ i, func->allocated_stack);
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+ }
+
+ if (func->stack[i].slot_type[0] != STACK_SPILL)
+ continue;
+ reg = &func->stack[i].spilled_ptr;
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ if (!reg->precise)
+ new_marks = true;
+ reg->precise = true;
+ }
+ if (env->log.level & BPF_LOG_LEVEL) {
+ print_verifier_state(env, func);
+ verbose(env, "parent %s regs=%x stack=%llx marks\n",
+ new_marks ? "didn't have" : "already had",
+ reg_mask, stack_mask);
+ }
+
+ if (!new_marks)
+ break;
+
+ last_idx = st->last_insn_idx;
+ first_idx = st->first_insn_idx;
+ }
+ return 0;
+}
+
+
static bool is_spillable_regtype(enum bpf_reg_type type)
{
switch (type) {
@@ -1378,6 +1821,23 @@ static bool register_is_null(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
}
+static bool register_is_const(struct bpf_reg_state *reg)
+{
+ return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
+}
+
+static void save_register_state(struct bpf_func_state *state,
+ int spi, struct bpf_reg_state *reg)
+{
+ int i;
+
+ state->stack[spi].spilled_ptr = *reg;
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
+ for (i = 0; i < BPF_REG_SIZE; i++)
+ state->stack[spi].slot_type[i] = STACK_SPILL;
+}
+
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -1387,7 +1847,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
{
struct bpf_func_state *cur; /* state of the current function */
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
- enum bpf_reg_type type;
+ u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
+ struct bpf_reg_state *reg = NULL;
err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
state->acquired_refs, true);
@@ -1404,27 +1865,48 @@ static int check_stack_write(struct bpf_verifier_env *env,
}
cur = env->cur_state->frame[env->cur_state->curframe];
- if (value_regno >= 0 &&
- is_spillable_regtype((type = cur->regs[value_regno].type))) {
-
+ if (value_regno >= 0)
+ reg = &cur->regs[value_regno];
+
+ if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
+ !register_is_null(reg) && env->allow_ptr_leaks) {
+ if (dst_reg != BPF_REG_FP) {
+ /* The backtracking logic can only recognize explicit
+ * stack slot address like [fp - 8]. Other spill of
+ * scalar via different register has to be conervative.
+ * Backtrack from here and mark all registers as precise
+ * that contributed into 'reg' being a constant.
+ */
+ err = mark_chain_precision(env, value_regno);
+ if (err)
+ return err;
+ }
+ save_register_state(state, spi, reg);
+ } else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
+ verbose_linfo(env, insn_idx, "; ");
verbose(env, "invalid size of register spill\n");
return -EACCES;
}
- if (state != cur && type == PTR_TO_STACK) {
+ if (state != cur && reg->type == PTR_TO_STACK) {
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
- /* save register state */
- state->stack[spi].spilled_ptr = cur->regs[value_regno];
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ if (!env->allow_ptr_leaks) {
+ bool sanitize = false;
- for (i = 0; i < BPF_REG_SIZE; i++) {
- if (state->stack[spi].slot_type[i] == STACK_MISC &&
- !env->allow_ptr_leaks) {
+ if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+ register_is_const(&state->stack[spi].spilled_ptr))
+ sanitize = true;
+ for (i = 0; i < BPF_REG_SIZE; i++)
+ if (state->stack[spi].slot_type[i] == STACK_MISC) {
+ sanitize = true;
+ break;
+ }
+ if (sanitize) {
int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
int soff = (-spi - 1) * BPF_REG_SIZE;
@@ -1447,8 +1929,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
}
*poff = soff;
}
- state->stack[spi].slot_type[i] = STACK_SPILL;
}
+ save_register_state(state, spi, reg);
} else {
u8 type = STACK_MISC;
@@ -1471,9 +1953,13 @@ static int check_stack_write(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
/* when we zero initialize stack slots mark them as such */
- if (value_regno >= 0 &&
- register_is_null(&cur->regs[value_regno]))
+ if (reg && register_is_null(reg)) {
+ /* backtracking doesn't work for STACK_ZERO yet. */
+ err = mark_chain_precision(env, value_regno);
+ if (err)
+ return err;
type = STACK_ZERO;
+ }
/* Mark slots affected by this stack write. */
for (i = 0; i < size; i++)
@@ -1490,6 +1976,7 @@ static int check_stack_read(struct bpf_verifier_env *env,
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
+ struct bpf_reg_state *reg;
u8 *stype;
if (reg_state->allocated_stack <= slot) {
@@ -1498,11 +1985,21 @@ static int check_stack_read(struct bpf_verifier_env *env,
return -EACCES;
}
stype = reg_state->stack[spi].slot_type;
+ reg = &reg_state->stack[spi].spilled_ptr;
if (stype[0] == STACK_SPILL) {
if (size != BPF_REG_SIZE) {
- verbose(env, "invalid size of register spill\n");
- return -EACCES;
+ if (reg->type != SCALAR_VALUE) {
+ verbose_linfo(env, env->insn_idx, "; ");
+ verbose(env, "invalid size of register fill\n");
+ return -EACCES;
+ }
+ if (value_regno >= 0) {
+ mark_reg_unknown(env, state->regs, value_regno);
+ state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ }
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+ return 0;
}
for (i = 1; i < BPF_REG_SIZE; i++) {
if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
@@ -1513,17 +2010,14 @@ static int check_stack_read(struct bpf_verifier_env *env,
if (value_regno >= 0) {
/* restore register state from stack */
- state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
+ state->regs[value_regno] = *reg;
/* mark reg as written since spilled pointer state likely
* has its liveness marks cleared by is_state_visited()
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
}
- mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
- reg_state->stack[spi].spilled_ptr.parent,
- REG_LIVE_READ64);
- return 0;
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
int zeros = 0;
@@ -1538,23 +2032,32 @@ static int check_stack_read(struct bpf_verifier_env *env,
off, i, size);
return -EACCES;
}
- mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
- reg_state->stack[spi].spilled_ptr.parent,
- REG_LIVE_READ64);
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
if (value_regno >= 0) {
if (zeros == size) {
/* any size read into register is zero extended,
* so the whole register == const_zero
*/
__mark_reg_const_zero(&state->regs[value_regno]);
+ /* backtracking doesn't support STACK_ZERO yet,
+ * so mark it precise here, so that later
+ * backtracking can stop here.
+ * Backtracking may not need this if this register
+ * doesn't participate in pointer adjustment.
+ * Forward propagation of precise flag is not
+ * necessary either. This mark is only to stop
+ * backtracking. Any register that contributed
+ * to const 0 was marked precise before spill.
+ */
+ state->regs[value_regno].precise = true;
} else {
/* have read misc data from the stack */
mark_reg_unknown(env, state->regs, value_regno);
}
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
}
- return 0;
}
+ return 0;
}
static int check_stack_access(struct bpf_verifier_env *env,
@@ -2415,7 +2918,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
{
struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
- int err, min_off, max_off, i, slot, spi;
+ int err, min_off, max_off, i, j, slot, spi;
if (reg->type != PTR_TO_STACK) {
/* Allow zero-byte read from NULL, regardless of pointer type */
@@ -2503,6 +3006,14 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
*stype = STACK_MISC;
goto mark;
}
+ if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+ state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
+ __mark_reg_unknown(&state->stack[spi].spilled_ptr);
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ state->stack[spi].slot_type[j] = STACK_MISC;
+ goto mark;
+ }
+
err:
if (tnum_is_const(reg->var_off)) {
verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
@@ -2854,6 +3365,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_helper_mem_access(env, regno - 1,
reg->umax_value,
zero_size_allowed, meta);
+ if (!err)
+ err = mark_chain_precision(env, regno);
} else if (arg_type_is_int_ptr(arg_type)) {
int size = int_ptr_type_to_size(arg_type);
@@ -4290,6 +4803,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
u8 opcode = BPF_OP(insn->code);
+ int err;
dst_reg = &regs[insn->dst_reg];
src_reg = NULL;
@@ -4316,11 +4830,17 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
* This is legal, but we have to reverse our
* src/dest handling in computing the range
*/
+ err = mark_chain_precision(env, insn->dst_reg);
+ if (err)
+ return err;
return adjust_ptr_min_max_vals(env, insn,
src_reg, dst_reg);
}
} else if (ptr_reg) {
/* pointer += scalar */
+ err = mark_chain_precision(env, insn->src_reg);
+ if (err)
+ return err;
return adjust_ptr_min_max_vals(env, insn,
dst_reg, src_reg);
}
@@ -5226,9 +5746,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_verifier_state *this_branch = env->cur_state;
struct bpf_verifier_state *other_branch;
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
- struct bpf_reg_state *dst_reg, *other_branch_regs;
+ struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
+ int pred = -1;
int err;
/* Only conditional jumps are expected to reach here. */
@@ -5253,6 +5774,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
insn->src_reg);
return -EACCES;
}
+ src_reg = &regs[insn->src_reg];
} else {
if (insn->src_reg != BPF_REG_0) {
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
@@ -5268,20 +5790,29 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
dst_reg = &regs[insn->dst_reg];
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- if (BPF_SRC(insn->code) == BPF_K) {
- int pred = is_branch_taken(dst_reg, insn->imm, opcode,
- is_jmp32);
-
- if (pred == 1) {
- /* only follow the goto, ignore fall-through */
- *insn_idx += insn->off;
- return 0;
- } else if (pred == 0) {
- /* only follow fall-through branch, since
- * that's where the program will go
- */
- return 0;
- }
+ if (BPF_SRC(insn->code) == BPF_K)
+ pred = is_branch_taken(dst_reg, insn->imm,
+ opcode, is_jmp32);
+ else if (src_reg->type == SCALAR_VALUE &&
+ tnum_is_const(src_reg->var_off))
+ pred = is_branch_taken(dst_reg, src_reg->var_off.value,
+ opcode, is_jmp32);
+ if (pred >= 0) {
+ err = mark_chain_precision(env, insn->dst_reg);
+ if (BPF_SRC(insn->code) == BPF_X && !err)
+ err = mark_chain_precision(env, insn->src_reg);
+ if (err)
+ return err;
+ }
+ if (pred == 1) {
+ /* only follow the goto, ignore fall-through */
+ *insn_idx += insn->off;
+ return 0;
+ } else if (pred == 0) {
+ /* only follow fall-through branch, since
+ * that's where the program will go
+ */
+ return 0;
}
other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
@@ -5638,7 +6169,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx)
* w - next instruction
* e - edge
*/
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
+ bool loop_ok)
{
int *insn_stack = env->cfg.insn_stack;
int *insn_state = env->cfg.insn_state;
@@ -5668,6 +6200,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
insn_stack[env->cfg.cur_stack++] = w;
return 1;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
+ if (loop_ok && env->allow_ptr_leaks)
+ return 0;
verbose_linfo(env, t, "%d: ", t);
verbose_linfo(env, w, "%d: ", w);
verbose(env, "back-edge from insn %d to %d\n", t, w);
@@ -5719,7 +6253,7 @@ peek_stack:
if (opcode == BPF_EXIT) {
goto mark_explored;
} else if (opcode == BPF_CALL) {
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret == 1)
goto peek_stack;
else if (ret < 0)
@@ -5728,7 +6262,8 @@ peek_stack:
init_explored_state(env, t + 1);
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
init_explored_state(env, t);
- ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
+ ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
+ env, false);
if (ret == 1)
goto peek_stack;
else if (ret < 0)
@@ -5741,11 +6276,16 @@ peek_stack:
}
/* unconditional jump with single edge */
ret = push_insn(t, t + insns[t].off + 1,
- FALLTHROUGH, env);
+ FALLTHROUGH, env, true);
if (ret == 1)
goto peek_stack;
else if (ret < 0)
goto err_free;
+ /* unconditional jmp is not a good pruning point,
+ * but it's marked, since backtracking needs
+ * to record jmp history in is_state_visited().
+ */
+ init_explored_state(env, t + insns[t].off + 1);
/* tell verifier to check for equivalent states
* after every call and jump
*/
@@ -5754,13 +6294,13 @@ peek_stack:
} else {
/* conditional jump with two edges */
init_explored_state(env, t);
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
if (ret == 1)
goto peek_stack;
else if (ret < 0)
goto err_free;
- ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
+ ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
if (ret == 1)
goto peek_stack;
else if (ret < 0)
@@ -5770,7 +6310,7 @@ peek_stack:
/* all other non-branch instructions with single
* fall-through edge
*/
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret == 1)
goto peek_stack;
else if (ret < 0)
@@ -6203,6 +6743,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
sl = *explored_state(env, insn);
while (sl) {
+ if (sl->state.branches)
+ goto next;
if (sl->state.insn_idx != insn ||
sl->state.curframe != cur->curframe)
goto next;
@@ -6244,6 +6786,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
switch (rold->type) {
case SCALAR_VALUE:
if (rcur->type == SCALAR_VALUE) {
+ if (!rold->precise && !rcur->precise)
+ return true;
/* new val must satisfy old val knowledge */
return range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off);
@@ -6567,19 +7111,52 @@ static int propagate_liveness(struct bpf_verifier_env *env,
return 0;
}
+static bool states_maybe_looping(struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur)
+{
+ struct bpf_func_state *fold, *fcur;
+ int i, fr = cur->curframe;
+
+ if (old->curframe != fr)
+ return false;
+
+ fold = old->frame[fr];
+ fcur = cur->frame[fr];
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (memcmp(&fold->regs[i], &fcur->regs[i],
+ offsetof(struct bpf_reg_state, parent)))
+ return false;
+ return true;
+}
+
+
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
struct bpf_verifier_state_list *sl, **pprev;
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
+ bool add_new_state = false;
+ cur->last_insn_idx = env->prev_insn_idx;
if (!env->insn_aux_data[insn_idx].prune_point)
/* this 'insn_idx' instruction wasn't marked, so we will not
* be doing state search here
*/
return 0;
+ /* bpf progs typically have pruning point every 4 instructions
+ * http://vger.kernel.org/bpfconf2019.html#session-1
+ * Do not add new state for future pruning if the verifier hasn't seen
+ * at least 2 jumps and at least 8 instructions.
+ * This heuristics helps decrease 'total_states' and 'peak_states' metric.
+ * In tests that amounts to up to 50% reduction into total verifier
+ * memory consumption and 20% verifier time speedup.
+ */
+ if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
+ env->insn_processed - env->prev_insn_processed >= 8)
+ add_new_state = true;
+
pprev = explored_state(env, insn_idx);
sl = *pprev;
@@ -6589,6 +7166,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
states_cnt++;
if (sl->state.insn_idx != insn_idx)
goto next;
+ if (sl->state.branches) {
+ if (states_maybe_looping(&sl->state, cur) &&
+ states_equal(env, &sl->state, cur)) {
+ verbose_linfo(env, insn_idx, "; ");
+ verbose(env, "infinite loop detected at insn %d\n", insn_idx);
+ return -EINVAL;
+ }
+ /* if the verifier is processing a loop, avoid adding new state
+ * too often, since different loop iterations have distinct
+ * states and may not help future pruning.
+ * This threshold shouldn't be too low to make sure that
+ * a loop with large bound will be rejected quickly.
+ * The most abusive loop will be:
+ * r1 += 1
+ * if r1 < 1000000 goto pc-2
+ * 1M insn_procssed limit / 100 == 10k peak states.
+ * This threshold shouldn't be too high either, since states
+ * at the end of the loop are likely to be useful in pruning.
+ */
+ if (env->jmps_processed - env->prev_jmps_processed < 20 &&
+ env->insn_processed - env->prev_insn_processed < 100)
+ add_new_state = false;
+ goto miss;
+ }
if (states_equal(env, &sl->state, cur)) {
sl->hit_cnt++;
/* reached equivalent register/stack state,
@@ -6606,7 +7207,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return err;
return 1;
}
- sl->miss_cnt++;
+miss:
+ /* when new state is not going to be added do not increase miss count.
+ * Otherwise several loop iterations will remove the state
+ * recorded earlier. The goal of these heuristics is to have
+ * states from some iterations of the loop (some in the beginning
+ * and some at the end) to help pruning.
+ */
+ if (add_new_state)
+ sl->miss_cnt++;
/* heuristic to determine whether this state is beneficial
* to keep checking from state equivalence point of view.
* Higher numbers increase max_states_per_insn and verification time,
@@ -6618,6 +7227,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
*/
*pprev = sl->next;
if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
+ u32 br = sl->state.branches;
+
+ WARN_ONCE(br,
+ "BUG live_done but branches_to_explore %d\n",
+ br);
free_verifier_state(&sl->state, false);
kfree(sl);
env->peak_states--;
@@ -6641,20 +7255,27 @@ next:
env->max_states_per_insn = states_cnt;
if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
- return 0;
+ return push_jmp_history(env, cur);
+
+ if (!add_new_state)
+ return push_jmp_history(env, cur);
- /* there were no equivalent states, remember current one.
- * technically the current state is not proven to be safe yet,
+ /* There were no equivalent states, remember the current one.
+ * Technically the current state is not proven to be safe yet,
* but it will either reach outer most bpf_exit (which means it's safe)
- * or it will be rejected. Since there are no loops, we won't be
+ * or it will be rejected. When there are no loops the verifier won't be
* seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
- * again on the way to bpf_exit
+ * again on the way to bpf_exit.
+ * When looping the sl->state.branches will be > 0 and this state
+ * will not be considered for equivalence until branches == 0.
*/
new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
if (!new_sl)
return -ENOMEM;
env->total_states++;
env->peak_states++;
+ env->prev_jmps_processed = env->jmps_processed;
+ env->prev_insn_processed = env->insn_processed;
/* add new state to the head of linked list */
new = &new_sl->state;
@@ -6665,6 +7286,12 @@ next:
return err;
}
new->insn_idx = insn_idx;
+ WARN_ONCE(new->branches != 1,
+ "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
+
+ cur->parent = new;
+ cur->first_insn_idx = insn_idx;
+ clear_jmp_history(cur);
new_sl->next = *explored_state(env, insn_idx);
*explored_state(env, insn_idx) = new_sl;
/* connect new state to parentage chain. Current frame needs all
@@ -6674,17 +7301,18 @@ next:
* the state of the call instruction (with WRITTEN set), and r0 comes
* from callee with its full parentage chain, anyway.
*/
- for (j = 0; j <= cur->curframe; j++)
- for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
- cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
/* clear write marks in current state: the writes we did are not writes
* our child did, so they don't screen off its reads from us.
* (There are no read marks in current state, because reads always mark
* their parent and current state never has children yet. Only
* explored_states can get read marks.)
*/
- for (i = 0; i < BPF_REG_FP; i++)
- cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
+ for (j = 0; j <= cur->curframe; j++) {
+ for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
+ cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
+ for (i = 0; i < BPF_REG_FP; i++)
+ cur->frame[j]->regs[i].live = REG_LIVE_NONE;
+ }
/* all stack frames are accessible from callee, clear them all */
for (j = 0; j <= cur->curframe; j++) {
@@ -6743,6 +7371,7 @@ static int do_check(struct bpf_verifier_env *env)
struct bpf_reg_state *regs;
int insn_cnt = env->prog->len;
bool do_print_state = false;
+ int prev_insn_idx = -1;
env->prev_linfo = NULL;
@@ -6751,6 +7380,7 @@ static int do_check(struct bpf_verifier_env *env)
return -ENOMEM;
state->curframe = 0;
state->speculative = false;
+ state->branches = 1;
state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
if (!state->frame[0]) {
kfree(state);
@@ -6767,6 +7397,7 @@ static int do_check(struct bpf_verifier_env *env)
u8 class;
int err;
+ env->prev_insn_idx = prev_insn_idx;
if (env->insn_idx >= insn_cnt) {
verbose(env, "invalid insn idx %d insn_cnt %d\n",
env->insn_idx, insn_cnt);
@@ -6839,6 +7470,7 @@ static int do_check(struct bpf_verifier_env *env)
regs = cur_regs(env);
env->insn_aux_data[env->insn_idx].seen = true;
+ prev_insn_idx = env->insn_idx;
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
@@ -6957,6 +7589,7 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_JMP || class == BPF_JMP32) {
u8 opcode = BPF_OP(insn->code);
+ env->jmps_processed++;
if (opcode == BPF_CALL) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->off != 0 ||
@@ -7011,7 +7644,6 @@ static int do_check(struct bpf_verifier_env *env)
if (state->curframe) {
/* exit from nested function */
- env->prev_insn_idx = env->insn_idx;
err = prepare_func_exit(env, &env->insn_idx);
if (err)
return err;
@@ -7042,7 +7674,8 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
process_bpf_exit:
- err = pop_stack(env, &env->prev_insn_idx,
+ update_branch_counts(env, env->cur_state);
+ err = pop_stack(env, &prev_insn_idx,
&env->insn_idx);
if (err < 0) {
if (err != -ENOENT)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index c0091137074b..e1b55261526f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -5,7 +5,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
if (level != LIBBPF_DEBUG)
- return 0;
+ return vfprintf(stderr, format, args);
if (!strstr(format, "verifier log"))
return 0;
@@ -32,24 +32,69 @@ static int check_load(const char *file, enum bpf_prog_type type)
void test_bpf_verif_scale(void)
{
- const char *scale[] = {
- "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
+ const char *sched_cls[] = {
+ "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
};
- const char *pyperf[] = {
- "./pyperf50.o", "./pyperf100.o", "./pyperf180.o"
+ const char *raw_tp[] = {
+ /* full unroll by llvm */
+ "./pyperf50.o", "./pyperf100.o", "./pyperf180.o",
+
+ /* partial unroll. llvm will unroll loop ~150 times.
+ * C loop count -> 600.
+ * Asm loop count -> 4.
+ * 16k insns in loop body.
+ * Total of 5 such loops. Total program size ~82k insns.
+ */
+ "./pyperf600.o",
+
+ /* no unroll at all.
+ * C loop count -> 600.
+ * ASM loop count -> 600.
+ * ~110 insns in loop body.
+ * Total of 5 such loops. Total program size ~1500 insns.
+ */
+ "./pyperf600_nounroll.o",
+
+ "./loop1.o", "./loop2.o",
+
+ /* partial unroll. 19k insn in a loop.
+ * Total program size 20.8k insn.
+ * ~350k processed_insns
+ */
+ "./strobemeta.o",
+
+ /* no unroll, tiny loops */
+ "./strobemeta_nounroll1.o",
+ "./strobemeta_nounroll2.o",
+ };
+ const char *cg_sysctl[] = {
+ "./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
};
int err, i;
if (verifier_stats)
libbpf_set_print(libbpf_debug_print);
- for (i = 0; i < ARRAY_SIZE(scale); i++) {
- err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
- printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
+ err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
+ printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
+
+ for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
+ err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
+ printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
}
- for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
- err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
- printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
+ for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
+ err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
+ printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
}
+
+ for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
+ err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
+ printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
+ }
+ err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
+ printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
+
+ err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
+ printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
}
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
new file mode 100644
index 000000000000..dea395af9ea9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/kfree_skb")
+int nested_loops(volatile struct pt_regs* ctx)
+{
+ int i, j, sum = 0, m;
+
+ for (j = 0; j < 300; j++)
+ for (i = 0; i < j; i++) {
+ if (j & 1)
+ m = ctx->rax;
+ else
+ m = j;
+ sum += i * m;
+ }
+
+ return sum;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
new file mode 100644
index 000000000000..0637bd8e8bcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/consume_skb")
+int while_true(volatile struct pt_regs* ctx)
+{
+ int i = 0;
+
+ while (true) {
+ if (ctx->rax & 1)
+ i += 3;
+ else
+ i += 7;
+ if (i > 40)
+ break;
+ }
+
+ return i;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
new file mode 100644
index 000000000000..30a0f6cba080
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/consume_skb")
+int while_true(volatile struct pt_regs* ctx)
+{
+ __u64 i = 0, sum = 0;
+ do {
+ i++;
+ sum += ctx->rax;
+ } while (i < 0x100000000ULL);
+ return sum;
+}
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 0cc5e4ee90bd..6b0781391be5 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -220,7 +220,11 @@ static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
-#pragma unroll
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma clang loop unroll(full)
+#endif
/* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
new file mode 100644
index 000000000000..cb49b89e37cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 600
+/* clang will not unroll the loop 600 times.
+ * Instead it will unroll it to the amount it deemed
+ * appropriate, but the loop will still execute 600 times.
+ * Total program size is around 90k insns
+ */
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
new file mode 100644
index 000000000000..6beff7502f4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 600
+#define NO_UNROLL
+/* clang will not unroll at all.
+ * Total program size is around 2k insns
+ */
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.c b/tools/testing/selftests/bpf/progs/strobemeta.c
new file mode 100644
index 000000000000..d3df3d86f092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+/* full unroll by llvm #undef NO_UNROLL */
+#include "strobemeta.h"
+
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
new file mode 100644
index 000000000000..1ff73f60a3e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include "bpf_helpers.h"
+
+typedef uint32_t pid_t;
+struct task_struct {};
+
+#define TASK_COMM_LEN 16
+#define PERF_MAX_STACK_DEPTH 127
+
+#define STROBE_TYPE_INVALID 0
+#define STROBE_TYPE_INT 1
+#define STROBE_TYPE_STR 2
+#define STROBE_TYPE_MAP 3
+
+#define STACK_TABLE_EPOCH_SHIFT 20
+#define STROBE_MAX_STR_LEN 1
+#define STROBE_MAX_CFGS 32
+#define STROBE_MAX_PAYLOAD \
+ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
+ STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+
+struct strobe_value_header {
+ /*
+ * meaning depends on type:
+ * 1. int: 0, if value not set, 1 otherwise
+ * 2. str: 1 always, whether value is set or not is determined by ptr
+ * 3. map: 1 always, pointer points to additional struct with number
+ * of entries (up to STROBE_MAX_MAP_ENTRIES)
+ */
+ uint16_t len;
+ /*
+ * _reserved might be used for some future fields/flags, but we always
+ * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
+ * bytes in one go and get both header and value
+ */
+ uint8_t _reserved[6];
+};
+
+/*
+ * strobe_value_generic is used from BPF probe only, but needs to be a union
+ * of strobe_value_int/strobe_value_str/strobe_value_map
+ */
+struct strobe_value_generic {
+ struct strobe_value_header header;
+ union {
+ int64_t val;
+ void *ptr;
+ };
+};
+
+struct strobe_value_int {
+ struct strobe_value_header header;
+ int64_t value;
+};
+
+struct strobe_value_str {
+ struct strobe_value_header header;
+ const char* value;
+};
+
+struct strobe_value_map {
+ struct strobe_value_header header;
+ const struct strobe_map_raw* value;
+};
+
+struct strobe_map_entry {
+ const char* key;
+ const char* val;
+};
+
+/*
+ * Map of C-string key/value pairs with fixed maximum capacity. Each map has
+ * corresponding int64 ID, which application can use (or ignore) in whatever
+ * way appropriate. Map is "write-only", there is no way to get data out of
+ * map. Map is intended to be used to provide metadata for profilers and is
+ * not to be used for internal in-app communication. All methods are
+ * thread-safe.
+ */
+struct strobe_map_raw {
+ /*
+ * general purpose unique ID that's up to application to decide
+ * whether and how to use; for request metadata use case id is unique
+ * request ID that's used to match metadata with stack traces on
+ * Strobelight backend side
+ */
+ int64_t id;
+ /* number of used entries in map */
+ int64_t cnt;
+ /*
+ * having volatile doesn't change anything on BPF side, but clang
+ * emits warnings for passing `volatile const char *` into
+ * bpf_probe_read_str that expects just `const char *`
+ */
+ const char* tag;
+ /*
+ * key/value entries, each consisting of 2 pointers to key and value
+ * C strings
+ */
+ struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
+};
+
+/* Following values define supported values of TLS mode */
+#define TLS_NOT_SET -1
+#define TLS_LOCAL_EXEC 0
+#define TLS_IMM_EXEC 1
+#define TLS_GENERAL_DYN 2
+
+/*
+ * structure that universally represents TLS location (both for static
+ * executables and shared libraries)
+ */
+struct strobe_value_loc {
+ /*
+ * tls_mode defines what TLS mode was used for particular metavariable:
+ * - -1 (TLS_NOT_SET) - no metavariable;
+ * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
+ * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
+ * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
+ * Local Dynamic mode is not yet supported, because never seen in
+ * practice. Mode defines how offset field is interpreted. See
+ * calc_location() in below for details.
+ */
+ int64_t tls_mode;
+ /*
+ * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
+ * tpidr_el0 for aarch64).
+ * TLS_IMM_EXEC: absolute address of GOT entry containing offset
+ * from thread pointer;
+ * TLS_GENERAL_DYN: absolute addres of double GOT entry
+ * containing tls_index_t struct;
+ */
+ int64_t offset;
+};
+
+struct strobemeta_cfg {
+ int64_t req_meta_idx;
+ struct strobe_value_loc int_locs[STROBE_MAX_INTS];
+ struct strobe_value_loc str_locs[STROBE_MAX_STRS];
+ struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
+};
+
+struct strobe_map_descr {
+ uint64_t id;
+ int16_t tag_len;
+ /*
+ * cnt <0 - map value isn't set;
+ * 0 - map has id set, but no key/value entries
+ */
+ int16_t cnt;
+ /*
+ * both key_lens[i] and val_lens[i] should be >0 for present key/value
+ * entry
+ */
+ uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
+ uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
+};
+
+struct strobemeta_payload {
+ /* req_id has valid request ID, if req_meta_valid == 1 */
+ int64_t req_id;
+ uint8_t req_meta_valid;
+ /*
+ * mask has Nth bit set to 1, if Nth metavar was present and
+ * successfully read
+ */
+ uint64_t int_vals_set_mask;
+ int64_t int_vals[STROBE_MAX_INTS];
+ /* len is >0 for present values */
+ uint16_t str_lens[STROBE_MAX_STRS];
+ /* if map_descrs[i].cnt == -1, metavar is not present/set */
+ struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
+ /*
+ * payload has compactly packed values of str and map variables in the
+ * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
+ * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
+ * value length
+ */
+ char payload[STROBE_MAX_PAYLOAD];
+};
+
+struct strobelight_bpf_sample {
+ uint64_t ktime;
+ char comm[TASK_COMM_LEN];
+ pid_t pid;
+ int user_stack_id;
+ int kernel_stack_id;
+ int has_meta;
+ struct strobemeta_payload metadata;
+ /*
+ * makes it possible to pass (<real payload size> + 1) as data size to
+ * perf_submit() to avoid perf_submit's paranoia about passing zero as
+ * size, as it deduces that <real payload size> might be
+ * **theoretically** zero
+ */
+ char dummy_safeguard;
+};
+
+struct bpf_map_def SEC("maps") samples = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 32,
+};
+
+struct bpf_map_def SEC("maps") stacks_0 = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(uint32_t),
+ .value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16,
+};
+
+struct bpf_map_def SEC("maps") stacks_1 = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(uint32_t),
+ .value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16,
+};
+
+struct bpf_map_def SEC("maps") sample_heap = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(uint32_t),
+ .value_size = sizeof(struct strobelight_bpf_sample),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") strobemeta_cfgs = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(pid_t),
+ .value_size = sizeof(struct strobemeta_cfg),
+ .max_entries = STROBE_MAX_CFGS,
+};
+
+/* Type for the dtv. */
+/* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
+typedef union dtv {
+ size_t counter;
+ struct {
+ void* val;
+ bool is_static;
+ } pointer;
+} dtv_t;
+
+/* Partial definition for tcbhead_t */
+/* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
+struct tcbhead {
+ void* tcb;
+ dtv_t* dtv;
+};
+
+/*
+ * TLS module/offset information for shared library case.
+ * For x86-64, this is mapped onto two entries in GOT.
+ * For aarch64, this is pointed to by second GOT entry.
+ */
+struct tls_index {
+ uint64_t module;
+ uint64_t offset;
+};
+
+static inline __attribute__((always_inline))
+void *calc_location(struct strobe_value_loc *loc, void *tls_base)
+{
+ /*
+ * tls_mode value is:
+ * - -1 (TLS_NOT_SET), if no metavar is present;
+ * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
+ * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
+ * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
+ * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
+ * This schema allows to use something like:
+ * (tls_mode + 1) * (tls_base + offset)
+ * to get NULL for "no metavar" location, or correct pointer for local
+ * executable mode without doing extra ifs.
+ */
+ if (loc->tls_mode <= TLS_LOCAL_EXEC) {
+ /* static executable is simple, we just have offset from
+ * tls_base */
+ void *addr = tls_base + loc->offset;
+ /* multiply by (tls_mode + 1) to get NULL, if we have no
+ * metavar in this slot */
+ return (void *)((loc->tls_mode + 1) * (int64_t)addr);
+ }
+ /*
+ * Other modes are more complicated, we need to jump through few hoops.
+ *
+ * For immediate executable mode (currently supported only for aarch64):
+ * - loc->offset is pointing to a GOT entry containing fixed offset
+ * relative to tls_base;
+ *
+ * For general dynamic mode:
+ * - loc->offset is pointing to a beginning of double GOT entries;
+ * - (for aarch64 only) second entry points to tls_index_t struct;
+ * - (for x86-64 only) two GOT entries are already tls_index_t;
+ * - tls_index_t->module is used to find start of TLS section in
+ * which variable resides;
+ * - tls_index_t->offset provides offset within that TLS section,
+ * pointing to value of variable.
+ */
+ struct tls_index tls_index;
+ dtv_t *dtv;
+ void *tls_ptr;
+
+ bpf_probe_read(&tls_index, sizeof(struct tls_index),
+ (void *)loc->offset);
+ /* valid module index is always positive */
+ if (tls_index.module > 0) {
+ /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
+ bpf_probe_read(&dtv, sizeof(dtv),
+ &((struct tcbhead *)tls_base)->dtv);
+ dtv += tls_index.module;
+ } else {
+ dtv = NULL;
+ }
+ bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
+ /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
+ return tls_ptr && tls_ptr != (void *)-1
+ ? tls_ptr + tls_index.offset
+ : NULL;
+}
+
+static inline __attribute__((always_inline))
+void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data)
+{
+ void *location = calc_location(&cfg->int_locs[idx], tls_base);
+ if (!location)
+ return;
+
+ bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+ data->int_vals[idx] = value->val;
+ if (value->header.len)
+ data->int_vals_set_mask |= (1 << idx);
+}
+
+static inline __attribute__((always_inline))
+uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data, void *payload)
+{
+ void *location;
+ uint32_t len;
+
+ data->str_lens[idx] = 0;
+ location = calc_location(&cfg->str_locs[idx], tls_base);
+ if (!location)
+ return 0;
+
+ bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+ len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+ /*
+ * if bpf_probe_read_str returns error (<0), due to casting to
+ * unsinged int, it will become big number, so next check is
+ * sufficient to check for errors AND prove to BPF verifier, that
+ * bpf_probe_read_str won't return anything bigger than
+ * STROBE_MAX_STR_LEN
+ */
+ if (len > STROBE_MAX_STR_LEN)
+ return 0;
+
+ data->str_lens[idx] = len;
+ return len;
+}
+
+static inline __attribute__((always_inline))
+void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload* data, void *payload)
+{
+ struct strobe_map_descr* descr = &data->map_descrs[idx];
+ struct strobe_map_raw map;
+ void *location;
+ uint32_t len;
+ int i;
+
+ descr->tag_len = 0; /* presume no tag is set */
+ descr->cnt = -1; /* presume no value is set */
+
+ location = calc_location(&cfg->map_locs[idx], tls_base);
+ if (!location)
+ return payload;
+
+ bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+ if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
+ return payload;
+
+ descr->id = map.id;
+ descr->cnt = map.cnt;
+ if (cfg->req_meta_idx == idx) {
+ data->req_id = map.id;
+ data->req_meta_valid = 1;
+ }
+
+ len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
+ if (len <= STROBE_MAX_STR_LEN) {
+ descr->tag_len = len;
+ payload += len;
+ }
+
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+ for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
+ descr->key_lens[i] = 0;
+ len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
+ map.entries[i].key);
+ if (len <= STROBE_MAX_STR_LEN) {
+ descr->key_lens[i] = len;
+ payload += len;
+ }
+ descr->val_lens[i] = 0;
+ len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
+ map.entries[i].val);
+ if (len <= STROBE_MAX_STR_LEN) {
+ descr->val_lens[i] = len;
+ payload += len;
+ }
+ }
+
+ return payload;
+}
+
+/*
+ * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
+ * pointer to *right after* payload ends
+ */
+static inline __attribute__((always_inline))
+void *read_strobe_meta(struct task_struct* task,
+ struct strobemeta_payload* data) {
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ struct strobe_value_generic value = {0};
+ struct strobemeta_cfg *cfg;
+ void *tls_base, *payload;
+
+ cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
+ if (!cfg)
+ return NULL;
+
+ data->int_vals_set_mask = 0;
+ data->req_meta_valid = 0;
+ payload = data->payload;
+ /*
+ * we don't have struct task_struct definition, it should be:
+ * tls_base = (void *)task->thread.fsbase;
+ */
+ tls_base = (void *)task;
+
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+ for (int i = 0; i < STROBE_MAX_INTS; ++i) {
+ read_int_var(cfg, i, tls_base, &value, data);
+ }
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+ for (int i = 0; i < STROBE_MAX_STRS; ++i) {
+ payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+ }
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+ for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
+ payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+ }
+ /*
+ * return pointer right after end of payload, so it's possible to
+ * calculate exact amount of useful data that needs to be sent
+ */
+ return payload;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs *ctx) {
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ struct strobelight_bpf_sample* sample;
+ struct task_struct *task;
+ uint32_t zero = 0;
+ uint64_t ktime_ns;
+ void *sample_end;
+
+ sample = bpf_map_lookup_elem(&sample_heap, &zero);
+ if (!sample)
+ return 0; /* this will never happen */
+
+ sample->pid = pid;
+ bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
+ ktime_ns = bpf_ktime_get_ns();
+ sample->ktime = ktime_ns;
+
+ task = (struct task_struct *)bpf_get_current_task();
+ sample_end = read_strobe_meta(task, &sample->metadata);
+ sample->has_meta = sample_end != NULL;
+ sample_end = sample_end ? : &sample->metadata;
+
+ if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
+ sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
+ sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
+ } else {
+ sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
+ sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
+ }
+
+ uint64_t sample_size = sample_end - (void *)sample;
+ /* should always be true */
+ if (sample_size < sizeof(struct strobelight_bpf_sample))
+ bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
new file mode 100644
index 000000000000..f0a1669e11d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
new file mode 100644
index 000000000000..4291a7d642e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 30
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
new file mode 100644
index 000000000000..463964d79f73
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -0,0 +1,261 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+ ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+ unsigned int ver:4;
+ unsigned int priority:8;
+ unsigned int flow_label:20;
+ unsigned short payload_len;
+ unsigned char next_header;
+ unsigned char hop_limit;
+ unsigned long long src_hi;
+ unsigned long long src_lo;
+ unsigned long long dst_hi;
+ unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+ unsigned long long hi;
+ unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+ unsigned char nexthdr;
+ unsigned char hdrlen;
+ unsigned char type;
+ unsigned char segments_left;
+ unsigned char first_segment;
+ unsigned char flags;
+ unsigned short tag;
+
+ struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+ unsigned char type;
+ unsigned char len;
+ unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+ void *cursor, *data_end;
+ struct ip6_srh_t *srh;
+ struct ip6_t *ip;
+ uint8_t *ipver;
+
+ data_end = (void *)(long)skb->data_end;
+ cursor = (void *)(long)skb->data;
+ ipver = (uint8_t *)cursor;
+
+ if ((void *)ipver + sizeof(*ipver) > data_end)
+ return NULL;
+
+ if ((*ipver >> 4) != 6)
+ return NULL;
+
+ ip = cursor_advance(cursor, sizeof(*ip));
+ if ((void *)ip + sizeof(*ip) > data_end)
+ return NULL;
+
+ if (ip->next_header != 43)
+ return NULL;
+
+ srh = cursor_advance(cursor, sizeof(*srh));
+ if ((void *)srh + sizeof(*srh) > data_end)
+ return NULL;
+
+ if (srh->type != 4)
+ return NULL;
+
+ return srh;
+}
+
+static __attribute__((always_inline))
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+ uint32_t old_pad, uint32_t pad_off)
+{
+ int err;
+
+ if (new_pad != old_pad) {
+ err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+ (int) new_pad - (int) old_pad);
+ if (err)
+ return err;
+ }
+
+ if (new_pad > 0) {
+ char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0};
+ struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+ pad_tlv->type = SR6_TLV_PADDING;
+ pad_tlv->len = new_pad - 2;
+
+ err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+ (void *)pad_tlv_buf, new_pad);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static __attribute__((always_inline))
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t *tlv_off, uint32_t *pad_size,
+ uint32_t *pad_off)
+{
+ uint32_t srh_off, cur_off;
+ int offset_valid = 0;
+ int err;
+
+ srh_off = (char *)srh - (char *)(long)skb->data;
+ // cur_off = end of segments, start of possible TLVs
+ cur_off = srh_off + sizeof(*srh) +
+ sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+ *pad_off = 0;
+
+ // we can only go as far as ~10 TLVs due to the BPF max stack size
+ #pragma clang loop unroll(disable)
+ for (int i = 0; i < 100; i++) {
+ struct sr6_tlv_t tlv;
+
+ if (cur_off == *tlv_off)
+ offset_valid = 1;
+
+ if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+ break;
+
+ err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ if (tlv.type == SR6_TLV_PADDING) {
+ *pad_size = tlv.len + sizeof(tlv);
+ *pad_off = cur_off;
+
+ if (*tlv_off == srh_off) {
+ *tlv_off = cur_off;
+ offset_valid = 1;
+ }
+ break;
+
+ } else if (tlv.type == SR6_TLV_HMAC) {
+ break;
+ }
+
+ cur_off += sizeof(tlv) + tlv.len;
+ } // we reached the padding or HMAC TLVs, or the end of the SRH
+
+ if (*pad_off == 0)
+ *pad_off = cur_off;
+
+ if (*tlv_off == -1)
+ *tlv_off = cur_off;
+ else if (!offset_valid)
+ return -EINVAL;
+
+ return 0;
+}
+
+static __attribute__((always_inline))
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+ struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ uint32_t partial_srh_len;
+ int err;
+
+ if (tlv_off != -1)
+ tlv_off += srh_off;
+
+ if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+ return -EINVAL;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+ if (err)
+ return err;
+
+ // the following can't be moved inside update_tlv_pad because the
+ // bpf verifier has some issues with it
+ pad_off += sizeof(*itlv) + itlv->len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("lwt_seg6local")
+int __add_egr_x(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfc42000000000000;
+ unsigned long long lo = 0x1;
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint8_t new_flags = SR6_FLAG_ALERT;
+ struct ip6_addr_t addr;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+ err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+ (struct sr6_tlv_t *)&tlv, 20);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ err = bpf_lwt_seg6_store_bytes(skb, offset,
+ (void *)&new_flags, sizeof(new_flags));
+ if (err)
+ return BPF_DROP;
+
+ addr.lo = htonll(lo);
+ addr.hi = htonll(hi);
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+ (void *)&addr, sizeof(addr));
+ if (err)
+ return BPF_DROP;
+ return BPF_REDIRECT;
+}
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
new file mode 100644
index 000000000000..608a06871572
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
+#define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */
+#define MAX_ULONG_STR_LEN 7
+#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+
+static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+ volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
+ unsigned char i;
+ char name[64];
+ int ret;
+
+ memset(name, 0, sizeof(name));
+ ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+ if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+ return 0;
+
+#pragma clang loop unroll(disable)
+ for (i = 0; i < sizeof(tcp_mem_name); ++i)
+ if (name[i] != tcp_mem_name[i])
+ return 0;
+
+ return 1;
+}
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+ unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
+ char value[MAX_VALUE_STR_LEN];
+ unsigned char i, off = 0;
+ int ret;
+
+ if (ctx->write)
+ return 0;
+
+ if (!is_tcp_mem(ctx))
+ return 0;
+
+ ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+ if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+ return 0;
+
+#pragma clang loop unroll(disable)
+ for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+ ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+ tcp_mem + i);
+ if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+ return 0;
+ off += ret & MAX_ULONG_STR_LEN;
+ }
+
+ return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
new file mode 100644
index 000000000000..cb201cbe11e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
+#define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */
+#define MAX_ULONG_STR_LEN 7
+#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+
+static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+ volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
+ unsigned char i;
+ char name[64];
+ int ret;
+
+ memset(name, 0, sizeof(name));
+ ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+ if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+ return 0;
+
+#pragma clang loop unroll(disable)
+ for (i = 0; i < sizeof(tcp_mem_name); ++i)
+ if (name[i] != tcp_mem_name[i])
+ return 0;
+
+ return 1;
+}
+
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+ unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
+ char value[MAX_VALUE_STR_LEN];
+ unsigned char i, off = 0;
+ int ret;
+
+ if (ctx->write)
+ return 0;
+
+ if (!is_tcp_mem(ctx))
+ return 0;
+
+ ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+ if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+ return 0;
+
+#pragma clang loop unroll(disable)
+ for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+ ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+ tcp_mem + i);
+ if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+ return 0;
+ off += ret & MAX_ULONG_STR_LEN;
+ }
+
+ return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
new file mode 100644
index 000000000000..7fa4677df22e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if (th + 1 > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if (uh + 1 > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ iph = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*iph);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ iph + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+#pragma clang loop unroll(disable)
+ for (i = 0; i < sizeof(*iph) >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ ip6h = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*ip6h);
+
+ if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+ ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index cd0248c54e25..93e1d87a343a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -237,10 +237,10 @@ static void bpf_fill_scale1(struct bpf_test *self)
insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
-8 * (k % 64 + 1));
}
- /* every jump adds 1 step to insn_processed, so to stay exactly
- * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
+ /* is_state_visited() doesn't allocate state for pruning for every jump.
+ * Hence multiply jmps by 4 to accommodate that heuristic
*/
- while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
+ while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
insn[i] = BPF_EXIT_INSN();
self->prog_len = i + 1;
@@ -269,10 +269,7 @@ static void bpf_fill_scale2(struct bpf_test *self)
insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
-8 * (k % (64 - 4 * FUNC_NEST) + 1));
}
- /* every jump adds 1 step to insn_processed, so to stay exactly
- * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
- */
- while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
+ while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
insn[i] = BPF_EXIT_INSN();
self->prog_len = i + 1;
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9093a8f64dc6..2d752c4f8d9d 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -215,9 +215,11 @@
BPF_MOV64_IMM(BPF_REG_0, 3),
BPF_JMP_IMM(BPF_JA, 0, 0, -6),
},
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn",
- .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .errstr_unpriv = "back-edge from insn",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 1,
},
{
"calls: conditional call 4",
@@ -250,22 +252,24 @@
BPF_MOV64_IMM(BPF_REG_0, 3),
BPF_EXIT_INSN(),
},
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn",
- .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
},
{
"calls: conditional call 6",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "infinite loop detected",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
index 349c0862fb4c..4eb76ed739ce 100644
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ b/tools/testing/selftests/bpf/verifier/cfg.c
@@ -41,7 +41,8 @@
BPF_JMP_IMM(BPF_JA, 0, 0, -1),
BPF_EXIT_INSN(),
},
- .errstr = "back-edge",
+ .errstr = "unreachable insn 1",
+ .errstr_unpriv = "back-edge",
.result = REJECT,
},
{
@@ -53,18 +54,20 @@
BPF_JMP_IMM(BPF_JA, 0, 0, -4),
BPF_EXIT_INSN(),
},
- .errstr = "back-edge",
+ .errstr = "unreachable insn 4",
+ .errstr_unpriv = "back-edge",
.result = REJECT,
},
{
"conditional loop",
.insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
BPF_EXIT_INSN(),
},
- .errstr = "back-edge",
+ .errstr = "infinite loop detected",
+ .errstr_unpriv = "back-edge",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index d5c596fdc4b9..2c5fbe7bcd27 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -511,7 +511,8 @@
offsetof(struct __sk_buff, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 1f39d845c64f..67ab12410050 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,9 +29,9 @@
{
"helper access to variable memory: stack, bitwise AND, zero included",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -46,9 +46,9 @@
{
"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
@@ -122,9 +122,9 @@
{
"helper access to variable memory: stack, JMP, bounds + offset",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
@@ -143,9 +143,9 @@
{
"helper access to variable memory: stack, JMP, wrong max",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
@@ -163,9 +163,9 @@
{
"helper access to variable memory: stack, JMP, no max check",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_MOV64_IMM(BPF_REG_4, 0),
@@ -183,9 +183,9 @@
{
"helper access to variable memory: stack, JMP, no min check",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
@@ -201,9 +201,9 @@
{
"helper access to variable memory: stack, JMP (signed), no min check",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 16),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
@@ -244,6 +244,7 @@
{
"helper access to variable memory: map, JMP, wrong max",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -251,7 +252,7 @@
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
@@ -262,7 +263,7 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map_hash_48b = { 3 },
+ .fixup_map_hash_48b = { 4 },
.errstr = "invalid access to map value, value_size=48 off=0 size=49",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -296,6 +297,7 @@
{
"helper access to variable memory: map adjusted, JMP, wrong max",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -304,7 +306,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
@@ -315,7 +317,7 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map_hash_48b = { 3 },
+ .fixup_map_hash_48b = { 4 },
.errstr = "R1 min value is outside of the array range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -337,8 +339,8 @@
{
"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
.insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -562,6 +564,7 @@
{
"helper access to variable memory: 8 bytes leak",
.insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -572,7 +575,6 @@
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
new file mode 100644
index 000000000000..5e980a5ab69d
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/loops1.c
@@ -0,0 +1,161 @@
+{
+ "bounded loop, count to 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .retval = 4,
+},
+{
+ "bounded loop, count to 20",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "bounded loop, count from positive unknown to 4",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .retval = 4,
+},
+{
+ "bounded loop, count from totally unknown to 4",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "bounded loop, count to 4 with equality",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "bounded loop, start in the middle",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "back-edge",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .retval = 4,
+},
+{
+ "bounded loop containing a forward jump",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .retval = 4,
+},
+{
+ "bounded loop that jumps out rather than in",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_JMP_A(-4),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "infinite loop after a conditional jump",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 5),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_A(-2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "program is too large",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "bounded recursion",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "back-edge",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "infinite loop in two jumps",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "loop detected",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "infinite loop: three-jump trick",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "loop detected",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},