Re: len = bpf_probe_read_str(); bpf_perf_event_output(... len) == FAIL

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 1/22/18 7:06 AM, Arnaldo Carvalho de Melo wrote:
Em Wed, Nov 22, 2017 at 10:42:22AM -0800, Gianluca Borello escreveu:
On Tue, Nov 21, 2017 at 2:31 PM, Alexei Starovoitov
<alexei.starovoitov@xxxxxxxxx> wrote:

yeah sorry about this hack. Gianluca reported this issue as well.
Yonghong fixed it for bpf_probe_read only. We will extend
the fix to bpf_probe_read_str() and bpf_perf_event_output() asap.
The above workaround gets too much into llvm and verifier details
we should strive to make bpf program writing as easy as possible.


Hi Arnaldo

With the help of Alexei, Daniel and Yonghong I just submitted a new
series ("bpf: fix semantics issues with helpers receiving NULL
arguments") that includes a fix in bpf_perf_event_output. This should
simplify the way you write your bpf programs, so you shouldn't be
required to write those convoluted branches anymore (there are a few
usage examples in the commit log).

In my case it made writing the code much easier, after applying it I
haven't been surprised by the compiler output in a while, and I hope
your experience will be improved as well.

Trying to work with this again, and I still need to trick clang into not
doing some optimizations that end up getting the resulting eBPF object
rejected by the kernel verifier:

[root@jouet bpf]# uname -a
Linux jouet 4.15.0-rc8+ #1 SMP Wed Jan 17 11:01:34 -03 2018 x86_64 x86_64 x86_64 GNU/Linux
[root@jouet bpf]# grep -i bpf /lib/modules/`uname -r`/build/.config
CONFIG_CGROUP_BPF=y
CONFIG_BPF=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT_ALWAYS_ON=y
# CONFIG_NETFILTER_XT_MATCH_BPF is not set
# CONFIG_NET_CLS_BPF is not set
# CONFIG_NET_ACT_BPF is not set
CONFIG_BPF_JIT=y
CONFIG_BPF_STREAM_PARSER=y
CONFIG_LWTUNNEL_BPF=y
CONFIG_HAVE_EBPF_JIT=y
CONFIG_BPF_EVENTS=y
# CONFIG_TEST_BPF is not set
[root@jouet bpf]# cat sys_enter_open.c
#include "bpf.h"

SEC("syscalls:sys_enter_open")
int func(void *ctx)
{
	struct {
		char *ptr;
		char path[256];
	} filename = {
	/*
	 * /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format:
	 *
	 * ...
	 * field:const char * filename;	offset:16;	size:8;	signed:0;
	 * ...
	 * ctx + 16 selects 'filename'
	 */
		.ptr = *((char **)(ctx + 16)),
	};
	int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr);
	if (len > 0 && len < 256)
		perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr));
	return 0;
}
[root@jouet bpf]#
[root@jouet bpf]# perf trace -v -e open,sys_enter_open.c
bpf: builtin compilation failed: -95, try external compiler
Kernel build dir is set to /lib/modules/4.15.0-rc8+/build
set env: KBUILD_DIR=/lib/modules/4.15.0-rc8+/build
unset env: KBUILD_OPTS
include option is set to  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h
set env: NR_CPUS=4
set env: LINUX_VERSION_CODE=0x40f00
set env: CLANG_EXEC=/usr/local/bin/clang
unset env: CLANG_OPTIONS
set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h
set env: WORKING_DIR=/lib/modules/4.15.0-rc8+/build
set env: CLANG_SOURCE=/home/acme/bpf/sys_enter_open.c
llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o -
libbpf: loading object 'sys_enter_open.c' from buffer
libbpf: section .strtab, size 101, link 0, flags 0, type=3
libbpf: section .text, size 0, link 0, flags 6, type=1
libbpf: section syscalls:sys_enter_open, size 472, link 0, flags 6, type=1
libbpf: found program syscalls:sys_enter_open
libbpf: section .relsyscalls:sys_enter_open, size 16, link 8, flags 0, type=9
libbpf: section maps, size 16, link 0, flags 3, type=1
libbpf: section license, size 4, link 0, flags 3, type=1
libbpf: license of sys_enter_open.c is GPL
libbpf: section version, size 4, link 0, flags 3, type=1
libbpf: kernel version of sys_enter_open.c is 40f00
libbpf: section .symtab, size 144, link 1, flags 0, type=2
libbpf: maps in sys_enter_open.c: 1 maps in 16 bytes
libbpf: map 0 is "__bpf_stdout__"
libbpf: collecting relocating info for: 'syscalls:sys_enter_open'
libbpf: relocation: insn_idx=51
libbpf: relocation: find map 0 (__bpf_stdout__) for insn 51
LLVM: dumping sys_enter_open.o
bpf: config program 'syscalls:sys_enter_open'
libbpf: create map __bpf_stdout__: fd=3
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
0: (bf) r6 = r1
1: (b7) r1 = 0
2: (7b) *(u64 *)(r10 -8) = r1
3: (7b) *(u64 *)(r10 -16) = r1
4: (7b) *(u64 *)(r10 -24) = r1
5: (7b) *(u64 *)(r10 -32) = r1
6: (7b) *(u64 *)(r10 -40) = r1
7: (7b) *(u64 *)(r10 -48) = r1
8: (7b) *(u64 *)(r10 -56) = r1
9: (7b) *(u64 *)(r10 -64) = r1
10: (7b) *(u64 *)(r10 -72) = r1
11: (7b) *(u64 *)(r10 -80) = r1
12: (7b) *(u64 *)(r10 -88) = r1
13: (7b) *(u64 *)(r10 -96) = r1
14: (7b) *(u64 *)(r10 -104) = r1
15: (7b) *(u64 *)(r10 -112) = r1
16: (7b) *(u64 *)(r10 -120) = r1
17: (7b) *(u64 *)(r10 -128) = r1
18: (7b) *(u64 *)(r10 -136) = r1
19: (7b) *(u64 *)(r10 -144) = r1
20: (7b) *(u64 *)(r10 -152) = r1
21: (7b) *(u64 *)(r10 -160) = r1
22: (7b) *(u64 *)(r10 -168) = r1
23: (7b) *(u64 *)(r10 -176) = r1
24: (7b) *(u64 *)(r10 -184) = r1
25: (7b) *(u64 *)(r10 -192) = r1
26: (7b) *(u64 *)(r10 -200) = r1
27: (7b) *(u64 *)(r10 -208) = r1
28: (7b) *(u64 *)(r10 -216) = r1
29: (7b) *(u64 *)(r10 -224) = r1
30: (7b) *(u64 *)(r10 -232) = r1
31: (7b) *(u64 *)(r10 -240) = r1
32: (7b) *(u64 *)(r10 -248) = r1
33: (7b) *(u64 *)(r10 -256) = r1
34: (79) r3 = *(u64 *)(r6 +16)
35: (7b) *(u64 *)(r10 -264) = r3
36: (bf) r1 = r10
37: (07) r1 += -256
38: (b7) r2 = 256
39: (85) call bpf_probe_read_str#45
40: (bf) r1 = r0
41: (07) r1 += -1
42: (67) r1 <<= 32
43: (77) r1 >>= 32
44: (25) if r1 > 0xfe goto pc+12
  R0=inv(id=0) R1=inv(id=0,umax_value=254,var_off=(0x0; 0xff)) R6=ctx(id=0,off=0,imm=0) R10=fp0
45: (67) r0 <<= 32
46: (c7) r0 s>>= 32
47: (07) r0 += 8
48: (bf) r4 = r10
49: (07) r4 += -264
50: (bf) r1 = r6
51: (18) r2 = 0xffff957fc90e1000
53: (18) r3 = 0xffffffff
55: (bf) r5 = r0
56: (85) call bpf_perf_event_output#25
R5 min value is negative, either use unsigned or 'var &= const'

Yes, I can reproduce the issue as well. The reason is again due to
compiler optimization,
   > 39: (85) call bpf_probe_read_str#45
   > 40: (bf) r1 = r0
   > 41: (07) r1 += -1
   > 42: (67) r1 <<= 32
   > 43: (77) r1 >>= 32
   > 44: (25) if r1 > 0xfe goto pc+12
> R0=inv(id=0) R1=inv(id=0,umax_value=254,var_off=(0x0; 0xff)) R6=ctx(id=0,off=0,imm=0) R10=fp0
   > 45: (67) r0 <<= 32
   > 46: (c7) r0 s>>= 32
   > 47: (07) r0 += 8
   > 48: (bf) r4 = r10
   > 49: (07) r4 += -264
   > 50: (bf) r1 = r6
   > 51: (18) r2 = 0xffff957fc90e1000
   > 53: (18) r3 = 0xffffffff
   > 55: (bf) r5 = r0
   > 56: (85) call bpf_perf_event_output#25
   > R5 min value is negative, either use unsigned or 'var &= const'

The compiler did "40: (bf) r1 = r0" and then uses "r1" for branch comparison, the original "r0" is left with complete unknown integer value and later used to calculate the buffer size "55: (bf) r5 = r0"
where "r5" could be negative value and the verifier rightfully
complains.

There is no easy way to fix this in verifier unless verifier starts to track correlations between registers which is a big task. So your below workaround is okay. The below workaround should also work:

int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr);
        if (len > 0 && len < 256)
bpf_perf_event_output(ctx, &my_map, BPF_F_CURRENT_CPU, &filename, (len & 0xff) + sizeof(filename.ptr));
        return 0;

Thanks,


libbpf: -- END LOG --
libbpf: failed to load program 'syscalls:sys_enter_open'
libbpf: failed to load object 'sys_enter_open.c'
bpf: load objects failed
event syntax error: 'sys_enter_open.c'
                      \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

  Usage: perf trace [<options>] [<command>]
     or: perf trace [<options>] -- <command> [<options>]
     or: perf trace record [<options>] [<command>]
     or: perf trace record [<options>] -- <command> [<options>]

     -e, --event <event>   event/syscall selector. use 'perf list' to list available events
[root@jouet bpf]#

If I use the version we came up before, it works:

[root@jouet bpf]# trace -e open,sys_enter_open.c
LLVM: dumping sys_enter_open.o
   2171.820 (         ): __bpf_stdout__:@......./proc/self/task/14683/comm..)
   2171.845 ( 0.339 ms): qemu-system-x8/6721 open(filename: /proc/self/task/14683/comm, flags: RDWR                ) = 91
^C[root@jouet bpf]
[root@jouet bpf]# trace -e open,sys_enter_open.o
   2485.416 (         ): __bpf_stdout__:Vm..u.../proc/loadavg.......)
   2485.434 ( 0.210 ms): lighttpd/25120 open(filename: /proc/loadavg, mode: ISGID|IXOTH                       ) = 8
^C[root@jouet bpf]#

[root@jouet bpf]cat sys_enter_open.c
#include "bpf.h"

SEC("syscalls:sys_enter_open")
int func(void *ctx)
{
	struct {
		char *ptr;
		char path[256];
	} filename = {
	/*
	 * /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format:
	 *
	 * ...
	 * field:const char * filename;	offset:16;	size:8;	signed:0;
	 * ...
	 * ctx + 16 selects 'filename'
	 */
		.ptr = *((char **)(ctx + 16)),
	};
	int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr);
	if (len > 0) {
	       	if (len == 1)
			perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr));
		else if (len < 256)
			perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr));
	}
	return 0;
}
[root@jouet bpf]#

[acme@jouet perf]$ llc --version | head -16
LLVM (https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=DA8e1B5r073vIqRrFz7MRA&m=kbmGig2gAUdNIT0zFKDollApvFW0R95_U5Ntz4dvRbM&s=zQgAZ9vJnfkoNy-c53Y3H_2nMVwMzV-LyMA2f6bSFrw&e=):
   LLVM version 6.0.0svn
   DEBUG build with assertions.
   Default target: x86_64-unknown-linux-gnu
   Host CPU: broadwell

   Registered Targets:
     aarch64    - AArch64 (little endian)
     aarch64_be - AArch64 (big endian)
     amdgcn     - AMD GCN GPUs
     arm        - ARM
     arm64      - ARM64 (little endian)
     armeb      - ARM (big endian)
     bpf        - BPF (host endian)
     bpfeb      - BPF (big endian)
     bpfel      - BPF (little endian)
[acme@jouet perf]$





[Index of Archives]     [Linux Kernel Discussion]     [TCP Instrumentation]     [Ethernet Bridging]     [Linux Wireless Networking]     [Linux WPAN Networking]     [Linux Host AP]     [Linux WPAN Networking]     [Linux Bluetooth Networking]     [Linux ATH6KL Networking]     [Linux Networking Users]     [XDP Newbies]     [Linux Coverity]     [VLAN]     [Git]     [IETF Annouce]     [Linux Assembly]     [Security]     [Bugtraq]     [Yosemite Information]     [MIPS Linux]     [ARM Linux Kernel]     [ARM Linux]     [Linux Virtualization]     [Linux IDE]     [Linux RAID]     [Linux SCSI]

  Powered by Linux