xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/bpf/progs/strobemeta.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun // Copyright (c) 2019 Facebook
3*4882a593Smuzhiyun 
4*4882a593Smuzhiyun #include <stdint.h>
5*4882a593Smuzhiyun #include <stddef.h>
6*4882a593Smuzhiyun #include <stdbool.h>
7*4882a593Smuzhiyun #include <linux/bpf.h>
8*4882a593Smuzhiyun #include <linux/ptrace.h>
9*4882a593Smuzhiyun #include <linux/sched.h>
10*4882a593Smuzhiyun #include <linux/types.h>
11*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun typedef uint32_t pid_t;
14*4882a593Smuzhiyun struct task_struct {};
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #define TASK_COMM_LEN 16
17*4882a593Smuzhiyun #define PERF_MAX_STACK_DEPTH 127
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun #define STROBE_TYPE_INVALID 0
20*4882a593Smuzhiyun #define STROBE_TYPE_INT 1
21*4882a593Smuzhiyun #define STROBE_TYPE_STR 2
22*4882a593Smuzhiyun #define STROBE_TYPE_MAP 3
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #define STACK_TABLE_EPOCH_SHIFT 20
25*4882a593Smuzhiyun #define STROBE_MAX_STR_LEN 1
26*4882a593Smuzhiyun #define STROBE_MAX_CFGS 32
27*4882a593Smuzhiyun #define STROBE_MAX_PAYLOAD						\
28*4882a593Smuzhiyun 	(STROBE_MAX_STRS * STROBE_MAX_STR_LEN +				\
29*4882a593Smuzhiyun 	STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun struct strobe_value_header {
32*4882a593Smuzhiyun 	/*
33*4882a593Smuzhiyun 	 * meaning depends on type:
34*4882a593Smuzhiyun 	 * 1. int: 0, if value not set, 1 otherwise
35*4882a593Smuzhiyun 	 * 2. str: 1 always, whether value is set or not is determined by ptr
36*4882a593Smuzhiyun 	 * 3. map: 1 always, pointer points to additional struct with number
37*4882a593Smuzhiyun 	 *    of entries (up to STROBE_MAX_MAP_ENTRIES)
38*4882a593Smuzhiyun 	 */
39*4882a593Smuzhiyun 	uint16_t len;
40*4882a593Smuzhiyun 	/*
41*4882a593Smuzhiyun 	 * _reserved might be used for some future fields/flags, but we always
42*4882a593Smuzhiyun 	 * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
43*4882a593Smuzhiyun 	 * bytes in one go and get both header and value
44*4882a593Smuzhiyun 	 */
45*4882a593Smuzhiyun 	uint8_t _reserved[6];
46*4882a593Smuzhiyun };
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun  * strobe_value_generic is used from BPF probe only, but needs to be a union
50*4882a593Smuzhiyun  * of strobe_value_int/strobe_value_str/strobe_value_map
51*4882a593Smuzhiyun  */
52*4882a593Smuzhiyun struct strobe_value_generic {
53*4882a593Smuzhiyun 	struct strobe_value_header header;
54*4882a593Smuzhiyun 	union {
55*4882a593Smuzhiyun 		int64_t val;
56*4882a593Smuzhiyun 		void *ptr;
57*4882a593Smuzhiyun 	};
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun struct strobe_value_int {
61*4882a593Smuzhiyun 	struct strobe_value_header header;
62*4882a593Smuzhiyun 	int64_t value;
63*4882a593Smuzhiyun };
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun struct strobe_value_str {
66*4882a593Smuzhiyun 	struct strobe_value_header header;
67*4882a593Smuzhiyun 	const char* value;
68*4882a593Smuzhiyun };
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun struct strobe_value_map {
71*4882a593Smuzhiyun 	struct strobe_value_header header;
72*4882a593Smuzhiyun 	const struct strobe_map_raw* value;
73*4882a593Smuzhiyun };
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun struct strobe_map_entry {
76*4882a593Smuzhiyun 	const char* key;
77*4882a593Smuzhiyun 	const char* val;
78*4882a593Smuzhiyun };
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun /*
81*4882a593Smuzhiyun  * Map of C-string key/value pairs with fixed maximum capacity. Each map has
82*4882a593Smuzhiyun  * corresponding int64 ID, which application can use (or ignore) in whatever
83*4882a593Smuzhiyun  * way appropriate. Map is "write-only", there is no way to get data out of
84*4882a593Smuzhiyun  * map. Map is intended to be used to provide metadata for profilers and is
85*4882a593Smuzhiyun  * not to be used for internal in-app communication. All methods are
86*4882a593Smuzhiyun  * thread-safe.
87*4882a593Smuzhiyun  */
88*4882a593Smuzhiyun struct strobe_map_raw {
89*4882a593Smuzhiyun 	/*
90*4882a593Smuzhiyun 	 * general purpose unique ID that's up to application to decide
91*4882a593Smuzhiyun 	 * whether and how to use; for request metadata use case id is unique
92*4882a593Smuzhiyun 	 * request ID that's used to match metadata with stack traces on
93*4882a593Smuzhiyun 	 * Strobelight backend side
94*4882a593Smuzhiyun 	 */
95*4882a593Smuzhiyun 	int64_t id;
96*4882a593Smuzhiyun 	/* number of used entries in map */
97*4882a593Smuzhiyun 	int64_t cnt;
98*4882a593Smuzhiyun 	/*
99*4882a593Smuzhiyun 	 * having volatile doesn't change anything on BPF side, but clang
100*4882a593Smuzhiyun 	 * emits warnings for passing `volatile const char *` into
101*4882a593Smuzhiyun 	 * bpf_probe_read_user_str that expects just `const char *`
102*4882a593Smuzhiyun 	 */
103*4882a593Smuzhiyun 	const char* tag;
104*4882a593Smuzhiyun 	/*
105*4882a593Smuzhiyun 	 * key/value entries, each consisting of 2 pointers to key and value
106*4882a593Smuzhiyun 	 * C strings
107*4882a593Smuzhiyun 	 */
108*4882a593Smuzhiyun 	struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
109*4882a593Smuzhiyun };
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun /* Following values define supported values of TLS mode */
112*4882a593Smuzhiyun #define TLS_NOT_SET -1
113*4882a593Smuzhiyun #define TLS_LOCAL_EXEC 0
114*4882a593Smuzhiyun #define TLS_IMM_EXEC 1
115*4882a593Smuzhiyun #define TLS_GENERAL_DYN 2
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun /*
118*4882a593Smuzhiyun  * structure that universally represents TLS location (both for static
119*4882a593Smuzhiyun  * executables and shared libraries)
120*4882a593Smuzhiyun  */
121*4882a593Smuzhiyun struct strobe_value_loc {
122*4882a593Smuzhiyun 	/*
123*4882a593Smuzhiyun 	 * tls_mode defines what TLS mode was used for particular metavariable:
124*4882a593Smuzhiyun 	 * - -1 (TLS_NOT_SET) - no metavariable;
125*4882a593Smuzhiyun 	 * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
126*4882a593Smuzhiyun 	 * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
127*4882a593Smuzhiyun 	 * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
128*4882a593Smuzhiyun 	 * Local Dynamic mode is not yet supported, because never seen in
129*4882a593Smuzhiyun 	 * practice.  Mode defines how offset field is interpreted. See
130*4882a593Smuzhiyun 	 * calc_location() in below for details.
131*4882a593Smuzhiyun 	 */
132*4882a593Smuzhiyun 	int64_t tls_mode;
133*4882a593Smuzhiyun 	/*
134*4882a593Smuzhiyun 	 * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
135*4882a593Smuzhiyun 	 * tpidr_el0 for aarch64).
136*4882a593Smuzhiyun 	 * TLS_IMM_EXEC: absolute address of GOT entry containing offset
137*4882a593Smuzhiyun 	 * from thread pointer;
138*4882a593Smuzhiyun 	 * TLS_GENERAL_DYN: absolute addres of double GOT entry
139*4882a593Smuzhiyun 	 * containing tls_index_t struct;
140*4882a593Smuzhiyun 	 */
141*4882a593Smuzhiyun 	int64_t offset;
142*4882a593Smuzhiyun };
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun struct strobemeta_cfg {
145*4882a593Smuzhiyun 	int64_t req_meta_idx;
146*4882a593Smuzhiyun 	struct strobe_value_loc int_locs[STROBE_MAX_INTS];
147*4882a593Smuzhiyun 	struct strobe_value_loc str_locs[STROBE_MAX_STRS];
148*4882a593Smuzhiyun 	struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
149*4882a593Smuzhiyun };
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun struct strobe_map_descr {
152*4882a593Smuzhiyun 	uint64_t id;
153*4882a593Smuzhiyun 	int16_t tag_len;
154*4882a593Smuzhiyun 	/*
155*4882a593Smuzhiyun 	 * cnt <0 - map value isn't set;
156*4882a593Smuzhiyun 	 * 0 - map has id set, but no key/value entries
157*4882a593Smuzhiyun 	 */
158*4882a593Smuzhiyun 	int16_t cnt;
159*4882a593Smuzhiyun 	/*
160*4882a593Smuzhiyun 	 * both key_lens[i] and val_lens[i] should be >0 for present key/value
161*4882a593Smuzhiyun 	 * entry
162*4882a593Smuzhiyun 	 */
163*4882a593Smuzhiyun 	uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
164*4882a593Smuzhiyun 	uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
165*4882a593Smuzhiyun };
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun struct strobemeta_payload {
168*4882a593Smuzhiyun 	/* req_id has valid request ID, if req_meta_valid == 1 */
169*4882a593Smuzhiyun 	int64_t req_id;
170*4882a593Smuzhiyun 	uint8_t req_meta_valid;
171*4882a593Smuzhiyun 	/*
172*4882a593Smuzhiyun 	 * mask has Nth bit set to 1, if Nth metavar was present and
173*4882a593Smuzhiyun 	 * successfully read
174*4882a593Smuzhiyun 	 */
175*4882a593Smuzhiyun 	uint64_t int_vals_set_mask;
176*4882a593Smuzhiyun 	int64_t int_vals[STROBE_MAX_INTS];
177*4882a593Smuzhiyun 	/* len is >0 for present values */
178*4882a593Smuzhiyun 	uint16_t str_lens[STROBE_MAX_STRS];
179*4882a593Smuzhiyun 	/* if map_descrs[i].cnt == -1, metavar is not present/set */
180*4882a593Smuzhiyun 	struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
181*4882a593Smuzhiyun 	/*
182*4882a593Smuzhiyun 	 * payload has compactly packed values of str and map variables in the
183*4882a593Smuzhiyun 	 * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
184*4882a593Smuzhiyun 	 * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
185*4882a593Smuzhiyun 	 * value length
186*4882a593Smuzhiyun 	 */
187*4882a593Smuzhiyun 	char payload[STROBE_MAX_PAYLOAD];
188*4882a593Smuzhiyun };
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun struct strobelight_bpf_sample {
191*4882a593Smuzhiyun 	uint64_t ktime;
192*4882a593Smuzhiyun 	char comm[TASK_COMM_LEN];
193*4882a593Smuzhiyun 	pid_t pid;
194*4882a593Smuzhiyun 	int user_stack_id;
195*4882a593Smuzhiyun 	int kernel_stack_id;
196*4882a593Smuzhiyun 	int has_meta;
197*4882a593Smuzhiyun 	struct strobemeta_payload metadata;
198*4882a593Smuzhiyun 	/*
199*4882a593Smuzhiyun 	 * makes it possible to pass (<real payload size> + 1) as data size to
200*4882a593Smuzhiyun 	 * perf_submit() to avoid perf_submit's paranoia about passing zero as
201*4882a593Smuzhiyun 	 * size, as it deduces that <real payload size> might be
202*4882a593Smuzhiyun 	 * **theoretically** zero
203*4882a593Smuzhiyun 	 */
204*4882a593Smuzhiyun 	char dummy_safeguard;
205*4882a593Smuzhiyun };
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun struct {
208*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
209*4882a593Smuzhiyun 	__uint(max_entries, 32);
210*4882a593Smuzhiyun 	__uint(key_size, sizeof(int));
211*4882a593Smuzhiyun 	__uint(value_size, sizeof(int));
212*4882a593Smuzhiyun } samples SEC(".maps");
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun struct {
215*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
216*4882a593Smuzhiyun 	__uint(max_entries, 16);
217*4882a593Smuzhiyun 	__uint(key_size, sizeof(uint32_t));
218*4882a593Smuzhiyun 	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
219*4882a593Smuzhiyun } stacks_0 SEC(".maps");
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun struct {
222*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
223*4882a593Smuzhiyun 	__uint(max_entries, 16);
224*4882a593Smuzhiyun 	__uint(key_size, sizeof(uint32_t));
225*4882a593Smuzhiyun 	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
226*4882a593Smuzhiyun } stacks_1 SEC(".maps");
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun struct {
229*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
230*4882a593Smuzhiyun 	__uint(max_entries, 1);
231*4882a593Smuzhiyun 	__type(key, uint32_t);
232*4882a593Smuzhiyun 	__type(value, struct strobelight_bpf_sample);
233*4882a593Smuzhiyun } sample_heap SEC(".maps");
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun struct {
236*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
237*4882a593Smuzhiyun 	__uint(max_entries, STROBE_MAX_CFGS);
238*4882a593Smuzhiyun 	__type(key, pid_t);
239*4882a593Smuzhiyun 	__type(value, struct strobemeta_cfg);
240*4882a593Smuzhiyun } strobemeta_cfgs SEC(".maps");
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun /* Type for the dtv.  */
243*4882a593Smuzhiyun /* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
244*4882a593Smuzhiyun typedef union dtv {
245*4882a593Smuzhiyun 	size_t counter;
246*4882a593Smuzhiyun 	struct {
247*4882a593Smuzhiyun 		void* val;
248*4882a593Smuzhiyun 		bool is_static;
249*4882a593Smuzhiyun 	} pointer;
250*4882a593Smuzhiyun } dtv_t;
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun /* Partial definition for tcbhead_t */
253*4882a593Smuzhiyun /* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
254*4882a593Smuzhiyun struct tcbhead {
255*4882a593Smuzhiyun 	void* tcb;
256*4882a593Smuzhiyun 	dtv_t* dtv;
257*4882a593Smuzhiyun };
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun /*
260*4882a593Smuzhiyun  * TLS module/offset information for shared library case.
261*4882a593Smuzhiyun  * For x86-64, this is mapped onto two entries in GOT.
262*4882a593Smuzhiyun  * For aarch64, this is pointed to by second GOT entry.
263*4882a593Smuzhiyun  */
264*4882a593Smuzhiyun struct tls_index {
265*4882a593Smuzhiyun 	uint64_t module;
266*4882a593Smuzhiyun 	uint64_t offset;
267*4882a593Smuzhiyun };
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun #ifdef SUBPROGS
270*4882a593Smuzhiyun __noinline
271*4882a593Smuzhiyun #else
272*4882a593Smuzhiyun __always_inline
273*4882a593Smuzhiyun #endif
calc_location(struct strobe_value_loc * loc,void * tls_base)274*4882a593Smuzhiyun static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	/*
277*4882a593Smuzhiyun 	 * tls_mode value is:
278*4882a593Smuzhiyun 	 * - -1 (TLS_NOT_SET), if no metavar is present;
279*4882a593Smuzhiyun 	 * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
280*4882a593Smuzhiyun 	 * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
281*4882a593Smuzhiyun 	 * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
282*4882a593Smuzhiyun 	 * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
283*4882a593Smuzhiyun 	 * This schema allows to use something like:
284*4882a593Smuzhiyun 	 * (tls_mode + 1) * (tls_base + offset)
285*4882a593Smuzhiyun 	 * to get NULL for "no metavar" location, or correct pointer for local
286*4882a593Smuzhiyun 	 * executable mode without doing extra ifs.
287*4882a593Smuzhiyun 	 */
288*4882a593Smuzhiyun 	if (loc->tls_mode <= TLS_LOCAL_EXEC) {
289*4882a593Smuzhiyun 		/* static executable is simple, we just have offset from
290*4882a593Smuzhiyun 		 * tls_base */
291*4882a593Smuzhiyun 		void *addr = tls_base + loc->offset;
292*4882a593Smuzhiyun 		/* multiply by (tls_mode + 1) to get NULL, if we have no
293*4882a593Smuzhiyun 		 * metavar in this slot */
294*4882a593Smuzhiyun 		return (void *)((loc->tls_mode + 1) * (int64_t)addr);
295*4882a593Smuzhiyun 	}
296*4882a593Smuzhiyun 	/*
297*4882a593Smuzhiyun 	 * Other modes are more complicated, we need to jump through few hoops.
298*4882a593Smuzhiyun 	 *
299*4882a593Smuzhiyun 	 * For immediate executable mode (currently supported only for aarch64):
300*4882a593Smuzhiyun 	 *  - loc->offset is pointing to a GOT entry containing fixed offset
301*4882a593Smuzhiyun 	 *  relative to tls_base;
302*4882a593Smuzhiyun 	 *
303*4882a593Smuzhiyun 	 * For general dynamic mode:
304*4882a593Smuzhiyun 	 *  - loc->offset is pointing to a beginning of double GOT entries;
305*4882a593Smuzhiyun 	 *  - (for aarch64 only) second entry points to tls_index_t struct;
306*4882a593Smuzhiyun 	 *  - (for x86-64 only) two GOT entries are already tls_index_t;
307*4882a593Smuzhiyun 	 *  - tls_index_t->module is used to find start of TLS section in
308*4882a593Smuzhiyun 	 *  which variable resides;
309*4882a593Smuzhiyun 	 *  - tls_index_t->offset provides offset within that TLS section,
310*4882a593Smuzhiyun 	 *  pointing to value of variable.
311*4882a593Smuzhiyun 	 */
312*4882a593Smuzhiyun 	struct tls_index tls_index;
313*4882a593Smuzhiyun 	dtv_t *dtv;
314*4882a593Smuzhiyun 	void *tls_ptr;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
317*4882a593Smuzhiyun 			    (void *)loc->offset);
318*4882a593Smuzhiyun 	/* valid module index is always positive */
319*4882a593Smuzhiyun 	if (tls_index.module > 0) {
320*4882a593Smuzhiyun 		/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
321*4882a593Smuzhiyun 		bpf_probe_read_user(&dtv, sizeof(dtv),
322*4882a593Smuzhiyun 				    &((struct tcbhead *)tls_base)->dtv);
323*4882a593Smuzhiyun 		dtv += tls_index.module;
324*4882a593Smuzhiyun 	} else {
325*4882a593Smuzhiyun 		dtv = NULL;
326*4882a593Smuzhiyun 	}
327*4882a593Smuzhiyun 	bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
328*4882a593Smuzhiyun 	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
329*4882a593Smuzhiyun 	return tls_ptr && tls_ptr != (void *)-1
330*4882a593Smuzhiyun 		? tls_ptr + tls_index.offset
331*4882a593Smuzhiyun 		: NULL;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun #ifdef SUBPROGS
335*4882a593Smuzhiyun __noinline
336*4882a593Smuzhiyun #else
337*4882a593Smuzhiyun __always_inline
338*4882a593Smuzhiyun #endif
read_int_var(struct strobemeta_cfg * cfg,size_t idx,void * tls_base,struct strobe_value_generic * value,struct strobemeta_payload * data)339*4882a593Smuzhiyun static void read_int_var(struct strobemeta_cfg *cfg,
340*4882a593Smuzhiyun 			 size_t idx, void *tls_base,
341*4882a593Smuzhiyun 			 struct strobe_value_generic *value,
342*4882a593Smuzhiyun 			 struct strobemeta_payload *data)
343*4882a593Smuzhiyun {
344*4882a593Smuzhiyun 	void *location = calc_location(&cfg->int_locs[idx], tls_base);
345*4882a593Smuzhiyun 	if (!location)
346*4882a593Smuzhiyun 		return;
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
349*4882a593Smuzhiyun 	data->int_vals[idx] = value->val;
350*4882a593Smuzhiyun 	if (value->header.len)
351*4882a593Smuzhiyun 		data->int_vals_set_mask |= (1 << idx);
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun 
read_str_var(struct strobemeta_cfg * cfg,size_t idx,void * tls_base,struct strobe_value_generic * value,struct strobemeta_payload * data,void * payload)354*4882a593Smuzhiyun static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
355*4882a593Smuzhiyun 					     size_t idx, void *tls_base,
356*4882a593Smuzhiyun 					     struct strobe_value_generic *value,
357*4882a593Smuzhiyun 					     struct strobemeta_payload *data,
358*4882a593Smuzhiyun 					     void *payload)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	void *location;
361*4882a593Smuzhiyun 	uint64_t len;
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	data->str_lens[idx] = 0;
364*4882a593Smuzhiyun 	location = calc_location(&cfg->str_locs[idx], tls_base);
365*4882a593Smuzhiyun 	if (!location)
366*4882a593Smuzhiyun 		return 0;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
369*4882a593Smuzhiyun 	len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
370*4882a593Smuzhiyun 	/*
371*4882a593Smuzhiyun 	 * if bpf_probe_read_user_str returns error (<0), due to casting to
372*4882a593Smuzhiyun 	 * unsinged int, it will become big number, so next check is
373*4882a593Smuzhiyun 	 * sufficient to check for errors AND prove to BPF verifier, that
374*4882a593Smuzhiyun 	 * bpf_probe_read_user_str won't return anything bigger than
375*4882a593Smuzhiyun 	 * STROBE_MAX_STR_LEN
376*4882a593Smuzhiyun 	 */
377*4882a593Smuzhiyun 	if (len > STROBE_MAX_STR_LEN)
378*4882a593Smuzhiyun 		return 0;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	data->str_lens[idx] = len;
381*4882a593Smuzhiyun 	return len;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun 
read_map_var(struct strobemeta_cfg * cfg,size_t idx,void * tls_base,struct strobe_value_generic * value,struct strobemeta_payload * data,void * payload)384*4882a593Smuzhiyun static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
385*4882a593Smuzhiyun 					  size_t idx, void *tls_base,
386*4882a593Smuzhiyun 					  struct strobe_value_generic *value,
387*4882a593Smuzhiyun 					  struct strobemeta_payload *data,
388*4882a593Smuzhiyun 					  void *payload)
389*4882a593Smuzhiyun {
390*4882a593Smuzhiyun 	struct strobe_map_descr* descr = &data->map_descrs[idx];
391*4882a593Smuzhiyun 	struct strobe_map_raw map;
392*4882a593Smuzhiyun 	void *location;
393*4882a593Smuzhiyun 	uint64_t len;
394*4882a593Smuzhiyun 	int i;
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 	descr->tag_len = 0; /* presume no tag is set */
397*4882a593Smuzhiyun 	descr->cnt = -1; /* presume no value is set */
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	location = calc_location(&cfg->map_locs[idx], tls_base);
400*4882a593Smuzhiyun 	if (!location)
401*4882a593Smuzhiyun 		return payload;
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
404*4882a593Smuzhiyun 	if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
405*4882a593Smuzhiyun 		return payload;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	descr->id = map.id;
408*4882a593Smuzhiyun 	descr->cnt = map.cnt;
409*4882a593Smuzhiyun 	if (cfg->req_meta_idx == idx) {
410*4882a593Smuzhiyun 		data->req_id = map.id;
411*4882a593Smuzhiyun 		data->req_meta_valid = 1;
412*4882a593Smuzhiyun 	}
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
415*4882a593Smuzhiyun 	if (len <= STROBE_MAX_STR_LEN) {
416*4882a593Smuzhiyun 		descr->tag_len = len;
417*4882a593Smuzhiyun 		payload += len;
418*4882a593Smuzhiyun 	}
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun #ifdef NO_UNROLL
421*4882a593Smuzhiyun #pragma clang loop unroll(disable)
422*4882a593Smuzhiyun #else
423*4882a593Smuzhiyun #pragma unroll
424*4882a593Smuzhiyun #endif
425*4882a593Smuzhiyun 	for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
426*4882a593Smuzhiyun 		if (i >= map.cnt)
427*4882a593Smuzhiyun 			break;
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 		descr->key_lens[i] = 0;
430*4882a593Smuzhiyun 		len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
431*4882a593Smuzhiyun 					      map.entries[i].key);
432*4882a593Smuzhiyun 		if (len <= STROBE_MAX_STR_LEN) {
433*4882a593Smuzhiyun 			descr->key_lens[i] = len;
434*4882a593Smuzhiyun 			payload += len;
435*4882a593Smuzhiyun 		}
436*4882a593Smuzhiyun 		descr->val_lens[i] = 0;
437*4882a593Smuzhiyun 		len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
438*4882a593Smuzhiyun 					      map.entries[i].val);
439*4882a593Smuzhiyun 		if (len <= STROBE_MAX_STR_LEN) {
440*4882a593Smuzhiyun 			descr->val_lens[i] = len;
441*4882a593Smuzhiyun 			payload += len;
442*4882a593Smuzhiyun 		}
443*4882a593Smuzhiyun 	}
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun 	return payload;
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun /*
449*4882a593Smuzhiyun  * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
450*4882a593Smuzhiyun  * pointer to *right after* payload ends
451*4882a593Smuzhiyun  */
452*4882a593Smuzhiyun #ifdef SUBPROGS
453*4882a593Smuzhiyun __noinline
454*4882a593Smuzhiyun #else
455*4882a593Smuzhiyun __always_inline
456*4882a593Smuzhiyun #endif
read_strobe_meta(struct task_struct * task,struct strobemeta_payload * data)457*4882a593Smuzhiyun static void *read_strobe_meta(struct task_struct *task,
458*4882a593Smuzhiyun 			      struct strobemeta_payload *data)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
461*4882a593Smuzhiyun 	struct strobe_value_generic value = {0};
462*4882a593Smuzhiyun 	struct strobemeta_cfg *cfg;
463*4882a593Smuzhiyun 	void *tls_base, *payload;
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 	cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
466*4882a593Smuzhiyun 	if (!cfg)
467*4882a593Smuzhiyun 		return NULL;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	data->int_vals_set_mask = 0;
470*4882a593Smuzhiyun 	data->req_meta_valid = 0;
471*4882a593Smuzhiyun 	payload = data->payload;
472*4882a593Smuzhiyun 	/*
473*4882a593Smuzhiyun 	 * we don't have struct task_struct definition, it should be:
474*4882a593Smuzhiyun 	 * tls_base = (void *)task->thread.fsbase;
475*4882a593Smuzhiyun 	 */
476*4882a593Smuzhiyun 	tls_base = (void *)task;
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun #ifdef NO_UNROLL
479*4882a593Smuzhiyun #pragma clang loop unroll(disable)
480*4882a593Smuzhiyun #else
481*4882a593Smuzhiyun #pragma unroll
482*4882a593Smuzhiyun #endif
483*4882a593Smuzhiyun 	for (int i = 0; i < STROBE_MAX_INTS; ++i) {
484*4882a593Smuzhiyun 		read_int_var(cfg, i, tls_base, &value, data);
485*4882a593Smuzhiyun 	}
486*4882a593Smuzhiyun #ifdef NO_UNROLL
487*4882a593Smuzhiyun #pragma clang loop unroll(disable)
488*4882a593Smuzhiyun #else
489*4882a593Smuzhiyun #pragma unroll
490*4882a593Smuzhiyun #endif
491*4882a593Smuzhiyun 	for (int i = 0; i < STROBE_MAX_STRS; ++i) {
492*4882a593Smuzhiyun 		payload += read_str_var(cfg, i, tls_base, &value, data, payload);
493*4882a593Smuzhiyun 	}
494*4882a593Smuzhiyun #ifdef NO_UNROLL
495*4882a593Smuzhiyun #pragma clang loop unroll(disable)
496*4882a593Smuzhiyun #else
497*4882a593Smuzhiyun #pragma unroll
498*4882a593Smuzhiyun #endif
499*4882a593Smuzhiyun 	for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
500*4882a593Smuzhiyun 		payload = read_map_var(cfg, i, tls_base, &value, data, payload);
501*4882a593Smuzhiyun 	}
502*4882a593Smuzhiyun 	/*
503*4882a593Smuzhiyun 	 * return pointer right after end of payload, so it's possible to
504*4882a593Smuzhiyun 	 * calculate exact amount of useful data that needs to be sent
505*4882a593Smuzhiyun 	 */
506*4882a593Smuzhiyun 	return payload;
507*4882a593Smuzhiyun }
508*4882a593Smuzhiyun 
509*4882a593Smuzhiyun SEC("raw_tracepoint/kfree_skb")
on_event(struct pt_regs * ctx)510*4882a593Smuzhiyun int on_event(struct pt_regs *ctx) {
511*4882a593Smuzhiyun 	pid_t pid =  bpf_get_current_pid_tgid() >> 32;
512*4882a593Smuzhiyun 	struct strobelight_bpf_sample* sample;
513*4882a593Smuzhiyun 	struct task_struct *task;
514*4882a593Smuzhiyun 	uint32_t zero = 0;
515*4882a593Smuzhiyun 	uint64_t ktime_ns;
516*4882a593Smuzhiyun 	void *sample_end;
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	sample = bpf_map_lookup_elem(&sample_heap, &zero);
519*4882a593Smuzhiyun 	if (!sample)
520*4882a593Smuzhiyun 		return 0; /* this will never happen */
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	sample->pid = pid;
523*4882a593Smuzhiyun 	bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
524*4882a593Smuzhiyun 	ktime_ns = bpf_ktime_get_ns();
525*4882a593Smuzhiyun 	sample->ktime = ktime_ns;
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 	task = (struct task_struct *)bpf_get_current_task();
528*4882a593Smuzhiyun 	sample_end = read_strobe_meta(task, &sample->metadata);
529*4882a593Smuzhiyun 	sample->has_meta = sample_end != NULL;
530*4882a593Smuzhiyun 	sample_end = sample_end ? : &sample->metadata;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
533*4882a593Smuzhiyun 		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
534*4882a593Smuzhiyun 		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
535*4882a593Smuzhiyun 	} else {
536*4882a593Smuzhiyun 		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
537*4882a593Smuzhiyun 		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
538*4882a593Smuzhiyun 	}
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	uint64_t sample_size = sample_end - (void *)sample;
541*4882a593Smuzhiyun 	/* should always be true */
542*4882a593Smuzhiyun 	if (sample_size < sizeof(struct strobelight_bpf_sample))
543*4882a593Smuzhiyun 		bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
544*4882a593Smuzhiyun 	return 0;
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun char _license[] SEC("license") = "GPL";
548