1*4882a593Smuzhiyun /* Copyright (c) 2016 Facebook
2*4882a593Smuzhiyun *
3*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or
4*4882a593Smuzhiyun * modify it under the terms of version 2 of the GNU General Public
5*4882a593Smuzhiyun * License as published by the Free Software Foundation.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
8*4882a593Smuzhiyun #include <uapi/linux/ptrace.h>
9*4882a593Smuzhiyun #include <uapi/linux/perf_event.h>
10*4882a593Smuzhiyun #include <linux/version.h>
11*4882a593Smuzhiyun #include <linux/sched.h>
12*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
13*4882a593Smuzhiyun #include <bpf/bpf_tracing.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #define _(P) \
16*4882a593Smuzhiyun ({ \
17*4882a593Smuzhiyun typeof(P) val; \
18*4882a593Smuzhiyun bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
19*4882a593Smuzhiyun val; \
20*4882a593Smuzhiyun })
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #define MINBLOCK_US 1
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun struct key_t {
25*4882a593Smuzhiyun char waker[TASK_COMM_LEN];
26*4882a593Smuzhiyun char target[TASK_COMM_LEN];
27*4882a593Smuzhiyun u32 wret;
28*4882a593Smuzhiyun u32 tret;
29*4882a593Smuzhiyun };
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun struct {
32*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_HASH);
33*4882a593Smuzhiyun __type(key, struct key_t);
34*4882a593Smuzhiyun __type(value, u64);
35*4882a593Smuzhiyun __uint(max_entries, 10000);
36*4882a593Smuzhiyun } counts SEC(".maps");
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun struct {
39*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_HASH);
40*4882a593Smuzhiyun __type(key, u32);
41*4882a593Smuzhiyun __type(value, u64);
42*4882a593Smuzhiyun __uint(max_entries, 10000);
43*4882a593Smuzhiyun } start SEC(".maps");
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun struct wokeby_t {
46*4882a593Smuzhiyun char name[TASK_COMM_LEN];
47*4882a593Smuzhiyun u32 ret;
48*4882a593Smuzhiyun };
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun struct {
51*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_HASH);
52*4882a593Smuzhiyun __type(key, u32);
53*4882a593Smuzhiyun __type(value, struct wokeby_t);
54*4882a593Smuzhiyun __uint(max_entries, 10000);
55*4882a593Smuzhiyun } wokeby SEC(".maps");
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun struct {
58*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_STACK_TRACE);
59*4882a593Smuzhiyun __uint(key_size, sizeof(u32));
60*4882a593Smuzhiyun __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
61*4882a593Smuzhiyun __uint(max_entries, 10000);
62*4882a593Smuzhiyun } stackmap SEC(".maps");
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun SEC("kprobe/try_to_wake_up")
waker(struct pt_regs * ctx)67*4882a593Smuzhiyun int waker(struct pt_regs *ctx)
68*4882a593Smuzhiyun {
69*4882a593Smuzhiyun struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
70*4882a593Smuzhiyun struct wokeby_t woke;
71*4882a593Smuzhiyun u32 pid;
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun pid = _(p->pid);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun bpf_get_current_comm(&woke.name, sizeof(woke.name));
76*4882a593Smuzhiyun woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
79*4882a593Smuzhiyun return 0;
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun
update_counts(void * ctx,u32 pid,u64 delta)82*4882a593Smuzhiyun static inline int update_counts(void *ctx, u32 pid, u64 delta)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun struct wokeby_t *woke;
85*4882a593Smuzhiyun u64 zero = 0, *val;
86*4882a593Smuzhiyun struct key_t key;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun __builtin_memset(&key.waker, 0, sizeof(key.waker));
89*4882a593Smuzhiyun bpf_get_current_comm(&key.target, sizeof(key.target));
90*4882a593Smuzhiyun key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
91*4882a593Smuzhiyun key.wret = 0;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun woke = bpf_map_lookup_elem(&wokeby, &pid);
94*4882a593Smuzhiyun if (woke) {
95*4882a593Smuzhiyun key.wret = woke->ret;
96*4882a593Smuzhiyun __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
97*4882a593Smuzhiyun bpf_map_delete_elem(&wokeby, &pid);
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun val = bpf_map_lookup_elem(&counts, &key);
101*4882a593Smuzhiyun if (!val) {
102*4882a593Smuzhiyun bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
103*4882a593Smuzhiyun val = bpf_map_lookup_elem(&counts, &key);
104*4882a593Smuzhiyun if (!val)
105*4882a593Smuzhiyun return 0;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun (*val) += delta;
108*4882a593Smuzhiyun return 0;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun #if 1
112*4882a593Smuzhiyun /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
113*4882a593Smuzhiyun struct sched_switch_args {
114*4882a593Smuzhiyun unsigned long long pad;
115*4882a593Smuzhiyun char prev_comm[16];
116*4882a593Smuzhiyun int prev_pid;
117*4882a593Smuzhiyun int prev_prio;
118*4882a593Smuzhiyun long long prev_state;
119*4882a593Smuzhiyun char next_comm[16];
120*4882a593Smuzhiyun int next_pid;
121*4882a593Smuzhiyun int next_prio;
122*4882a593Smuzhiyun };
123*4882a593Smuzhiyun SEC("tracepoint/sched/sched_switch")
oncpu(struct sched_switch_args * ctx)124*4882a593Smuzhiyun int oncpu(struct sched_switch_args *ctx)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun /* record previous thread sleep time */
127*4882a593Smuzhiyun u32 pid = ctx->prev_pid;
128*4882a593Smuzhiyun #else
129*4882a593Smuzhiyun SEC("kprobe/finish_task_switch")
130*4882a593Smuzhiyun int oncpu(struct pt_regs *ctx)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
133*4882a593Smuzhiyun /* record previous thread sleep time */
134*4882a593Smuzhiyun u32 pid = _(p->pid);
135*4882a593Smuzhiyun #endif
136*4882a593Smuzhiyun u64 delta, ts, *tsp;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun ts = bpf_ktime_get_ns();
139*4882a593Smuzhiyun bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun /* calculate current thread's delta time */
142*4882a593Smuzhiyun pid = bpf_get_current_pid_tgid();
143*4882a593Smuzhiyun tsp = bpf_map_lookup_elem(&start, &pid);
144*4882a593Smuzhiyun if (!tsp)
145*4882a593Smuzhiyun /* missed start or filtered */
146*4882a593Smuzhiyun return 0;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun delta = bpf_ktime_get_ns() - *tsp;
149*4882a593Smuzhiyun bpf_map_delete_elem(&start, &pid);
150*4882a593Smuzhiyun delta = delta / 1000;
151*4882a593Smuzhiyun if (delta < MINBLOCK_US)
152*4882a593Smuzhiyun return 0;
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun return update_counts(ctx, pid, delta);
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun char _license[] SEC("license") = "GPL";
157*4882a593Smuzhiyun u32 _version SEC("version") = LINUX_VERSION_CODE;
158