1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef __LINUX_PREEMPT_H
3*4882a593Smuzhiyun #define __LINUX_PREEMPT_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun * include/linux/preempt.h - macros for accessing and manipulating
7*4882a593Smuzhiyun * preempt_count (used for kernel preemption, interrupt count, etc.)
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/linkage.h>
11*4882a593Smuzhiyun #include <linux/list.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun * We put the hardirq and softirq counter into the preemption
15*4882a593Smuzhiyun * counter. The bitmask has the following meaning:
16*4882a593Smuzhiyun *
17*4882a593Smuzhiyun * - bits 0-7 are the preemption count (max preemption depth: 256)
18*4882a593Smuzhiyun * - bits 8-15 are the softirq count (max # of softirqs: 256)
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * The hardirq count could in theory be the same as the number of
21*4882a593Smuzhiyun * interrupts in the system, but we run all interrupt handlers with
22*4882a593Smuzhiyun * interrupts disabled, so we cannot have nesting interrupts. Though
23*4882a593Smuzhiyun * there are a few palaeontologic drivers which reenable interrupts in
24*4882a593Smuzhiyun * the handler, so we need more than one bit here.
25*4882a593Smuzhiyun *
26*4882a593Smuzhiyun * PREEMPT_MASK: 0x000000ff
27*4882a593Smuzhiyun * SOFTIRQ_MASK: 0x0000ff00
28*4882a593Smuzhiyun * HARDIRQ_MASK: 0x000f0000
29*4882a593Smuzhiyun * NMI_MASK: 0x00f00000
30*4882a593Smuzhiyun * PREEMPT_NEED_RESCHED: 0x80000000
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyun #define PREEMPT_BITS 8
33*4882a593Smuzhiyun #define SOFTIRQ_BITS 8
34*4882a593Smuzhiyun #define HARDIRQ_BITS 4
35*4882a593Smuzhiyun #define NMI_BITS 4
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun #define PREEMPT_SHIFT 0
38*4882a593Smuzhiyun #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
39*4882a593Smuzhiyun #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
40*4882a593Smuzhiyun #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun #define __IRQ_MASK(x) ((1UL << (x))-1)
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
45*4882a593Smuzhiyun #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
46*4882a593Smuzhiyun #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
47*4882a593Smuzhiyun #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
50*4882a593Smuzhiyun #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
51*4882a593Smuzhiyun #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
52*4882a593Smuzhiyun #define NMI_OFFSET (1UL << NMI_SHIFT)
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun #define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun /*
59*4882a593Smuzhiyun * Disable preemption until the scheduler is running -- use an unconditional
60*4882a593Smuzhiyun * value so that it also works on !PREEMPT_COUNT kernels.
61*4882a593Smuzhiyun *
62*4882a593Smuzhiyun * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
63*4882a593Smuzhiyun */
64*4882a593Smuzhiyun #define INIT_PREEMPT_COUNT PREEMPT_OFFSET
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /*
67*4882a593Smuzhiyun * Initial preempt_count value; reflects the preempt_count schedule invariant
68*4882a593Smuzhiyun * which states that during context switches:
69*4882a593Smuzhiyun *
70*4882a593Smuzhiyun * preempt_count() == 2*PREEMPT_DISABLE_OFFSET
71*4882a593Smuzhiyun *
72*4882a593Smuzhiyun * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
73*4882a593Smuzhiyun * Note: See finish_task_switch().
74*4882a593Smuzhiyun */
75*4882a593Smuzhiyun #define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
78*4882a593Smuzhiyun #include <asm/preempt.h>
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
81*4882a593Smuzhiyun #define softirq_count() (preempt_count() & SOFTIRQ_MASK)
82*4882a593Smuzhiyun #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
83*4882a593Smuzhiyun | NMI_MASK))
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /*
86*4882a593Smuzhiyun * Are we doing bottom half or hardware interrupt processing?
87*4882a593Smuzhiyun *
88*4882a593Smuzhiyun * in_irq() - We're in (hard) IRQ context
89*4882a593Smuzhiyun * in_softirq() - We have BH disabled, or are processing softirqs
90*4882a593Smuzhiyun * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
91*4882a593Smuzhiyun * in_serving_softirq() - We're in softirq context
92*4882a593Smuzhiyun * in_nmi() - We're in NMI context
93*4882a593Smuzhiyun * in_task() - We're in task context
94*4882a593Smuzhiyun *
95*4882a593Smuzhiyun * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really
96*4882a593Smuzhiyun * should not be used in new code.
97*4882a593Smuzhiyun */
98*4882a593Smuzhiyun #define in_irq() (hardirq_count())
99*4882a593Smuzhiyun #define in_softirq() (softirq_count())
100*4882a593Smuzhiyun #define in_interrupt() (irq_count())
101*4882a593Smuzhiyun #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
102*4882a593Smuzhiyun #define in_nmi() (preempt_count() & NMI_MASK)
103*4882a593Smuzhiyun #define in_task() (!(preempt_count() & \
104*4882a593Smuzhiyun (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /*
107*4882a593Smuzhiyun * The preempt_count offset after preempt_disable();
108*4882a593Smuzhiyun */
109*4882a593Smuzhiyun #if defined(CONFIG_PREEMPT_COUNT)
110*4882a593Smuzhiyun # define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET
111*4882a593Smuzhiyun #else
112*4882a593Smuzhiyun # define PREEMPT_DISABLE_OFFSET 0
113*4882a593Smuzhiyun #endif
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /*
116*4882a593Smuzhiyun * The preempt_count offset after spin_lock()
117*4882a593Smuzhiyun */
118*4882a593Smuzhiyun #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun /*
121*4882a593Smuzhiyun * The preempt_count offset needed for things like:
122*4882a593Smuzhiyun *
123*4882a593Smuzhiyun * spin_lock_bh()
124*4882a593Smuzhiyun *
125*4882a593Smuzhiyun * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and
126*4882a593Smuzhiyun * softirqs, such that unlock sequences of:
127*4882a593Smuzhiyun *
128*4882a593Smuzhiyun * spin_unlock();
129*4882a593Smuzhiyun * local_bh_enable();
130*4882a593Smuzhiyun *
131*4882a593Smuzhiyun * Work as expected.
132*4882a593Smuzhiyun */
133*4882a593Smuzhiyun #define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET)
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun /*
136*4882a593Smuzhiyun * Are we running in atomic context? WARNING: this macro cannot
137*4882a593Smuzhiyun * always detect atomic context; in particular, it cannot know about
138*4882a593Smuzhiyun * held spinlocks in non-preemptible kernels. Thus it should not be
139*4882a593Smuzhiyun * used in the general case to determine whether sleeping is possible.
140*4882a593Smuzhiyun * Do not use in_atomic() in driver code.
141*4882a593Smuzhiyun */
142*4882a593Smuzhiyun #define in_atomic() (preempt_count() != 0)
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun /*
145*4882a593Smuzhiyun * Check whether we were atomic before we did preempt_disable():
146*4882a593Smuzhiyun * (used by the scheduler)
147*4882a593Smuzhiyun */
148*4882a593Smuzhiyun #define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
151*4882a593Smuzhiyun extern void preempt_count_add(int val);
152*4882a593Smuzhiyun extern void preempt_count_sub(int val);
153*4882a593Smuzhiyun #define preempt_count_dec_and_test() \
154*4882a593Smuzhiyun ({ preempt_count_sub(1); should_resched(0); })
155*4882a593Smuzhiyun #else
156*4882a593Smuzhiyun #define preempt_count_add(val) __preempt_count_add(val)
157*4882a593Smuzhiyun #define preempt_count_sub(val) __preempt_count_sub(val)
158*4882a593Smuzhiyun #define preempt_count_dec_and_test() __preempt_count_dec_and_test()
159*4882a593Smuzhiyun #endif
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun #define __preempt_count_inc() __preempt_count_add(1)
162*4882a593Smuzhiyun #define __preempt_count_dec() __preempt_count_sub(1)
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun #define preempt_count_inc() preempt_count_add(1)
165*4882a593Smuzhiyun #define preempt_count_dec() preempt_count_sub(1)
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun #ifdef CONFIG_PREEMPT_COUNT
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun #define preempt_disable() \
170*4882a593Smuzhiyun do { \
171*4882a593Smuzhiyun preempt_count_inc(); \
172*4882a593Smuzhiyun barrier(); \
173*4882a593Smuzhiyun } while (0)
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun #define sched_preempt_enable_no_resched() \
176*4882a593Smuzhiyun do { \
177*4882a593Smuzhiyun barrier(); \
178*4882a593Smuzhiyun preempt_count_dec(); \
179*4882a593Smuzhiyun } while (0)
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun #define preemptible() (preempt_count() == 0 && !irqs_disabled())
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun #ifdef CONFIG_PREEMPTION
186*4882a593Smuzhiyun #define preempt_enable() \
187*4882a593Smuzhiyun do { \
188*4882a593Smuzhiyun barrier(); \
189*4882a593Smuzhiyun if (unlikely(preempt_count_dec_and_test())) \
190*4882a593Smuzhiyun __preempt_schedule(); \
191*4882a593Smuzhiyun } while (0)
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun #define preempt_enable_notrace() \
194*4882a593Smuzhiyun do { \
195*4882a593Smuzhiyun barrier(); \
196*4882a593Smuzhiyun if (unlikely(__preempt_count_dec_and_test())) \
197*4882a593Smuzhiyun __preempt_schedule_notrace(); \
198*4882a593Smuzhiyun } while (0)
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun #define preempt_check_resched() \
201*4882a593Smuzhiyun do { \
202*4882a593Smuzhiyun if (should_resched(0)) \
203*4882a593Smuzhiyun __preempt_schedule(); \
204*4882a593Smuzhiyun } while (0)
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun #else /* !CONFIG_PREEMPTION */
207*4882a593Smuzhiyun #define preempt_enable() \
208*4882a593Smuzhiyun do { \
209*4882a593Smuzhiyun barrier(); \
210*4882a593Smuzhiyun preempt_count_dec(); \
211*4882a593Smuzhiyun } while (0)
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun #define preempt_enable_notrace() \
214*4882a593Smuzhiyun do { \
215*4882a593Smuzhiyun barrier(); \
216*4882a593Smuzhiyun __preempt_count_dec(); \
217*4882a593Smuzhiyun } while (0)
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun #define preempt_check_resched() do { } while (0)
220*4882a593Smuzhiyun #endif /* CONFIG_PREEMPTION */
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun #define preempt_disable_notrace() \
223*4882a593Smuzhiyun do { \
224*4882a593Smuzhiyun __preempt_count_inc(); \
225*4882a593Smuzhiyun barrier(); \
226*4882a593Smuzhiyun } while (0)
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun #define preempt_enable_no_resched_notrace() \
229*4882a593Smuzhiyun do { \
230*4882a593Smuzhiyun barrier(); \
231*4882a593Smuzhiyun __preempt_count_dec(); \
232*4882a593Smuzhiyun } while (0)
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun #else /* !CONFIG_PREEMPT_COUNT */
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun * Even if we don't have any preemption, we need preempt disable/enable
238*4882a593Smuzhiyun * to be barriers, so that we don't have things like get_user/put_user
239*4882a593Smuzhiyun * that can cause faults and scheduling migrate into our preempt-protected
240*4882a593Smuzhiyun * region.
241*4882a593Smuzhiyun */
242*4882a593Smuzhiyun #define preempt_disable() barrier()
243*4882a593Smuzhiyun #define sched_preempt_enable_no_resched() barrier()
244*4882a593Smuzhiyun #define preempt_enable_no_resched() barrier()
245*4882a593Smuzhiyun #define preempt_enable() barrier()
246*4882a593Smuzhiyun #define preempt_check_resched() do { } while (0)
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun #define preempt_disable_notrace() barrier()
249*4882a593Smuzhiyun #define preempt_enable_no_resched_notrace() barrier()
250*4882a593Smuzhiyun #define preempt_enable_notrace() barrier()
251*4882a593Smuzhiyun #define preemptible() 0
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun #endif /* CONFIG_PREEMPT_COUNT */
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun #ifdef MODULE
256*4882a593Smuzhiyun /*
257*4882a593Smuzhiyun * Modules have no business playing preemption tricks.
258*4882a593Smuzhiyun */
259*4882a593Smuzhiyun #undef sched_preempt_enable_no_resched
260*4882a593Smuzhiyun #undef preempt_enable_no_resched
261*4882a593Smuzhiyun #undef preempt_enable_no_resched_notrace
262*4882a593Smuzhiyun #undef preempt_check_resched
263*4882a593Smuzhiyun #endif
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun #define preempt_set_need_resched() \
266*4882a593Smuzhiyun do { \
267*4882a593Smuzhiyun set_preempt_need_resched(); \
268*4882a593Smuzhiyun } while (0)
269*4882a593Smuzhiyun #define preempt_fold_need_resched() \
270*4882a593Smuzhiyun do { \
271*4882a593Smuzhiyun if (tif_need_resched()) \
272*4882a593Smuzhiyun set_preempt_need_resched(); \
273*4882a593Smuzhiyun } while (0)
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun #ifdef CONFIG_PREEMPT_NOTIFIERS
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun struct preempt_notifier;
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun /**
280*4882a593Smuzhiyun * preempt_ops - notifiers called when a task is preempted and rescheduled
281*4882a593Smuzhiyun * @sched_in: we're about to be rescheduled:
282*4882a593Smuzhiyun * notifier: struct preempt_notifier for the task being scheduled
283*4882a593Smuzhiyun * cpu: cpu we're scheduled on
284*4882a593Smuzhiyun * @sched_out: we've just been preempted
285*4882a593Smuzhiyun * notifier: struct preempt_notifier for the task being preempted
286*4882a593Smuzhiyun * next: the task that's kicking us out
287*4882a593Smuzhiyun *
288*4882a593Smuzhiyun * Please note that sched_in and out are called under different
289*4882a593Smuzhiyun * contexts. sched_out is called with rq lock held and irq disabled
290*4882a593Smuzhiyun * while sched_in is called without rq lock and irq enabled. This
291*4882a593Smuzhiyun * difference is intentional and depended upon by its users.
292*4882a593Smuzhiyun */
293*4882a593Smuzhiyun struct preempt_ops {
294*4882a593Smuzhiyun void (*sched_in)(struct preempt_notifier *notifier, int cpu);
295*4882a593Smuzhiyun void (*sched_out)(struct preempt_notifier *notifier,
296*4882a593Smuzhiyun struct task_struct *next);
297*4882a593Smuzhiyun };
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun /**
300*4882a593Smuzhiyun * preempt_notifier - key for installing preemption notifiers
301*4882a593Smuzhiyun * @link: internal use
302*4882a593Smuzhiyun * @ops: defines the notifier functions to be called
303*4882a593Smuzhiyun *
304*4882a593Smuzhiyun * Usually used in conjunction with container_of().
305*4882a593Smuzhiyun */
306*4882a593Smuzhiyun struct preempt_notifier {
307*4882a593Smuzhiyun struct hlist_node link;
308*4882a593Smuzhiyun struct preempt_ops *ops;
309*4882a593Smuzhiyun };
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun void preempt_notifier_inc(void);
312*4882a593Smuzhiyun void preempt_notifier_dec(void);
313*4882a593Smuzhiyun void preempt_notifier_register(struct preempt_notifier *notifier);
314*4882a593Smuzhiyun void preempt_notifier_unregister(struct preempt_notifier *notifier);
315*4882a593Smuzhiyun
preempt_notifier_init(struct preempt_notifier * notifier,struct preempt_ops * ops)316*4882a593Smuzhiyun static inline void preempt_notifier_init(struct preempt_notifier *notifier,
317*4882a593Smuzhiyun struct preempt_ops *ops)
318*4882a593Smuzhiyun {
319*4882a593Smuzhiyun INIT_HLIST_NODE(¬ifier->link);
320*4882a593Smuzhiyun notifier->ops = ops;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun #endif
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun /**
326*4882a593Smuzhiyun * migrate_disable - Prevent migration of the current task
327*4882a593Smuzhiyun *
328*4882a593Smuzhiyun * Maps to preempt_disable() which also disables preemption. Use
329*4882a593Smuzhiyun * migrate_disable() to annotate that the intent is to prevent migration,
330*4882a593Smuzhiyun * but not necessarily preemption.
331*4882a593Smuzhiyun *
332*4882a593Smuzhiyun * Can be invoked nested like preempt_disable() and needs the corresponding
333*4882a593Smuzhiyun * number of migrate_enable() invocations.
334*4882a593Smuzhiyun */
migrate_disable(void)335*4882a593Smuzhiyun static __always_inline void migrate_disable(void)
336*4882a593Smuzhiyun {
337*4882a593Smuzhiyun preempt_disable();
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun /**
341*4882a593Smuzhiyun * migrate_enable - Allow migration of the current task
342*4882a593Smuzhiyun *
343*4882a593Smuzhiyun * Counterpart to migrate_disable().
344*4882a593Smuzhiyun *
345*4882a593Smuzhiyun * As migrate_disable() can be invoked nested, only the outermost invocation
346*4882a593Smuzhiyun * reenables migration.
347*4882a593Smuzhiyun *
348*4882a593Smuzhiyun * Currently mapped to preempt_enable().
349*4882a593Smuzhiyun */
migrate_enable(void)350*4882a593Smuzhiyun static __always_inline void migrate_enable(void)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun preempt_enable();
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun #endif /* __LINUX_PREEMPT_H */
356