1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2013 ARM Ltd.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun #ifndef __ASM_PERCPU_H
6*4882a593Smuzhiyun #define __ASM_PERCPU_H
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/preempt.h>
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <asm/alternative.h>
11*4882a593Smuzhiyun #include <asm/cmpxchg.h>
12*4882a593Smuzhiyun #include <asm/stack_pointer.h>
13*4882a593Smuzhiyun #include <asm/sysreg.h>
14*4882a593Smuzhiyun
set_my_cpu_offset(unsigned long off)15*4882a593Smuzhiyun static inline void set_my_cpu_offset(unsigned long off)
16*4882a593Smuzhiyun {
17*4882a593Smuzhiyun asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
18*4882a593Smuzhiyun "msr tpidr_el2, %0",
19*4882a593Smuzhiyun ARM64_HAS_VIRT_HOST_EXTN)
20*4882a593Smuzhiyun :: "r" (off) : "memory");
21*4882a593Smuzhiyun }
22*4882a593Smuzhiyun
__hyp_my_cpu_offset(void)23*4882a593Smuzhiyun static inline unsigned long __hyp_my_cpu_offset(void)
24*4882a593Smuzhiyun {
25*4882a593Smuzhiyun /*
26*4882a593Smuzhiyun * Non-VHE hyp code runs with preemption disabled. No need to hazard
27*4882a593Smuzhiyun * the register access against barrier() as in __kern_my_cpu_offset.
28*4882a593Smuzhiyun */
29*4882a593Smuzhiyun return read_sysreg(tpidr_el2);
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun
__kern_my_cpu_offset(void)32*4882a593Smuzhiyun static inline unsigned long __kern_my_cpu_offset(void)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun unsigned long off;
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun /*
37*4882a593Smuzhiyun * We want to allow caching the value, so avoid using volatile and
38*4882a593Smuzhiyun * instead use a fake stack read to hazard against barrier().
39*4882a593Smuzhiyun */
40*4882a593Smuzhiyun asm(ALTERNATIVE("mrs %0, tpidr_el1",
41*4882a593Smuzhiyun "mrs %0, tpidr_el2",
42*4882a593Smuzhiyun ARM64_HAS_VIRT_HOST_EXTN)
43*4882a593Smuzhiyun : "=r" (off) :
44*4882a593Smuzhiyun "Q" (*(const unsigned long *)current_stack_pointer));
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun return off;
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun #ifdef __KVM_NVHE_HYPERVISOR__
50*4882a593Smuzhiyun #define __my_cpu_offset __hyp_my_cpu_offset()
51*4882a593Smuzhiyun #else
52*4882a593Smuzhiyun #define __my_cpu_offset __kern_my_cpu_offset()
53*4882a593Smuzhiyun #endif
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun #define PERCPU_RW_OPS(sz) \
56*4882a593Smuzhiyun static inline unsigned long __percpu_read_##sz(void *ptr) \
57*4882a593Smuzhiyun { \
58*4882a593Smuzhiyun return READ_ONCE(*(u##sz *)ptr); \
59*4882a593Smuzhiyun } \
60*4882a593Smuzhiyun \
61*4882a593Smuzhiyun static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
62*4882a593Smuzhiyun { \
63*4882a593Smuzhiyun WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun #define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
67*4882a593Smuzhiyun static inline void \
68*4882a593Smuzhiyun __percpu_##name##_case_##sz(void *ptr, unsigned long val) \
69*4882a593Smuzhiyun { \
70*4882a593Smuzhiyun unsigned int loop; \
71*4882a593Smuzhiyun u##sz tmp; \
72*4882a593Smuzhiyun \
73*4882a593Smuzhiyun asm volatile (ARM64_LSE_ATOMIC_INSN( \
74*4882a593Smuzhiyun /* LL/SC */ \
75*4882a593Smuzhiyun "1: ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n" \
76*4882a593Smuzhiyun #op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
77*4882a593Smuzhiyun " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
78*4882a593Smuzhiyun " cbnz %w[loop], 1b", \
79*4882a593Smuzhiyun /* LSE atomics */ \
80*4882a593Smuzhiyun #op_lse "\t%" #w "[val], %[ptr]\n" \
81*4882a593Smuzhiyun __nops(3)) \
82*4882a593Smuzhiyun : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
83*4882a593Smuzhiyun [ptr] "+Q"(*(u##sz *)ptr) \
84*4882a593Smuzhiyun : [val] "r" ((u##sz)(val))); \
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun #define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
88*4882a593Smuzhiyun static inline u##sz \
89*4882a593Smuzhiyun __percpu_##name##_return_case_##sz(void *ptr, unsigned long val) \
90*4882a593Smuzhiyun { \
91*4882a593Smuzhiyun unsigned int loop; \
92*4882a593Smuzhiyun u##sz ret; \
93*4882a593Smuzhiyun \
94*4882a593Smuzhiyun asm volatile (ARM64_LSE_ATOMIC_INSN( \
95*4882a593Smuzhiyun /* LL/SC */ \
96*4882a593Smuzhiyun "1: ldxr" #sfx "\t%" #w "[ret], %[ptr]\n" \
97*4882a593Smuzhiyun #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
98*4882a593Smuzhiyun " stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n" \
99*4882a593Smuzhiyun " cbnz %w[loop], 1b", \
100*4882a593Smuzhiyun /* LSE atomics */ \
101*4882a593Smuzhiyun #op_lse "\t%" #w "[val], %" #w "[ret], %[ptr]\n" \
102*4882a593Smuzhiyun #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
103*4882a593Smuzhiyun __nops(2)) \
104*4882a593Smuzhiyun : [loop] "=&r" (loop), [ret] "=&r" (ret), \
105*4882a593Smuzhiyun [ptr] "+Q"(*(u##sz *)ptr) \
106*4882a593Smuzhiyun : [val] "r" ((u##sz)(val))); \
107*4882a593Smuzhiyun \
108*4882a593Smuzhiyun return ret; \
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun #define PERCPU_OP(name, op_llsc, op_lse) \
112*4882a593Smuzhiyun __PERCPU_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
113*4882a593Smuzhiyun __PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
114*4882a593Smuzhiyun __PERCPU_OP_CASE(w, , name, 32, op_llsc, op_lse) \
115*4882a593Smuzhiyun __PERCPU_OP_CASE( , , name, 64, op_llsc, op_lse)
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun #define PERCPU_RET_OP(name, op_llsc, op_lse) \
118*4882a593Smuzhiyun __PERCPU_RET_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
119*4882a593Smuzhiyun __PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
120*4882a593Smuzhiyun __PERCPU_RET_OP_CASE(w, , name, 32, op_llsc, op_lse) \
121*4882a593Smuzhiyun __PERCPU_RET_OP_CASE( , , name, 64, op_llsc, op_lse)
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun PERCPU_RW_OPS(8)
124*4882a593Smuzhiyun PERCPU_RW_OPS(16)
125*4882a593Smuzhiyun PERCPU_RW_OPS(32)
126*4882a593Smuzhiyun PERCPU_RW_OPS(64)
127*4882a593Smuzhiyun PERCPU_OP(add, add, stadd)
128*4882a593Smuzhiyun PERCPU_OP(andnot, bic, stclr)
129*4882a593Smuzhiyun PERCPU_OP(or, orr, stset)
130*4882a593Smuzhiyun PERCPU_RET_OP(add, add, ldadd)
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun #undef PERCPU_RW_OPS
133*4882a593Smuzhiyun #undef __PERCPU_OP_CASE
134*4882a593Smuzhiyun #undef __PERCPU_RET_OP_CASE
135*4882a593Smuzhiyun #undef PERCPU_OP
136*4882a593Smuzhiyun #undef PERCPU_RET_OP
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun /*
139*4882a593Smuzhiyun * It would be nice to avoid the conditional call into the scheduler when
140*4882a593Smuzhiyun * re-enabling preemption for preemptible kernels, but doing that in a way
141*4882a593Smuzhiyun * which builds inside a module would mean messing directly with the preempt
142*4882a593Smuzhiyun * count. If you do this, peterz and tglx will hunt you down.
143*4882a593Smuzhiyun */
144*4882a593Smuzhiyun #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
145*4882a593Smuzhiyun ({ \
146*4882a593Smuzhiyun int __ret; \
147*4882a593Smuzhiyun preempt_disable_notrace(); \
148*4882a593Smuzhiyun __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
149*4882a593Smuzhiyun raw_cpu_ptr(&(ptr2)), \
150*4882a593Smuzhiyun o1, o2, n1, n2); \
151*4882a593Smuzhiyun preempt_enable_notrace(); \
152*4882a593Smuzhiyun __ret; \
153*4882a593Smuzhiyun })
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun #define _pcp_protect(op, pcp, ...) \
156*4882a593Smuzhiyun ({ \
157*4882a593Smuzhiyun preempt_disable_notrace(); \
158*4882a593Smuzhiyun op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
159*4882a593Smuzhiyun preempt_enable_notrace(); \
160*4882a593Smuzhiyun })
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun #define _pcp_protect_return(op, pcp, args...) \
163*4882a593Smuzhiyun ({ \
164*4882a593Smuzhiyun typeof(pcp) __retval; \
165*4882a593Smuzhiyun preempt_disable_notrace(); \
166*4882a593Smuzhiyun __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
167*4882a593Smuzhiyun preempt_enable_notrace(); \
168*4882a593Smuzhiyun __retval; \
169*4882a593Smuzhiyun })
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun #define this_cpu_read_1(pcp) \
172*4882a593Smuzhiyun _pcp_protect_return(__percpu_read_8, pcp)
173*4882a593Smuzhiyun #define this_cpu_read_2(pcp) \
174*4882a593Smuzhiyun _pcp_protect_return(__percpu_read_16, pcp)
175*4882a593Smuzhiyun #define this_cpu_read_4(pcp) \
176*4882a593Smuzhiyun _pcp_protect_return(__percpu_read_32, pcp)
177*4882a593Smuzhiyun #define this_cpu_read_8(pcp) \
178*4882a593Smuzhiyun _pcp_protect_return(__percpu_read_64, pcp)
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun #define this_cpu_write_1(pcp, val) \
181*4882a593Smuzhiyun _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
182*4882a593Smuzhiyun #define this_cpu_write_2(pcp, val) \
183*4882a593Smuzhiyun _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
184*4882a593Smuzhiyun #define this_cpu_write_4(pcp, val) \
185*4882a593Smuzhiyun _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
186*4882a593Smuzhiyun #define this_cpu_write_8(pcp, val) \
187*4882a593Smuzhiyun _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun #define this_cpu_add_1(pcp, val) \
190*4882a593Smuzhiyun _pcp_protect(__percpu_add_case_8, pcp, val)
191*4882a593Smuzhiyun #define this_cpu_add_2(pcp, val) \
192*4882a593Smuzhiyun _pcp_protect(__percpu_add_case_16, pcp, val)
193*4882a593Smuzhiyun #define this_cpu_add_4(pcp, val) \
194*4882a593Smuzhiyun _pcp_protect(__percpu_add_case_32, pcp, val)
195*4882a593Smuzhiyun #define this_cpu_add_8(pcp, val) \
196*4882a593Smuzhiyun _pcp_protect(__percpu_add_case_64, pcp, val)
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun #define this_cpu_add_return_1(pcp, val) \
199*4882a593Smuzhiyun _pcp_protect_return(__percpu_add_return_case_8, pcp, val)
200*4882a593Smuzhiyun #define this_cpu_add_return_2(pcp, val) \
201*4882a593Smuzhiyun _pcp_protect_return(__percpu_add_return_case_16, pcp, val)
202*4882a593Smuzhiyun #define this_cpu_add_return_4(pcp, val) \
203*4882a593Smuzhiyun _pcp_protect_return(__percpu_add_return_case_32, pcp, val)
204*4882a593Smuzhiyun #define this_cpu_add_return_8(pcp, val) \
205*4882a593Smuzhiyun _pcp_protect_return(__percpu_add_return_case_64, pcp, val)
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun #define this_cpu_and_1(pcp, val) \
208*4882a593Smuzhiyun _pcp_protect(__percpu_andnot_case_8, pcp, ~val)
209*4882a593Smuzhiyun #define this_cpu_and_2(pcp, val) \
210*4882a593Smuzhiyun _pcp_protect(__percpu_andnot_case_16, pcp, ~val)
211*4882a593Smuzhiyun #define this_cpu_and_4(pcp, val) \
212*4882a593Smuzhiyun _pcp_protect(__percpu_andnot_case_32, pcp, ~val)
213*4882a593Smuzhiyun #define this_cpu_and_8(pcp, val) \
214*4882a593Smuzhiyun _pcp_protect(__percpu_andnot_case_64, pcp, ~val)
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun #define this_cpu_or_1(pcp, val) \
217*4882a593Smuzhiyun _pcp_protect(__percpu_or_case_8, pcp, val)
218*4882a593Smuzhiyun #define this_cpu_or_2(pcp, val) \
219*4882a593Smuzhiyun _pcp_protect(__percpu_or_case_16, pcp, val)
220*4882a593Smuzhiyun #define this_cpu_or_4(pcp, val) \
221*4882a593Smuzhiyun _pcp_protect(__percpu_or_case_32, pcp, val)
222*4882a593Smuzhiyun #define this_cpu_or_8(pcp, val) \
223*4882a593Smuzhiyun _pcp_protect(__percpu_or_case_64, pcp, val)
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun #define this_cpu_xchg_1(pcp, val) \
226*4882a593Smuzhiyun _pcp_protect_return(xchg_relaxed, pcp, val)
227*4882a593Smuzhiyun #define this_cpu_xchg_2(pcp, val) \
228*4882a593Smuzhiyun _pcp_protect_return(xchg_relaxed, pcp, val)
229*4882a593Smuzhiyun #define this_cpu_xchg_4(pcp, val) \
230*4882a593Smuzhiyun _pcp_protect_return(xchg_relaxed, pcp, val)
231*4882a593Smuzhiyun #define this_cpu_xchg_8(pcp, val) \
232*4882a593Smuzhiyun _pcp_protect_return(xchg_relaxed, pcp, val)
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun #define this_cpu_cmpxchg_1(pcp, o, n) \
235*4882a593Smuzhiyun _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
236*4882a593Smuzhiyun #define this_cpu_cmpxchg_2(pcp, o, n) \
237*4882a593Smuzhiyun _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
238*4882a593Smuzhiyun #define this_cpu_cmpxchg_4(pcp, o, n) \
239*4882a593Smuzhiyun _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
240*4882a593Smuzhiyun #define this_cpu_cmpxchg_8(pcp, o, n) \
241*4882a593Smuzhiyun _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun #ifdef __KVM_NVHE_HYPERVISOR__
244*4882a593Smuzhiyun extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
245*4882a593Smuzhiyun #define __per_cpu_offset
246*4882a593Smuzhiyun #define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu))
247*4882a593Smuzhiyun #endif
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun #include <asm-generic/percpu.h>
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun /* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
252*4882a593Smuzhiyun #if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT)
253*4882a593Smuzhiyun #undef this_cpu_ptr
254*4882a593Smuzhiyun #define this_cpu_ptr raw_cpu_ptr
255*4882a593Smuzhiyun #undef __this_cpu_read
256*4882a593Smuzhiyun #define __this_cpu_read raw_cpu_read
257*4882a593Smuzhiyun #undef __this_cpu_write
258*4882a593Smuzhiyun #define __this_cpu_write raw_cpu_write
259*4882a593Smuzhiyun #endif
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun #endif /* __ASM_PERCPU_H */
262