1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef __ASM_SPINLOCK_H
3*4882a593Smuzhiyun #define __ASM_SPINLOCK_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #if __LINUX_ARM_ARCH__ < 6
6*4882a593Smuzhiyun #error SMP not supported on pre-ARMv6 CPUs
7*4882a593Smuzhiyun #endif
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <linux/prefetch.h>
10*4882a593Smuzhiyun #include <asm/barrier.h>
11*4882a593Smuzhiyun #include <asm/processor.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
15*4882a593Smuzhiyun * extensions, so when running on UP, we have to patch these instructions away.
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun #ifdef CONFIG_THUMB2_KERNEL
18*4882a593Smuzhiyun /*
19*4882a593Smuzhiyun * For Thumb-2, special care is needed to ensure that the conditional WFE
20*4882a593Smuzhiyun * instruction really does assemble to exactly 4 bytes (as required by
21*4882a593Smuzhiyun * the SMP_ON_UP fixup code). By itself "wfene" might cause the
22*4882a593Smuzhiyun * assembler to insert a extra (16-bit) IT instruction, depending on the
23*4882a593Smuzhiyun * presence or absence of neighbouring conditional instructions.
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun * To avoid this unpredictableness, an approprite IT is inserted explicitly:
26*4882a593Smuzhiyun * the assembler won't change IT instructions which are explicitly present
27*4882a593Smuzhiyun * in the input.
28*4882a593Smuzhiyun */
29*4882a593Smuzhiyun #define WFE(cond) __ALT_SMP_ASM( \
30*4882a593Smuzhiyun "it " cond "\n\t" \
31*4882a593Smuzhiyun "wfe" cond ".n", \
32*4882a593Smuzhiyun \
33*4882a593Smuzhiyun "nop.w" \
34*4882a593Smuzhiyun )
35*4882a593Smuzhiyun #else
36*4882a593Smuzhiyun #define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop")
37*4882a593Smuzhiyun #endif
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun #define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop))
40*4882a593Smuzhiyun
dsb_sev(void)41*4882a593Smuzhiyun static inline void dsb_sev(void)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun dsb(ishst);
45*4882a593Smuzhiyun __asm__(SEV);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * ARMv6 ticket-based spin-locking.
50*4882a593Smuzhiyun *
51*4882a593Smuzhiyun * A memory barrier is required after we get a lock, and before we
52*4882a593Smuzhiyun * release it, because V6 CPUs are assumed to have weakly ordered
53*4882a593Smuzhiyun * memory.
54*4882a593Smuzhiyun */
55*4882a593Smuzhiyun
arch_spin_lock(arch_spinlock_t * lock)56*4882a593Smuzhiyun static inline void arch_spin_lock(arch_spinlock_t *lock)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun unsigned long tmp;
59*4882a593Smuzhiyun u32 newval;
60*4882a593Smuzhiyun arch_spinlock_t lockval;
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun prefetchw(&lock->slock);
63*4882a593Smuzhiyun __asm__ __volatile__(
64*4882a593Smuzhiyun "1: ldrex %0, [%3]\n"
65*4882a593Smuzhiyun " add %1, %0, %4\n"
66*4882a593Smuzhiyun " strex %2, %1, [%3]\n"
67*4882a593Smuzhiyun " teq %2, #0\n"
68*4882a593Smuzhiyun " bne 1b"
69*4882a593Smuzhiyun : "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
70*4882a593Smuzhiyun : "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
71*4882a593Smuzhiyun : "cc");
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun while (lockval.tickets.next != lockval.tickets.owner) {
74*4882a593Smuzhiyun wfe();
75*4882a593Smuzhiyun lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun smp_mb();
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
arch_spin_trylock(arch_spinlock_t * lock)81*4882a593Smuzhiyun static inline int arch_spin_trylock(arch_spinlock_t *lock)
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun unsigned long contended, res;
84*4882a593Smuzhiyun u32 slock;
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun prefetchw(&lock->slock);
87*4882a593Smuzhiyun do {
88*4882a593Smuzhiyun __asm__ __volatile__(
89*4882a593Smuzhiyun " ldrex %0, [%3]\n"
90*4882a593Smuzhiyun " mov %2, #0\n"
91*4882a593Smuzhiyun " subs %1, %0, %0, ror #16\n"
92*4882a593Smuzhiyun " addeq %0, %0, %4\n"
93*4882a593Smuzhiyun " strexeq %2, %0, [%3]"
94*4882a593Smuzhiyun : "=&r" (slock), "=&r" (contended), "=&r" (res)
95*4882a593Smuzhiyun : "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
96*4882a593Smuzhiyun : "cc");
97*4882a593Smuzhiyun } while (res);
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun if (!contended) {
100*4882a593Smuzhiyun smp_mb();
101*4882a593Smuzhiyun return 1;
102*4882a593Smuzhiyun } else {
103*4882a593Smuzhiyun return 0;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
arch_spin_unlock(arch_spinlock_t * lock)107*4882a593Smuzhiyun static inline void arch_spin_unlock(arch_spinlock_t *lock)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun smp_mb();
110*4882a593Smuzhiyun lock->tickets.owner++;
111*4882a593Smuzhiyun dsb_sev();
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun
arch_spin_value_unlocked(arch_spinlock_t lock)114*4882a593Smuzhiyun static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun return lock.tickets.owner == lock.tickets.next;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
arch_spin_is_locked(arch_spinlock_t * lock)119*4882a593Smuzhiyun static inline int arch_spin_is_locked(arch_spinlock_t *lock)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun return !arch_spin_value_unlocked(READ_ONCE(*lock));
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun
arch_spin_is_contended(arch_spinlock_t * lock)124*4882a593Smuzhiyun static inline int arch_spin_is_contended(arch_spinlock_t *lock)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun struct __raw_tickets tickets = READ_ONCE(lock->tickets);
127*4882a593Smuzhiyun return (tickets.next - tickets.owner) > 1;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun #define arch_spin_is_contended arch_spin_is_contended
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun /*
132*4882a593Smuzhiyun * RWLOCKS
133*4882a593Smuzhiyun *
134*4882a593Smuzhiyun *
135*4882a593Smuzhiyun * Write locks are easy - we just set bit 31. When unlocking, we can
136*4882a593Smuzhiyun * just write zero since the lock is exclusively held.
137*4882a593Smuzhiyun */
138*4882a593Smuzhiyun
arch_write_lock(arch_rwlock_t * rw)139*4882a593Smuzhiyun static inline void arch_write_lock(arch_rwlock_t *rw)
140*4882a593Smuzhiyun {
141*4882a593Smuzhiyun unsigned long tmp;
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun prefetchw(&rw->lock);
144*4882a593Smuzhiyun __asm__ __volatile__(
145*4882a593Smuzhiyun "1: ldrex %0, [%1]\n"
146*4882a593Smuzhiyun " teq %0, #0\n"
147*4882a593Smuzhiyun WFE("ne")
148*4882a593Smuzhiyun " strexeq %0, %2, [%1]\n"
149*4882a593Smuzhiyun " teq %0, #0\n"
150*4882a593Smuzhiyun " bne 1b"
151*4882a593Smuzhiyun : "=&r" (tmp)
152*4882a593Smuzhiyun : "r" (&rw->lock), "r" (0x80000000)
153*4882a593Smuzhiyun : "cc");
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun smp_mb();
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
arch_write_trylock(arch_rwlock_t * rw)158*4882a593Smuzhiyun static inline int arch_write_trylock(arch_rwlock_t *rw)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun unsigned long contended, res;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun prefetchw(&rw->lock);
163*4882a593Smuzhiyun do {
164*4882a593Smuzhiyun __asm__ __volatile__(
165*4882a593Smuzhiyun " ldrex %0, [%2]\n"
166*4882a593Smuzhiyun " mov %1, #0\n"
167*4882a593Smuzhiyun " teq %0, #0\n"
168*4882a593Smuzhiyun " strexeq %1, %3, [%2]"
169*4882a593Smuzhiyun : "=&r" (contended), "=&r" (res)
170*4882a593Smuzhiyun : "r" (&rw->lock), "r" (0x80000000)
171*4882a593Smuzhiyun : "cc");
172*4882a593Smuzhiyun } while (res);
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun if (!contended) {
175*4882a593Smuzhiyun smp_mb();
176*4882a593Smuzhiyun return 1;
177*4882a593Smuzhiyun } else {
178*4882a593Smuzhiyun return 0;
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun
arch_write_unlock(arch_rwlock_t * rw)182*4882a593Smuzhiyun static inline void arch_write_unlock(arch_rwlock_t *rw)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun smp_mb();
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun __asm__ __volatile__(
187*4882a593Smuzhiyun "str %1, [%0]\n"
188*4882a593Smuzhiyun :
189*4882a593Smuzhiyun : "r" (&rw->lock), "r" (0)
190*4882a593Smuzhiyun : "cc");
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun dsb_sev();
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun /*
196*4882a593Smuzhiyun * Read locks are a bit more hairy:
197*4882a593Smuzhiyun * - Exclusively load the lock value.
198*4882a593Smuzhiyun * - Increment it.
199*4882a593Smuzhiyun * - Store new lock value if positive, and we still own this location.
200*4882a593Smuzhiyun * If the value is negative, we've already failed.
201*4882a593Smuzhiyun * - If we failed to store the value, we want a negative result.
202*4882a593Smuzhiyun * - If we failed, try again.
203*4882a593Smuzhiyun * Unlocking is similarly hairy. We may have multiple read locks
204*4882a593Smuzhiyun * currently active. However, we know we won't have any write
205*4882a593Smuzhiyun * locks.
206*4882a593Smuzhiyun */
arch_read_lock(arch_rwlock_t * rw)207*4882a593Smuzhiyun static inline void arch_read_lock(arch_rwlock_t *rw)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun unsigned long tmp, tmp2;
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun prefetchw(&rw->lock);
212*4882a593Smuzhiyun __asm__ __volatile__(
213*4882a593Smuzhiyun " .syntax unified\n"
214*4882a593Smuzhiyun "1: ldrex %0, [%2]\n"
215*4882a593Smuzhiyun " adds %0, %0, #1\n"
216*4882a593Smuzhiyun " strexpl %1, %0, [%2]\n"
217*4882a593Smuzhiyun WFE("mi")
218*4882a593Smuzhiyun " rsbspl %0, %1, #0\n"
219*4882a593Smuzhiyun " bmi 1b"
220*4882a593Smuzhiyun : "=&r" (tmp), "=&r" (tmp2)
221*4882a593Smuzhiyun : "r" (&rw->lock)
222*4882a593Smuzhiyun : "cc");
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun smp_mb();
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun
arch_read_unlock(arch_rwlock_t * rw)227*4882a593Smuzhiyun static inline void arch_read_unlock(arch_rwlock_t *rw)
228*4882a593Smuzhiyun {
229*4882a593Smuzhiyun unsigned long tmp, tmp2;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun smp_mb();
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun prefetchw(&rw->lock);
234*4882a593Smuzhiyun __asm__ __volatile__(
235*4882a593Smuzhiyun "1: ldrex %0, [%2]\n"
236*4882a593Smuzhiyun " sub %0, %0, #1\n"
237*4882a593Smuzhiyun " strex %1, %0, [%2]\n"
238*4882a593Smuzhiyun " teq %1, #0\n"
239*4882a593Smuzhiyun " bne 1b"
240*4882a593Smuzhiyun : "=&r" (tmp), "=&r" (tmp2)
241*4882a593Smuzhiyun : "r" (&rw->lock)
242*4882a593Smuzhiyun : "cc");
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun if (tmp == 0)
245*4882a593Smuzhiyun dsb_sev();
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun
arch_read_trylock(arch_rwlock_t * rw)248*4882a593Smuzhiyun static inline int arch_read_trylock(arch_rwlock_t *rw)
249*4882a593Smuzhiyun {
250*4882a593Smuzhiyun unsigned long contended, res;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun prefetchw(&rw->lock);
253*4882a593Smuzhiyun do {
254*4882a593Smuzhiyun __asm__ __volatile__(
255*4882a593Smuzhiyun " ldrex %0, [%2]\n"
256*4882a593Smuzhiyun " mov %1, #0\n"
257*4882a593Smuzhiyun " adds %0, %0, #1\n"
258*4882a593Smuzhiyun " strexpl %1, %0, [%2]"
259*4882a593Smuzhiyun : "=&r" (contended), "=&r" (res)
260*4882a593Smuzhiyun : "r" (&rw->lock)
261*4882a593Smuzhiyun : "cc");
262*4882a593Smuzhiyun } while (res);
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun /* If the lock is negative, then it is already held for write. */
265*4882a593Smuzhiyun if (contended < 0x80000000) {
266*4882a593Smuzhiyun smp_mb();
267*4882a593Smuzhiyun return 1;
268*4882a593Smuzhiyun } else {
269*4882a593Smuzhiyun return 0;
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun #endif /* __ASM_SPINLOCK_H */
274