xref: /OK3568_Linux_fs/kernel/arch/arm/include/asm/spinlock.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef __ASM_SPINLOCK_H
3*4882a593Smuzhiyun #define __ASM_SPINLOCK_H
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun #if __LINUX_ARM_ARCH__ < 6
6*4882a593Smuzhiyun #error SMP not supported on pre-ARMv6 CPUs
7*4882a593Smuzhiyun #endif
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/prefetch.h>
10*4882a593Smuzhiyun #include <asm/barrier.h>
11*4882a593Smuzhiyun #include <asm/processor.h>
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
15*4882a593Smuzhiyun  * extensions, so when running on UP, we have to patch these instructions away.
16*4882a593Smuzhiyun  */
17*4882a593Smuzhiyun #ifdef CONFIG_THUMB2_KERNEL
18*4882a593Smuzhiyun /*
19*4882a593Smuzhiyun  * For Thumb-2, special care is needed to ensure that the conditional WFE
20*4882a593Smuzhiyun  * instruction really does assemble to exactly 4 bytes (as required by
21*4882a593Smuzhiyun  * the SMP_ON_UP fixup code).   By itself "wfene" might cause the
22*4882a593Smuzhiyun  * assembler to insert a extra (16-bit) IT instruction, depending on the
23*4882a593Smuzhiyun  * presence or absence of neighbouring conditional instructions.
24*4882a593Smuzhiyun  *
25*4882a593Smuzhiyun  * To avoid this unpredictableness, an approprite IT is inserted explicitly:
26*4882a593Smuzhiyun  * the assembler won't change IT instructions which are explicitly present
27*4882a593Smuzhiyun  * in the input.
28*4882a593Smuzhiyun  */
29*4882a593Smuzhiyun #define WFE(cond)	__ALT_SMP_ASM(		\
30*4882a593Smuzhiyun 	"it " cond "\n\t"			\
31*4882a593Smuzhiyun 	"wfe" cond ".n",			\
32*4882a593Smuzhiyun 						\
33*4882a593Smuzhiyun 	"nop.w"					\
34*4882a593Smuzhiyun )
35*4882a593Smuzhiyun #else
36*4882a593Smuzhiyun #define WFE(cond)	__ALT_SMP_ASM("wfe" cond, "nop")
37*4882a593Smuzhiyun #endif
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #define SEV		__ALT_SMP_ASM(WASM(sev), WASM(nop))
40*4882a593Smuzhiyun 
dsb_sev(void)41*4882a593Smuzhiyun static inline void dsb_sev(void)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 	dsb(ishst);
45*4882a593Smuzhiyun 	__asm__(SEV);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun  * ARMv6 ticket-based spin-locking.
50*4882a593Smuzhiyun  *
51*4882a593Smuzhiyun  * A memory barrier is required after we get a lock, and before we
52*4882a593Smuzhiyun  * release it, because V6 CPUs are assumed to have weakly ordered
53*4882a593Smuzhiyun  * memory.
54*4882a593Smuzhiyun  */
55*4882a593Smuzhiyun 
arch_spin_lock(arch_spinlock_t * lock)56*4882a593Smuzhiyun static inline void arch_spin_lock(arch_spinlock_t *lock)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun 	unsigned long tmp;
59*4882a593Smuzhiyun 	u32 newval;
60*4882a593Smuzhiyun 	arch_spinlock_t lockval;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	prefetchw(&lock->slock);
63*4882a593Smuzhiyun 	__asm__ __volatile__(
64*4882a593Smuzhiyun "1:	ldrex	%0, [%3]\n"
65*4882a593Smuzhiyun "	add	%1, %0, %4\n"
66*4882a593Smuzhiyun "	strex	%2, %1, [%3]\n"
67*4882a593Smuzhiyun "	teq	%2, #0\n"
68*4882a593Smuzhiyun "	bne	1b"
69*4882a593Smuzhiyun 	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
70*4882a593Smuzhiyun 	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
71*4882a593Smuzhiyun 	: "cc");
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	while (lockval.tickets.next != lockval.tickets.owner) {
74*4882a593Smuzhiyun 		wfe();
75*4882a593Smuzhiyun 		lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
76*4882a593Smuzhiyun 	}
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	smp_mb();
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun 
arch_spin_trylock(arch_spinlock_t * lock)81*4882a593Smuzhiyun static inline int arch_spin_trylock(arch_spinlock_t *lock)
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun 	unsigned long contended, res;
84*4882a593Smuzhiyun 	u32 slock;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	prefetchw(&lock->slock);
87*4882a593Smuzhiyun 	do {
88*4882a593Smuzhiyun 		__asm__ __volatile__(
89*4882a593Smuzhiyun 		"	ldrex	%0, [%3]\n"
90*4882a593Smuzhiyun 		"	mov	%2, #0\n"
91*4882a593Smuzhiyun 		"	subs	%1, %0, %0, ror #16\n"
92*4882a593Smuzhiyun 		"	addeq	%0, %0, %4\n"
93*4882a593Smuzhiyun 		"	strexeq	%2, %0, [%3]"
94*4882a593Smuzhiyun 		: "=&r" (slock), "=&r" (contended), "=&r" (res)
95*4882a593Smuzhiyun 		: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
96*4882a593Smuzhiyun 		: "cc");
97*4882a593Smuzhiyun 	} while (res);
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	if (!contended) {
100*4882a593Smuzhiyun 		smp_mb();
101*4882a593Smuzhiyun 		return 1;
102*4882a593Smuzhiyun 	} else {
103*4882a593Smuzhiyun 		return 0;
104*4882a593Smuzhiyun 	}
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun 
arch_spin_unlock(arch_spinlock_t * lock)107*4882a593Smuzhiyun static inline void arch_spin_unlock(arch_spinlock_t *lock)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun 	smp_mb();
110*4882a593Smuzhiyun 	lock->tickets.owner++;
111*4882a593Smuzhiyun 	dsb_sev();
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun 
arch_spin_value_unlocked(arch_spinlock_t lock)114*4882a593Smuzhiyun static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	return lock.tickets.owner == lock.tickets.next;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun 
arch_spin_is_locked(arch_spinlock_t * lock)119*4882a593Smuzhiyun static inline int arch_spin_is_locked(arch_spinlock_t *lock)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun 	return !arch_spin_value_unlocked(READ_ONCE(*lock));
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun 
arch_spin_is_contended(arch_spinlock_t * lock)124*4882a593Smuzhiyun static inline int arch_spin_is_contended(arch_spinlock_t *lock)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun 	struct __raw_tickets tickets = READ_ONCE(lock->tickets);
127*4882a593Smuzhiyun 	return (tickets.next - tickets.owner) > 1;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun #define arch_spin_is_contended	arch_spin_is_contended
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun /*
132*4882a593Smuzhiyun  * RWLOCKS
133*4882a593Smuzhiyun  *
134*4882a593Smuzhiyun  *
135*4882a593Smuzhiyun  * Write locks are easy - we just set bit 31.  When unlocking, we can
136*4882a593Smuzhiyun  * just write zero since the lock is exclusively held.
137*4882a593Smuzhiyun  */
138*4882a593Smuzhiyun 
arch_write_lock(arch_rwlock_t * rw)139*4882a593Smuzhiyun static inline void arch_write_lock(arch_rwlock_t *rw)
140*4882a593Smuzhiyun {
141*4882a593Smuzhiyun 	unsigned long tmp;
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	prefetchw(&rw->lock);
144*4882a593Smuzhiyun 	__asm__ __volatile__(
145*4882a593Smuzhiyun "1:	ldrex	%0, [%1]\n"
146*4882a593Smuzhiyun "	teq	%0, #0\n"
147*4882a593Smuzhiyun 	WFE("ne")
148*4882a593Smuzhiyun "	strexeq	%0, %2, [%1]\n"
149*4882a593Smuzhiyun "	teq	%0, #0\n"
150*4882a593Smuzhiyun "	bne	1b"
151*4882a593Smuzhiyun 	: "=&r" (tmp)
152*4882a593Smuzhiyun 	: "r" (&rw->lock), "r" (0x80000000)
153*4882a593Smuzhiyun 	: "cc");
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	smp_mb();
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
arch_write_trylock(arch_rwlock_t * rw)158*4882a593Smuzhiyun static inline int arch_write_trylock(arch_rwlock_t *rw)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun 	unsigned long contended, res;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	prefetchw(&rw->lock);
163*4882a593Smuzhiyun 	do {
164*4882a593Smuzhiyun 		__asm__ __volatile__(
165*4882a593Smuzhiyun 		"	ldrex	%0, [%2]\n"
166*4882a593Smuzhiyun 		"	mov	%1, #0\n"
167*4882a593Smuzhiyun 		"	teq	%0, #0\n"
168*4882a593Smuzhiyun 		"	strexeq	%1, %3, [%2]"
169*4882a593Smuzhiyun 		: "=&r" (contended), "=&r" (res)
170*4882a593Smuzhiyun 		: "r" (&rw->lock), "r" (0x80000000)
171*4882a593Smuzhiyun 		: "cc");
172*4882a593Smuzhiyun 	} while (res);
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	if (!contended) {
175*4882a593Smuzhiyun 		smp_mb();
176*4882a593Smuzhiyun 		return 1;
177*4882a593Smuzhiyun 	} else {
178*4882a593Smuzhiyun 		return 0;
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun 
arch_write_unlock(arch_rwlock_t * rw)182*4882a593Smuzhiyun static inline void arch_write_unlock(arch_rwlock_t *rw)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun 	smp_mb();
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	__asm__ __volatile__(
187*4882a593Smuzhiyun 	"str	%1, [%0]\n"
188*4882a593Smuzhiyun 	:
189*4882a593Smuzhiyun 	: "r" (&rw->lock), "r" (0)
190*4882a593Smuzhiyun 	: "cc");
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	dsb_sev();
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun /*
196*4882a593Smuzhiyun  * Read locks are a bit more hairy:
197*4882a593Smuzhiyun  *  - Exclusively load the lock value.
198*4882a593Smuzhiyun  *  - Increment it.
199*4882a593Smuzhiyun  *  - Store new lock value if positive, and we still own this location.
200*4882a593Smuzhiyun  *    If the value is negative, we've already failed.
201*4882a593Smuzhiyun  *  - If we failed to store the value, we want a negative result.
202*4882a593Smuzhiyun  *  - If we failed, try again.
203*4882a593Smuzhiyun  * Unlocking is similarly hairy.  We may have multiple read locks
204*4882a593Smuzhiyun  * currently active.  However, we know we won't have any write
205*4882a593Smuzhiyun  * locks.
206*4882a593Smuzhiyun  */
arch_read_lock(arch_rwlock_t * rw)207*4882a593Smuzhiyun static inline void arch_read_lock(arch_rwlock_t *rw)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun 	unsigned long tmp, tmp2;
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	prefetchw(&rw->lock);
212*4882a593Smuzhiyun 	__asm__ __volatile__(
213*4882a593Smuzhiyun "	.syntax unified\n"
214*4882a593Smuzhiyun "1:	ldrex	%0, [%2]\n"
215*4882a593Smuzhiyun "	adds	%0, %0, #1\n"
216*4882a593Smuzhiyun "	strexpl	%1, %0, [%2]\n"
217*4882a593Smuzhiyun 	WFE("mi")
218*4882a593Smuzhiyun "	rsbspl	%0, %1, #0\n"
219*4882a593Smuzhiyun "	bmi	1b"
220*4882a593Smuzhiyun 	: "=&r" (tmp), "=&r" (tmp2)
221*4882a593Smuzhiyun 	: "r" (&rw->lock)
222*4882a593Smuzhiyun 	: "cc");
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	smp_mb();
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun 
arch_read_unlock(arch_rwlock_t * rw)227*4882a593Smuzhiyun static inline void arch_read_unlock(arch_rwlock_t *rw)
228*4882a593Smuzhiyun {
229*4882a593Smuzhiyun 	unsigned long tmp, tmp2;
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	smp_mb();
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	prefetchw(&rw->lock);
234*4882a593Smuzhiyun 	__asm__ __volatile__(
235*4882a593Smuzhiyun "1:	ldrex	%0, [%2]\n"
236*4882a593Smuzhiyun "	sub	%0, %0, #1\n"
237*4882a593Smuzhiyun "	strex	%1, %0, [%2]\n"
238*4882a593Smuzhiyun "	teq	%1, #0\n"
239*4882a593Smuzhiyun "	bne	1b"
240*4882a593Smuzhiyun 	: "=&r" (tmp), "=&r" (tmp2)
241*4882a593Smuzhiyun 	: "r" (&rw->lock)
242*4882a593Smuzhiyun 	: "cc");
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	if (tmp == 0)
245*4882a593Smuzhiyun 		dsb_sev();
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun 
arch_read_trylock(arch_rwlock_t * rw)248*4882a593Smuzhiyun static inline int arch_read_trylock(arch_rwlock_t *rw)
249*4882a593Smuzhiyun {
250*4882a593Smuzhiyun 	unsigned long contended, res;
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 	prefetchw(&rw->lock);
253*4882a593Smuzhiyun 	do {
254*4882a593Smuzhiyun 		__asm__ __volatile__(
255*4882a593Smuzhiyun 		"	ldrex	%0, [%2]\n"
256*4882a593Smuzhiyun 		"	mov	%1, #0\n"
257*4882a593Smuzhiyun 		"	adds	%0, %0, #1\n"
258*4882a593Smuzhiyun 		"	strexpl	%1, %0, [%2]"
259*4882a593Smuzhiyun 		: "=&r" (contended), "=&r" (res)
260*4882a593Smuzhiyun 		: "r" (&rw->lock)
261*4882a593Smuzhiyun 		: "cc");
262*4882a593Smuzhiyun 	} while (res);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	/* If the lock is negative, then it is already held for write. */
265*4882a593Smuzhiyun 	if (contended < 0x80000000) {
266*4882a593Smuzhiyun 		smp_mb();
267*4882a593Smuzhiyun 		return 1;
268*4882a593Smuzhiyun 	} else {
269*4882a593Smuzhiyun 		return 0;
270*4882a593Smuzhiyun 	}
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun #endif /* __ASM_SPINLOCK_H */
274