xref: /OK3568_Linux_fs/kernel/arch/mips/include/asm/sync.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun #ifndef __MIPS_ASM_SYNC_H__
3*4882a593Smuzhiyun #define __MIPS_ASM_SYNC_H__
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun  * sync types are defined by the MIPS64 Instruction Set documentation in Volume
7*4882a593Smuzhiyun  * II-A of the MIPS Architecture Reference Manual, which can be found here:
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * Two types of barrier are provided:
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  *   1) Completion barriers, which ensure that a memory operation has actually
14*4882a593Smuzhiyun  *      completed & often involve stalling the CPU pipeline to do so.
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  *   2) Ordering barriers, which only ensure that affected memory operations
17*4882a593Smuzhiyun  *      won't be reordered in the CPU pipeline in a manner that violates the
18*4882a593Smuzhiyun  *      restrictions imposed by the barrier.
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  * Ordering barriers can be more efficient than completion barriers, since:
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  *   a) Ordering barriers only require memory access instructions which preceed
23*4882a593Smuzhiyun  *      them in program order (older instructions) to reach a point in the
24*4882a593Smuzhiyun  *      load/store datapath beyond which reordering is not possible before
25*4882a593Smuzhiyun  *      allowing memory access instructions which follow them (younger
26*4882a593Smuzhiyun  *      instructions) to be performed.  That is, older instructions don't
27*4882a593Smuzhiyun  *      actually need to complete - they just need to get far enough that all
28*4882a593Smuzhiyun  *      other coherent CPUs will observe their completion before they observe
29*4882a593Smuzhiyun  *      the effects of younger instructions.
30*4882a593Smuzhiyun  *
31*4882a593Smuzhiyun  *   b) Multiple variants of ordering barrier are provided which allow the
32*4882a593Smuzhiyun  *      effects to be restricted to different combinations of older or younger
33*4882a593Smuzhiyun  *      loads or stores. By way of example, if we only care that stores older
34*4882a593Smuzhiyun  *      than a barrier are observed prior to stores that are younger than a
35*4882a593Smuzhiyun  *      barrier & don't care about the ordering of loads then the 'wmb'
36*4882a593Smuzhiyun  *      ordering barrier can be used. Limiting the barrier's effects to stores
37*4882a593Smuzhiyun  *      allows loads to continue unaffected & potentially allows the CPU to
38*4882a593Smuzhiyun  *      make progress faster than if younger loads had to wait for older stores
39*4882a593Smuzhiyun  *      to complete.
40*4882a593Smuzhiyun  */
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun /*
43*4882a593Smuzhiyun  * No sync instruction at all; used to allow code to nullify the effect of the
44*4882a593Smuzhiyun  * __SYNC() macro without needing lots of #ifdefery.
45*4882a593Smuzhiyun  */
46*4882a593Smuzhiyun #define __SYNC_none	-1
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun  * A full completion barrier; all memory accesses appearing prior to this sync
50*4882a593Smuzhiyun  * instruction in program order must complete before any memory accesses
51*4882a593Smuzhiyun  * appearing after this sync instruction in program order.
52*4882a593Smuzhiyun  */
53*4882a593Smuzhiyun #define __SYNC_full	0x00
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun /*
56*4882a593Smuzhiyun  * For now we use a full completion barrier to implement all sync types, until
57*4882a593Smuzhiyun  * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
58*4882a593Smuzhiyun  * sufficient to uphold our desired memory model.
59*4882a593Smuzhiyun  */
60*4882a593Smuzhiyun #define __SYNC_aq	__SYNC_full
61*4882a593Smuzhiyun #define __SYNC_rl	__SYNC_full
62*4882a593Smuzhiyun #define __SYNC_mb	__SYNC_full
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun /*
65*4882a593Smuzhiyun  * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
66*4882a593Smuzhiyun  * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
67*4882a593Smuzhiyun  * speculative reads.
68*4882a593Smuzhiyun  */
69*4882a593Smuzhiyun #ifdef CONFIG_CPU_CAVIUM_OCTEON
70*4882a593Smuzhiyun # define __SYNC_rmb	__SYNC_none
71*4882a593Smuzhiyun # define __SYNC_wmb	0x04
72*4882a593Smuzhiyun #else
73*4882a593Smuzhiyun # define __SYNC_rmb	__SYNC_full
74*4882a593Smuzhiyun # define __SYNC_wmb	__SYNC_full
75*4882a593Smuzhiyun #endif
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun /*
78*4882a593Smuzhiyun  * A GINV sync is a little different; it doesn't relate directly to loads or
79*4882a593Smuzhiyun  * stores, but instead causes synchronization of an icache or TLB global
80*4882a593Smuzhiyun  * invalidation operation triggered by the ginvi or ginvt instructions
81*4882a593Smuzhiyun  * respectively. In cases where we need to know that a ginvi or ginvt operation
82*4882a593Smuzhiyun  * has been performed by all coherent CPUs, we must issue a sync instruction of
83*4882a593Smuzhiyun  * this type. Once this instruction graduates all coherent CPUs will have
84*4882a593Smuzhiyun  * observed the invalidation.
85*4882a593Smuzhiyun  */
86*4882a593Smuzhiyun #define __SYNC_ginv	0x14
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun /* Trivial; indicate that we always need this sync instruction. */
89*4882a593Smuzhiyun #define __SYNC_always	(1 << 0)
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun /*
92*4882a593Smuzhiyun  * Indicate that we need this sync instruction only on systems with weakly
93*4882a593Smuzhiyun  * ordered memory access. In general this is most MIPS systems, but there are
94*4882a593Smuzhiyun  * exceptions which provide strongly ordered memory.
95*4882a593Smuzhiyun  */
96*4882a593Smuzhiyun #ifdef CONFIG_WEAK_ORDERING
97*4882a593Smuzhiyun # define __SYNC_weak_ordering	(1 << 1)
98*4882a593Smuzhiyun #else
99*4882a593Smuzhiyun # define __SYNC_weak_ordering	0
100*4882a593Smuzhiyun #endif
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun /*
103*4882a593Smuzhiyun  * Indicate that we need this sync instruction only on systems where LL/SC
104*4882a593Smuzhiyun  * don't implicitly provide a memory barrier. In general this is most MIPS
105*4882a593Smuzhiyun  * systems.
106*4882a593Smuzhiyun  */
107*4882a593Smuzhiyun #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
108*4882a593Smuzhiyun # define __SYNC_weak_llsc	(1 << 2)
109*4882a593Smuzhiyun #else
110*4882a593Smuzhiyun # define __SYNC_weak_llsc	0
111*4882a593Smuzhiyun #endif
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
115*4882a593Smuzhiyun  * store or prefetch) in between an LL & SC can cause the SC instruction to
116*4882a593Smuzhiyun  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
117*4882a593Smuzhiyun  * containing such sequences, this bug bites harder than we might otherwise
118*4882a593Smuzhiyun  * expect due to reordering & speculation:
119*4882a593Smuzhiyun  *
120*4882a593Smuzhiyun  * 1) A memory access appearing prior to the LL in program order may actually
121*4882a593Smuzhiyun  *    be executed after the LL - this is the reordering case.
122*4882a593Smuzhiyun  *
123*4882a593Smuzhiyun  *    In order to avoid this we need to place a memory barrier (ie. a SYNC
124*4882a593Smuzhiyun  *    instruction) prior to every LL instruction, in between it and any earlier
125*4882a593Smuzhiyun  *    memory access instructions.
126*4882a593Smuzhiyun  *
127*4882a593Smuzhiyun  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
128*4882a593Smuzhiyun  *
129*4882a593Smuzhiyun  * 2) If a conditional branch exists between an LL & SC with a target outside
130*4882a593Smuzhiyun  *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
131*4882a593Smuzhiyun  *    or similar, then misprediction of the branch may allow speculative
132*4882a593Smuzhiyun  *    execution of memory accesses from outside of the LL-SC loop.
133*4882a593Smuzhiyun  *
134*4882a593Smuzhiyun  *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
135*4882a593Smuzhiyun  *    at each affected branch target.
136*4882a593Smuzhiyun  *
137*4882a593Smuzhiyun  *    This case affects all current Loongson 3 CPUs.
138*4882a593Smuzhiyun  *
139*4882a593Smuzhiyun  * The above described cases cause an error in the cache coherence protocol;
140*4882a593Smuzhiyun  * such that the Invalidate of a competing LL-SC goes 'missing' and SC
141*4882a593Smuzhiyun  * erroneously observes its core still has Exclusive state and lets the SC
142*4882a593Smuzhiyun  * proceed.
143*4882a593Smuzhiyun  *
144*4882a593Smuzhiyun  * Therefore the error only occurs on SMP systems.
145*4882a593Smuzhiyun  */
146*4882a593Smuzhiyun #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
147*4882a593Smuzhiyun # define __SYNC_loongson3_war	(1 << 31)
148*4882a593Smuzhiyun #else
149*4882a593Smuzhiyun # define __SYNC_loongson3_war	0
150*4882a593Smuzhiyun #endif
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun /*
153*4882a593Smuzhiyun  * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
154*4882a593Smuzhiyun  * barrier to be ineffective, requiring the use of 2 in sequence to provide an
155*4882a593Smuzhiyun  * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
156*4882a593Smuzhiyun  * optimized memory barrier primitives."). Here we specify that the affected
157*4882a593Smuzhiyun  * sync instructions should be emitted twice.
158*4882a593Smuzhiyun  * Note that this expression is evaluated by the assembler (not the compiler),
159*4882a593Smuzhiyun  * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
160*4882a593Smuzhiyun  */
161*4882a593Smuzhiyun #ifdef CONFIG_CPU_CAVIUM_OCTEON
162*4882a593Smuzhiyun # define __SYNC_rpt(type)	(1 - (type == __SYNC_wmb))
163*4882a593Smuzhiyun #else
164*4882a593Smuzhiyun # define __SYNC_rpt(type)	1
165*4882a593Smuzhiyun #endif
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun /*
168*4882a593Smuzhiyun  * The main event. Here we actually emit a sync instruction of a given type, if
169*4882a593Smuzhiyun  * reason is non-zero.
170*4882a593Smuzhiyun  *
171*4882a593Smuzhiyun  * In future we have the option of emitting entries in a fixups-style table
172*4882a593Smuzhiyun  * here that would allow us to opportunistically remove some sync instructions
173*4882a593Smuzhiyun  * when we detect at runtime that we're running on a CPU that doesn't need
174*4882a593Smuzhiyun  * them.
175*4882a593Smuzhiyun  */
176*4882a593Smuzhiyun #ifdef CONFIG_CPU_HAS_SYNC
177*4882a593Smuzhiyun # define ____SYNC(_type, _reason, _else)			\
178*4882a593Smuzhiyun 	.if	(( _type ) != -1) && ( _reason );		\
179*4882a593Smuzhiyun 	.set	push;						\
180*4882a593Smuzhiyun 	.set	MIPS_ISA_LEVEL_RAW;				\
181*4882a593Smuzhiyun 	.rept	__SYNC_rpt(_type);				\
182*4882a593Smuzhiyun 	sync	_type;						\
183*4882a593Smuzhiyun 	.endr;							\
184*4882a593Smuzhiyun 	.set	pop;						\
185*4882a593Smuzhiyun 	.else;							\
186*4882a593Smuzhiyun 	_else;							\
187*4882a593Smuzhiyun 	.endif
188*4882a593Smuzhiyun #else
189*4882a593Smuzhiyun # define ____SYNC(_type, _reason, _else)
190*4882a593Smuzhiyun #endif
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun /*
193*4882a593Smuzhiyun  * Preprocessor magic to expand macros used as arguments before we insert them
194*4882a593Smuzhiyun  * into assembly code.
195*4882a593Smuzhiyun  */
196*4882a593Smuzhiyun #ifdef __ASSEMBLY__
197*4882a593Smuzhiyun # define ___SYNC(type, reason, else)				\
198*4882a593Smuzhiyun 	____SYNC(type, reason, else)
199*4882a593Smuzhiyun #else
200*4882a593Smuzhiyun # define ___SYNC(type, reason, else)				\
201*4882a593Smuzhiyun 	__stringify(____SYNC(type, reason, else))
202*4882a593Smuzhiyun #endif
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun #define __SYNC(type, reason)					\
205*4882a593Smuzhiyun 	___SYNC(__SYNC_##type, __SYNC_##reason, )
206*4882a593Smuzhiyun #define __SYNC_ELSE(type, reason, else)				\
207*4882a593Smuzhiyun 	___SYNC(__SYNC_##type, __SYNC_##reason, else)
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun #endif /* __MIPS_ASM_SYNC_H__ */
210