1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef _ASM_X86_CMPXCHG_32_H
3*4882a593Smuzhiyun #define _ASM_X86_CMPXCHG_32_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun * Note: if you use set64_bit(), __cmpxchg64(), or their variants,
7*4882a593Smuzhiyun * you need to test for the feature in boot_cpu_data.
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun /*
11*4882a593Smuzhiyun * CMPXCHG8B only writes to the target if we had the previous
12*4882a593Smuzhiyun * value in registers, otherwise it acts as a read and gives us the
13*4882a593Smuzhiyun * "new previous" value. That is why there is a loop. Preloading
14*4882a593Smuzhiyun * EDX:EAX is a performance optimization: in the common case it means
15*4882a593Smuzhiyun * we need only one locked operation.
16*4882a593Smuzhiyun *
17*4882a593Smuzhiyun * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
18*4882a593Smuzhiyun * least an FPU save and/or %cr0.ts manipulation.
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * cmpxchg8b must be used with the lock prefix here to allow the
21*4882a593Smuzhiyun * instruction to be executed atomically. We need to have the reader
22*4882a593Smuzhiyun * side to see the coherent 64bit value.
23*4882a593Smuzhiyun */
set_64bit(volatile u64 * ptr,u64 value)24*4882a593Smuzhiyun static inline void set_64bit(volatile u64 *ptr, u64 value)
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun u32 low = value;
27*4882a593Smuzhiyun u32 high = value >> 32;
28*4882a593Smuzhiyun u64 prev = *ptr;
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun asm volatile("\n1:\t"
31*4882a593Smuzhiyun LOCK_PREFIX "cmpxchg8b %0\n\t"
32*4882a593Smuzhiyun "jnz 1b"
33*4882a593Smuzhiyun : "=m" (*ptr), "+A" (prev)
34*4882a593Smuzhiyun : "b" (low), "c" (high)
35*4882a593Smuzhiyun : "memory");
36*4882a593Smuzhiyun }
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #ifdef CONFIG_X86_CMPXCHG64
39*4882a593Smuzhiyun #define arch_cmpxchg64(ptr, o, n) \
40*4882a593Smuzhiyun ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
41*4882a593Smuzhiyun (unsigned long long)(n)))
42*4882a593Smuzhiyun #define arch_cmpxchg64_local(ptr, o, n) \
43*4882a593Smuzhiyun ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
44*4882a593Smuzhiyun (unsigned long long)(n)))
45*4882a593Smuzhiyun #endif
46*4882a593Smuzhiyun
__cmpxchg64(volatile u64 * ptr,u64 old,u64 new)47*4882a593Smuzhiyun static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
48*4882a593Smuzhiyun {
49*4882a593Smuzhiyun u64 prev;
50*4882a593Smuzhiyun asm volatile(LOCK_PREFIX "cmpxchg8b %1"
51*4882a593Smuzhiyun : "=A" (prev),
52*4882a593Smuzhiyun "+m" (*ptr)
53*4882a593Smuzhiyun : "b" ((u32)new),
54*4882a593Smuzhiyun "c" ((u32)(new >> 32)),
55*4882a593Smuzhiyun "0" (old)
56*4882a593Smuzhiyun : "memory");
57*4882a593Smuzhiyun return prev;
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
__cmpxchg64_local(volatile u64 * ptr,u64 old,u64 new)60*4882a593Smuzhiyun static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun u64 prev;
63*4882a593Smuzhiyun asm volatile("cmpxchg8b %1"
64*4882a593Smuzhiyun : "=A" (prev),
65*4882a593Smuzhiyun "+m" (*ptr)
66*4882a593Smuzhiyun : "b" ((u32)new),
67*4882a593Smuzhiyun "c" ((u32)(new >> 32)),
68*4882a593Smuzhiyun "0" (old)
69*4882a593Smuzhiyun : "memory");
70*4882a593Smuzhiyun return prev;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun #ifndef CONFIG_X86_CMPXCHG64
74*4882a593Smuzhiyun /*
75*4882a593Smuzhiyun * Building a kernel capable running on 80386 and 80486. It may be necessary
76*4882a593Smuzhiyun * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
77*4882a593Smuzhiyun */
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun #define arch_cmpxchg64(ptr, o, n) \
80*4882a593Smuzhiyun ({ \
81*4882a593Smuzhiyun __typeof__(*(ptr)) __ret; \
82*4882a593Smuzhiyun __typeof__(*(ptr)) __old = (o); \
83*4882a593Smuzhiyun __typeof__(*(ptr)) __new = (n); \
84*4882a593Smuzhiyun alternative_io(LOCK_PREFIX_HERE \
85*4882a593Smuzhiyun "call cmpxchg8b_emu", \
86*4882a593Smuzhiyun "lock; cmpxchg8b (%%esi)" , \
87*4882a593Smuzhiyun X86_FEATURE_CX8, \
88*4882a593Smuzhiyun "=A" (__ret), \
89*4882a593Smuzhiyun "S" ((ptr)), "0" (__old), \
90*4882a593Smuzhiyun "b" ((unsigned int)__new), \
91*4882a593Smuzhiyun "c" ((unsigned int)(__new>>32)) \
92*4882a593Smuzhiyun : "memory"); \
93*4882a593Smuzhiyun __ret; })
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun #define arch_cmpxchg64_local(ptr, o, n) \
97*4882a593Smuzhiyun ({ \
98*4882a593Smuzhiyun __typeof__(*(ptr)) __ret; \
99*4882a593Smuzhiyun __typeof__(*(ptr)) __old = (o); \
100*4882a593Smuzhiyun __typeof__(*(ptr)) __new = (n); \
101*4882a593Smuzhiyun alternative_io("call cmpxchg8b_emu", \
102*4882a593Smuzhiyun "cmpxchg8b (%%esi)" , \
103*4882a593Smuzhiyun X86_FEATURE_CX8, \
104*4882a593Smuzhiyun "=A" (__ret), \
105*4882a593Smuzhiyun "S" ((ptr)), "0" (__old), \
106*4882a593Smuzhiyun "b" ((unsigned int)__new), \
107*4882a593Smuzhiyun "c" ((unsigned int)(__new>>32)) \
108*4882a593Smuzhiyun : "memory"); \
109*4882a593Smuzhiyun __ret; })
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun #endif
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun #define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun #endif /* _ASM_X86_CMPXCHG_32_H */
116