1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun #ifndef __TOOLS_LINUX_SPARC64_BARRIER_H 3*4882a593Smuzhiyun #define __TOOLS_LINUX_SPARC64_BARRIER_H 4*4882a593Smuzhiyun 5*4882a593Smuzhiyun /* Copied from the kernel sources to tools/: 6*4882a593Smuzhiyun * 7*4882a593Smuzhiyun * These are here in an effort to more fully work around Spitfire Errata 8*4882a593Smuzhiyun * #51. Essentially, if a memory barrier occurs soon after a mispredicted 9*4882a593Smuzhiyun * branch, the chip can stop executing instructions until a trap occurs. 10*4882a593Smuzhiyun * Therefore, if interrupts are disabled, the chip can hang forever. 11*4882a593Smuzhiyun * 12*4882a593Smuzhiyun * It used to be believed that the memory barrier had to be right in the 13*4882a593Smuzhiyun * delay slot, but a case has been traced recently wherein the memory barrier 14*4882a593Smuzhiyun * was one instruction after the branch delay slot and the chip still hung. 15*4882a593Smuzhiyun * The offending sequence was the following in sym_wakeup_done() of the 16*4882a593Smuzhiyun * sym53c8xx_2 driver: 17*4882a593Smuzhiyun * 18*4882a593Smuzhiyun * call sym_ccb_from_dsa, 0 19*4882a593Smuzhiyun * movge %icc, 0, %l0 20*4882a593Smuzhiyun * brz,pn %o0, .LL1303 21*4882a593Smuzhiyun * mov %o0, %l2 22*4882a593Smuzhiyun * membar #LoadLoad 23*4882a593Smuzhiyun * 24*4882a593Smuzhiyun * The branch has to be mispredicted for the bug to occur. Therefore, we put 25*4882a593Smuzhiyun * the memory barrier explicitly into a "branch always, predicted taken" 26*4882a593Smuzhiyun * delay slot to avoid the problem case. 27*4882a593Smuzhiyun */ 28*4882a593Smuzhiyun #define membar_safe(type) \ 29*4882a593Smuzhiyun do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ 30*4882a593Smuzhiyun " membar " type "\n" \ 31*4882a593Smuzhiyun "1:\n" \ 32*4882a593Smuzhiyun : : : "memory"); \ 33*4882a593Smuzhiyun } while (0) 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun /* The kernel always executes in TSO memory model these days, 36*4882a593Smuzhiyun * and furthermore most sparc64 chips implement more stringent 37*4882a593Smuzhiyun * memory ordering than required by the specifications. 38*4882a593Smuzhiyun */ 39*4882a593Smuzhiyun #define mb() membar_safe("#StoreLoad") 40*4882a593Smuzhiyun #define rmb() __asm__ __volatile__("":::"memory") 41*4882a593Smuzhiyun #define wmb() __asm__ __volatile__("":::"memory") 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun #define smp_store_release(p, v) \ 44*4882a593Smuzhiyun do { \ 45*4882a593Smuzhiyun barrier(); \ 46*4882a593Smuzhiyun WRITE_ONCE(*p, v); \ 47*4882a593Smuzhiyun } while (0) 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun #define smp_load_acquire(p) \ 50*4882a593Smuzhiyun ({ \ 51*4882a593Smuzhiyun typeof(*p) ___p1 = READ_ONCE(*p); \ 52*4882a593Smuzhiyun barrier(); \ 53*4882a593Smuzhiyun ___p1; \ 54*4882a593Smuzhiyun }) 55*4882a593Smuzhiyun 56*4882a593Smuzhiyun #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */ 57