1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun #ifndef __PARISC_LDCW_H 3*4882a593Smuzhiyun #define __PARISC_LDCW_H 4*4882a593Smuzhiyun 5*4882a593Smuzhiyun #ifndef CONFIG_PA20 6*4882a593Smuzhiyun /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, 7*4882a593Smuzhiyun and GCC only guarantees 8-byte alignment for stack locals, we can't 8*4882a593Smuzhiyun be assured of 16-byte alignment for atomic lock data even if we 9*4882a593Smuzhiyun specify "__attribute ((aligned(16)))" in the type declaration. So, 10*4882a593Smuzhiyun we use a struct containing an array of four ints for the atomic lock 11*4882a593Smuzhiyun type and dynamically select the 16-byte aligned int from the array 12*4882a593Smuzhiyun for the semaphore. */ 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun #define __PA_LDCW_ALIGNMENT 16 15*4882a593Smuzhiyun #define __PA_LDCW_ALIGN_ORDER 4 16*4882a593Smuzhiyun #define __ldcw_align(a) ({ \ 17*4882a593Smuzhiyun unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 18*4882a593Smuzhiyun __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 19*4882a593Smuzhiyun & ~(__PA_LDCW_ALIGNMENT - 1); \ 20*4882a593Smuzhiyun (volatile unsigned int *) __ret; \ 21*4882a593Smuzhiyun }) 22*4882a593Smuzhiyun #define __LDCW "ldcw" 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun #else /*CONFIG_PA20*/ 25*4882a593Smuzhiyun /* From: "Jim Hull" <jim.hull of hp.com> 26*4882a593Smuzhiyun I've attached a summary of the change, but basically, for PA 2.0, as 27*4882a593Smuzhiyun long as the ",CO" (coherent operation) completer is specified, then the 28*4882a593Smuzhiyun 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead 29*4882a593Smuzhiyun they only require "natural" alignment (4-byte for ldcw, 8-byte for 30*4882a593Smuzhiyun ldcd). */ 31*4882a593Smuzhiyun 32*4882a593Smuzhiyun #define __PA_LDCW_ALIGNMENT 4 33*4882a593Smuzhiyun #define __PA_LDCW_ALIGN_ORDER 2 34*4882a593Smuzhiyun #define __ldcw_align(a) (&(a)->slock) 35*4882a593Smuzhiyun #define __LDCW "ldcw,co" 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun #endif /*!CONFIG_PA20*/ 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. 40*4882a593Smuzhiyun We don't explicitly expose that "*a" may be written as reload 41*4882a593Smuzhiyun fails to find a register in class R1_REGS when "a" needs to be 42*4882a593Smuzhiyun reloaded when generating 64-bit PIC code. Instead, we clobber 43*4882a593Smuzhiyun memory to indicate to the compiler that the assembly code reads 44*4882a593Smuzhiyun or writes to items other than those listed in the input and output 45*4882a593Smuzhiyun operands. This may pessimize the code somewhat but __ldcw is 46*4882a593Smuzhiyun usually used within code blocks surrounded by memory barriers. */ 47*4882a593Smuzhiyun #define __ldcw(a) ({ \ 48*4882a593Smuzhiyun unsigned __ret; \ 49*4882a593Smuzhiyun __asm__ __volatile__(__LDCW " 0(%1),%0" \ 50*4882a593Smuzhiyun : "=r" (__ret) : "r" (a) : "memory"); \ 51*4882a593Smuzhiyun __ret; \ 52*4882a593Smuzhiyun }) 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun #ifdef CONFIG_SMP 55*4882a593Smuzhiyun # define __lock_aligned __section(".data..lock_aligned") 56*4882a593Smuzhiyun #endif 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun #endif /* __PARISC_LDCW_H */ 59