include/asm/ldcw.h

*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
*4882a593Smuzhiyun#ifndef __PARISC_LDCW_H
*4882a593Smuzhiyun#define __PARISC_LDCW_H
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifndef CONFIG_PA20
*4882a593Smuzhiyun/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
*4882a593Smuzhiyun   and GCC only guarantees 8-byte alignment for stack locals, we can't
*4882a593Smuzhiyun   be assured of 16-byte alignment for atomic lock data even if we
*4882a593Smuzhiyun   specify "__attribute ((aligned(16)))" in the type declaration.  So,
*4882a593Smuzhiyun   we use a struct containing an array of four ints for the atomic lock
*4882a593Smuzhiyun   type and dynamically select the 16-byte aligned int from the array
*4882a593Smuzhiyun   for the semaphore.  */
*4882a593Smuzhiyun
*4882a593Smuzhiyun#define __PA_LDCW_ALIGNMENT	16
*4882a593Smuzhiyun#define __PA_LDCW_ALIGN_ORDER	4
*4882a593Smuzhiyun#define __ldcw_align(a) ({					\
*4882a593Smuzhiyun	unsigned long __ret = (unsigned long) &(a)->lock[0];	\
*4882a593Smuzhiyun	__ret = (__ret + __PA_LDCW_ALIGNMENT - 1)		\
*4882a593Smuzhiyun		& ~(__PA_LDCW_ALIGNMENT - 1);			\
*4882a593Smuzhiyun	(volatile unsigned int *) __ret;			\
*4882a593Smuzhiyun})
*4882a593Smuzhiyun#define __LDCW	"ldcw"
*4882a593Smuzhiyun
*4882a593Smuzhiyun#else /*CONFIG_PA20*/
*4882a593Smuzhiyun/* From: "Jim Hull" <jim.hull of hp.com>
*4882a593Smuzhiyun   I've attached a summary of the change, but basically, for PA 2.0, as
*4882a593Smuzhiyun   long as the ",CO" (coherent operation) completer is specified, then the
*4882a593Smuzhiyun   16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
*4882a593Smuzhiyun   they only require "natural" alignment (4-byte for ldcw, 8-byte for
*4882a593Smuzhiyun   ldcd). */
*4882a593Smuzhiyun
*4882a593Smuzhiyun#define __PA_LDCW_ALIGNMENT	4
*4882a593Smuzhiyun#define __PA_LDCW_ALIGN_ORDER	2
*4882a593Smuzhiyun#define __ldcw_align(a) (&(a)->slock)
*4882a593Smuzhiyun#define __LDCW	"ldcw,co"
*4882a593Smuzhiyun
*4882a593Smuzhiyun#endif /*!CONFIG_PA20*/
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
*4882a593Smuzhiyun   We don't explicitly expose that "*a" may be written as reload
*4882a593Smuzhiyun   fails to find a register in class R1_REGS when "a" needs to be
*4882a593Smuzhiyun   reloaded when generating 64-bit PIC code.  Instead, we clobber
*4882a593Smuzhiyun   memory to indicate to the compiler that the assembly code reads
*4882a593Smuzhiyun   or writes to items other than those listed in the input and output
*4882a593Smuzhiyun   operands.  This may pessimize the code somewhat but __ldcw is
*4882a593Smuzhiyun   usually used within code blocks surrounded by memory barriers.  */
*4882a593Smuzhiyun#define __ldcw(a) ({						\
*4882a593Smuzhiyun	unsigned __ret;						\
*4882a593Smuzhiyun	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
*4882a593Smuzhiyun		: "=r" (__ret) : "r" (a) : "memory");		\
*4882a593Smuzhiyun	__ret;							\
*4882a593Smuzhiyun})
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef CONFIG_SMP
*4882a593Smuzhiyun# define __lock_aligned __section(".data..lock_aligned")
*4882a593Smuzhiyun#endif
*4882a593Smuzhiyun
*4882a593Smuzhiyun#endif /* __PARISC_LDCW_H */