include/asm/barrier.h

*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Based on arch/arm/include/asm/barrier.h
*4882a593Smuzhiyun *
*4882a593Smuzhiyun * Copyright (C) 2012 ARM Ltd.
*4882a593Smuzhiyun * Copyright (C) 2013 Regents of the University of California
*4882a593Smuzhiyun * Copyright (C) 2017 SiFive
*4882a593Smuzhiyun */
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifndef _ASM_RISCV_BARRIER_H
*4882a593Smuzhiyun#define _ASM_RISCV_BARRIER_H
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifndef __ASSEMBLY__
*4882a593Smuzhiyun
*4882a593Smuzhiyun#define nop()		__asm__ __volatile__ ("nop")
*4882a593Smuzhiyun
*4882a593Smuzhiyun#define RISCV_FENCE(p, s) \
*4882a593Smuzhiyun	__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* These barriers need to enforce ordering on both devices or memory. */
*4882a593Smuzhiyun#define mb()		RISCV_FENCE(iorw,iorw)
*4882a593Smuzhiyun#define rmb()		RISCV_FENCE(ir,ir)
*4882a593Smuzhiyun#define wmb()		RISCV_FENCE(ow,ow)
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* These barriers do not need to enforce ordering on devices, just memory. */
*4882a593Smuzhiyun#define __smp_mb()	RISCV_FENCE(rw,rw)
*4882a593Smuzhiyun#define __smp_rmb()	RISCV_FENCE(r,r)
*4882a593Smuzhiyun#define __smp_wmb()	RISCV_FENCE(w,w)
*4882a593Smuzhiyun
*4882a593Smuzhiyun#define __smp_store_release(p, v)					\
*4882a593Smuzhiyundo {									\
*4882a593Smuzhiyun	compiletime_assert_atomic_type(*p);				\
*4882a593Smuzhiyun	RISCV_FENCE(rw,w);						\
*4882a593Smuzhiyun	WRITE_ONCE(*p, v);						\
*4882a593Smuzhiyun} while (0)
*4882a593Smuzhiyun
*4882a593Smuzhiyun#define __smp_load_acquire(p)						\
*4882a593Smuzhiyun({									\
*4882a593Smuzhiyun	typeof(*p) ___p1 = READ_ONCE(*p);				\
*4882a593Smuzhiyun	compiletime_assert_atomic_type(*p);				\
*4882a593Smuzhiyun	RISCV_FENCE(r,rw);						\
*4882a593Smuzhiyun	___p1;								\
*4882a593Smuzhiyun})
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * This is a very specific barrier: it's currently only used in two places in
*4882a593Smuzhiyun * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
*4882a593Smuzhiyun * orderings it guarantees, but the "critical section is RCsc" guarantee
*4882a593Smuzhiyun * mandates a barrier on RISC-V.  The sequence looks like:
*4882a593Smuzhiyun *
*4882a593Smuzhiyun *    lr.aq lock
*4882a593Smuzhiyun *    sc    lock <= LOCKED
*4882a593Smuzhiyun *    smp_mb__after_spinlock()
*4882a593Smuzhiyun *    // critical section
*4882a593Smuzhiyun *    lr    lock
*4882a593Smuzhiyun *    sc.rl lock <= UNLOCKED
*4882a593Smuzhiyun *
*4882a593Smuzhiyun * The AQ/RL pair provides a RCpc critical section, but there's not really any
*4882a593Smuzhiyun * way we can take advantage of that here because the ordering is only enforced
*4882a593Smuzhiyun * on that one lock.  Thus, we're just doing a full fence.
*4882a593Smuzhiyun *
*4882a593Smuzhiyun * Since we allow writeX to be called from preemptive regions we need at least
*4882a593Smuzhiyun * an "o" in the predecessor set to ensure device writes are visible before the
*4882a593Smuzhiyun * task is marked as available for scheduling on a new hart.  While I don't see
*4882a593Smuzhiyun * any concrete reason we need a full IO fence, it seems safer to just upgrade
*4882a593Smuzhiyun * this in order to avoid any IO crossing a scheduling boundary.  In both
*4882a593Smuzhiyun * instances the scheduler pairs this with an mb(), so nothing is necessary on
*4882a593Smuzhiyun * the new hart.
*4882a593Smuzhiyun */
*4882a593Smuzhiyun#define smp_mb__after_spinlock()	RISCV_FENCE(iorw,iorw)
*4882a593Smuzhiyun
*4882a593Smuzhiyun#include <asm-generic/barrier.h>
*4882a593Smuzhiyun
*4882a593Smuzhiyun#endif /* __ASSEMBLY__ */
*4882a593Smuzhiyun
*4882a593Smuzhiyun#endif /* _ASM_RISCV_BARRIER_H */