xref: /OK3568_Linux_fs/kernel/arch/riscv/lib/delay.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2012 Regents of the University of California
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <linux/delay.h>
7*4882a593Smuzhiyun #include <linux/param.h>
8*4882a593Smuzhiyun #include <linux/timex.h>
9*4882a593Smuzhiyun #include <linux/export.h>
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun  * This is copies from arch/arm/include/asm/delay.h
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * Loop (or tick) based delay:
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * where:
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  * jiffies_per_sec = HZ
21*4882a593Smuzhiyun  * us_per_sec = 1000000
22*4882a593Smuzhiyun  *
23*4882a593Smuzhiyun  * Therefore the constant part is HZ / 1000000 which is a small
24*4882a593Smuzhiyun  * fractional number. To make this usable with integer math, we
25*4882a593Smuzhiyun  * scale up this constant by 2^31, perform the actual multiplication,
26*4882a593Smuzhiyun  * and scale the result back down by 2^31 with a simple shift:
27*4882a593Smuzhiyun  *
28*4882a593Smuzhiyun  * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31
29*4882a593Smuzhiyun  *
30*4882a593Smuzhiyun  * where:
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  * UDELAY_MULT = 2^31 * HZ / 1000000
33*4882a593Smuzhiyun  *             = (2^31 / 1000000) * HZ
34*4882a593Smuzhiyun  *             = 2147.483648 * HZ
35*4882a593Smuzhiyun  *             = 2147 * HZ + 483648 * HZ / 1000000
36*4882a593Smuzhiyun  *
37*4882a593Smuzhiyun  * 31 is the biggest scale shift value that won't overflow 32 bits for
38*4882a593Smuzhiyun  * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000.
39*4882a593Smuzhiyun  */
40*4882a593Smuzhiyun #define MAX_UDELAY_US	2000
41*4882a593Smuzhiyun #define MAX_UDELAY_HZ	1000
42*4882a593Smuzhiyun #define UDELAY_MULT	(2147UL * HZ + 483648UL * HZ / 1000000UL)
43*4882a593Smuzhiyun #define UDELAY_SHIFT	31
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun #if HZ > MAX_UDELAY_HZ
46*4882a593Smuzhiyun #error "HZ > MAX_UDELAY_HZ"
47*4882a593Smuzhiyun #endif
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun /*
50*4882a593Smuzhiyun  * RISC-V supports both UDELAY and NDELAY.  This is largely the same as above,
51*4882a593Smuzhiyun  * but with different constants.  I added 10 bits to the shift to get this, but
52*4882a593Smuzhiyun  * the result is that I need a 64-bit multiply, which is slow on 32-bit
53*4882a593Smuzhiyun  * platforms.
54*4882a593Smuzhiyun  *
55*4882a593Smuzhiyun  * NDELAY_MULT = 2^41 * HZ / 1000000000
56*4882a593Smuzhiyun  *             = (2^41 / 1000000000) * HZ
57*4882a593Smuzhiyun  *             = 2199.02325555 * HZ
58*4882a593Smuzhiyun  *             = 2199 * HZ + 23255550 * HZ / 1000000000
59*4882a593Smuzhiyun  *
60*4882a593Smuzhiyun  * The maximum here is to avoid 64-bit overflow, but it isn't checked as it
61*4882a593Smuzhiyun  * won't happen.
62*4882a593Smuzhiyun  */
63*4882a593Smuzhiyun #define MAX_NDELAY_NS   (1ULL << 42)
64*4882a593Smuzhiyun #define MAX_NDELAY_HZ	MAX_UDELAY_HZ
65*4882a593Smuzhiyun #define NDELAY_MULT	((unsigned long long)(2199ULL * HZ + 23255550ULL * HZ / 1000000000ULL))
66*4882a593Smuzhiyun #define NDELAY_SHIFT	41
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun #if HZ > MAX_NDELAY_HZ
69*4882a593Smuzhiyun #error "HZ > MAX_NDELAY_HZ"
70*4882a593Smuzhiyun #endif
71*4882a593Smuzhiyun 
__delay(unsigned long cycles)72*4882a593Smuzhiyun void __delay(unsigned long cycles)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	u64 t0 = get_cycles();
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	while ((unsigned long)(get_cycles() - t0) < cycles)
77*4882a593Smuzhiyun 		cpu_relax();
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun EXPORT_SYMBOL(__delay);
80*4882a593Smuzhiyun 
udelay(unsigned long usecs)81*4882a593Smuzhiyun void udelay(unsigned long usecs)
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun 	u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT;
84*4882a593Smuzhiyun 	u64 n;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	if (unlikely(usecs > MAX_UDELAY_US)) {
87*4882a593Smuzhiyun 		n = (u64)usecs * riscv_timebase;
88*4882a593Smuzhiyun 		do_div(n, 1000000);
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 		__delay(n);
91*4882a593Smuzhiyun 		return;
92*4882a593Smuzhiyun 	}
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	__delay(ucycles >> UDELAY_SHIFT);
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun EXPORT_SYMBOL(udelay);
97*4882a593Smuzhiyun 
ndelay(unsigned long nsecs)98*4882a593Smuzhiyun void ndelay(unsigned long nsecs)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	/*
101*4882a593Smuzhiyun 	 * This doesn't bother checking for overflow, as it won't happen (it's
102*4882a593Smuzhiyun 	 * an hour) of delay.
103*4882a593Smuzhiyun 	 */
104*4882a593Smuzhiyun 	unsigned long long ncycles = nsecs * lpj_fine * NDELAY_MULT;
105*4882a593Smuzhiyun 	__delay(ncycles >> NDELAY_SHIFT);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun EXPORT_SYMBOL(ndelay);
108