1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only 2*4882a593Smuzhiyun /* 3*4882a593Smuzhiyun * Copyright (C) 2012 Regents of the University of California 4*4882a593Smuzhiyun */ 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun #include <linux/delay.h> 7*4882a593Smuzhiyun #include <linux/param.h> 8*4882a593Smuzhiyun #include <linux/timex.h> 9*4882a593Smuzhiyun #include <linux/export.h> 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun /* 12*4882a593Smuzhiyun * This is copies from arch/arm/include/asm/delay.h 13*4882a593Smuzhiyun * 14*4882a593Smuzhiyun * Loop (or tick) based delay: 15*4882a593Smuzhiyun * 16*4882a593Smuzhiyun * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec 17*4882a593Smuzhiyun * 18*4882a593Smuzhiyun * where: 19*4882a593Smuzhiyun * 20*4882a593Smuzhiyun * jiffies_per_sec = HZ 21*4882a593Smuzhiyun * us_per_sec = 1000000 22*4882a593Smuzhiyun * 23*4882a593Smuzhiyun * Therefore the constant part is HZ / 1000000 which is a small 24*4882a593Smuzhiyun * fractional number. To make this usable with integer math, we 25*4882a593Smuzhiyun * scale up this constant by 2^31, perform the actual multiplication, 26*4882a593Smuzhiyun * and scale the result back down by 2^31 with a simple shift: 27*4882a593Smuzhiyun * 28*4882a593Smuzhiyun * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31 29*4882a593Smuzhiyun * 30*4882a593Smuzhiyun * where: 31*4882a593Smuzhiyun * 32*4882a593Smuzhiyun * UDELAY_MULT = 2^31 * HZ / 1000000 33*4882a593Smuzhiyun * = (2^31 / 1000000) * HZ 34*4882a593Smuzhiyun * = 2147.483648 * HZ 35*4882a593Smuzhiyun * = 2147 * HZ + 483648 * HZ / 1000000 36*4882a593Smuzhiyun * 37*4882a593Smuzhiyun * 31 is the biggest scale shift value that won't overflow 32 bits for 38*4882a593Smuzhiyun * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000. 39*4882a593Smuzhiyun */ 40*4882a593Smuzhiyun #define MAX_UDELAY_US 2000 41*4882a593Smuzhiyun #define MAX_UDELAY_HZ 1000 42*4882a593Smuzhiyun #define UDELAY_MULT (2147UL * HZ + 483648UL * HZ / 1000000UL) 43*4882a593Smuzhiyun #define UDELAY_SHIFT 31 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun #if HZ > MAX_UDELAY_HZ 46*4882a593Smuzhiyun #error "HZ > MAX_UDELAY_HZ" 47*4882a593Smuzhiyun #endif 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun /* 50*4882a593Smuzhiyun * RISC-V supports both UDELAY and NDELAY. This is largely the same as above, 51*4882a593Smuzhiyun * but with different constants. I added 10 bits to the shift to get this, but 52*4882a593Smuzhiyun * the result is that I need a 64-bit multiply, which is slow on 32-bit 53*4882a593Smuzhiyun * platforms. 54*4882a593Smuzhiyun * 55*4882a593Smuzhiyun * NDELAY_MULT = 2^41 * HZ / 1000000000 56*4882a593Smuzhiyun * = (2^41 / 1000000000) * HZ 57*4882a593Smuzhiyun * = 2199.02325555 * HZ 58*4882a593Smuzhiyun * = 2199 * HZ + 23255550 * HZ / 1000000000 59*4882a593Smuzhiyun * 60*4882a593Smuzhiyun * The maximum here is to avoid 64-bit overflow, but it isn't checked as it 61*4882a593Smuzhiyun * won't happen. 62*4882a593Smuzhiyun */ 63*4882a593Smuzhiyun #define MAX_NDELAY_NS (1ULL << 42) 64*4882a593Smuzhiyun #define MAX_NDELAY_HZ MAX_UDELAY_HZ 65*4882a593Smuzhiyun #define NDELAY_MULT ((unsigned long long)(2199ULL * HZ + 23255550ULL * HZ / 1000000000ULL)) 66*4882a593Smuzhiyun #define NDELAY_SHIFT 41 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun #if HZ > MAX_NDELAY_HZ 69*4882a593Smuzhiyun #error "HZ > MAX_NDELAY_HZ" 70*4882a593Smuzhiyun #endif 71*4882a593Smuzhiyun __delay(unsigned long cycles)72*4882a593Smuzhiyunvoid __delay(unsigned long cycles) 73*4882a593Smuzhiyun { 74*4882a593Smuzhiyun u64 t0 = get_cycles(); 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun while ((unsigned long)(get_cycles() - t0) < cycles) 77*4882a593Smuzhiyun cpu_relax(); 78*4882a593Smuzhiyun } 79*4882a593Smuzhiyun EXPORT_SYMBOL(__delay); 80*4882a593Smuzhiyun udelay(unsigned long usecs)81*4882a593Smuzhiyunvoid udelay(unsigned long usecs) 82*4882a593Smuzhiyun { 83*4882a593Smuzhiyun u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT; 84*4882a593Smuzhiyun u64 n; 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun if (unlikely(usecs > MAX_UDELAY_US)) { 87*4882a593Smuzhiyun n = (u64)usecs * riscv_timebase; 88*4882a593Smuzhiyun do_div(n, 1000000); 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun __delay(n); 91*4882a593Smuzhiyun return; 92*4882a593Smuzhiyun } 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun __delay(ucycles >> UDELAY_SHIFT); 95*4882a593Smuzhiyun } 96*4882a593Smuzhiyun EXPORT_SYMBOL(udelay); 97*4882a593Smuzhiyun ndelay(unsigned long nsecs)98*4882a593Smuzhiyunvoid ndelay(unsigned long nsecs) 99*4882a593Smuzhiyun { 100*4882a593Smuzhiyun /* 101*4882a593Smuzhiyun * This doesn't bother checking for overflow, as it won't happen (it's 102*4882a593Smuzhiyun * an hour) of delay. 103*4882a593Smuzhiyun */ 104*4882a593Smuzhiyun unsigned long long ncycles = nsecs * lpj_fine * NDELAY_MULT; 105*4882a593Smuzhiyun __delay(ncycles >> NDELAY_SHIFT); 106*4882a593Smuzhiyun } 107*4882a593Smuzhiyun EXPORT_SYMBOL(ndelay); 108