1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef _ASM_HASH_H
3*4882a593Smuzhiyun #define _ASM_HASH_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun * If CONFIG_M68000=y (original mc68000/010), this file is #included
7*4882a593Smuzhiyun * to work around the lack of a MULU.L instruction.
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #define HAVE_ARCH__HASH_32 1
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun * While it would be legal to substitute a different hash operation
13*4882a593Smuzhiyun * entirely, let's keep it simple and just use an optimized multiply
14*4882a593Smuzhiyun * by GOLDEN_RATIO_32 = 0x61C88647.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * The best way to do that appears to be to multiply by 0x8647 with
17*4882a593Smuzhiyun * shifts and adds, and use mulu.w to multiply the high half by 0x61C8.
18*4882a593Smuzhiyun *
19*4882a593Smuzhiyun * Because the 68000 has multi-cycle shifts, this addition chain is
20*4882a593Smuzhiyun * chosen to minimise the shift distances.
21*4882a593Smuzhiyun *
22*4882a593Smuzhiyun * Despite every attempt to spoon-feed it simple operations, GCC
23*4882a593Smuzhiyun * 6.1.1 doggedly insists on doing annoying things like converting
24*4882a593Smuzhiyun * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles).
25*4882a593Smuzhiyun *
26*4882a593Smuzhiyun * It also likes to notice two shifts in a row, like "a = x << 2" and
27*4882a593Smuzhiyun * "a <<= 7", and convert that to "a = x << 9". But shifts longer
28*4882a593Smuzhiyun * than 8 bits are extra-slow on m68k, so that's a lose.
29*4882a593Smuzhiyun *
30*4882a593Smuzhiyun * Since the 68000 is a very simple in-order processor with no
31*4882a593Smuzhiyun * instruction scheduling effects on execution time, we can safely
32*4882a593Smuzhiyun * take it out of GCC's hands and write one big asm() block.
33*4882a593Smuzhiyun *
34*4882a593Smuzhiyun * Without calling overhead, this operation is 30 bytes (14 instructions
35*4882a593Smuzhiyun * plus one immediate constant) and 166 cycles.
36*4882a593Smuzhiyun *
37*4882a593Smuzhiyun * (Because %2 is fetched twice, it can't be postincrement, and thus it
38*4882a593Smuzhiyun * can't be a fully general "g" or "m". Register is preferred, but
39*4882a593Smuzhiyun * offsettable memory or immediate will work.)
40*4882a593Smuzhiyun */
__hash_32(u32 x)41*4882a593Smuzhiyun static inline u32 __attribute_const__ __hash_32(u32 x)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun u32 a, b;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun asm( "move.l %2,%0" /* a = x * 0x0001 */
46*4882a593Smuzhiyun "\n lsl.l #2,%0" /* a = x * 0x0004 */
47*4882a593Smuzhiyun "\n move.l %0,%1"
48*4882a593Smuzhiyun "\n lsl.l #7,%0" /* a = x * 0x0200 */
49*4882a593Smuzhiyun "\n add.l %2,%0" /* a = x * 0x0201 */
50*4882a593Smuzhiyun "\n add.l %0,%1" /* b = x * 0x0205 */
51*4882a593Smuzhiyun "\n add.l %0,%0" /* a = x * 0x0402 */
52*4882a593Smuzhiyun "\n add.l %0,%1" /* b = x * 0x0607 */
53*4882a593Smuzhiyun "\n lsl.l #5,%0" /* a = x * 0x8040 */
54*4882a593Smuzhiyun : "=&d,d" (a), "=&r,r" (b)
55*4882a593Smuzhiyun : "r,roi?" (x)); /* a+b = x*0x8647 */
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun return ((u16)(x*0x61c8) << 16) + a + b;
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun #endif /* _ASM_HASH_H */
61