1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Accelerated CRC32(C) using AArch64 CRC instructions 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <linux/linkage.h> 9*4882a593Smuzhiyun#include <asm/alternative.h> 10*4882a593Smuzhiyun#include <asm/assembler.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun .arch armv8-a+crc 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun .macro __crc32, c 15*4882a593Smuzhiyun cmp x2, #16 16*4882a593Smuzhiyun b.lt 8f // less than 16 bytes 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun and x7, x2, #0x1f 19*4882a593Smuzhiyun and x2, x2, #~0x1f 20*4882a593Smuzhiyun cbz x7, 32f // multiple of 32 bytes 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun and x8, x7, #0xf 23*4882a593Smuzhiyun ldp x3, x4, [x1] 24*4882a593Smuzhiyun add x8, x8, x1 25*4882a593Smuzhiyun add x1, x1, x7 26*4882a593Smuzhiyun ldp x5, x6, [x8] 27*4882a593SmuzhiyunCPU_BE( rev x3, x3 ) 28*4882a593SmuzhiyunCPU_BE( rev x4, x4 ) 29*4882a593SmuzhiyunCPU_BE( rev x5, x5 ) 30*4882a593SmuzhiyunCPU_BE( rev x6, x6 ) 31*4882a593Smuzhiyun 32*4882a593Smuzhiyun tst x7, #8 33*4882a593Smuzhiyun crc32\c\()x w8, w0, x3 34*4882a593Smuzhiyun csel x3, x3, x4, eq 35*4882a593Smuzhiyun csel w0, w0, w8, eq 36*4882a593Smuzhiyun tst x7, #4 37*4882a593Smuzhiyun lsr x4, x3, #32 38*4882a593Smuzhiyun crc32\c\()w w8, w0, w3 39*4882a593Smuzhiyun csel x3, x3, x4, eq 40*4882a593Smuzhiyun csel w0, w0, w8, eq 41*4882a593Smuzhiyun tst x7, #2 42*4882a593Smuzhiyun lsr w4, w3, #16 43*4882a593Smuzhiyun crc32\c\()h w8, w0, w3 44*4882a593Smuzhiyun csel w3, w3, w4, eq 45*4882a593Smuzhiyun csel w0, w0, w8, eq 46*4882a593Smuzhiyun tst x7, #1 47*4882a593Smuzhiyun crc32\c\()b w8, w0, w3 48*4882a593Smuzhiyun csel w0, w0, w8, eq 49*4882a593Smuzhiyun tst x7, #16 50*4882a593Smuzhiyun crc32\c\()x w8, w0, x5 51*4882a593Smuzhiyun crc32\c\()x w8, w8, x6 52*4882a593Smuzhiyun csel w0, w0, w8, eq 53*4882a593Smuzhiyun cbz x2, 0f 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun32: ldp x3, x4, [x1], #32 56*4882a593Smuzhiyun sub x2, x2, #32 57*4882a593Smuzhiyun ldp x5, x6, [x1, #-16] 58*4882a593SmuzhiyunCPU_BE( rev x3, x3 ) 59*4882a593SmuzhiyunCPU_BE( rev x4, x4 ) 60*4882a593SmuzhiyunCPU_BE( rev x5, x5 ) 61*4882a593SmuzhiyunCPU_BE( rev x6, x6 ) 62*4882a593Smuzhiyun crc32\c\()x w0, w0, x3 63*4882a593Smuzhiyun crc32\c\()x w0, w0, x4 64*4882a593Smuzhiyun crc32\c\()x w0, w0, x5 65*4882a593Smuzhiyun crc32\c\()x w0, w0, x6 66*4882a593Smuzhiyun cbnz x2, 32b 67*4882a593Smuzhiyun0: ret 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun8: tbz x2, #3, 4f 70*4882a593Smuzhiyun ldr x3, [x1], #8 71*4882a593SmuzhiyunCPU_BE( rev x3, x3 ) 72*4882a593Smuzhiyun crc32\c\()x w0, w0, x3 73*4882a593Smuzhiyun4: tbz x2, #2, 2f 74*4882a593Smuzhiyun ldr w3, [x1], #4 75*4882a593SmuzhiyunCPU_BE( rev w3, w3 ) 76*4882a593Smuzhiyun crc32\c\()w w0, w0, w3 77*4882a593Smuzhiyun2: tbz x2, #1, 1f 78*4882a593Smuzhiyun ldrh w3, [x1], #2 79*4882a593SmuzhiyunCPU_BE( rev16 w3, w3 ) 80*4882a593Smuzhiyun crc32\c\()h w0, w0, w3 81*4882a593Smuzhiyun1: tbz x2, #0, 0f 82*4882a593Smuzhiyun ldrb w3, [x1] 83*4882a593Smuzhiyun crc32\c\()b w0, w0, w3 84*4882a593Smuzhiyun0: ret 85*4882a593Smuzhiyun .endm 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun .align 5 88*4882a593SmuzhiyunSYM_FUNC_START(crc32_le) 89*4882a593Smuzhiyunalternative_if_not ARM64_HAS_CRC32 90*4882a593Smuzhiyun b crc32_le_base 91*4882a593Smuzhiyunalternative_else_nop_endif 92*4882a593Smuzhiyun __crc32 93*4882a593SmuzhiyunSYM_FUNC_END(crc32_le) 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun .align 5 96*4882a593SmuzhiyunSYM_FUNC_START(__crc32c_le) 97*4882a593Smuzhiyunalternative_if_not ARM64_HAS_CRC32 98*4882a593Smuzhiyun b __crc32c_le_base 99*4882a593Smuzhiyunalternative_else_nop_endif 100*4882a593Smuzhiyun __crc32 c 101*4882a593SmuzhiyunSYM_FUNC_END(__crc32c_le) 102