1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. 4*4882a593Smuzhiyun */ 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun/* Numerology: 7*4882a593Smuzhiyun * WXYZ 8*4882a593Smuzhiyun * W: width in bytes 9*4882a593Smuzhiyun * X: Load=0, Store=1 10*4882a593Smuzhiyun * Y: Location 0=preamble,8=loop,9=epilog 11*4882a593Smuzhiyun * Z: Location=0,handler=9 12*4882a593Smuzhiyun */ 13*4882a593Smuzhiyun .text 14*4882a593Smuzhiyun .global FUNCNAME 15*4882a593Smuzhiyun .type FUNCNAME, @function 16*4882a593Smuzhiyun .p2align 5 17*4882a593SmuzhiyunFUNCNAME: 18*4882a593Smuzhiyun { 19*4882a593Smuzhiyun p0 = cmp.gtu(bytes,#0) 20*4882a593Smuzhiyun if (!p0.new) jump:nt .Ldone 21*4882a593Smuzhiyun r3 = or(dst,src) 22*4882a593Smuzhiyun r4 = xor(dst,src) 23*4882a593Smuzhiyun } 24*4882a593Smuzhiyun { 25*4882a593Smuzhiyun p1 = cmp.gtu(bytes,#15) 26*4882a593Smuzhiyun p0 = bitsclr(r3,#7) 27*4882a593Smuzhiyun if (!p0.new) jump:nt .Loop_not_aligned_8 28*4882a593Smuzhiyun src_dst_sav = combine(src,dst) 29*4882a593Smuzhiyun } 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun { 32*4882a593Smuzhiyun loopcount = lsr(bytes,#3) 33*4882a593Smuzhiyun if (!p1) jump .Lsmall 34*4882a593Smuzhiyun } 35*4882a593Smuzhiyun p3=sp1loop0(.Loop8,loopcount) 36*4882a593Smuzhiyun.Loop8: 37*4882a593Smuzhiyun8080: 38*4882a593Smuzhiyun8180: 39*4882a593Smuzhiyun { 40*4882a593Smuzhiyun if (p3) memd(dst++#8) = d_dbuf 41*4882a593Smuzhiyun d_dbuf = memd(src++#8) 42*4882a593Smuzhiyun }:endloop0 43*4882a593Smuzhiyun8190: 44*4882a593Smuzhiyun { 45*4882a593Smuzhiyun memd(dst++#8) = d_dbuf 46*4882a593Smuzhiyun bytes -= asl(loopcount,#3) 47*4882a593Smuzhiyun jump .Lsmall 48*4882a593Smuzhiyun } 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun.Loop_not_aligned_8: 51*4882a593Smuzhiyun { 52*4882a593Smuzhiyun p0 = bitsclr(r4,#7) 53*4882a593Smuzhiyun if (p0.new) jump:nt .Lalign 54*4882a593Smuzhiyun } 55*4882a593Smuzhiyun { 56*4882a593Smuzhiyun p0 = bitsclr(r3,#3) 57*4882a593Smuzhiyun if (!p0.new) jump:nt .Loop_not_aligned_4 58*4882a593Smuzhiyun p1 = cmp.gtu(bytes,#7) 59*4882a593Smuzhiyun } 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun { 62*4882a593Smuzhiyun if (!p1) jump .Lsmall 63*4882a593Smuzhiyun loopcount = lsr(bytes,#2) 64*4882a593Smuzhiyun } 65*4882a593Smuzhiyun p3=sp1loop0(.Loop4,loopcount) 66*4882a593Smuzhiyun.Loop4: 67*4882a593Smuzhiyun4080: 68*4882a593Smuzhiyun4180: 69*4882a593Smuzhiyun { 70*4882a593Smuzhiyun if (p3) memw(dst++#4) = w_dbuf 71*4882a593Smuzhiyun w_dbuf = memw(src++#4) 72*4882a593Smuzhiyun }:endloop0 73*4882a593Smuzhiyun4190: 74*4882a593Smuzhiyun { 75*4882a593Smuzhiyun memw(dst++#4) = w_dbuf 76*4882a593Smuzhiyun bytes -= asl(loopcount,#2) 77*4882a593Smuzhiyun jump .Lsmall 78*4882a593Smuzhiyun } 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun.Loop_not_aligned_4: 81*4882a593Smuzhiyun { 82*4882a593Smuzhiyun p0 = bitsclr(r3,#1) 83*4882a593Smuzhiyun if (!p0.new) jump:nt .Loop_not_aligned 84*4882a593Smuzhiyun p1 = cmp.gtu(bytes,#3) 85*4882a593Smuzhiyun } 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun { 88*4882a593Smuzhiyun if (!p1) jump .Lsmall 89*4882a593Smuzhiyun loopcount = lsr(bytes,#1) 90*4882a593Smuzhiyun } 91*4882a593Smuzhiyun p3=sp1loop0(.Loop2,loopcount) 92*4882a593Smuzhiyun.Loop2: 93*4882a593Smuzhiyun2080: 94*4882a593Smuzhiyun2180: 95*4882a593Smuzhiyun { 96*4882a593Smuzhiyun if (p3) memh(dst++#2) = w_dbuf 97*4882a593Smuzhiyun w_dbuf = memuh(src++#2) 98*4882a593Smuzhiyun }:endloop0 99*4882a593Smuzhiyun2190: 100*4882a593Smuzhiyun { 101*4882a593Smuzhiyun memh(dst++#2) = w_dbuf 102*4882a593Smuzhiyun bytes -= asl(loopcount,#1) 103*4882a593Smuzhiyun jump .Lsmall 104*4882a593Smuzhiyun } 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun.Loop_not_aligned: /* Works for as small as one byte */ 107*4882a593Smuzhiyun p3=sp1loop0(.Loop1,bytes) 108*4882a593Smuzhiyun.Loop1: 109*4882a593Smuzhiyun1080: 110*4882a593Smuzhiyun1180: 111*4882a593Smuzhiyun { 112*4882a593Smuzhiyun if (p3) memb(dst++#1) = w_dbuf 113*4882a593Smuzhiyun w_dbuf = memub(src++#1) 114*4882a593Smuzhiyun }:endloop0 115*4882a593Smuzhiyun /* Done */ 116*4882a593Smuzhiyun1190: 117*4882a593Smuzhiyun { 118*4882a593Smuzhiyun memb(dst) = w_dbuf 119*4882a593Smuzhiyun jumpr r31 120*4882a593Smuzhiyun r0 = #0 121*4882a593Smuzhiyun } 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun.Lsmall: 124*4882a593Smuzhiyun { 125*4882a593Smuzhiyun p0 = cmp.gtu(bytes,#0) 126*4882a593Smuzhiyun if (p0.new) jump:nt .Loop_not_aligned 127*4882a593Smuzhiyun } 128*4882a593Smuzhiyun.Ldone: 129*4882a593Smuzhiyun { 130*4882a593Smuzhiyun r0 = #0 131*4882a593Smuzhiyun jumpr r31 132*4882a593Smuzhiyun } 133*4882a593Smuzhiyun .falign 134*4882a593Smuzhiyun.Lalign: 135*4882a593Smuzhiyun1000: 136*4882a593Smuzhiyun { 137*4882a593Smuzhiyun if (p0.new) w_dbuf = memub(src) 138*4882a593Smuzhiyun p0 = tstbit(src,#0) 139*4882a593Smuzhiyun if (!p1) jump .Lsmall 140*4882a593Smuzhiyun } 141*4882a593Smuzhiyun1100: 142*4882a593Smuzhiyun { 143*4882a593Smuzhiyun if (p0) memb(dst++#1) = w_dbuf 144*4882a593Smuzhiyun if (p0) bytes = add(bytes,#-1) 145*4882a593Smuzhiyun if (p0) src = add(src,#1) 146*4882a593Smuzhiyun } 147*4882a593Smuzhiyun2000: 148*4882a593Smuzhiyun { 149*4882a593Smuzhiyun if (p0.new) w_dbuf = memuh(src) 150*4882a593Smuzhiyun p0 = tstbit(src,#1) 151*4882a593Smuzhiyun if (!p1) jump .Lsmall 152*4882a593Smuzhiyun } 153*4882a593Smuzhiyun2100: 154*4882a593Smuzhiyun { 155*4882a593Smuzhiyun if (p0) memh(dst++#2) = w_dbuf 156*4882a593Smuzhiyun if (p0) bytes = add(bytes,#-2) 157*4882a593Smuzhiyun if (p0) src = add(src,#2) 158*4882a593Smuzhiyun } 159*4882a593Smuzhiyun4000: 160*4882a593Smuzhiyun { 161*4882a593Smuzhiyun if (p0.new) w_dbuf = memw(src) 162*4882a593Smuzhiyun p0 = tstbit(src,#2) 163*4882a593Smuzhiyun if (!p1) jump .Lsmall 164*4882a593Smuzhiyun } 165*4882a593Smuzhiyun4100: 166*4882a593Smuzhiyun { 167*4882a593Smuzhiyun if (p0) memw(dst++#4) = w_dbuf 168*4882a593Smuzhiyun if (p0) bytes = add(bytes,#-4) 169*4882a593Smuzhiyun if (p0) src = add(src,#4) 170*4882a593Smuzhiyun jump FUNCNAME 171*4882a593Smuzhiyun } 172*4882a593Smuzhiyun .size FUNCNAME,.-FUNCNAME 173