1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * Optimized version of the standard copy_page() function 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Inputs: 7*4882a593Smuzhiyun * in0: address of target page 8*4882a593Smuzhiyun * in1: address of source page 9*4882a593Smuzhiyun * Output: 10*4882a593Smuzhiyun * no return value 11*4882a593Smuzhiyun * 12*4882a593Smuzhiyun * Copyright (C) 1999, 2001 Hewlett-Packard Co 13*4882a593Smuzhiyun * Stephane Eranian <eranian@hpl.hp.com> 14*4882a593Smuzhiyun * David Mosberger <davidm@hpl.hp.com> 15*4882a593Smuzhiyun * 16*4882a593Smuzhiyun * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. 17*4882a593Smuzhiyun */ 18*4882a593Smuzhiyun#include <asm/asmmacro.h> 19*4882a593Smuzhiyun#include <asm/page.h> 20*4882a593Smuzhiyun#include <asm/export.h> 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun#define PIPE_DEPTH 3 23*4882a593Smuzhiyun#define EPI p[PIPE_DEPTH-1] 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun#define lcount r16 26*4882a593Smuzhiyun#define saved_pr r17 27*4882a593Smuzhiyun#define saved_lc r18 28*4882a593Smuzhiyun#define saved_pfs r19 29*4882a593Smuzhiyun#define src1 r20 30*4882a593Smuzhiyun#define src2 r21 31*4882a593Smuzhiyun#define tgt1 r22 32*4882a593Smuzhiyun#define tgt2 r23 33*4882a593Smuzhiyun#define srcf r24 34*4882a593Smuzhiyun#define tgtf r25 35*4882a593Smuzhiyun#define tgt_last r26 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun#define Nrot ((8*PIPE_DEPTH+7)&~7) 38*4882a593Smuzhiyun 39*4882a593SmuzhiyunGLOBAL_ENTRY(copy_page) 40*4882a593Smuzhiyun .prologue 41*4882a593Smuzhiyun .save ar.pfs, saved_pfs 42*4882a593Smuzhiyun alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ 45*4882a593Smuzhiyun t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] 46*4882a593Smuzhiyun .rotp p[PIPE_DEPTH] 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun .save ar.lc, saved_lc 49*4882a593Smuzhiyun mov saved_lc=ar.lc 50*4882a593Smuzhiyun mov ar.ec=PIPE_DEPTH 51*4882a593Smuzhiyun 52*4882a593Smuzhiyun mov lcount=PAGE_SIZE/64-1 53*4882a593Smuzhiyun .save pr, saved_pr 54*4882a593Smuzhiyun mov saved_pr=pr 55*4882a593Smuzhiyun mov pr.rot=1<<16 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun .body 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun mov src1=in1 60*4882a593Smuzhiyun adds src2=8,in1 61*4882a593Smuzhiyun mov tgt_last = PAGE_SIZE 62*4882a593Smuzhiyun ;; 63*4882a593Smuzhiyun adds tgt2=8,in0 64*4882a593Smuzhiyun add srcf=512,in1 65*4882a593Smuzhiyun mov ar.lc=lcount 66*4882a593Smuzhiyun mov tgt1=in0 67*4882a593Smuzhiyun add tgtf=512,in0 68*4882a593Smuzhiyun add tgt_last = tgt_last, in0 69*4882a593Smuzhiyun ;; 70*4882a593Smuzhiyun1: 71*4882a593Smuzhiyun(p[0]) ld8 t1[0]=[src1],16 72*4882a593Smuzhiyun(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 73*4882a593Smuzhiyun(p[0]) ld8 t2[0]=[src2],16 74*4882a593Smuzhiyun(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 75*4882a593Smuzhiyun cmp.ltu p6,p0 = tgtf, tgt_last 76*4882a593Smuzhiyun ;; 77*4882a593Smuzhiyun(p[0]) ld8 t3[0]=[src1],16 78*4882a593Smuzhiyun(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 79*4882a593Smuzhiyun(p[0]) ld8 t4[0]=[src2],16 80*4882a593Smuzhiyun(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 81*4882a593Smuzhiyun ;; 82*4882a593Smuzhiyun(p[0]) ld8 t5[0]=[src1],16 83*4882a593Smuzhiyun(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 84*4882a593Smuzhiyun(p[0]) ld8 t6[0]=[src2],16 85*4882a593Smuzhiyun(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 86*4882a593Smuzhiyun ;; 87*4882a593Smuzhiyun(p[0]) ld8 t7[0]=[src1],16 88*4882a593Smuzhiyun(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 89*4882a593Smuzhiyun(p[0]) ld8 t8[0]=[src2],16 90*4882a593Smuzhiyun(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun(p6) lfetch [srcf], 64 93*4882a593Smuzhiyun(p6) lfetch [tgtf], 64 94*4882a593Smuzhiyun br.ctop.sptk.few 1b 95*4882a593Smuzhiyun ;; 96*4882a593Smuzhiyun mov pr=saved_pr,0xffffffffffff0000 // restore predicates 97*4882a593Smuzhiyun mov ar.pfs=saved_pfs 98*4882a593Smuzhiyun mov ar.lc=saved_lc 99*4882a593Smuzhiyun br.ret.sptk.many rp 100*4882a593SmuzhiyunEND(copy_page) 101*4882a593SmuzhiyunEXPORT_SYMBOL(copy_page) 102