xref: /OK3568_Linux_fs/kernel/arch/ia64/lib/copy_page.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Optimized version of the standard copy_page() function
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Inputs:
7*4882a593Smuzhiyun *	in0:	address of target page
8*4882a593Smuzhiyun *	in1:	address of source page
9*4882a593Smuzhiyun * Output:
10*4882a593Smuzhiyun *	no return value
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Copyright (C) 1999, 2001 Hewlett-Packard Co
13*4882a593Smuzhiyun *	Stephane Eranian <eranian@hpl.hp.com>
14*4882a593Smuzhiyun *	David Mosberger <davidm@hpl.hp.com>
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
17*4882a593Smuzhiyun */
18*4882a593Smuzhiyun#include <asm/asmmacro.h>
19*4882a593Smuzhiyun#include <asm/page.h>
20*4882a593Smuzhiyun#include <asm/export.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun#define PIPE_DEPTH	3
23*4882a593Smuzhiyun#define EPI		p[PIPE_DEPTH-1]
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun#define lcount		r16
26*4882a593Smuzhiyun#define saved_pr	r17
27*4882a593Smuzhiyun#define saved_lc	r18
28*4882a593Smuzhiyun#define saved_pfs	r19
29*4882a593Smuzhiyun#define src1		r20
30*4882a593Smuzhiyun#define src2		r21
31*4882a593Smuzhiyun#define tgt1		r22
32*4882a593Smuzhiyun#define tgt2		r23
33*4882a593Smuzhiyun#define srcf		r24
34*4882a593Smuzhiyun#define tgtf		r25
35*4882a593Smuzhiyun#define tgt_last	r26
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun#define Nrot		((8*PIPE_DEPTH+7)&~7)
38*4882a593Smuzhiyun
39*4882a593SmuzhiyunGLOBAL_ENTRY(copy_page)
40*4882a593Smuzhiyun	.prologue
41*4882a593Smuzhiyun	.save ar.pfs, saved_pfs
42*4882a593Smuzhiyun	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
45*4882a593Smuzhiyun	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
46*4882a593Smuzhiyun	.rotp p[PIPE_DEPTH]
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun	.save ar.lc, saved_lc
49*4882a593Smuzhiyun	mov saved_lc=ar.lc
50*4882a593Smuzhiyun	mov ar.ec=PIPE_DEPTH
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun	mov lcount=PAGE_SIZE/64-1
53*4882a593Smuzhiyun	.save pr, saved_pr
54*4882a593Smuzhiyun	mov saved_pr=pr
55*4882a593Smuzhiyun	mov pr.rot=1<<16
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun	.body
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun	mov src1=in1
60*4882a593Smuzhiyun	adds src2=8,in1
61*4882a593Smuzhiyun	mov tgt_last = PAGE_SIZE
62*4882a593Smuzhiyun	;;
63*4882a593Smuzhiyun	adds tgt2=8,in0
64*4882a593Smuzhiyun	add srcf=512,in1
65*4882a593Smuzhiyun	mov ar.lc=lcount
66*4882a593Smuzhiyun	mov tgt1=in0
67*4882a593Smuzhiyun	add tgtf=512,in0
68*4882a593Smuzhiyun	add tgt_last = tgt_last, in0
69*4882a593Smuzhiyun	;;
70*4882a593Smuzhiyun1:
71*4882a593Smuzhiyun(p[0])	ld8 t1[0]=[src1],16
72*4882a593Smuzhiyun(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
73*4882a593Smuzhiyun(p[0])	ld8 t2[0]=[src2],16
74*4882a593Smuzhiyun(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
75*4882a593Smuzhiyun	cmp.ltu p6,p0 = tgtf, tgt_last
76*4882a593Smuzhiyun	;;
77*4882a593Smuzhiyun(p[0])	ld8 t3[0]=[src1],16
78*4882a593Smuzhiyun(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
79*4882a593Smuzhiyun(p[0])	ld8 t4[0]=[src2],16
80*4882a593Smuzhiyun(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
81*4882a593Smuzhiyun	;;
82*4882a593Smuzhiyun(p[0])	ld8 t5[0]=[src1],16
83*4882a593Smuzhiyun(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
84*4882a593Smuzhiyun(p[0])	ld8 t6[0]=[src2],16
85*4882a593Smuzhiyun(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
86*4882a593Smuzhiyun	;;
87*4882a593Smuzhiyun(p[0])	ld8 t7[0]=[src1],16
88*4882a593Smuzhiyun(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
89*4882a593Smuzhiyun(p[0])	ld8 t8[0]=[src2],16
90*4882a593Smuzhiyun(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun(p6)	lfetch [srcf], 64
93*4882a593Smuzhiyun(p6)	lfetch [tgtf], 64
94*4882a593Smuzhiyun	br.ctop.sptk.few 1b
95*4882a593Smuzhiyun	;;
96*4882a593Smuzhiyun	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
97*4882a593Smuzhiyun	mov ar.pfs=saved_pfs
98*4882a593Smuzhiyun	mov ar.lc=saved_lc
99*4882a593Smuzhiyun	br.ret.sptk.many rp
100*4882a593SmuzhiyunEND(copy_page)
101*4882a593SmuzhiyunEXPORT_SYMBOL(copy_page)
102