1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Cache flushing routines. 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co 6*4882a593Smuzhiyun * David Mosberger-Tang <davidm@hpl.hp.com> 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * 05/28/05 Zoltan Menyhart Dynamic stride size 9*4882a593Smuzhiyun */ 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun#include <asm/asmmacro.h> 12*4882a593Smuzhiyun#include <asm/export.h> 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun /* 16*4882a593Smuzhiyun * flush_icache_range(start,end) 17*4882a593Smuzhiyun * 18*4882a593Smuzhiyun * Make i-cache(s) coherent with d-caches. 19*4882a593Smuzhiyun * 20*4882a593Smuzhiyun * Must deal with range from start to end-1 but nothing else (need to 21*4882a593Smuzhiyun * be careful not to touch addresses that may be unmapped). 22*4882a593Smuzhiyun * 23*4882a593Smuzhiyun * Note: "in0" and "in1" are preserved for debugging purposes. 24*4882a593Smuzhiyun */ 25*4882a593Smuzhiyun .section .kprobes.text,"ax" 26*4882a593SmuzhiyunGLOBAL_ENTRY(flush_icache_range) 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun .prologue 29*4882a593Smuzhiyun alloc r2=ar.pfs,2,0,0,0 30*4882a593Smuzhiyun movl r3=ia64_i_cache_stride_shift 31*4882a593Smuzhiyun mov r21=1 32*4882a593Smuzhiyun ;; 33*4882a593Smuzhiyun ld8 r20=[r3] // r20: stride shift 34*4882a593Smuzhiyun sub r22=in1,r0,1 // last byte address 35*4882a593Smuzhiyun ;; 36*4882a593Smuzhiyun shr.u r23=in0,r20 // start / (stride size) 37*4882a593Smuzhiyun shr.u r22=r22,r20 // (last byte address) / (stride size) 38*4882a593Smuzhiyun shl r21=r21,r20 // r21: stride size of the i-cache(s) 39*4882a593Smuzhiyun ;; 40*4882a593Smuzhiyun sub r8=r22,r23 // number of strides - 1 41*4882a593Smuzhiyun shl r24=r23,r20 // r24: addresses for "fc.i" = 42*4882a593Smuzhiyun // "start" rounded down to stride boundary 43*4882a593Smuzhiyun .save ar.lc,r3 44*4882a593Smuzhiyun mov r3=ar.lc // save ar.lc 45*4882a593Smuzhiyun ;; 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun .body 48*4882a593Smuzhiyun mov ar.lc=r8 49*4882a593Smuzhiyun ;; 50*4882a593Smuzhiyun /* 51*4882a593Smuzhiyun * 32 byte aligned loop, even number of (actually 2) bundles 52*4882a593Smuzhiyun */ 53*4882a593Smuzhiyun.Loop: fc.i r24 // issuable on M0 only 54*4882a593Smuzhiyun add r24=r21,r24 // we flush "stride size" bytes per iteration 55*4882a593Smuzhiyun nop.i 0 56*4882a593Smuzhiyun br.cloop.sptk.few .Loop 57*4882a593Smuzhiyun ;; 58*4882a593Smuzhiyun sync.i 59*4882a593Smuzhiyun ;; 60*4882a593Smuzhiyun srlz.i 61*4882a593Smuzhiyun ;; 62*4882a593Smuzhiyun mov ar.lc=r3 // restore ar.lc 63*4882a593Smuzhiyun br.ret.sptk.many rp 64*4882a593SmuzhiyunEND(flush_icache_range) 65*4882a593SmuzhiyunEXPORT_SYMBOL_GPL(flush_icache_range) 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun /* 68*4882a593Smuzhiyun * clflush_cache_range(start,size) 69*4882a593Smuzhiyun * 70*4882a593Smuzhiyun * Flush cache lines from start to start+size-1. 71*4882a593Smuzhiyun * 72*4882a593Smuzhiyun * Must deal with range from start to start+size-1 but nothing else 73*4882a593Smuzhiyun * (need to be careful not to touch addresses that may be 74*4882a593Smuzhiyun * unmapped). 75*4882a593Smuzhiyun * 76*4882a593Smuzhiyun * Note: "in0" and "in1" are preserved for debugging purposes. 77*4882a593Smuzhiyun */ 78*4882a593Smuzhiyun .section .kprobes.text,"ax" 79*4882a593SmuzhiyunGLOBAL_ENTRY(clflush_cache_range) 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun .prologue 82*4882a593Smuzhiyun alloc r2=ar.pfs,2,0,0,0 83*4882a593Smuzhiyun movl r3=ia64_cache_stride_shift 84*4882a593Smuzhiyun mov r21=1 85*4882a593Smuzhiyun add r22=in1,in0 86*4882a593Smuzhiyun ;; 87*4882a593Smuzhiyun ld8 r20=[r3] // r20: stride shift 88*4882a593Smuzhiyun sub r22=r22,r0,1 // last byte address 89*4882a593Smuzhiyun ;; 90*4882a593Smuzhiyun shr.u r23=in0,r20 // start / (stride size) 91*4882a593Smuzhiyun shr.u r22=r22,r20 // (last byte address) / (stride size) 92*4882a593Smuzhiyun shl r21=r21,r20 // r21: stride size of the i-cache(s) 93*4882a593Smuzhiyun ;; 94*4882a593Smuzhiyun sub r8=r22,r23 // number of strides - 1 95*4882a593Smuzhiyun shl r24=r23,r20 // r24: addresses for "fc" = 96*4882a593Smuzhiyun // "start" rounded down to stride 97*4882a593Smuzhiyun // boundary 98*4882a593Smuzhiyun .save ar.lc,r3 99*4882a593Smuzhiyun mov r3=ar.lc // save ar.lc 100*4882a593Smuzhiyun ;; 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun .body 103*4882a593Smuzhiyun mov ar.lc=r8 104*4882a593Smuzhiyun ;; 105*4882a593Smuzhiyun /* 106*4882a593Smuzhiyun * 32 byte aligned loop, even number of (actually 2) bundles 107*4882a593Smuzhiyun */ 108*4882a593Smuzhiyun.Loop_fc: 109*4882a593Smuzhiyun fc r24 // issuable on M0 only 110*4882a593Smuzhiyun add r24=r21,r24 // we flush "stride size" bytes per iteration 111*4882a593Smuzhiyun nop.i 0 112*4882a593Smuzhiyun br.cloop.sptk.few .Loop_fc 113*4882a593Smuzhiyun ;; 114*4882a593Smuzhiyun sync.i 115*4882a593Smuzhiyun ;; 116*4882a593Smuzhiyun srlz.i 117*4882a593Smuzhiyun ;; 118*4882a593Smuzhiyun mov ar.lc=r3 // restore ar.lc 119*4882a593Smuzhiyun br.ret.sptk.many rp 120*4882a593SmuzhiyunEND(clflush_cache_range) 121