1*4882a593Smuzhiyun============================ 2*4882a593SmuzhiyunKernel-provided User Helpers 3*4882a593Smuzhiyun============================ 4*4882a593Smuzhiyun 5*4882a593SmuzhiyunThese are segment of kernel provided user code reachable from user space 6*4882a593Smuzhiyunat a fixed address in kernel memory. This is used to provide user space 7*4882a593Smuzhiyunwith some operations which require kernel help because of unimplemented 8*4882a593Smuzhiyunnative feature and/or instructions in many ARM CPUs. The idea is for this 9*4882a593Smuzhiyuncode to be executed directly in user mode for best efficiency but which is 10*4882a593Smuzhiyuntoo intimate with the kernel counter part to be left to user libraries. 11*4882a593SmuzhiyunIn fact this code might even differ from one CPU to another depending on 12*4882a593Smuzhiyunthe available instruction set, or whether it is a SMP systems. In other 13*4882a593Smuzhiyunwords, the kernel reserves the right to change this code as needed without 14*4882a593Smuzhiyunwarning. Only the entry points and their results as documented here are 15*4882a593Smuzhiyunguaranteed to be stable. 16*4882a593Smuzhiyun 17*4882a593SmuzhiyunThis is different from (but doesn't preclude) a full blown VDSO 18*4882a593Smuzhiyunimplementation, however a VDSO would prevent some assembly tricks with 19*4882a593Smuzhiyunconstants that allows for efficient branching to those code segments. And 20*4882a593Smuzhiyunsince those code segments only use a few cycles before returning to user 21*4882a593Smuzhiyuncode, the overhead of a VDSO indirect far call would add a measurable 22*4882a593Smuzhiyunoverhead to such minimalistic operations. 23*4882a593Smuzhiyun 24*4882a593SmuzhiyunUser space is expected to bypass those helpers and implement those things 25*4882a593Smuzhiyuninline (either in the code emitted directly by the compiler, or part of 26*4882a593Smuzhiyunthe implementation of a library call) when optimizing for a recent enough 27*4882a593Smuzhiyunprocessor that has the necessary native support, but only if resulting 28*4882a593Smuzhiyunbinaries are already to be incompatible with earlier ARM processors due to 29*4882a593Smuzhiyunusage of similar native instructions for other things. In other words 30*4882a593Smuzhiyundon't make binaries unable to run on earlier processors just for the sake 31*4882a593Smuzhiyunof not using these kernel helpers if your compiled code is not going to 32*4882a593Smuzhiyunuse new instructions for other purpose. 33*4882a593Smuzhiyun 34*4882a593SmuzhiyunNew helpers may be added over time, so an older kernel may be missing some 35*4882a593Smuzhiyunhelpers present in a newer kernel. For this reason, programs must check 36*4882a593Smuzhiyunthe value of __kuser_helper_version (see below) before assuming that it is 37*4882a593Smuzhiyunsafe to call any particular helper. This check should ideally be 38*4882a593Smuzhiyunperformed only once at process startup time, and execution aborted early 39*4882a593Smuzhiyunif the required helpers are not provided by the kernel version that 40*4882a593Smuzhiyunprocess is running on. 41*4882a593Smuzhiyun 42*4882a593Smuzhiyunkuser_helper_version 43*4882a593Smuzhiyun-------------------- 44*4882a593Smuzhiyun 45*4882a593SmuzhiyunLocation: 0xffff0ffc 46*4882a593Smuzhiyun 47*4882a593SmuzhiyunReference declaration:: 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun extern int32_t __kuser_helper_version; 50*4882a593Smuzhiyun 51*4882a593SmuzhiyunDefinition: 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun This field contains the number of helpers being implemented by the 54*4882a593Smuzhiyun running kernel. User space may read this to determine the availability 55*4882a593Smuzhiyun of a particular helper. 56*4882a593Smuzhiyun 57*4882a593SmuzhiyunUsage example:: 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun #define __kuser_helper_version (*(int32_t *)0xffff0ffc) 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun void check_kuser_version(void) 62*4882a593Smuzhiyun { 63*4882a593Smuzhiyun if (__kuser_helper_version < 2) { 64*4882a593Smuzhiyun fprintf(stderr, "can't do atomic operations, kernel too old\n"); 65*4882a593Smuzhiyun abort(); 66*4882a593Smuzhiyun } 67*4882a593Smuzhiyun } 68*4882a593Smuzhiyun 69*4882a593SmuzhiyunNotes: 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun User space may assume that the value of this field never changes 72*4882a593Smuzhiyun during the lifetime of any single process. This means that this 73*4882a593Smuzhiyun field can be read once during the initialisation of a library or 74*4882a593Smuzhiyun startup phase of a program. 75*4882a593Smuzhiyun 76*4882a593Smuzhiyunkuser_get_tls 77*4882a593Smuzhiyun------------- 78*4882a593Smuzhiyun 79*4882a593SmuzhiyunLocation: 0xffff0fe0 80*4882a593Smuzhiyun 81*4882a593SmuzhiyunReference prototype:: 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun void * __kuser_get_tls(void); 84*4882a593Smuzhiyun 85*4882a593SmuzhiyunInput: 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun lr = return address 88*4882a593Smuzhiyun 89*4882a593SmuzhiyunOutput: 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun r0 = TLS value 92*4882a593Smuzhiyun 93*4882a593SmuzhiyunClobbered registers: 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun none 96*4882a593Smuzhiyun 97*4882a593SmuzhiyunDefinition: 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun Get the TLS value as previously set via the __ARM_NR_set_tls syscall. 100*4882a593Smuzhiyun 101*4882a593SmuzhiyunUsage example:: 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun typedef void * (__kuser_get_tls_t)(void); 104*4882a593Smuzhiyun #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun void foo() 107*4882a593Smuzhiyun { 108*4882a593Smuzhiyun void *tls = __kuser_get_tls(); 109*4882a593Smuzhiyun printf("TLS = %p\n", tls); 110*4882a593Smuzhiyun } 111*4882a593Smuzhiyun 112*4882a593SmuzhiyunNotes: 113*4882a593Smuzhiyun 114*4882a593Smuzhiyun - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12). 115*4882a593Smuzhiyun 116*4882a593Smuzhiyunkuser_cmpxchg 117*4882a593Smuzhiyun------------- 118*4882a593Smuzhiyun 119*4882a593SmuzhiyunLocation: 0xffff0fc0 120*4882a593Smuzhiyun 121*4882a593SmuzhiyunReference prototype:: 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); 124*4882a593Smuzhiyun 125*4882a593SmuzhiyunInput: 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun r0 = oldval 128*4882a593Smuzhiyun r1 = newval 129*4882a593Smuzhiyun r2 = ptr 130*4882a593Smuzhiyun lr = return address 131*4882a593Smuzhiyun 132*4882a593SmuzhiyunOutput: 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun r0 = success code (zero or non-zero) 135*4882a593Smuzhiyun C flag = set if r0 == 0, clear if r0 != 0 136*4882a593Smuzhiyun 137*4882a593SmuzhiyunClobbered registers: 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun r3, ip, flags 140*4882a593Smuzhiyun 141*4882a593SmuzhiyunDefinition: 142*4882a593Smuzhiyun 143*4882a593Smuzhiyun Atomically store newval in `*ptr` only if `*ptr` is equal to oldval. 144*4882a593Smuzhiyun Return zero if `*ptr` was changed or non-zero if no exchange happened. 145*4882a593Smuzhiyun The C flag is also set if `*ptr` was changed to allow for assembly 146*4882a593Smuzhiyun optimization in the calling code. 147*4882a593Smuzhiyun 148*4882a593SmuzhiyunUsage example:: 149*4882a593Smuzhiyun 150*4882a593Smuzhiyun typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); 151*4882a593Smuzhiyun #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun int atomic_add(volatile int *ptr, int val) 154*4882a593Smuzhiyun { 155*4882a593Smuzhiyun int old, new; 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun do { 158*4882a593Smuzhiyun old = *ptr; 159*4882a593Smuzhiyun new = old + val; 160*4882a593Smuzhiyun } while(__kuser_cmpxchg(old, new, ptr)); 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun return new; 163*4882a593Smuzhiyun } 164*4882a593Smuzhiyun 165*4882a593SmuzhiyunNotes: 166*4882a593Smuzhiyun 167*4882a593Smuzhiyun - This routine already includes memory barriers as needed. 168*4882a593Smuzhiyun 169*4882a593Smuzhiyun - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12). 170*4882a593Smuzhiyun 171*4882a593Smuzhiyunkuser_memory_barrier 172*4882a593Smuzhiyun-------------------- 173*4882a593Smuzhiyun 174*4882a593SmuzhiyunLocation: 0xffff0fa0 175*4882a593Smuzhiyun 176*4882a593SmuzhiyunReference prototype:: 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun void __kuser_memory_barrier(void); 179*4882a593Smuzhiyun 180*4882a593SmuzhiyunInput: 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun lr = return address 183*4882a593Smuzhiyun 184*4882a593SmuzhiyunOutput: 185*4882a593Smuzhiyun 186*4882a593Smuzhiyun none 187*4882a593Smuzhiyun 188*4882a593SmuzhiyunClobbered registers: 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun none 191*4882a593Smuzhiyun 192*4882a593SmuzhiyunDefinition: 193*4882a593Smuzhiyun 194*4882a593Smuzhiyun Apply any needed memory barrier to preserve consistency with data modified 195*4882a593Smuzhiyun manually and __kuser_cmpxchg usage. 196*4882a593Smuzhiyun 197*4882a593SmuzhiyunUsage example:: 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun typedef void (__kuser_dmb_t)(void); 200*4882a593Smuzhiyun #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) 201*4882a593Smuzhiyun 202*4882a593SmuzhiyunNotes: 203*4882a593Smuzhiyun 204*4882a593Smuzhiyun - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15). 205*4882a593Smuzhiyun 206*4882a593Smuzhiyunkuser_cmpxchg64 207*4882a593Smuzhiyun--------------- 208*4882a593Smuzhiyun 209*4882a593SmuzhiyunLocation: 0xffff0f60 210*4882a593Smuzhiyun 211*4882a593SmuzhiyunReference prototype:: 212*4882a593Smuzhiyun 213*4882a593Smuzhiyun int __kuser_cmpxchg64(const int64_t *oldval, 214*4882a593Smuzhiyun const int64_t *newval, 215*4882a593Smuzhiyun volatile int64_t *ptr); 216*4882a593Smuzhiyun 217*4882a593SmuzhiyunInput: 218*4882a593Smuzhiyun 219*4882a593Smuzhiyun r0 = pointer to oldval 220*4882a593Smuzhiyun r1 = pointer to newval 221*4882a593Smuzhiyun r2 = pointer to target value 222*4882a593Smuzhiyun lr = return address 223*4882a593Smuzhiyun 224*4882a593SmuzhiyunOutput: 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun r0 = success code (zero or non-zero) 227*4882a593Smuzhiyun C flag = set if r0 == 0, clear if r0 != 0 228*4882a593Smuzhiyun 229*4882a593SmuzhiyunClobbered registers: 230*4882a593Smuzhiyun 231*4882a593Smuzhiyun r3, lr, flags 232*4882a593Smuzhiyun 233*4882a593SmuzhiyunDefinition: 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun Atomically store the 64-bit value pointed by `*newval` in `*ptr` only if `*ptr` 236*4882a593Smuzhiyun is equal to the 64-bit value pointed by `*oldval`. Return zero if `*ptr` was 237*4882a593Smuzhiyun changed or non-zero if no exchange happened. 238*4882a593Smuzhiyun 239*4882a593Smuzhiyun The C flag is also set if `*ptr` was changed to allow for assembly 240*4882a593Smuzhiyun optimization in the calling code. 241*4882a593Smuzhiyun 242*4882a593SmuzhiyunUsage example:: 243*4882a593Smuzhiyun 244*4882a593Smuzhiyun typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, 245*4882a593Smuzhiyun const int64_t *newval, 246*4882a593Smuzhiyun volatile int64_t *ptr); 247*4882a593Smuzhiyun #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun int64_t atomic_add64(volatile int64_t *ptr, int64_t val) 250*4882a593Smuzhiyun { 251*4882a593Smuzhiyun int64_t old, new; 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun do { 254*4882a593Smuzhiyun old = *ptr; 255*4882a593Smuzhiyun new = old + val; 256*4882a593Smuzhiyun } while(__kuser_cmpxchg64(&old, &new, ptr)); 257*4882a593Smuzhiyun 258*4882a593Smuzhiyun return new; 259*4882a593Smuzhiyun } 260*4882a593Smuzhiyun 261*4882a593SmuzhiyunNotes: 262*4882a593Smuzhiyun 263*4882a593Smuzhiyun - This routine already includes memory barriers as needed. 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun - Due to the length of this sequence, this spans 2 conventional kuser 266*4882a593Smuzhiyun "slots", therefore 0xffff0f80 is not used as a valid entry point. 267*4882a593Smuzhiyun 268*4882a593Smuzhiyun - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1). 269