xref: /OK3568_Linux_fs/kernel/Documentation/arm/kernel_user_helpers.rst (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun============================
2*4882a593SmuzhiyunKernel-provided User Helpers
3*4882a593Smuzhiyun============================
4*4882a593Smuzhiyun
5*4882a593SmuzhiyunThese are segment of kernel provided user code reachable from user space
6*4882a593Smuzhiyunat a fixed address in kernel memory.  This is used to provide user space
7*4882a593Smuzhiyunwith some operations which require kernel help because of unimplemented
8*4882a593Smuzhiyunnative feature and/or instructions in many ARM CPUs. The idea is for this
9*4882a593Smuzhiyuncode to be executed directly in user mode for best efficiency but which is
10*4882a593Smuzhiyuntoo intimate with the kernel counter part to be left to user libraries.
11*4882a593SmuzhiyunIn fact this code might even differ from one CPU to another depending on
12*4882a593Smuzhiyunthe available instruction set, or whether it is a SMP systems. In other
13*4882a593Smuzhiyunwords, the kernel reserves the right to change this code as needed without
14*4882a593Smuzhiyunwarning. Only the entry points and their results as documented here are
15*4882a593Smuzhiyunguaranteed to be stable.
16*4882a593Smuzhiyun
17*4882a593SmuzhiyunThis is different from (but doesn't preclude) a full blown VDSO
18*4882a593Smuzhiyunimplementation, however a VDSO would prevent some assembly tricks with
19*4882a593Smuzhiyunconstants that allows for efficient branching to those code segments. And
20*4882a593Smuzhiyunsince those code segments only use a few cycles before returning to user
21*4882a593Smuzhiyuncode, the overhead of a VDSO indirect far call would add a measurable
22*4882a593Smuzhiyunoverhead to such minimalistic operations.
23*4882a593Smuzhiyun
24*4882a593SmuzhiyunUser space is expected to bypass those helpers and implement those things
25*4882a593Smuzhiyuninline (either in the code emitted directly by the compiler, or part of
26*4882a593Smuzhiyunthe implementation of a library call) when optimizing for a recent enough
27*4882a593Smuzhiyunprocessor that has the necessary native support, but only if resulting
28*4882a593Smuzhiyunbinaries are already to be incompatible with earlier ARM processors due to
29*4882a593Smuzhiyunusage of similar native instructions for other things.  In other words
30*4882a593Smuzhiyundon't make binaries unable to run on earlier processors just for the sake
31*4882a593Smuzhiyunof not using these kernel helpers if your compiled code is not going to
32*4882a593Smuzhiyunuse new instructions for other purpose.
33*4882a593Smuzhiyun
34*4882a593SmuzhiyunNew helpers may be added over time, so an older kernel may be missing some
35*4882a593Smuzhiyunhelpers present in a newer kernel.  For this reason, programs must check
36*4882a593Smuzhiyunthe value of __kuser_helper_version (see below) before assuming that it is
37*4882a593Smuzhiyunsafe to call any particular helper.  This check should ideally be
38*4882a593Smuzhiyunperformed only once at process startup time, and execution aborted early
39*4882a593Smuzhiyunif the required helpers are not provided by the kernel version that
40*4882a593Smuzhiyunprocess is running on.
41*4882a593Smuzhiyun
42*4882a593Smuzhiyunkuser_helper_version
43*4882a593Smuzhiyun--------------------
44*4882a593Smuzhiyun
45*4882a593SmuzhiyunLocation:	0xffff0ffc
46*4882a593Smuzhiyun
47*4882a593SmuzhiyunReference declaration::
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun  extern int32_t __kuser_helper_version;
50*4882a593Smuzhiyun
51*4882a593SmuzhiyunDefinition:
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun  This field contains the number of helpers being implemented by the
54*4882a593Smuzhiyun  running kernel.  User space may read this to determine the availability
55*4882a593Smuzhiyun  of a particular helper.
56*4882a593Smuzhiyun
57*4882a593SmuzhiyunUsage example::
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun  #define __kuser_helper_version (*(int32_t *)0xffff0ffc)
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun  void check_kuser_version(void)
62*4882a593Smuzhiyun  {
63*4882a593Smuzhiyun	if (__kuser_helper_version < 2) {
64*4882a593Smuzhiyun		fprintf(stderr, "can't do atomic operations, kernel too old\n");
65*4882a593Smuzhiyun		abort();
66*4882a593Smuzhiyun	}
67*4882a593Smuzhiyun  }
68*4882a593Smuzhiyun
69*4882a593SmuzhiyunNotes:
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun  User space may assume that the value of this field never changes
72*4882a593Smuzhiyun  during the lifetime of any single process.  This means that this
73*4882a593Smuzhiyun  field can be read once during the initialisation of a library or
74*4882a593Smuzhiyun  startup phase of a program.
75*4882a593Smuzhiyun
76*4882a593Smuzhiyunkuser_get_tls
77*4882a593Smuzhiyun-------------
78*4882a593Smuzhiyun
79*4882a593SmuzhiyunLocation:	0xffff0fe0
80*4882a593Smuzhiyun
81*4882a593SmuzhiyunReference prototype::
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun  void * __kuser_get_tls(void);
84*4882a593Smuzhiyun
85*4882a593SmuzhiyunInput:
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun  lr = return address
88*4882a593Smuzhiyun
89*4882a593SmuzhiyunOutput:
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun  r0 = TLS value
92*4882a593Smuzhiyun
93*4882a593SmuzhiyunClobbered registers:
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun  none
96*4882a593Smuzhiyun
97*4882a593SmuzhiyunDefinition:
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun  Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
100*4882a593Smuzhiyun
101*4882a593SmuzhiyunUsage example::
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun  typedef void * (__kuser_get_tls_t)(void);
104*4882a593Smuzhiyun  #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun  void foo()
107*4882a593Smuzhiyun  {
108*4882a593Smuzhiyun	void *tls = __kuser_get_tls();
109*4882a593Smuzhiyun	printf("TLS = %p\n", tls);
110*4882a593Smuzhiyun  }
111*4882a593Smuzhiyun
112*4882a593SmuzhiyunNotes:
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun  - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
115*4882a593Smuzhiyun
116*4882a593Smuzhiyunkuser_cmpxchg
117*4882a593Smuzhiyun-------------
118*4882a593Smuzhiyun
119*4882a593SmuzhiyunLocation:	0xffff0fc0
120*4882a593Smuzhiyun
121*4882a593SmuzhiyunReference prototype::
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun  int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
124*4882a593Smuzhiyun
125*4882a593SmuzhiyunInput:
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun  r0 = oldval
128*4882a593Smuzhiyun  r1 = newval
129*4882a593Smuzhiyun  r2 = ptr
130*4882a593Smuzhiyun  lr = return address
131*4882a593Smuzhiyun
132*4882a593SmuzhiyunOutput:
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun  r0 = success code (zero or non-zero)
135*4882a593Smuzhiyun  C flag = set if r0 == 0, clear if r0 != 0
136*4882a593Smuzhiyun
137*4882a593SmuzhiyunClobbered registers:
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun  r3, ip, flags
140*4882a593Smuzhiyun
141*4882a593SmuzhiyunDefinition:
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun  Atomically store newval in `*ptr` only if `*ptr` is equal to oldval.
144*4882a593Smuzhiyun  Return zero if `*ptr` was changed or non-zero if no exchange happened.
145*4882a593Smuzhiyun  The C flag is also set if `*ptr` was changed to allow for assembly
146*4882a593Smuzhiyun  optimization in the calling code.
147*4882a593Smuzhiyun
148*4882a593SmuzhiyunUsage example::
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun  typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
151*4882a593Smuzhiyun  #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun  int atomic_add(volatile int *ptr, int val)
154*4882a593Smuzhiyun  {
155*4882a593Smuzhiyun	int old, new;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun	do {
158*4882a593Smuzhiyun		old = *ptr;
159*4882a593Smuzhiyun		new = old + val;
160*4882a593Smuzhiyun	} while(__kuser_cmpxchg(old, new, ptr));
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun	return new;
163*4882a593Smuzhiyun  }
164*4882a593Smuzhiyun
165*4882a593SmuzhiyunNotes:
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun  - This routine already includes memory barriers as needed.
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun  - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
170*4882a593Smuzhiyun
171*4882a593Smuzhiyunkuser_memory_barrier
172*4882a593Smuzhiyun--------------------
173*4882a593Smuzhiyun
174*4882a593SmuzhiyunLocation:	0xffff0fa0
175*4882a593Smuzhiyun
176*4882a593SmuzhiyunReference prototype::
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun  void __kuser_memory_barrier(void);
179*4882a593Smuzhiyun
180*4882a593SmuzhiyunInput:
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun  lr = return address
183*4882a593Smuzhiyun
184*4882a593SmuzhiyunOutput:
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun  none
187*4882a593Smuzhiyun
188*4882a593SmuzhiyunClobbered registers:
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun  none
191*4882a593Smuzhiyun
192*4882a593SmuzhiyunDefinition:
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun  Apply any needed memory barrier to preserve consistency with data modified
195*4882a593Smuzhiyun  manually and __kuser_cmpxchg usage.
196*4882a593Smuzhiyun
197*4882a593SmuzhiyunUsage example::
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun  typedef void (__kuser_dmb_t)(void);
200*4882a593Smuzhiyun  #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
201*4882a593Smuzhiyun
202*4882a593SmuzhiyunNotes:
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun  - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
205*4882a593Smuzhiyun
206*4882a593Smuzhiyunkuser_cmpxchg64
207*4882a593Smuzhiyun---------------
208*4882a593Smuzhiyun
209*4882a593SmuzhiyunLocation:	0xffff0f60
210*4882a593Smuzhiyun
211*4882a593SmuzhiyunReference prototype::
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun  int __kuser_cmpxchg64(const int64_t *oldval,
214*4882a593Smuzhiyun                        const int64_t *newval,
215*4882a593Smuzhiyun                        volatile int64_t *ptr);
216*4882a593Smuzhiyun
217*4882a593SmuzhiyunInput:
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun  r0 = pointer to oldval
220*4882a593Smuzhiyun  r1 = pointer to newval
221*4882a593Smuzhiyun  r2 = pointer to target value
222*4882a593Smuzhiyun  lr = return address
223*4882a593Smuzhiyun
224*4882a593SmuzhiyunOutput:
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun  r0 = success code (zero or non-zero)
227*4882a593Smuzhiyun  C flag = set if r0 == 0, clear if r0 != 0
228*4882a593Smuzhiyun
229*4882a593SmuzhiyunClobbered registers:
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun  r3, lr, flags
232*4882a593Smuzhiyun
233*4882a593SmuzhiyunDefinition:
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun  Atomically store the 64-bit value pointed by `*newval` in `*ptr` only if `*ptr`
236*4882a593Smuzhiyun  is equal to the 64-bit value pointed by `*oldval`.  Return zero if `*ptr` was
237*4882a593Smuzhiyun  changed or non-zero if no exchange happened.
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun  The C flag is also set if `*ptr` was changed to allow for assembly
240*4882a593Smuzhiyun  optimization in the calling code.
241*4882a593Smuzhiyun
242*4882a593SmuzhiyunUsage example::
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun  typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
245*4882a593Smuzhiyun                                    const int64_t *newval,
246*4882a593Smuzhiyun                                    volatile int64_t *ptr);
247*4882a593Smuzhiyun  #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun  int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
250*4882a593Smuzhiyun  {
251*4882a593Smuzhiyun	int64_t old, new;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun	do {
254*4882a593Smuzhiyun		old = *ptr;
255*4882a593Smuzhiyun		new = old + val;
256*4882a593Smuzhiyun	} while(__kuser_cmpxchg64(&old, &new, ptr));
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun	return new;
259*4882a593Smuzhiyun  }
260*4882a593Smuzhiyun
261*4882a593SmuzhiyunNotes:
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun  - This routine already includes memory barriers as needed.
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun  - Due to the length of this sequence, this spans 2 conventional kuser
266*4882a593Smuzhiyun    "slots", therefore 0xffff0f80 is not used as a valid entry point.
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun  - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
269