1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef __ASM_SH_UNALIGNED_SH4A_H
3*4882a593Smuzhiyun #define __ASM_SH_UNALIGNED_SH4A_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
7*4882a593Smuzhiyun * Support for 64-bit accesses are done through shifting and masking
8*4882a593Smuzhiyun * relative to the endianness. Unaligned stores are not supported by the
9*4882a593Smuzhiyun * instruction encoding, so these continue to use the packed
10*4882a593Smuzhiyun * struct.
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * The same note as with the movli.l/movco.l pair applies here, as long
13*4882a593Smuzhiyun * as the load is guaranteed to be inlined, nothing else will hook in to
14*4882a593Smuzhiyun * r0 and we get the return value for free.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * NOTE: Due to the fact we require r0 encoding, care should be taken to
17*4882a593Smuzhiyun * avoid mixing these heavily with other r0 consumers, such as the atomic
18*4882a593Smuzhiyun * ops. Failure to adhere to this can result in the compiler running out
19*4882a593Smuzhiyun * of spill registers and blowing up when building at low optimization
20*4882a593Smuzhiyun * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
21*4882a593Smuzhiyun */
22*4882a593Smuzhiyun #include <linux/unaligned/packed_struct.h>
23*4882a593Smuzhiyun #include <linux/types.h>
24*4882a593Smuzhiyun #include <asm/byteorder.h>
25*4882a593Smuzhiyun
sh4a_get_unaligned_cpu16(const u8 * p)26*4882a593Smuzhiyun static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun #ifdef __LITTLE_ENDIAN
29*4882a593Smuzhiyun return p[0] | p[1] << 8;
30*4882a593Smuzhiyun #else
31*4882a593Smuzhiyun return p[0] << 8 | p[1];
32*4882a593Smuzhiyun #endif
33*4882a593Smuzhiyun }
34*4882a593Smuzhiyun
sh4a_get_unaligned_cpu32(const u8 * p)35*4882a593Smuzhiyun static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
36*4882a593Smuzhiyun {
37*4882a593Smuzhiyun unsigned long unaligned;
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun __asm__ __volatile__ (
40*4882a593Smuzhiyun "movua.l @%1, %0\n\t"
41*4882a593Smuzhiyun : "=z" (unaligned)
42*4882a593Smuzhiyun : "r" (p)
43*4882a593Smuzhiyun );
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun return unaligned;
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * Even though movua.l supports auto-increment on the read side, it can
50*4882a593Smuzhiyun * only store to r0 due to instruction encoding constraints, so just let
51*4882a593Smuzhiyun * the compiler sort it out on its own.
52*4882a593Smuzhiyun */
sh4a_get_unaligned_cpu64(const u8 * p)53*4882a593Smuzhiyun static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
54*4882a593Smuzhiyun {
55*4882a593Smuzhiyun #ifdef __LITTLE_ENDIAN
56*4882a593Smuzhiyun return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
57*4882a593Smuzhiyun sh4a_get_unaligned_cpu32(p);
58*4882a593Smuzhiyun #else
59*4882a593Smuzhiyun return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
60*4882a593Smuzhiyun sh4a_get_unaligned_cpu32(p + 4);
61*4882a593Smuzhiyun #endif
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
get_unaligned_le16(const void * p)64*4882a593Smuzhiyun static inline u16 get_unaligned_le16(const void *p)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
get_unaligned_le32(const void * p)69*4882a593Smuzhiyun static inline u32 get_unaligned_le32(const void *p)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
72*4882a593Smuzhiyun }
73*4882a593Smuzhiyun
get_unaligned_le64(const void * p)74*4882a593Smuzhiyun static inline u64 get_unaligned_le64(const void *p)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
get_unaligned_be16(const void * p)79*4882a593Smuzhiyun static inline u16 get_unaligned_be16(const void *p)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
get_unaligned_be32(const void * p)84*4882a593Smuzhiyun static inline u32 get_unaligned_be32(const void *p)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
get_unaligned_be64(const void * p)89*4882a593Smuzhiyun static inline u64 get_unaligned_be64(const void *p)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
nonnative_put_le16(u16 val,u8 * p)94*4882a593Smuzhiyun static inline void nonnative_put_le16(u16 val, u8 *p)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun *p++ = val;
97*4882a593Smuzhiyun *p++ = val >> 8;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
nonnative_put_le32(u32 val,u8 * p)100*4882a593Smuzhiyun static inline void nonnative_put_le32(u32 val, u8 *p)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun nonnative_put_le16(val, p);
103*4882a593Smuzhiyun nonnative_put_le16(val >> 16, p + 2);
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
nonnative_put_le64(u64 val,u8 * p)106*4882a593Smuzhiyun static inline void nonnative_put_le64(u64 val, u8 *p)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun nonnative_put_le32(val, p);
109*4882a593Smuzhiyun nonnative_put_le32(val >> 32, p + 4);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
nonnative_put_be16(u16 val,u8 * p)112*4882a593Smuzhiyun static inline void nonnative_put_be16(u16 val, u8 *p)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun *p++ = val >> 8;
115*4882a593Smuzhiyun *p++ = val;
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun
nonnative_put_be32(u32 val,u8 * p)118*4882a593Smuzhiyun static inline void nonnative_put_be32(u32 val, u8 *p)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun nonnative_put_be16(val >> 16, p);
121*4882a593Smuzhiyun nonnative_put_be16(val, p + 2);
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun
nonnative_put_be64(u64 val,u8 * p)124*4882a593Smuzhiyun static inline void nonnative_put_be64(u64 val, u8 *p)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun nonnative_put_be32(val >> 32, p);
127*4882a593Smuzhiyun nonnative_put_be32(val, p + 4);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun
put_unaligned_le16(u16 val,void * p)130*4882a593Smuzhiyun static inline void put_unaligned_le16(u16 val, void *p)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun #ifdef __LITTLE_ENDIAN
133*4882a593Smuzhiyun __put_unaligned_cpu16(val, p);
134*4882a593Smuzhiyun #else
135*4882a593Smuzhiyun nonnative_put_le16(val, p);
136*4882a593Smuzhiyun #endif
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun
put_unaligned_le32(u32 val,void * p)139*4882a593Smuzhiyun static inline void put_unaligned_le32(u32 val, void *p)
140*4882a593Smuzhiyun {
141*4882a593Smuzhiyun #ifdef __LITTLE_ENDIAN
142*4882a593Smuzhiyun __put_unaligned_cpu32(val, p);
143*4882a593Smuzhiyun #else
144*4882a593Smuzhiyun nonnative_put_le32(val, p);
145*4882a593Smuzhiyun #endif
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun
put_unaligned_le64(u64 val,void * p)148*4882a593Smuzhiyun static inline void put_unaligned_le64(u64 val, void *p)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun #ifdef __LITTLE_ENDIAN
151*4882a593Smuzhiyun __put_unaligned_cpu64(val, p);
152*4882a593Smuzhiyun #else
153*4882a593Smuzhiyun nonnative_put_le64(val, p);
154*4882a593Smuzhiyun #endif
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun
put_unaligned_be16(u16 val,void * p)157*4882a593Smuzhiyun static inline void put_unaligned_be16(u16 val, void *p)
158*4882a593Smuzhiyun {
159*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
160*4882a593Smuzhiyun __put_unaligned_cpu16(val, p);
161*4882a593Smuzhiyun #else
162*4882a593Smuzhiyun nonnative_put_be16(val, p);
163*4882a593Smuzhiyun #endif
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun
put_unaligned_be32(u32 val,void * p)166*4882a593Smuzhiyun static inline void put_unaligned_be32(u32 val, void *p)
167*4882a593Smuzhiyun {
168*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
169*4882a593Smuzhiyun __put_unaligned_cpu32(val, p);
170*4882a593Smuzhiyun #else
171*4882a593Smuzhiyun nonnative_put_be32(val, p);
172*4882a593Smuzhiyun #endif
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
put_unaligned_be64(u64 val,void * p)175*4882a593Smuzhiyun static inline void put_unaligned_be64(u64 val, void *p)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
178*4882a593Smuzhiyun __put_unaligned_cpu64(val, p);
179*4882a593Smuzhiyun #else
180*4882a593Smuzhiyun nonnative_put_be64(val, p);
181*4882a593Smuzhiyun #endif
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun /*
185*4882a593Smuzhiyun * While it's a bit non-obvious, even though the generic le/be wrappers
186*4882a593Smuzhiyun * use the __get/put_xxx prefixing, they actually wrap in to the
187*4882a593Smuzhiyun * non-prefixed get/put_xxx variants as provided above.
188*4882a593Smuzhiyun */
189*4882a593Smuzhiyun #include <linux/unaligned/generic.h>
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun #ifdef __LITTLE_ENDIAN
192*4882a593Smuzhiyun # define get_unaligned __get_unaligned_le
193*4882a593Smuzhiyun # define put_unaligned __put_unaligned_le
194*4882a593Smuzhiyun #else
195*4882a593Smuzhiyun # define get_unaligned __get_unaligned_be
196*4882a593Smuzhiyun # define put_unaligned __put_unaligned_be
197*4882a593Smuzhiyun #endif
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun #endif /* __ASM_SH_UNALIGNED_SH4A_H */
200