Lines Matching +full:push +full:- +full:pull

2 From: Jeffy Chen <jeffy.chen@rock-chips.com>
4 Subject: [PATCH] libc: string: arm: Support using glibc-neon version of APIs
8 Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
9 ---
11 libc/string/Makefile.in | 10 +-
13 libc/string/arm/glibc-neon/README | 4 +
14 libc/string/arm/glibc-neon/arm-features.h | 59 ++
15 libc/string/arm/glibc-neon/bcopy.c | 24 +
16 libc/string/arm/glibc-neon/bzero.c | 28 +
17 libc/string/arm/glibc-neon/glibc_wrap.h | 29 +
18 libc/string/arm/glibc-neon/memcmp.S | 1 +
19 libc/string/arm/glibc-neon/memcpy.S | 728 ++++++++++++++++++
20 libc/string/arm/glibc-neon/memmove.S | 332 ++++++++
21 libc/string/arm/glibc-neon/memset.S | 68 ++
22 libc/string/arm/glibc-neon/strcmp.S | 504 ++++++++++++
23 libc/string/arm/glibc-neon/strlen.S | 76 ++
24 libc/string/arm/glibc-neon/sysdep.h | 339 ++++++++
25 .../arm/glibc-neon/sysdeps/generic/dwarf2.h | 590 ++++++++++++++
26 .../arm/glibc-neon/sysdeps/generic/sysdep.h | 97 +++
27 17 files changed, 2901 insertions(+), 2 deletions(-)
29 create mode 100644 libc/string/arm/glibc-neon/README
30 create mode 100644 libc/string/arm/glibc-neon/arm-features.h
31 create mode 100644 libc/string/arm/glibc-neon/bcopy.c
32 create mode 100644 libc/string/arm/glibc-neon/bzero.c
33 create mode 100644 libc/string/arm/glibc-neon/glibc_wrap.h
34 create mode 120000 libc/string/arm/glibc-neon/memcmp.S
35 create mode 100644 libc/string/arm/glibc-neon/memcpy.S
36 create mode 100644 libc/string/arm/glibc-neon/memmove.S
37 create mode 100644 libc/string/arm/glibc-neon/memset.S
38 create mode 100644 libc/string/arm/glibc-neon/strcmp.S
39 create mode 100644 libc/string/arm/glibc-neon/strlen.S
40 create mode 100644 libc/string/arm/glibc-neon/sysdep.h
41 create mode 100644 libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h
42 create mode 100644 libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h
44 diff --git a/Makerules b/Makerules
46 --- a/Makerules
48 @@ -255,6 +255,7 @@ CFLAGS_gen.dep = -MT $@ -MD -MP -MF $(dir $@).$(notdir $@).dep
50 cmd_compile.c = $(CC) -c $< -o $@ \
51 $(filter-out $(CFLAGS-OMIT-$(notdir $<)), \
52 + $(CFLAGS-extra-$(subst $(top_srcdir),,$(<D))) \
54 $(CFLAGS-for-library-members) \
55 $(CFLAGS-$(suffix $@)) \
56 diff --git a/libc/string/Makefile.in b/libc/string/Makefile.in
58 --- a/libc/string/Makefile.in
60 @@ -5,6 +5,12 @@
73 @@ -25,8 +31,8 @@ STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ) $(STRING_SUBARCH_COBJ)
77 -STRING_ARCH_DIR := $(top_srcdir)libc/string/$(TARGET_ARCH)
78 -STRING_ARCH_OUT := $(top_builddir)libc/string/$(TARGET_ARCH)
84 diff --git a/libc/string/arm/Makefile.in b/libc/string/arm/Makefile.in
87 --- /dev/null
89 @@ -0,0 +1,13 @@
92 +# Copyright (C) 2022 Jeffy Chen <jeffy.chen@rock-chips.com>
97 +SUBDIR := libc/string/arm/glibc-neon
102 +CFLAGS-extra-$(SUBDIR) := -include glibc_wrap.h -I$(top_srcdir)$(SUBDIR)
103 diff --git a/libc/string/arm/glibc-neon/README b/libc/string/arm/glibc-neon/README
106 --- /dev/null
107 +++ b/libc/string/arm/glibc-neon/README
108 @@ -0,0 +1,4 @@
110 +1/ Ported from glibc-2.34.9.
113 diff --git a/libc/string/arm/glibc-neon/arm-features.h b/libc/string/arm/glibc-neon/arm-features.h
116 --- /dev/null
117 +++ b/libc/string/arm/glibc-neon/arm-features.h
118 @@ -0,0 +1,59 @@
120 + Copyright (C) 2012-2021 Free Software Foundation, Inc.
140 +/* An OS-specific arm-features.h file should define ARM_HAVE_VFP to
152 +/* An OS-specific arm-features.h file may define ARM_ASSUME_NO_IWMMXT
157 +/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
163 + A more-specific arm-features.h file may define this to set a more
167 + always have a fixed size of four bytes), or for Thumb-mode code that is
174 +/* An OS-specific arm-features.h file may define ARM_NO_INDEX_REGISTER to
175 + indicate that the two-register addressing modes must never be used. */
177 +#endif /* arm-features.h */
178 diff --git a/libc/string/arm/glibc-neon/bcopy.c b/libc/string/arm/glibc-neon/bcopy.c
181 --- /dev/null
182 +++ b/libc/string/arm/glibc-neon/bcopy.c
183 @@ -0,0 +1,24 @@
184 +/* Copyright (C) 1991-2021 Free Software Foundation, Inc.
208 diff --git a/libc/string/arm/glibc-neon/bzero.c b/libc/string/arm/glibc-neon/bzero.c
211 --- /dev/null
212 +++ b/libc/string/arm/glibc-neon/bzero.c
213 @@ -0,0 +1,28 @@
214 +/* Copyright (C) 1991-2021 Free Software Foundation, Inc.
242 diff --git a/libc/string/arm/glibc-neon/glibc_wrap.h b/libc/string/arm/glibc-neon/glibc_wrap.h
245 --- /dev/null
246 +++ b/libc/string/arm/glibc-neon/glibc_wrap.h
247 @@ -0,0 +1,29 @@
249 + Copyright (C) 2022 Jeffy Chen <jeffy.chen@rock-chips.com>
277 diff --git a/libc/string/arm/glibc-neon/memcmp.S b/libc/string/arm/glibc-neon/memcmp.S
280 --- /dev/null
281 +++ b/libc/string/arm/glibc-neon/memcmp.S
282 @@ -0,0 +1 @@
285 diff --git a/libc/string/arm/glibc-neon/memcpy.S b/libc/string/arm/glibc-neon/memcpy.S
288 --- /dev/null
289 +++ b/libc/string/arm/glibc-neon/memcpy.S
290 @@ -0,0 +1,728 @@
291 +/* NEON/VFP/ARM version of memcpy optimized for Cortex-A15.
292 + Copyright (C) 2013-2021 Free Software Foundation, Inc.
309 + This memcpy routine is optimised for Cortex-A15 cores and takes advantage
314 + ARMv6 (ARMv7-a if using Neon)
325 +#include <arm-features.h>
334 + .arch armv7-a
376 + double-words (8 bytes). TMP1 is at most 56.
387 + rsb tmp1, tmp1, #((7 * 8) - PC_OFS + INSN_SIZE)
400 + rsb tmp1, tmp1, #((15 * 4) - PC_OFS/2 + INSN_SIZE/2)
424 + /* TMP1 gets (max_bytes - bytes_to_copy), where max_bytes is
431 + + ((1f - PC_OFS - 0f) \
432 + >> (ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step)))
436 +0: add tmp1, pc, tmp1, lsl #(ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step)
501 +#define A_l r2 /* Call-clobbered. */
502 +#define A_h r3 /* Call-clobbered. */
512 +/* Number of lines ahead to pre-fetch data. If you change this the code
528 + vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
570 + tracks that bug; it was not fixed as of binutils-2.23.2. */
596 + ldr tmp1, [src, #-(\i * 4)]
597 + str tmp1, [dst, #-(\i * 4)]
611 + str tmp2, [sp, #-FRAME_SIZE]!
621 + /* Magic dust alert! Force VFP on Cortex-A9. Experiments show
627 + /* SRC and DST have the same mutual 64-bit alignment, but we may
628 + still need to pre-copy some bytes to get to natural alignment.
629 + We bring SRC and DST into full 64-bit alignment. */
680 + vldr d0, [src, #-(\i * 8)]
681 + vstr d0, [dst, #-(\i * 8)]
709 + cfi_adjust_cfa_offset (-FRAME_SIZE)
720 + /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
721 + we know that the src and dest are 64-bit aligned so we can use
730 + ldrd A_l, A_h, [src, #-(\i * 8)]
731 + strd A_l, A_h, [dst, #-(\i * 8)]
747 + cfi_adjust_cfa_offset (-FRAME_SIZE)
816 + /* Pre-bias src and dst. */
860 + /* Save the remaining bytes and restore the callee-saved regs. */
879 + cfi_adjust_cfa_offset (-FRAME_SIZE)
892 + /* Bring DST to 64-bit alignment. */
923 + neon_load_multi d0-d3, src
924 + neon_load_multi d4-d7, src
929 + neon_store_multi d0-d3, dst
930 + neon_load_multi d0-d3, src
931 + neon_store_multi d4-d7, dst
932 + neon_load_multi d4-d7, src
936 + neon_store_multi d0-d3, dst
937 + neon_store_multi d4-d7, dst
964 + pld [src, #(5 * 64) - (32 - 4)]
993 + /* Save the remaining bytes and restore the callee-saved regs. */
1012 + cfi_adjust_cfa_offset (-FRAME_SIZE)
1019 diff --git a/libc/string/arm/glibc-neon/memmove.S b/libc/string/arm/glibc-neon/memmove.S
1022 --- /dev/null
1023 +++ b/libc/string/arm/glibc-neon/memmove.S
1024 @@ -0,0 +1,332 @@
1025 +/* Copyright (C) 2006-2021 Free Software Foundation, Inc.
1047 +#include <arm-features.h>
1073 +#define PULL lsr
1074 +#define PUSH lsl
1076 +#define PULL lsl
1077 +#define PUSH lsr
1102 + push {r0, r4, lr}
1114 + PLD( pld [r1, #-4] )
1120 + push {r5 - r8}
1134 + CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
1136 + CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
1140 + PLD( pld [r1, #-4] )
1142 + PLD( pld [r1, #-32] )
1144 + PLD( pld [r1, #-64] )
1145 + PLD( pld [r1, #-96] )
1147 +3: PLD( pld [r1, #-128] )
1157 + addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
1161 + push {r10}
1164 +0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
1169 + add r10, #(6f - (0b + PC_OFS))
1175 + ldr r3, [r1, #-4]!
1177 + ldr r4, [r1, #-4]!
1179 + ldr r5, [r1, #-4]!
1181 + ldr r6, [r1, #-4]!
1183 + ldr r7, [r1, #-4]!
1185 + ldr r8, [r1, #-4]!
1187 + ldr lr, [r1, #-4]!
1190 + add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
1193 +0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
1198 + add r10, #(66f - (0b + PC_OFS))
1204 + str r3, [r0, #-4]!
1206 + str r4, [r0, #-4]!
1208 + str r5, [r0, #-4]!
1210 + str r6, [r0, #-4]!
1212 + str r7, [r0, #-4]!
1214 + str r8, [r0, #-4]!
1216 + str lr, [r0, #-4]!
1220 + cfi_adjust_cfa_offset (-4)
1226 +7: pop {r5 - r8}
1227 + cfi_adjust_cfa_offset (-16)
1234 + ldrbne r3, [r1, #-1]!
1235 + ldrbcs r4, [r1, #-1]!
1236 + ldrbcs ip, [r1, #-1]
1237 + strbne r3, [r0, #-1]!
1238 + strbcs r4, [r0, #-1]!
1239 + strbcs ip, [r0, #-1]
1244 + cfi_adjust_cfa_offset (-12)
1255 + ldrbgt r3, [r1, #-1]!
1256 + ldrbge r4, [r1, #-1]!
1257 + ldrb lr, [r1, #-1]!
1258 + strbgt r3, [r0, #-1]!
1259 + strbge r4, [r0, #-1]!
1261 + strb lr, [r0, #-1]!
1273 + .macro backward_copy_shift push pull
1284 +11: push {r5 - r8, r10}
1292 + PLD( pld [r1, #-4] )
1294 + PLD( pld [r1, #-32] )
1296 + PLD( pld [r1, #-64] )
1297 + PLD( pld [r1, #-96] )
1299 +12: PLD( pld [r1, #-128] )
1301 + mov lr, r3, PUSH #\push
1304 + orr lr, lr, ip, PULL #\pull
1305 + mov ip, ip, PUSH #\push
1306 + orr ip, ip, r10, PULL #\pull
1307 + mov r10, r10, PUSH #\push
1308 + orr r10, r10, r8, PULL #\pull
1309 + mov r8, r8, PUSH #\push
1310 + orr r8, r8, r7, PULL #\pull
1311 + mov r7, r7, PUSH #\push
1312 + orr r7, r7, r6, PULL #\pull
1313 + mov r6, r6, PUSH #\push
1314 + orr r6, r6, r5, PULL #\pull
1315 + mov r5, r5, PUSH #\push
1316 + orr r5, r5, r4, PULL #\pull
1317 + mov r4, r4, PUSH #\push
1318 + orr r4, r4, r3, PULL #\pull
1319 + stmdb r0!, {r4 - r8, r10, ip, lr}
1322 + pop {r5 - r8, r10}
1323 + cfi_adjust_cfa_offset (-20)
1333 +15: mov lr, r3, PUSH #\push
1334 + ldr r3, [r1, #-4]!
1336 + orr lr, lr, r3, PULL #\pull
1337 + str lr, [r0, #-4]!
1342 +16: add r1, r1, #(\pull / 8)
1348 + backward_copy_shift push=8 pull=24
1350 +17: backward_copy_shift push=16 pull=16
1352 +18: backward_copy_shift push=24 pull=8
1357 diff --git a/libc/string/arm/glibc-neon/memset.S b/libc/string/arm/glibc-neon/memset.S
1360 --- /dev/null
1361 +++ b/libc/string/arm/glibc-neon/memset.S
1362 @@ -0,0 +1,68 @@
1363 +/* Copyright (C) 1998-2021 Free Software Foundation, Inc.
1431 diff --git a/libc/string/arm/glibc-neon/strcmp.S b/libc/string/arm/glibc-neon/strcmp.S
1434 --- /dev/null
1435 +++ b/libc/string/arm/glibc-neon/strcmp.S
1436 @@ -0,0 +1,504 @@
1437 +/* strcmp implementation for ARMv7-A, optimized for Cortex-A15.
1438 + Copyright (C) 2012-2021 Free Software Foundation, Inc.
1455 +#include <arm-features.h>
1464 + STRCMP_PRECHECK: Run a quick pre-check of the first byte in the
1465 + string. If comparing completely random strings the pre-check will
1508 +/* Additional internal variables for 64-bit aligned data. */
1516 +/* Additional internal variables for 32-bit aligned data. */
1532 + /* Macro to compute and return the result value for word-aligned
1558 + /* To use the big-endian trick we'd have to reverse all three words.
1601 + strd r4, r5, [sp, #-16]!
1603 + cfi_offset (r4, -16)
1604 + cfi_offset (r5, -12)
1607 + cfi_offset (r6, -8)
1608 + cfi_offset (r7, -4)
1624 + lsl tmp2, tmp2, #3 /* Bytes -> bits. */
1655 + ldrd data1a, data1b, [src1, #-8]
1656 + ldrd data2a, data2b, [src2, #-8]
1684 + /* Unrolled by a factor of 2, to reduce the number of post-increment
1694 + ldr data1, [src1, #-4]
1695 + ldr data2, [src2, #-4]
1709 + lsl tmp1, tmp1, #3 /* Bytes -> bits. */
1752 + /* If we've done the pre-check, then we don't need to check the
1818 + fast-path the exit. */
1913 + /* Now everything looks big-endian... */
1941 diff --git a/libc/string/arm/glibc-neon/strlen.S b/libc/string/arm/glibc-neon/strlen.S
1944 --- /dev/null
1945 +++ b/libc/string/arm/glibc-neon/strlen.S
1946 @@ -0,0 +1,76 @@
1947 +/* Copyright (C) 1998-2021 Free Software Foundation, Inc.
1969 + * entry: r0 -> string
1980 + rsb r0, r3, $0 @ get - that number into counter.
1984 + orr r2, r2, $0xff000000 @ set this byte to non-zero
1990 + orr r2, r2, $0x000000ff @ set this byte to non-zero
2010 + tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th
2017 + tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th
2023 diff --git a/libc/string/arm/glibc-neon/sysdep.h b/libc/string/arm/glibc-neon/sysdep.h
2026 --- /dev/null
2027 +++ b/libc/string/arm/glibc-neon/sysdep.h
2028 @@ -0,0 +1,339 @@
2030 + Copyright (C) 1997-2021 Free Software Foundation, Inc.
2053 +# include <arm-features.h>
2099 +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
2142 + push {lr}; \
2146 + cfi_adjust_cfa_offset (-4); \
2158 +/* Tag_ABI_align8_preserved: This code preserves 8-byte
2161 +/* Tag_ABI_align8_needed: This code may require 8-byte alignment from
2177 + version eschews the two-register addressing mode, while the
2186 +/* We're never using the two-register addressing mode, so this
2191 +/* The two-register addressing mode is OK, except on Thumb with pc. */
2200 +/* Load or store to/from a pc-relative EXPR into/from R, using T. */
2205 +98: .word EXPR - 99f - PC_OFS; \
2211 + movw T, #:lower16:EXPR - 99f - PC_OFS; \
2212 + movt T, #:upper16:EXPR - 99f - PC_OFS; \
2218 +98: .word EXPR - 99f - PC_OFS; \
2231 + movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
2232 + movw T, #:lower16:99f - 98f - PC_OFS; \
2233 + movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
2234 + movt T, #:upper16:99f - 98f - PC_OFS; \
2250 +99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \
2271 + or NEGOFF_OFF2 to use A-B for thumb and A for arm. */
2276 +# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $((OFFA) - (OFFB))]
2293 +/* At this generic level we have no tricks to pull. Call the ABI routine. */
2295 + push { r1, r2, r3, lr }; \
2368 diff --git a/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h b/libc/string/arm/glibc-neon/sysde…
2371 --- /dev/null
2372 +++ b/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h
2373 @@ -0,0 +1,590 @@
2376 + Copyright (C) 1992-2021 Free Software Foundation, Inc.
2591 +#define DW_AT_lo_user 0x2000 /* implementation-defined range start */
2592 +#define DW_AT_hi_user 0x3ff0 /* implementation-defined range end */
2745 +#define DW_OP_lo_user 0x80 /* implementation-defined range start */
2746 +#define DW_OP_hi_user 0xff /* implementation-defined range end */
2924 +#define DW_LANG_lo_user 0x8000 /* implementation-defined range start */
2925 +#define DW_LANG_hi_user 0xffff /* implementation-defined range start */
2964 diff --git a/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h b/libc/string/arm/glibc-neon/sysde…
2967 --- /dev/null
2968 +++ b/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h
2969 @@ -0,0 +1,97 @@
2971 + Copyright (C) 1991-2021 Free Software Foundation, Inc.
3067 --