1*4882a593SmuzhiyunFrom 0bea89508c6be2b1a76650108d759d389e99bbf3 Mon Sep 17 00:00:00 2001 2*4882a593SmuzhiyunFrom: Jeffy Chen <jeffy.chen@rock-chips.com> 3*4882a593SmuzhiyunDate: Wed, 13 Apr 2022 16:32:02 +0800 4*4882a593SmuzhiyunSubject: [PATCH] libc: string: arm: Support using glibc-neon version of APIs 5*4882a593Smuzhiyun 6*4882a593SmuzhiyunSet USE_GLIBC_NEON to enable. 7*4882a593Smuzhiyun 8*4882a593SmuzhiyunSigned-off-by: Jeffy Chen <jeffy.chen@rock-chips.com> 9*4882a593Smuzhiyun--- 10*4882a593Smuzhiyun Makerules | 1 + 11*4882a593Smuzhiyun libc/string/Makefile.in | 10 +- 12*4882a593Smuzhiyun libc/string/arm/Makefile.in | 13 + 13*4882a593Smuzhiyun libc/string/arm/glibc-neon/README | 4 + 14*4882a593Smuzhiyun libc/string/arm/glibc-neon/arm-features.h | 59 ++ 15*4882a593Smuzhiyun libc/string/arm/glibc-neon/bcopy.c | 24 + 16*4882a593Smuzhiyun libc/string/arm/glibc-neon/bzero.c | 28 + 17*4882a593Smuzhiyun libc/string/arm/glibc-neon/glibc_wrap.h | 29 + 18*4882a593Smuzhiyun libc/string/arm/glibc-neon/memcmp.S | 1 + 19*4882a593Smuzhiyun libc/string/arm/glibc-neon/memcpy.S | 728 ++++++++++++++++++ 20*4882a593Smuzhiyun libc/string/arm/glibc-neon/memmove.S | 332 ++++++++ 21*4882a593Smuzhiyun libc/string/arm/glibc-neon/memset.S | 68 ++ 22*4882a593Smuzhiyun libc/string/arm/glibc-neon/strcmp.S | 504 ++++++++++++ 23*4882a593Smuzhiyun libc/string/arm/glibc-neon/strlen.S | 76 ++ 24*4882a593Smuzhiyun libc/string/arm/glibc-neon/sysdep.h | 339 ++++++++ 25*4882a593Smuzhiyun .../arm/glibc-neon/sysdeps/generic/dwarf2.h | 590 ++++++++++++++ 26*4882a593Smuzhiyun .../arm/glibc-neon/sysdeps/generic/sysdep.h | 97 +++ 27*4882a593Smuzhiyun 17 files changed, 2901 insertions(+), 2 deletions(-) 28*4882a593Smuzhiyun create mode 100644 libc/string/arm/Makefile.in 29*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/README 30*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/arm-features.h 31*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/bcopy.c 32*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/bzero.c 33*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/glibc_wrap.h 34*4882a593Smuzhiyun create mode 120000 libc/string/arm/glibc-neon/memcmp.S 35*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/memcpy.S 36*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/memmove.S 37*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/memset.S 38*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/strcmp.S 39*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/strlen.S 40*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/sysdep.h 41*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h 42*4882a593Smuzhiyun create mode 100644 libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h 43*4882a593Smuzhiyun 44*4882a593Smuzhiyundiff --git a/Makerules b/Makerules 45*4882a593Smuzhiyunindex fd40e6c..2013ba9 100644 46*4882a593Smuzhiyun--- a/Makerules 47*4882a593Smuzhiyun+++ b/Makerules 48*4882a593Smuzhiyun@@ -255,6 +255,7 @@ CFLAGS_gen.dep = -MT $@ -MD -MP -MF $(dir $@).$(notdir $@).dep 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun cmd_compile.c = $(CC) -c $< -o $@ \ 51*4882a593Smuzhiyun $(filter-out $(CFLAGS-OMIT-$(notdir $<)), \ 52*4882a593Smuzhiyun+ $(CFLAGS-extra-$(subst $(top_srcdir),,$(<D))) \ 53*4882a593Smuzhiyun $(CFLAGS) \ 54*4882a593Smuzhiyun $(CFLAGS-for-library-members) \ 55*4882a593Smuzhiyun $(CFLAGS-$(suffix $@)) \ 56*4882a593Smuzhiyundiff --git a/libc/string/Makefile.in b/libc/string/Makefile.in 57*4882a593Smuzhiyunindex e7f2ccd..975b395 100644 58*4882a593Smuzhiyun--- a/libc/string/Makefile.in 59*4882a593Smuzhiyun+++ b/libc/string/Makefile.in 60*4882a593Smuzhiyun@@ -5,6 +5,12 @@ 61*4882a593Smuzhiyun # Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. 62*4882a593Smuzhiyun # 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun+ifeq ($(TARGET_ARCH),arm) 65*4882a593Smuzhiyun+ifneq ($(USE_GLIBC_NEON),) 66*4882a593Smuzhiyun+include $(top_srcdir)libc/string/$(TARGET_ARCH)/Makefile.in 67*4882a593Smuzhiyun+endif 68*4882a593Smuzhiyun+endif 69*4882a593Smuzhiyun+ 70*4882a593Smuzhiyun subdirs += libc/string/$(TARGET_ARCH) libc/string/generic 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun # 73*4882a593Smuzhiyun@@ -25,8 +31,8 @@ STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ) $(STRING_SUBARCH_COBJ) 74*4882a593Smuzhiyun endif 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun # Collect the arch specific implementation (asm, c files) 77*4882a593Smuzhiyun-STRING_ARCH_DIR := $(top_srcdir)libc/string/$(TARGET_ARCH) 78*4882a593Smuzhiyun-STRING_ARCH_OUT := $(top_builddir)libc/string/$(TARGET_ARCH) 79*4882a593Smuzhiyun+STRING_ARCH_DIR ?= $(top_srcdir)libc/string/$(TARGET_ARCH) 80*4882a593Smuzhiyun+STRING_ARCH_OUT ?= $(top_builddir)libc/string/$(TARGET_ARCH) 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun STRING_ARCH_SRC := $(wildcard $(STRING_ARCH_DIR)/*.c) 83*4882a593Smuzhiyun STRING_ARCH_OBJ := $(patsubst $(STRING_ARCH_DIR)/%.c,$(STRING_ARCH_OUT)/%.o,$(STRING_ARCH_SRC)) 84*4882a593Smuzhiyundiff --git a/libc/string/arm/Makefile.in b/libc/string/arm/Makefile.in 85*4882a593Smuzhiyunnew file mode 100644 86*4882a593Smuzhiyunindex 0000000..4cd7e24 87*4882a593Smuzhiyun--- /dev/null 88*4882a593Smuzhiyun+++ b/libc/string/arm/Makefile.in 89*4882a593Smuzhiyun@@ -0,0 +1,13 @@ 90*4882a593Smuzhiyun+# Makefile for uClibc 91*4882a593Smuzhiyun+# 92*4882a593Smuzhiyun+# Copyright (C) 2022 Jeffy Chen <jeffy.chen@rock-chips.com> 93*4882a593Smuzhiyun+# 94*4882a593Smuzhiyun+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. 95*4882a593Smuzhiyun+# 96*4882a593Smuzhiyun+ 97*4882a593Smuzhiyun+SUBDIR := libc/string/arm/glibc-neon 98*4882a593Smuzhiyun+ 99*4882a593Smuzhiyun+STRING_ARCH_DIR := $(top_srcdir)$(SUBDIR) 100*4882a593Smuzhiyun+STRING_ARCH_OUT := $(top_builddir)$(SUBDIR) 101*4882a593Smuzhiyun+ 102*4882a593Smuzhiyun+CFLAGS-extra-$(SUBDIR) := -include glibc_wrap.h -I$(top_srcdir)$(SUBDIR) 103*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/README b/libc/string/arm/glibc-neon/README 104*4882a593Smuzhiyunnew file mode 100644 105*4882a593Smuzhiyunindex 0000000..ad72f05 106*4882a593Smuzhiyun--- /dev/null 107*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/README 108*4882a593Smuzhiyun@@ -0,0 +1,4 @@ 109*4882a593Smuzhiyun+NOTE: 110*4882a593Smuzhiyun+1/ Ported from glibc-2.34.9. 111*4882a593Smuzhiyun+2/ glibc doesn't have an arm arch version of memcmp. 112*4882a593Smuzhiyun+3/ uClibc is using strcmp as strcoll when locale disabled. 113*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/arm-features.h b/libc/string/arm/glibc-neon/arm-features.h 114*4882a593Smuzhiyunnew file mode 100644 115*4882a593Smuzhiyunindex 0000000..4a86e00 116*4882a593Smuzhiyun--- /dev/null 117*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/arm-features.h 118*4882a593Smuzhiyun@@ -0,0 +1,59 @@ 119*4882a593Smuzhiyun+/* Macros to test for CPU features on ARM. Generic ARM version. 120*4882a593Smuzhiyun+ Copyright (C) 2012-2021 Free Software Foundation, Inc. 121*4882a593Smuzhiyun+ This file is part of the GNU C Library. 122*4882a593Smuzhiyun+ 123*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 124*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 125*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 126*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 127*4882a593Smuzhiyun+ 128*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 129*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 130*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 131*4882a593Smuzhiyun+ Lesser General Public License for more details. 132*4882a593Smuzhiyun+ 133*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 134*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 135*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 136*4882a593Smuzhiyun+ 137*4882a593Smuzhiyun+#ifndef _ARM_ARM_FEATURES_H 138*4882a593Smuzhiyun+#define _ARM_ARM_FEATURES_H 1 139*4882a593Smuzhiyun+ 140*4882a593Smuzhiyun+/* An OS-specific arm-features.h file should define ARM_HAVE_VFP to 141*4882a593Smuzhiyun+ an appropriate expression for testing at runtime whether the VFP 142*4882a593Smuzhiyun+ hardware is present. We'll then redefine it to a constant if we 143*4882a593Smuzhiyun+ know at compile time that we can assume VFP. */ 144*4882a593Smuzhiyun+ 145*4882a593Smuzhiyun+#ifndef __SOFTFP__ 146*4882a593Smuzhiyun+/* The compiler is generating VFP instructions, so we're already 147*4882a593Smuzhiyun+ assuming the hardware exists. */ 148*4882a593Smuzhiyun+# undef ARM_HAVE_VFP 149*4882a593Smuzhiyun+# define ARM_HAVE_VFP 1 150*4882a593Smuzhiyun+#endif 151*4882a593Smuzhiyun+ 152*4882a593Smuzhiyun+/* An OS-specific arm-features.h file may define ARM_ASSUME_NO_IWMMXT 153*4882a593Smuzhiyun+ to indicate at compile time that iWMMXt hardware is never present 154*4882a593Smuzhiyun+ at runtime (or that we never care about its state) and so need not 155*4882a593Smuzhiyun+ be checked for. */ 156*4882a593Smuzhiyun+ 157*4882a593Smuzhiyun+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate 158*4882a593Smuzhiyun+ that instructions using pc as a destination register must never be used, 159*4882a593Smuzhiyun+ so a "bx" (or "blx") instruction is always required. */ 160*4882a593Smuzhiyun+ 161*4882a593Smuzhiyun+/* The log2 of the minimum alignment required for an address that 162*4882a593Smuzhiyun+ is the target of a computed branch (i.e. a "bx" instruction). 163*4882a593Smuzhiyun+ A more-specific arm-features.h file may define this to set a more 164*4882a593Smuzhiyun+ stringent requirement. 165*4882a593Smuzhiyun+ 166*4882a593Smuzhiyun+ Using this only makes sense for code in ARM mode (where instructions 167*4882a593Smuzhiyun+ always have a fixed size of four bytes), or for Thumb-mode code that is 168*4882a593Smuzhiyun+ specifically aligning all the related branch targets to match (since 169*4882a593Smuzhiyun+ Thumb instructions might be either two or four bytes). */ 170*4882a593Smuzhiyun+#ifndef ARM_BX_ALIGN_LOG2 171*4882a593Smuzhiyun+# define ARM_BX_ALIGN_LOG2 2 172*4882a593Smuzhiyun+#endif 173*4882a593Smuzhiyun+ 174*4882a593Smuzhiyun+/* An OS-specific arm-features.h file may define ARM_NO_INDEX_REGISTER to 175*4882a593Smuzhiyun+ indicate that the two-register addressing modes must never be used. */ 176*4882a593Smuzhiyun+ 177*4882a593Smuzhiyun+#endif /* arm-features.h */ 178*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/bcopy.c b/libc/string/arm/glibc-neon/bcopy.c 179*4882a593Smuzhiyunnew file mode 100644 180*4882a593Smuzhiyunindex 0000000..302bbbd 181*4882a593Smuzhiyun--- /dev/null 182*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/bcopy.c 183*4882a593Smuzhiyun@@ -0,0 +1,24 @@ 184*4882a593Smuzhiyun+/* Copyright (C) 1991-2021 Free Software Foundation, Inc. 185*4882a593Smuzhiyun+ This file is part of the GNU C Library. 186*4882a593Smuzhiyun+ 187*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 188*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 189*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 190*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 191*4882a593Smuzhiyun+ 192*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 193*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 194*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 195*4882a593Smuzhiyun+ Lesser General Public License for more details. 196*4882a593Smuzhiyun+ 197*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 198*4882a593Smuzhiyun+ License along with the GNU C Library; if not, see 199*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 200*4882a593Smuzhiyun+ 201*4882a593Smuzhiyun+#include <string.h> 202*4882a593Smuzhiyun+ 203*4882a593Smuzhiyun+void 204*4882a593Smuzhiyun+bcopy (const void *src, void *dest, size_t len) 205*4882a593Smuzhiyun+{ 206*4882a593Smuzhiyun+ memmove (dest, src, len); 207*4882a593Smuzhiyun+} 208*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/bzero.c b/libc/string/arm/glibc-neon/bzero.c 209*4882a593Smuzhiyunnew file mode 100644 210*4882a593Smuzhiyunindex 0000000..4391dad 211*4882a593Smuzhiyun--- /dev/null 212*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/bzero.c 213*4882a593Smuzhiyun@@ -0,0 +1,28 @@ 214*4882a593Smuzhiyun+/* Copyright (C) 1991-2021 Free Software Foundation, Inc. 215*4882a593Smuzhiyun+ This file is part of the GNU C Library. 216*4882a593Smuzhiyun+ 217*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 218*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 219*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 220*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 221*4882a593Smuzhiyun+ 222*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 223*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 224*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 225*4882a593Smuzhiyun+ Lesser General Public License for more details. 226*4882a593Smuzhiyun+ 227*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 228*4882a593Smuzhiyun+ License along with the GNU C Library; if not, see 229*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 230*4882a593Smuzhiyun+ 231*4882a593Smuzhiyun+#include <string.h> 232*4882a593Smuzhiyun+ 233*4882a593Smuzhiyun+#undef __bzero 234*4882a593Smuzhiyun+ 235*4882a593Smuzhiyun+/* Set N bytes of S to 0. */ 236*4882a593Smuzhiyun+void 237*4882a593Smuzhiyun+__bzero (void *s, size_t len) 238*4882a593Smuzhiyun+{ 239*4882a593Smuzhiyun+ memset (s, '\0', len); 240*4882a593Smuzhiyun+} 241*4882a593Smuzhiyun+weak_alias (__bzero, bzero) 242*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/glibc_wrap.h b/libc/string/arm/glibc-neon/glibc_wrap.h 243*4882a593Smuzhiyunnew file mode 100644 244*4882a593Smuzhiyunindex 0000000..f00d50d 245*4882a593Smuzhiyun--- /dev/null 246*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/glibc_wrap.h 247*4882a593Smuzhiyun@@ -0,0 +1,29 @@ 248*4882a593Smuzhiyun+/* Macros to adapt glibc version of string APIs. 249*4882a593Smuzhiyun+ Copyright (C) 2022 Jeffy Chen <jeffy.chen@rock-chips.com> 250*4882a593Smuzhiyun+ This file is part of the GNU C Library. 251*4882a593Smuzhiyun+ 252*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 253*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 254*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 255*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 256*4882a593Smuzhiyun+ 257*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 258*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 259*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 260*4882a593Smuzhiyun+ Lesser General Public License for more details. 261*4882a593Smuzhiyun+ 262*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 263*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 264*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 265*4882a593Smuzhiyun+ 266*4882a593Smuzhiyun+#ifndef GLIBC_WRAP_H 267*4882a593Smuzhiyun+#define GLIBC_WRAP_H 268*4882a593Smuzhiyun+ 269*4882a593Smuzhiyun+#define IS_IN(lib) 1 270*4882a593Smuzhiyun+ 271*4882a593Smuzhiyun+#define libc_hidden_builtin_def(fn) libc_hidden_def(fn) 272*4882a593Smuzhiyun+ 273*4882a593Smuzhiyun+#define C_SYMBOL_NAME(name) name 274*4882a593Smuzhiyun+#define MEMCPY_NEON 275*4882a593Smuzhiyun+ 276*4882a593Smuzhiyun+#endif // GLIBC_WRAP_H 277*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/memcmp.S b/libc/string/arm/glibc-neon/memcmp.S 278*4882a593Smuzhiyunnew file mode 120000 279*4882a593Smuzhiyunindex 0000000..7c28a09 280*4882a593Smuzhiyun--- /dev/null 281*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/memcmp.S 282*4882a593Smuzhiyun@@ -0,0 +1 @@ 283*4882a593Smuzhiyun+../memcmp.S 284*4882a593Smuzhiyun\ No newline at end of file 285*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/memcpy.S b/libc/string/arm/glibc-neon/memcpy.S 286*4882a593Smuzhiyunnew file mode 100644 287*4882a593Smuzhiyunindex 0000000..ee562e8 288*4882a593Smuzhiyun--- /dev/null 289*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/memcpy.S 290*4882a593Smuzhiyun@@ -0,0 +1,728 @@ 291*4882a593Smuzhiyun+/* NEON/VFP/ARM version of memcpy optimized for Cortex-A15. 292*4882a593Smuzhiyun+ Copyright (C) 2013-2021 Free Software Foundation, Inc. 293*4882a593Smuzhiyun+ This file is part of the GNU C Library. 294*4882a593Smuzhiyun+ 295*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 296*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 297*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 298*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 299*4882a593Smuzhiyun+ 300*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 301*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 302*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 303*4882a593Smuzhiyun+ Lesser General Public License for more details. 304*4882a593Smuzhiyun+ 305*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 306*4882a593Smuzhiyun+ License along with the GNU C Library; if not, see 307*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. 308*4882a593Smuzhiyun+ 309*4882a593Smuzhiyun+ This memcpy routine is optimised for Cortex-A15 cores and takes advantage 310*4882a593Smuzhiyun+ of VFP or NEON when built with the appropriate flags. 311*4882a593Smuzhiyun+ 312*4882a593Smuzhiyun+ Assumptions: 313*4882a593Smuzhiyun+ 314*4882a593Smuzhiyun+ ARMv6 (ARMv7-a if using Neon) 315*4882a593Smuzhiyun+ ARM state 316*4882a593Smuzhiyun+ Unaligned accesses 317*4882a593Smuzhiyun+ 318*4882a593Smuzhiyun+ */ 319*4882a593Smuzhiyun+ 320*4882a593Smuzhiyun+/* Thumb cannot encode negative immediate offsets in memory operations. */ 321*4882a593Smuzhiyun+#ifndef NO_THUMB 322*4882a593Smuzhiyun+#define NO_THUMB 323*4882a593Smuzhiyun+#endif 324*4882a593Smuzhiyun+#include <sysdep.h> 325*4882a593Smuzhiyun+#include <arm-features.h> 326*4882a593Smuzhiyun+ 327*4882a593Smuzhiyun+ .syntax unified 328*4882a593Smuzhiyun+ /* This implementation requires ARM state. */ 329*4882a593Smuzhiyun+ .arm 330*4882a593Smuzhiyun+ 331*4882a593Smuzhiyun+#ifdef MEMCPY_NEON 332*4882a593Smuzhiyun+ 333*4882a593Smuzhiyun+ .fpu neon 334*4882a593Smuzhiyun+ .arch armv7-a 335*4882a593Smuzhiyun+# define FRAME_SIZE 4 336*4882a593Smuzhiyun+# define USE_VFP 337*4882a593Smuzhiyun+# define USE_NEON 338*4882a593Smuzhiyun+ 339*4882a593Smuzhiyun+#elif defined (MEMCPY_VFP) 340*4882a593Smuzhiyun+ 341*4882a593Smuzhiyun+ .arch armv6 342*4882a593Smuzhiyun+ .fpu vfpv2 343*4882a593Smuzhiyun+# define FRAME_SIZE 32 344*4882a593Smuzhiyun+# define USE_VFP 345*4882a593Smuzhiyun+ 346*4882a593Smuzhiyun+#else 347*4882a593Smuzhiyun+ .arch armv6 348*4882a593Smuzhiyun+# define FRAME_SIZE 32 349*4882a593Smuzhiyun+ 350*4882a593Smuzhiyun+#endif 351*4882a593Smuzhiyun+ 352*4882a593Smuzhiyun+#define ALIGN(addr, align) addr:align 353*4882a593Smuzhiyun+ 354*4882a593Smuzhiyun+#define INSN_SIZE 4 355*4882a593Smuzhiyun+ 356*4882a593Smuzhiyun+/* Call parameters. */ 357*4882a593Smuzhiyun+#define dstin r0 358*4882a593Smuzhiyun+#define src r1 359*4882a593Smuzhiyun+#define count r2 360*4882a593Smuzhiyun+ 361*4882a593Smuzhiyun+/* Locals. */ 362*4882a593Smuzhiyun+#define tmp1 r3 363*4882a593Smuzhiyun+#define dst ip 364*4882a593Smuzhiyun+#define tmp2 r8 365*4882a593Smuzhiyun+ 366*4882a593Smuzhiyun+/* These two macros both work by repeated invocation of the macro 367*4882a593Smuzhiyun+ dispatch_step (not defined here). That macro performs one "step", 368*4882a593Smuzhiyun+ doing one load instruction and one store instruction to copy one 369*4882a593Smuzhiyun+ "unit". On entry, TMP1 contains the number of bytes to be copied, 370*4882a593Smuzhiyun+ a multiple of the unit size. The macro clobbers TMP1 in the 371*4882a593Smuzhiyun+ process of doing a computed jump to the tail containing the 372*4882a593Smuzhiyun+ appropriate number of steps. 373*4882a593Smuzhiyun+ 374*4882a593Smuzhiyun+ In dispatch_7_dword, dispatch_step is invoked seven times, with an 375*4882a593Smuzhiyun+ argument that is 7 for the first and 1 for the last. Units are 376*4882a593Smuzhiyun+ double-words (8 bytes). TMP1 is at most 56. 377*4882a593Smuzhiyun+ 378*4882a593Smuzhiyun+ In dispatch_15_word, dispatch_step is invoked fifteen times, 379*4882a593Smuzhiyun+ with an argument that is 15 for the first and 1 for the last. 380*4882a593Smuzhiyun+ Units are words (4 bytes). TMP1 is at most 60. */ 381*4882a593Smuzhiyun+ 382*4882a593Smuzhiyun+#ifndef ARM_ALWAYS_BX 383*4882a593Smuzhiyun+# if ARM_BX_ALIGN_LOG2 != 2 384*4882a593Smuzhiyun+# error case not handled 385*4882a593Smuzhiyun+# endif 386*4882a593Smuzhiyun+ .macro dispatch_7_dword 387*4882a593Smuzhiyun+ rsb tmp1, tmp1, #((7 * 8) - PC_OFS + INSN_SIZE) 388*4882a593Smuzhiyun+ add pc, pc, tmp1 389*4882a593Smuzhiyun+ dispatch_step 7 390*4882a593Smuzhiyun+ dispatch_step 6 391*4882a593Smuzhiyun+ dispatch_step 5 392*4882a593Smuzhiyun+ dispatch_step 4 393*4882a593Smuzhiyun+ dispatch_step 3 394*4882a593Smuzhiyun+ dispatch_step 2 395*4882a593Smuzhiyun+ dispatch_step 1 396*4882a593Smuzhiyun+ .purgem dispatch_step 397*4882a593Smuzhiyun+ .endm 398*4882a593Smuzhiyun+ 399*4882a593Smuzhiyun+ .macro dispatch_15_word 400*4882a593Smuzhiyun+ rsb tmp1, tmp1, #((15 * 4) - PC_OFS/2 + INSN_SIZE/2) 401*4882a593Smuzhiyun+ add pc, pc, tmp1, lsl #1 402*4882a593Smuzhiyun+ dispatch_step 15 403*4882a593Smuzhiyun+ dispatch_step 14 404*4882a593Smuzhiyun+ dispatch_step 13 405*4882a593Smuzhiyun+ dispatch_step 12 406*4882a593Smuzhiyun+ dispatch_step 11 407*4882a593Smuzhiyun+ dispatch_step 10 408*4882a593Smuzhiyun+ dispatch_step 9 409*4882a593Smuzhiyun+ dispatch_step 8 410*4882a593Smuzhiyun+ dispatch_step 7 411*4882a593Smuzhiyun+ dispatch_step 6 412*4882a593Smuzhiyun+ dispatch_step 5 413*4882a593Smuzhiyun+ dispatch_step 4 414*4882a593Smuzhiyun+ dispatch_step 3 415*4882a593Smuzhiyun+ dispatch_step 2 416*4882a593Smuzhiyun+ dispatch_step 1 417*4882a593Smuzhiyun+ .purgem dispatch_step 418*4882a593Smuzhiyun+ .endm 419*4882a593Smuzhiyun+#else 420*4882a593Smuzhiyun+# if ARM_BX_ALIGN_LOG2 < 3 421*4882a593Smuzhiyun+# error case not handled 422*4882a593Smuzhiyun+# endif 423*4882a593Smuzhiyun+ .macro dispatch_helper steps, log2_bytes_per_step 424*4882a593Smuzhiyun+ /* TMP1 gets (max_bytes - bytes_to_copy), where max_bytes is 425*4882a593Smuzhiyun+ (STEPS << LOG2_BYTES_PER_STEP). 426*4882a593Smuzhiyun+ So this is (steps_to_skip << LOG2_BYTES_PER_STEP). 427*4882a593Smuzhiyun+ Then it needs further adjustment to compensate for the 428*4882a593Smuzhiyun+ distance between the PC value taken below (0f + PC_OFS) 429*4882a593Smuzhiyun+ and the first step's instructions (1f). */ 430*4882a593Smuzhiyun+ rsb tmp1, tmp1, #((\steps << \log2_bytes_per_step) \ 431*4882a593Smuzhiyun+ + ((1f - PC_OFS - 0f) \ 432*4882a593Smuzhiyun+ >> (ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step))) 433*4882a593Smuzhiyun+ /* Shifting down LOG2_BYTES_PER_STEP gives us the number of 434*4882a593Smuzhiyun+ steps to skip, then shifting up ARM_BX_ALIGN_LOG2 gives us 435*4882a593Smuzhiyun+ the (byte) distance to add to the PC. */ 436*4882a593Smuzhiyun+0: add tmp1, pc, tmp1, lsl #(ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step) 437*4882a593Smuzhiyun+ bx tmp1 438*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 439*4882a593Smuzhiyun+1: 440*4882a593Smuzhiyun+ .endm 441*4882a593Smuzhiyun+ 442*4882a593Smuzhiyun+ .macro dispatch_7_dword 443*4882a593Smuzhiyun+ dispatch_helper 7, 3 444*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 445*4882a593Smuzhiyun+ dispatch_step 7 446*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 447*4882a593Smuzhiyun+ dispatch_step 6 448*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 449*4882a593Smuzhiyun+ dispatch_step 5 450*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 451*4882a593Smuzhiyun+ dispatch_step 4 452*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 453*4882a593Smuzhiyun+ dispatch_step 3 454*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 455*4882a593Smuzhiyun+ dispatch_step 2 456*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 457*4882a593Smuzhiyun+ dispatch_step 1 458*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 459*4882a593Smuzhiyun+ .purgem dispatch_step 460*4882a593Smuzhiyun+ .endm 461*4882a593Smuzhiyun+ 462*4882a593Smuzhiyun+ .macro dispatch_15_word 463*4882a593Smuzhiyun+ dispatch_helper 15, 2 464*4882a593Smuzhiyun+ dispatch_step 15 465*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 466*4882a593Smuzhiyun+ dispatch_step 14 467*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 468*4882a593Smuzhiyun+ dispatch_step 13 469*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 470*4882a593Smuzhiyun+ dispatch_step 12 471*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 472*4882a593Smuzhiyun+ dispatch_step 11 473*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 474*4882a593Smuzhiyun+ dispatch_step 10 475*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 476*4882a593Smuzhiyun+ dispatch_step 9 477*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 478*4882a593Smuzhiyun+ dispatch_step 8 479*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 480*4882a593Smuzhiyun+ dispatch_step 7 481*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 482*4882a593Smuzhiyun+ dispatch_step 6 483*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 484*4882a593Smuzhiyun+ dispatch_step 5 485*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 486*4882a593Smuzhiyun+ dispatch_step 4 487*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 488*4882a593Smuzhiyun+ dispatch_step 3 489*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 490*4882a593Smuzhiyun+ dispatch_step 2 491*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 492*4882a593Smuzhiyun+ dispatch_step 1 493*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 494*4882a593Smuzhiyun+ .purgem dispatch_step 495*4882a593Smuzhiyun+ .endm 496*4882a593Smuzhiyun+ 497*4882a593Smuzhiyun+#endif 498*4882a593Smuzhiyun+ 499*4882a593Smuzhiyun+#ifndef USE_NEON 500*4882a593Smuzhiyun+/* For bulk copies using GP registers. */ 501*4882a593Smuzhiyun+#define A_l r2 /* Call-clobbered. */ 502*4882a593Smuzhiyun+#define A_h r3 /* Call-clobbered. */ 503*4882a593Smuzhiyun+#define B_l r4 504*4882a593Smuzhiyun+#define B_h r5 505*4882a593Smuzhiyun+#define C_l r6 506*4882a593Smuzhiyun+#define C_h r7 507*4882a593Smuzhiyun+/* Don't use the pair r8,r9 because in some EABI variants r9 is reserved. */ 508*4882a593Smuzhiyun+#define D_l r10 509*4882a593Smuzhiyun+#define D_h r11 510*4882a593Smuzhiyun+#endif 511*4882a593Smuzhiyun+ 512*4882a593Smuzhiyun+/* Number of lines ahead to pre-fetch data. If you change this the code 513*4882a593Smuzhiyun+ below will need adjustment to compensate. */ 514*4882a593Smuzhiyun+ 515*4882a593Smuzhiyun+#define prefetch_lines 5 516*4882a593Smuzhiyun+ 517*4882a593Smuzhiyun+#ifdef USE_VFP 518*4882a593Smuzhiyun+ .macro cpy_line_vfp vreg, base 519*4882a593Smuzhiyun+ vstr \vreg, [dst, #\base] 520*4882a593Smuzhiyun+ vldr \vreg, [src, #\base] 521*4882a593Smuzhiyun+ vstr d0, [dst, #\base + 8] 522*4882a593Smuzhiyun+ vldr d0, [src, #\base + 8] 523*4882a593Smuzhiyun+ vstr d1, [dst, #\base + 16] 524*4882a593Smuzhiyun+ vldr d1, [src, #\base + 16] 525*4882a593Smuzhiyun+ vstr d2, [dst, #\base + 24] 526*4882a593Smuzhiyun+ vldr d2, [src, #\base + 24] 527*4882a593Smuzhiyun+ vstr \vreg, [dst, #\base + 32] 528*4882a593Smuzhiyun+ vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] 529*4882a593Smuzhiyun+ vstr d0, [dst, #\base + 40] 530*4882a593Smuzhiyun+ vldr d0, [src, #\base + 40] 531*4882a593Smuzhiyun+ vstr d1, [dst, #\base + 48] 532*4882a593Smuzhiyun+ vldr d1, [src, #\base + 48] 533*4882a593Smuzhiyun+ vstr d2, [dst, #\base + 56] 534*4882a593Smuzhiyun+ vldr d2, [src, #\base + 56] 535*4882a593Smuzhiyun+ .endm 536*4882a593Smuzhiyun+ 537*4882a593Smuzhiyun+ .macro cpy_tail_vfp vreg, base 538*4882a593Smuzhiyun+ vstr \vreg, [dst, #\base] 539*4882a593Smuzhiyun+ vldr \vreg, [src, #\base] 540*4882a593Smuzhiyun+ vstr d0, [dst, #\base + 8] 541*4882a593Smuzhiyun+ vldr d0, [src, #\base + 8] 542*4882a593Smuzhiyun+ vstr d1, [dst, #\base + 16] 543*4882a593Smuzhiyun+ vldr d1, [src, #\base + 16] 544*4882a593Smuzhiyun+ vstr d2, [dst, #\base + 24] 545*4882a593Smuzhiyun+ vldr d2, [src, #\base + 24] 546*4882a593Smuzhiyun+ vstr \vreg, [dst, #\base + 32] 547*4882a593Smuzhiyun+ vstr d0, [dst, #\base + 40] 548*4882a593Smuzhiyun+ vldr d0, [src, #\base + 40] 549*4882a593Smuzhiyun+ vstr d1, [dst, #\base + 48] 550*4882a593Smuzhiyun+ vldr d1, [src, #\base + 48] 551*4882a593Smuzhiyun+ vstr d2, [dst, #\base + 56] 552*4882a593Smuzhiyun+ vldr d2, [src, #\base + 56] 553*4882a593Smuzhiyun+ .endm 554*4882a593Smuzhiyun+#endif 555*4882a593Smuzhiyun+ 556*4882a593Smuzhiyun+ .p2align 6 557*4882a593Smuzhiyun+ENTRY(memcpy) 558*4882a593Smuzhiyun+ 559*4882a593Smuzhiyun+ mov dst, dstin /* Preserve dstin, we need to return it. */ 560*4882a593Smuzhiyun+ cmp count, #64 561*4882a593Smuzhiyun+ bhs .Lcpy_not_short 562*4882a593Smuzhiyun+ /* Deal with small copies quickly by dropping straight into the 563*4882a593Smuzhiyun+ exit block. */ 564*4882a593Smuzhiyun+ 565*4882a593Smuzhiyun+.Ltail63unaligned: 566*4882a593Smuzhiyun+#ifdef USE_NEON 567*4882a593Smuzhiyun+ /* These need an extra layer of macro just to work around a 568*4882a593Smuzhiyun+ bug in the assembler's parser when an operand starts with 569*4882a593Smuzhiyun+ a {...}. https://sourceware.org/bugzilla/show_bug.cgi?id=15647 570*4882a593Smuzhiyun+ tracks that bug; it was not fixed as of binutils-2.23.2. */ 571*4882a593Smuzhiyun+ .macro neon_load_d0 reg 572*4882a593Smuzhiyun+ vld1.8 {d0}, [\reg]! 573*4882a593Smuzhiyun+ .endm 574*4882a593Smuzhiyun+ .macro neon_store_d0 reg 575*4882a593Smuzhiyun+ vst1.8 {d0}, [\reg]! 576*4882a593Smuzhiyun+ .endm 577*4882a593Smuzhiyun+ 578*4882a593Smuzhiyun+ and tmp1, count, #0x38 579*4882a593Smuzhiyun+ .macro dispatch_step i 580*4882a593Smuzhiyun+ neon_load_d0 src 581*4882a593Smuzhiyun+ neon_store_d0 dst 582*4882a593Smuzhiyun+ .endm 583*4882a593Smuzhiyun+ dispatch_7_dword 584*4882a593Smuzhiyun+ 585*4882a593Smuzhiyun+ tst count, #4 586*4882a593Smuzhiyun+ ldrne tmp1, [src], #4 587*4882a593Smuzhiyun+ strne tmp1, [dst], #4 588*4882a593Smuzhiyun+#else 589*4882a593Smuzhiyun+ /* Copy up to 15 full words of data. May not be aligned. */ 590*4882a593Smuzhiyun+ /* Cannot use VFP for unaligned data. */ 591*4882a593Smuzhiyun+ and tmp1, count, #0x3c 592*4882a593Smuzhiyun+ add dst, dst, tmp1 593*4882a593Smuzhiyun+ add src, src, tmp1 594*4882a593Smuzhiyun+ /* Jump directly into the sequence below at the correct offset. */ 595*4882a593Smuzhiyun+ .macro dispatch_step i 596*4882a593Smuzhiyun+ ldr tmp1, [src, #-(\i * 4)] 597*4882a593Smuzhiyun+ str tmp1, [dst, #-(\i * 4)] 598*4882a593Smuzhiyun+ .endm 599*4882a593Smuzhiyun+ dispatch_15_word 600*4882a593Smuzhiyun+#endif 601*4882a593Smuzhiyun+ 602*4882a593Smuzhiyun+ lsls count, count, #31 603*4882a593Smuzhiyun+ ldrhcs tmp1, [src], #2 604*4882a593Smuzhiyun+ ldrbne src, [src] /* Src is dead, use as a scratch. */ 605*4882a593Smuzhiyun+ strhcs tmp1, [dst], #2 606*4882a593Smuzhiyun+ strbne src, [dst] 607*4882a593Smuzhiyun+ bx lr 608*4882a593Smuzhiyun+ 609*4882a593Smuzhiyun+.Lcpy_not_short: 610*4882a593Smuzhiyun+ /* At least 64 bytes to copy, but don't know the alignment yet. */ 611*4882a593Smuzhiyun+ str tmp2, [sp, #-FRAME_SIZE]! 612*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (FRAME_SIZE) 613*4882a593Smuzhiyun+ cfi_rel_offset (tmp2, 0) 614*4882a593Smuzhiyun+ cfi_remember_state 615*4882a593Smuzhiyun+ and tmp2, src, #7 616*4882a593Smuzhiyun+ and tmp1, dst, #7 617*4882a593Smuzhiyun+ cmp tmp1, tmp2 618*4882a593Smuzhiyun+ bne .Lcpy_notaligned 619*4882a593Smuzhiyun+ 620*4882a593Smuzhiyun+#ifdef USE_VFP 621*4882a593Smuzhiyun+ /* Magic dust alert! Force VFP on Cortex-A9. Experiments show 622*4882a593Smuzhiyun+ that the FP pipeline is much better at streaming loads and 623*4882a593Smuzhiyun+ stores. This is outside the critical loop. */ 624*4882a593Smuzhiyun+ vmov.f32 s0, s0 625*4882a593Smuzhiyun+#endif 626*4882a593Smuzhiyun+ 627*4882a593Smuzhiyun+ /* SRC and DST have the same mutual 64-bit alignment, but we may 628*4882a593Smuzhiyun+ still need to pre-copy some bytes to get to natural alignment. 629*4882a593Smuzhiyun+ We bring SRC and DST into full 64-bit alignment. */ 630*4882a593Smuzhiyun+ lsls tmp2, dst, #29 631*4882a593Smuzhiyun+ beq 1f 632*4882a593Smuzhiyun+ rsbs tmp2, tmp2, #0 633*4882a593Smuzhiyun+ sub count, count, tmp2, lsr #29 634*4882a593Smuzhiyun+ ldrmi tmp1, [src], #4 635*4882a593Smuzhiyun+ strmi tmp1, [dst], #4 636*4882a593Smuzhiyun+ lsls tmp2, tmp2, #2 637*4882a593Smuzhiyun+ ldrhcs tmp1, [src], #2 638*4882a593Smuzhiyun+ ldrbne tmp2, [src], #1 639*4882a593Smuzhiyun+ strhcs tmp1, [dst], #2 640*4882a593Smuzhiyun+ strbne tmp2, [dst], #1 641*4882a593Smuzhiyun+ 642*4882a593Smuzhiyun+1: 643*4882a593Smuzhiyun+ subs tmp2, count, #64 /* Use tmp2 for count. */ 644*4882a593Smuzhiyun+ blo .Ltail63aligned 645*4882a593Smuzhiyun+ 646*4882a593Smuzhiyun+ cmp tmp2, #512 647*4882a593Smuzhiyun+ bhs .Lcpy_body_long 648*4882a593Smuzhiyun+ 649*4882a593Smuzhiyun+.Lcpy_body_medium: /* Count in tmp2. */ 650*4882a593Smuzhiyun+#ifdef USE_VFP 651*4882a593Smuzhiyun+1: 652*4882a593Smuzhiyun+ vldr d0, [src, #0] 653*4882a593Smuzhiyun+ subs tmp2, tmp2, #64 654*4882a593Smuzhiyun+ vldr d1, [src, #8] 655*4882a593Smuzhiyun+ vstr d0, [dst, #0] 656*4882a593Smuzhiyun+ vldr d0, [src, #16] 657*4882a593Smuzhiyun+ vstr d1, [dst, #8] 658*4882a593Smuzhiyun+ vldr d1, [src, #24] 659*4882a593Smuzhiyun+ vstr d0, [dst, #16] 660*4882a593Smuzhiyun+ vldr d0, [src, #32] 661*4882a593Smuzhiyun+ vstr d1, [dst, #24] 662*4882a593Smuzhiyun+ vldr d1, [src, #40] 663*4882a593Smuzhiyun+ vstr d0, [dst, #32] 664*4882a593Smuzhiyun+ vldr d0, [src, #48] 665*4882a593Smuzhiyun+ vstr d1, [dst, #40] 666*4882a593Smuzhiyun+ vldr d1, [src, #56] 667*4882a593Smuzhiyun+ vstr d0, [dst, #48] 668*4882a593Smuzhiyun+ add src, src, #64 669*4882a593Smuzhiyun+ vstr d1, [dst, #56] 670*4882a593Smuzhiyun+ add dst, dst, #64 671*4882a593Smuzhiyun+ bhs 1b 672*4882a593Smuzhiyun+ tst tmp2, #0x3f 673*4882a593Smuzhiyun+ beq .Ldone 674*4882a593Smuzhiyun+ 675*4882a593Smuzhiyun+.Ltail63aligned: /* Count in tmp2. */ 676*4882a593Smuzhiyun+ and tmp1, tmp2, #0x38 677*4882a593Smuzhiyun+ add dst, dst, tmp1 678*4882a593Smuzhiyun+ add src, src, tmp1 679*4882a593Smuzhiyun+ .macro dispatch_step i 680*4882a593Smuzhiyun+ vldr d0, [src, #-(\i * 8)] 681*4882a593Smuzhiyun+ vstr d0, [dst, #-(\i * 8)] 682*4882a593Smuzhiyun+ .endm 683*4882a593Smuzhiyun+ dispatch_7_dword 684*4882a593Smuzhiyun+#else 685*4882a593Smuzhiyun+ sub src, src, #8 686*4882a593Smuzhiyun+ sub dst, dst, #8 687*4882a593Smuzhiyun+1: 688*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #8] 689*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #8] 690*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #16] 691*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #16] 692*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #24] 693*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #24] 694*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #32] 695*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #32] 696*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #40] 697*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #40] 698*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #48] 699*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #48] 700*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #56] 701*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #56] 702*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #64]! 703*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #64]! 704*4882a593Smuzhiyun+ subs tmp2, tmp2, #64 705*4882a593Smuzhiyun+ bhs 1b 706*4882a593Smuzhiyun+ tst tmp2, #0x3f 707*4882a593Smuzhiyun+ bne 1f 708*4882a593Smuzhiyun+ ldr tmp2,[sp], #FRAME_SIZE 709*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-FRAME_SIZE) 710*4882a593Smuzhiyun+ cfi_restore (tmp2) 711*4882a593Smuzhiyun+ bx lr 712*4882a593Smuzhiyun+ 713*4882a593Smuzhiyun+ cfi_restore_state 714*4882a593Smuzhiyun+ cfi_remember_state 715*4882a593Smuzhiyun+1: 716*4882a593Smuzhiyun+ add src, src, #8 717*4882a593Smuzhiyun+ add dst, dst, #8 718*4882a593Smuzhiyun+ 719*4882a593Smuzhiyun+.Ltail63aligned: /* Count in tmp2. */ 720*4882a593Smuzhiyun+ /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but 721*4882a593Smuzhiyun+ we know that the src and dest are 64-bit aligned so we can use 722*4882a593Smuzhiyun+ LDRD/STRD to improve efficiency. */ 723*4882a593Smuzhiyun+ /* TMP2 is now negative, but we don't care about that. The bottom 724*4882a593Smuzhiyun+ six bits still tell us how many bytes are left to copy. */ 725*4882a593Smuzhiyun+ 726*4882a593Smuzhiyun+ and tmp1, tmp2, #0x38 727*4882a593Smuzhiyun+ add dst, dst, tmp1 728*4882a593Smuzhiyun+ add src, src, tmp1 729*4882a593Smuzhiyun+ .macro dispatch_step i 730*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #-(\i * 8)] 731*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #-(\i * 8)] 732*4882a593Smuzhiyun+ .endm 733*4882a593Smuzhiyun+ dispatch_7_dword 734*4882a593Smuzhiyun+#endif 735*4882a593Smuzhiyun+ 736*4882a593Smuzhiyun+ tst tmp2, #4 737*4882a593Smuzhiyun+ ldrne tmp1, [src], #4 738*4882a593Smuzhiyun+ strne tmp1, [dst], #4 739*4882a593Smuzhiyun+ lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ 740*4882a593Smuzhiyun+ ldrhcs tmp1, [src], #2 741*4882a593Smuzhiyun+ ldrbne tmp2, [src] 742*4882a593Smuzhiyun+ strhcs tmp1, [dst], #2 743*4882a593Smuzhiyun+ strbne tmp2, [dst] 744*4882a593Smuzhiyun+ 745*4882a593Smuzhiyun+.Ldone: 746*4882a593Smuzhiyun+ ldr tmp2, [sp], #FRAME_SIZE 747*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-FRAME_SIZE) 748*4882a593Smuzhiyun+ cfi_restore (tmp2) 749*4882a593Smuzhiyun+ bx lr 750*4882a593Smuzhiyun+ 751*4882a593Smuzhiyun+ cfi_restore_state 752*4882a593Smuzhiyun+ cfi_remember_state 753*4882a593Smuzhiyun+ 754*4882a593Smuzhiyun+.Lcpy_body_long: /* Count in tmp2. */ 755*4882a593Smuzhiyun+ 756*4882a593Smuzhiyun+ /* Long copy. We know that there's at least (prefetch_lines * 64) 757*4882a593Smuzhiyun+ bytes to go. */ 758*4882a593Smuzhiyun+#ifdef USE_VFP 759*4882a593Smuzhiyun+ /* Don't use PLD. Instead, read some data in advance of the current 760*4882a593Smuzhiyun+ copy position into a register. This should act like a PLD 761*4882a593Smuzhiyun+ operation but we won't have to repeat the transfer. */ 762*4882a593Smuzhiyun+ 763*4882a593Smuzhiyun+ vldr d3, [src, #0] 764*4882a593Smuzhiyun+ vldr d4, [src, #64] 765*4882a593Smuzhiyun+ vldr d5, [src, #128] 766*4882a593Smuzhiyun+ vldr d6, [src, #192] 767*4882a593Smuzhiyun+ vldr d7, [src, #256] 768*4882a593Smuzhiyun+ 769*4882a593Smuzhiyun+ vldr d0, [src, #8] 770*4882a593Smuzhiyun+ vldr d1, [src, #16] 771*4882a593Smuzhiyun+ vldr d2, [src, #24] 772*4882a593Smuzhiyun+ add src, src, #32 773*4882a593Smuzhiyun+ 774*4882a593Smuzhiyun+ subs tmp2, tmp2, #prefetch_lines * 64 * 2 775*4882a593Smuzhiyun+ blo 2f 776*4882a593Smuzhiyun+1: 777*4882a593Smuzhiyun+ cpy_line_vfp d3, 0 778*4882a593Smuzhiyun+ cpy_line_vfp d4, 64 779*4882a593Smuzhiyun+ cpy_line_vfp d5, 128 780*4882a593Smuzhiyun+ add dst, dst, #3 * 64 781*4882a593Smuzhiyun+ add src, src, #3 * 64 782*4882a593Smuzhiyun+ cpy_line_vfp d6, 0 783*4882a593Smuzhiyun+ cpy_line_vfp d7, 64 784*4882a593Smuzhiyun+ add dst, dst, #2 * 64 785*4882a593Smuzhiyun+ add src, src, #2 * 64 786*4882a593Smuzhiyun+ subs tmp2, tmp2, #prefetch_lines * 64 787*4882a593Smuzhiyun+ bhs 1b 788*4882a593Smuzhiyun+ 789*4882a593Smuzhiyun+2: 790*4882a593Smuzhiyun+ cpy_tail_vfp d3, 0 791*4882a593Smuzhiyun+ cpy_tail_vfp d4, 64 792*4882a593Smuzhiyun+ cpy_tail_vfp d5, 128 793*4882a593Smuzhiyun+ add src, src, #3 * 64 794*4882a593Smuzhiyun+ add dst, dst, #3 * 64 795*4882a593Smuzhiyun+ cpy_tail_vfp d6, 0 796*4882a593Smuzhiyun+ vstr d7, [dst, #64] 797*4882a593Smuzhiyun+ vldr d7, [src, #64] 798*4882a593Smuzhiyun+ vstr d0, [dst, #64 + 8] 799*4882a593Smuzhiyun+ vldr d0, [src, #64 + 8] 800*4882a593Smuzhiyun+ vstr d1, [dst, #64 + 16] 801*4882a593Smuzhiyun+ vldr d1, [src, #64 + 16] 802*4882a593Smuzhiyun+ vstr d2, [dst, #64 + 24] 803*4882a593Smuzhiyun+ vldr d2, [src, #64 + 24] 804*4882a593Smuzhiyun+ vstr d7, [dst, #64 + 32] 805*4882a593Smuzhiyun+ add src, src, #96 806*4882a593Smuzhiyun+ vstr d0, [dst, #64 + 40] 807*4882a593Smuzhiyun+ vstr d1, [dst, #64 + 48] 808*4882a593Smuzhiyun+ vstr d2, [dst, #64 + 56] 809*4882a593Smuzhiyun+ add dst, dst, #128 810*4882a593Smuzhiyun+ add tmp2, tmp2, #prefetch_lines * 64 811*4882a593Smuzhiyun+ b .Lcpy_body_medium 812*4882a593Smuzhiyun+#else 813*4882a593Smuzhiyun+ /* Long copy. Use an SMS style loop to maximize the I/O 814*4882a593Smuzhiyun+ bandwidth of the core. We don't have enough spare registers 815*4882a593Smuzhiyun+ to synthesise prefetching, so use PLD operations. */ 816*4882a593Smuzhiyun+ /* Pre-bias src and dst. */ 817*4882a593Smuzhiyun+ sub src, src, #8 818*4882a593Smuzhiyun+ sub dst, dst, #8 819*4882a593Smuzhiyun+ pld [src, #8] 820*4882a593Smuzhiyun+ pld [src, #72] 821*4882a593Smuzhiyun+ subs tmp2, tmp2, #64 822*4882a593Smuzhiyun+ pld [src, #136] 823*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #8] 824*4882a593Smuzhiyun+ strd B_l, B_h, [sp, #8] 825*4882a593Smuzhiyun+ cfi_rel_offset (B_l, 8) 826*4882a593Smuzhiyun+ cfi_rel_offset (B_h, 12) 827*4882a593Smuzhiyun+ ldrd B_l, B_h, [src, #16] 828*4882a593Smuzhiyun+ strd C_l, C_h, [sp, #16] 829*4882a593Smuzhiyun+ cfi_rel_offset (C_l, 16) 830*4882a593Smuzhiyun+ cfi_rel_offset (C_h, 20) 831*4882a593Smuzhiyun+ ldrd C_l, C_h, [src, #24] 832*4882a593Smuzhiyun+ strd D_l, D_h, [sp, #24] 833*4882a593Smuzhiyun+ cfi_rel_offset (D_l, 24) 834*4882a593Smuzhiyun+ cfi_rel_offset (D_h, 28) 835*4882a593Smuzhiyun+ pld [src, #200] 836*4882a593Smuzhiyun+ ldrd D_l, D_h, [src, #32]! 837*4882a593Smuzhiyun+ b 1f 838*4882a593Smuzhiyun+ .p2align 6 839*4882a593Smuzhiyun+2: 840*4882a593Smuzhiyun+ pld [src, #232] 841*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #40] 842*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #40] 843*4882a593Smuzhiyun+ strd B_l, B_h, [dst, #48] 844*4882a593Smuzhiyun+ ldrd B_l, B_h, [src, #48] 845*4882a593Smuzhiyun+ strd C_l, C_h, [dst, #56] 846*4882a593Smuzhiyun+ ldrd C_l, C_h, [src, #56] 847*4882a593Smuzhiyun+ strd D_l, D_h, [dst, #64]! 848*4882a593Smuzhiyun+ ldrd D_l, D_h, [src, #64]! 849*4882a593Smuzhiyun+ subs tmp2, tmp2, #64 850*4882a593Smuzhiyun+1: 851*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #8] 852*4882a593Smuzhiyun+ ldrd A_l, A_h, [src, #8] 853*4882a593Smuzhiyun+ strd B_l, B_h, [dst, #16] 854*4882a593Smuzhiyun+ ldrd B_l, B_h, [src, #16] 855*4882a593Smuzhiyun+ strd C_l, C_h, [dst, #24] 856*4882a593Smuzhiyun+ ldrd C_l, C_h, [src, #24] 857*4882a593Smuzhiyun+ strd D_l, D_h, [dst, #32] 858*4882a593Smuzhiyun+ ldrd D_l, D_h, [src, #32] 859*4882a593Smuzhiyun+ bcs 2b 860*4882a593Smuzhiyun+ /* Save the remaining bytes and restore the callee-saved regs. */ 861*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #40] 862*4882a593Smuzhiyun+ add src, src, #40 863*4882a593Smuzhiyun+ strd B_l, B_h, [dst, #48] 864*4882a593Smuzhiyun+ ldrd B_l, B_h, [sp, #8] 865*4882a593Smuzhiyun+ cfi_restore (B_l) 866*4882a593Smuzhiyun+ cfi_restore (B_h) 867*4882a593Smuzhiyun+ strd C_l, C_h, [dst, #56] 868*4882a593Smuzhiyun+ ldrd C_l, C_h, [sp, #16] 869*4882a593Smuzhiyun+ cfi_restore (C_l) 870*4882a593Smuzhiyun+ cfi_restore (C_h) 871*4882a593Smuzhiyun+ strd D_l, D_h, [dst, #64] 872*4882a593Smuzhiyun+ ldrd D_l, D_h, [sp, #24] 873*4882a593Smuzhiyun+ cfi_restore (D_l) 874*4882a593Smuzhiyun+ cfi_restore (D_h) 875*4882a593Smuzhiyun+ add dst, dst, #72 876*4882a593Smuzhiyun+ tst tmp2, #0x3f 877*4882a593Smuzhiyun+ bne .Ltail63aligned 878*4882a593Smuzhiyun+ ldr tmp2, [sp], #FRAME_SIZE 879*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-FRAME_SIZE) 880*4882a593Smuzhiyun+ cfi_restore (tmp2) 881*4882a593Smuzhiyun+ bx lr 882*4882a593Smuzhiyun+#endif 883*4882a593Smuzhiyun+ 884*4882a593Smuzhiyun+ cfi_restore_state 885*4882a593Smuzhiyun+ cfi_remember_state 886*4882a593Smuzhiyun+ 887*4882a593Smuzhiyun+.Lcpy_notaligned: 888*4882a593Smuzhiyun+ pld [src, #0] 889*4882a593Smuzhiyun+ pld [src, #64] 890*4882a593Smuzhiyun+ /* There's at least 64 bytes to copy, but there is no mutual 891*4882a593Smuzhiyun+ alignment. */ 892*4882a593Smuzhiyun+ /* Bring DST to 64-bit alignment. */ 893*4882a593Smuzhiyun+ lsls tmp2, dst, #29 894*4882a593Smuzhiyun+ pld [src, #(2 * 64)] 895*4882a593Smuzhiyun+ beq 1f 896*4882a593Smuzhiyun+ rsbs tmp2, tmp2, #0 897*4882a593Smuzhiyun+ sub count, count, tmp2, lsr #29 898*4882a593Smuzhiyun+ ldrmi tmp1, [src], #4 899*4882a593Smuzhiyun+ strmi tmp1, [dst], #4 900*4882a593Smuzhiyun+ lsls tmp2, tmp2, #2 901*4882a593Smuzhiyun+ ldrbne tmp1, [src], #1 902*4882a593Smuzhiyun+ ldrhcs tmp2, [src], #2 903*4882a593Smuzhiyun+ strbne tmp1, [dst], #1 904*4882a593Smuzhiyun+ strhcs tmp2, [dst], #2 905*4882a593Smuzhiyun+1: 906*4882a593Smuzhiyun+ pld [src, #(3 * 64)] 907*4882a593Smuzhiyun+ subs count, count, #64 908*4882a593Smuzhiyun+ ldrlo tmp2, [sp], #FRAME_SIZE 909*4882a593Smuzhiyun+ blo .Ltail63unaligned 910*4882a593Smuzhiyun+ pld [src, #(4 * 64)] 911*4882a593Smuzhiyun+ 912*4882a593Smuzhiyun+#ifdef USE_NEON 913*4882a593Smuzhiyun+ /* These need an extra layer of macro just to work around a 914*4882a593Smuzhiyun+ bug in the assembler's parser when an operand starts with 915*4882a593Smuzhiyun+ a {...}. */ 916*4882a593Smuzhiyun+ .macro neon_load_multi reglist, basereg 917*4882a593Smuzhiyun+ vld1.8 {\reglist}, [\basereg]! 918*4882a593Smuzhiyun+ .endm 919*4882a593Smuzhiyun+ .macro neon_store_multi reglist, basereg 920*4882a593Smuzhiyun+ vst1.8 {\reglist}, [ALIGN (\basereg, 64)]! 921*4882a593Smuzhiyun+ .endm 922*4882a593Smuzhiyun+ 923*4882a593Smuzhiyun+ neon_load_multi d0-d3, src 924*4882a593Smuzhiyun+ neon_load_multi d4-d7, src 925*4882a593Smuzhiyun+ subs count, count, #64 926*4882a593Smuzhiyun+ blo 2f 927*4882a593Smuzhiyun+1: 928*4882a593Smuzhiyun+ pld [src, #(4 * 64)] 929*4882a593Smuzhiyun+ neon_store_multi d0-d3, dst 930*4882a593Smuzhiyun+ neon_load_multi d0-d3, src 931*4882a593Smuzhiyun+ neon_store_multi d4-d7, dst 932*4882a593Smuzhiyun+ neon_load_multi d4-d7, src 933*4882a593Smuzhiyun+ subs count, count, #64 934*4882a593Smuzhiyun+ bhs 1b 935*4882a593Smuzhiyun+2: 936*4882a593Smuzhiyun+ neon_store_multi d0-d3, dst 937*4882a593Smuzhiyun+ neon_store_multi d4-d7, dst 938*4882a593Smuzhiyun+ ands count, count, #0x3f 939*4882a593Smuzhiyun+#else 940*4882a593Smuzhiyun+ /* Use an SMS style loop to maximize the I/O bandwidth. */ 941*4882a593Smuzhiyun+ sub src, src, #4 942*4882a593Smuzhiyun+ sub dst, dst, #8 943*4882a593Smuzhiyun+ subs tmp2, count, #64 /* Use tmp2 for count. */ 944*4882a593Smuzhiyun+ ldr A_l, [src, #4] 945*4882a593Smuzhiyun+ ldr A_h, [src, #8] 946*4882a593Smuzhiyun+ strd B_l, B_h, [sp, #8] 947*4882a593Smuzhiyun+ cfi_rel_offset (B_l, 8) 948*4882a593Smuzhiyun+ cfi_rel_offset (B_h, 12) 949*4882a593Smuzhiyun+ ldr B_l, [src, #12] 950*4882a593Smuzhiyun+ ldr B_h, [src, #16] 951*4882a593Smuzhiyun+ strd C_l, C_h, [sp, #16] 952*4882a593Smuzhiyun+ cfi_rel_offset (C_l, 16) 953*4882a593Smuzhiyun+ cfi_rel_offset (C_h, 20) 954*4882a593Smuzhiyun+ ldr C_l, [src, #20] 955*4882a593Smuzhiyun+ ldr C_h, [src, #24] 956*4882a593Smuzhiyun+ strd D_l, D_h, [sp, #24] 957*4882a593Smuzhiyun+ cfi_rel_offset (D_l, 24) 958*4882a593Smuzhiyun+ cfi_rel_offset (D_h, 28) 959*4882a593Smuzhiyun+ ldr D_l, [src, #28] 960*4882a593Smuzhiyun+ ldr D_h, [src, #32]! 961*4882a593Smuzhiyun+ b 1f 962*4882a593Smuzhiyun+ .p2align 6 963*4882a593Smuzhiyun+2: 964*4882a593Smuzhiyun+ pld [src, #(5 * 64) - (32 - 4)] 965*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #40] 966*4882a593Smuzhiyun+ ldr A_l, [src, #36] 967*4882a593Smuzhiyun+ ldr A_h, [src, #40] 968*4882a593Smuzhiyun+ strd B_l, B_h, [dst, #48] 969*4882a593Smuzhiyun+ ldr B_l, [src, #44] 970*4882a593Smuzhiyun+ ldr B_h, [src, #48] 971*4882a593Smuzhiyun+ strd C_l, C_h, [dst, #56] 972*4882a593Smuzhiyun+ ldr C_l, [src, #52] 973*4882a593Smuzhiyun+ ldr C_h, [src, #56] 974*4882a593Smuzhiyun+ strd D_l, D_h, [dst, #64]! 975*4882a593Smuzhiyun+ ldr D_l, [src, #60] 976*4882a593Smuzhiyun+ ldr D_h, [src, #64]! 977*4882a593Smuzhiyun+ subs tmp2, tmp2, #64 978*4882a593Smuzhiyun+1: 979*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #8] 980*4882a593Smuzhiyun+ ldr A_l, [src, #4] 981*4882a593Smuzhiyun+ ldr A_h, [src, #8] 982*4882a593Smuzhiyun+ strd B_l, B_h, [dst, #16] 983*4882a593Smuzhiyun+ ldr B_l, [src, #12] 984*4882a593Smuzhiyun+ ldr B_h, [src, #16] 985*4882a593Smuzhiyun+ strd C_l, C_h, [dst, #24] 986*4882a593Smuzhiyun+ ldr C_l, [src, #20] 987*4882a593Smuzhiyun+ ldr C_h, [src, #24] 988*4882a593Smuzhiyun+ strd D_l, D_h, [dst, #32] 989*4882a593Smuzhiyun+ ldr D_l, [src, #28] 990*4882a593Smuzhiyun+ ldr D_h, [src, #32] 991*4882a593Smuzhiyun+ bcs 2b 992*4882a593Smuzhiyun+ 993*4882a593Smuzhiyun+ /* Save the remaining bytes and restore the callee-saved regs. */ 994*4882a593Smuzhiyun+ strd A_l, A_h, [dst, #40] 995*4882a593Smuzhiyun+ add src, src, #36 996*4882a593Smuzhiyun+ strd B_l, B_h, [dst, #48] 997*4882a593Smuzhiyun+ ldrd B_l, B_h, [sp, #8] 998*4882a593Smuzhiyun+ cfi_restore (B_l) 999*4882a593Smuzhiyun+ cfi_restore (B_h) 1000*4882a593Smuzhiyun+ strd C_l, C_h, [dst, #56] 1001*4882a593Smuzhiyun+ ldrd C_l, C_h, [sp, #16] 1002*4882a593Smuzhiyun+ cfi_restore (C_l) 1003*4882a593Smuzhiyun+ cfi_restore (C_h) 1004*4882a593Smuzhiyun+ strd D_l, D_h, [dst, #64] 1005*4882a593Smuzhiyun+ ldrd D_l, D_h, [sp, #24] 1006*4882a593Smuzhiyun+ cfi_restore (D_l) 1007*4882a593Smuzhiyun+ cfi_restore (D_h) 1008*4882a593Smuzhiyun+ add dst, dst, #72 1009*4882a593Smuzhiyun+ ands count, tmp2, #0x3f 1010*4882a593Smuzhiyun+#endif 1011*4882a593Smuzhiyun+ ldr tmp2, [sp], #FRAME_SIZE 1012*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-FRAME_SIZE) 1013*4882a593Smuzhiyun+ cfi_restore (tmp2) 1014*4882a593Smuzhiyun+ bne .Ltail63unaligned 1015*4882a593Smuzhiyun+ bx lr 1016*4882a593Smuzhiyun+ 1017*4882a593Smuzhiyun+END(memcpy) 1018*4882a593Smuzhiyun+libc_hidden_builtin_def (memcpy) 1019*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/memmove.S b/libc/string/arm/glibc-neon/memmove.S 1020*4882a593Smuzhiyunnew file mode 100644 1021*4882a593Smuzhiyunindex 0000000..b01164a 1022*4882a593Smuzhiyun--- /dev/null 1023*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/memmove.S 1024*4882a593Smuzhiyun@@ -0,0 +1,332 @@ 1025*4882a593Smuzhiyun+/* Copyright (C) 2006-2021 Free Software Foundation, Inc. 1026*4882a593Smuzhiyun+ This file is part of the GNU C Library. 1027*4882a593Smuzhiyun+ 1028*4882a593Smuzhiyun+ Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) 1029*4882a593Smuzhiyun+ 1030*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 1031*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 1032*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 1033*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 1034*4882a593Smuzhiyun+ 1035*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 1036*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1037*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1038*4882a593Smuzhiyun+ Lesser General Public License for more details. 1039*4882a593Smuzhiyun+ 1040*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 1041*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 1042*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 1043*4882a593Smuzhiyun+ 1044*4882a593Smuzhiyun+/* Thumb requires excessive IT insns here. */ 1045*4882a593Smuzhiyun+#define NO_THUMB 1046*4882a593Smuzhiyun+#include <sysdep.h> 1047*4882a593Smuzhiyun+#include <arm-features.h> 1048*4882a593Smuzhiyun+ 1049*4882a593Smuzhiyun+/* 1050*4882a593Smuzhiyun+ * Data preload for architectures that support it (ARM V5TE and above) 1051*4882a593Smuzhiyun+ */ 1052*4882a593Smuzhiyun+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ 1053*4882a593Smuzhiyun+ && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ 1054*4882a593Smuzhiyun+ && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ 1055*4882a593Smuzhiyun+ && !defined (__ARM_ARCH_5T__)) 1056*4882a593Smuzhiyun+#define PLD(code...) code 1057*4882a593Smuzhiyun+#else 1058*4882a593Smuzhiyun+#define PLD(code...) 1059*4882a593Smuzhiyun+#endif 1060*4882a593Smuzhiyun+ 1061*4882a593Smuzhiyun+/* 1062*4882a593Smuzhiyun+ * This can be used to enable code to cacheline align the source pointer. 1063*4882a593Smuzhiyun+ * Experiments on tested architectures (StrongARM and XScale) didn't show 1064*4882a593Smuzhiyun+ * this a worthwhile thing to do. That might be different in the future. 1065*4882a593Smuzhiyun+ */ 1066*4882a593Smuzhiyun+//#define CALGN(code...) code 1067*4882a593Smuzhiyun+#define CALGN(code...) 1068*4882a593Smuzhiyun+ 1069*4882a593Smuzhiyun+/* 1070*4882a593Smuzhiyun+ * Endian independent macros for shifting bytes within registers. 1071*4882a593Smuzhiyun+ */ 1072*4882a593Smuzhiyun+#ifndef __ARMEB__ 1073*4882a593Smuzhiyun+#define PULL lsr 1074*4882a593Smuzhiyun+#define PUSH lsl 1075*4882a593Smuzhiyun+#else 1076*4882a593Smuzhiyun+#define PULL lsl 1077*4882a593Smuzhiyun+#define PUSH lsr 1078*4882a593Smuzhiyun+#endif 1079*4882a593Smuzhiyun+ 1080*4882a593Smuzhiyun+ .text 1081*4882a593Smuzhiyun+ .syntax unified 1082*4882a593Smuzhiyun+ 1083*4882a593Smuzhiyun+/* 1084*4882a593Smuzhiyun+ * Prototype: void *memmove(void *dest, const void *src, size_t n); 1085*4882a593Smuzhiyun+ * 1086*4882a593Smuzhiyun+ * Note: 1087*4882a593Smuzhiyun+ * 1088*4882a593Smuzhiyun+ * If the memory regions don't overlap, we simply branch to memcpy which is 1089*4882a593Smuzhiyun+ * normally a bit faster. Otherwise the copy is done going downwards. 1090*4882a593Smuzhiyun+ */ 1091*4882a593Smuzhiyun+ 1092*4882a593Smuzhiyun+ENTRY(memmove) 1093*4882a593Smuzhiyun+ 1094*4882a593Smuzhiyun+ subs ip, r0, r1 1095*4882a593Smuzhiyun+ cmphi r2, ip 1096*4882a593Smuzhiyun+#if !IS_IN (libc) 1097*4882a593Smuzhiyun+ bls memcpy 1098*4882a593Smuzhiyun+#else 1099*4882a593Smuzhiyun+ bls HIDDEN_JUMPTARGET(memcpy) 1100*4882a593Smuzhiyun+#endif 1101*4882a593Smuzhiyun+ 1102*4882a593Smuzhiyun+ push {r0, r4, lr} 1103*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (12) 1104*4882a593Smuzhiyun+ cfi_rel_offset (r4, 4) 1105*4882a593Smuzhiyun+ cfi_rel_offset (lr, 8) 1106*4882a593Smuzhiyun+ 1107*4882a593Smuzhiyun+ cfi_remember_state 1108*4882a593Smuzhiyun+ 1109*4882a593Smuzhiyun+ add r1, r1, r2 1110*4882a593Smuzhiyun+ add r0, r0, r2 1111*4882a593Smuzhiyun+ subs r2, r2, #4 1112*4882a593Smuzhiyun+ blo 8f 1113*4882a593Smuzhiyun+ ands ip, r0, #3 1114*4882a593Smuzhiyun+ PLD( pld [r1, #-4] ) 1115*4882a593Smuzhiyun+ bne 9f 1116*4882a593Smuzhiyun+ ands ip, r1, #3 1117*4882a593Smuzhiyun+ bne 10f 1118*4882a593Smuzhiyun+ 1119*4882a593Smuzhiyun+1: subs r2, r2, #(28) 1120*4882a593Smuzhiyun+ push {r5 - r8} 1121*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (16) 1122*4882a593Smuzhiyun+ cfi_rel_offset (r5, 0) 1123*4882a593Smuzhiyun+ cfi_rel_offset (r6, 4) 1124*4882a593Smuzhiyun+ cfi_rel_offset (r7, 8) 1125*4882a593Smuzhiyun+ cfi_rel_offset (r8, 12) 1126*4882a593Smuzhiyun+ blo 5f 1127*4882a593Smuzhiyun+ 1128*4882a593Smuzhiyun+ CALGN( ands ip, r1, #31 ) 1129*4882a593Smuzhiyun+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here 1130*4882a593Smuzhiyun+ CALGN( bcs 2f ) 1131*4882a593Smuzhiyun+ CALGN( adr r4, 6f ) 1132*4882a593Smuzhiyun+ CALGN( subs r2, r2, ip ) @ C is set here 1133*4882a593Smuzhiyun+#ifndef ARM_ALWAYS_BX 1134*4882a593Smuzhiyun+ CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 1135*4882a593Smuzhiyun+#else 1136*4882a593Smuzhiyun+ CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 1137*4882a593Smuzhiyun+ CALGN( bx r4 ) 1138*4882a593Smuzhiyun+#endif 1139*4882a593Smuzhiyun+ 1140*4882a593Smuzhiyun+ PLD( pld [r1, #-4] ) 1141*4882a593Smuzhiyun+2: PLD( cmp r2, #96 ) 1142*4882a593Smuzhiyun+ PLD( pld [r1, #-32] ) 1143*4882a593Smuzhiyun+ PLD( blo 4f ) 1144*4882a593Smuzhiyun+ PLD( pld [r1, #-64] ) 1145*4882a593Smuzhiyun+ PLD( pld [r1, #-96] ) 1146*4882a593Smuzhiyun+ 1147*4882a593Smuzhiyun+3: PLD( pld [r1, #-128] ) 1148*4882a593Smuzhiyun+4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} 1149*4882a593Smuzhiyun+ subs r2, r2, #32 1150*4882a593Smuzhiyun+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} 1151*4882a593Smuzhiyun+ bhs 3b 1152*4882a593Smuzhiyun+ 1153*4882a593Smuzhiyun+5: ands ip, r2, #28 1154*4882a593Smuzhiyun+ rsb ip, ip, #32 1155*4882a593Smuzhiyun+#ifndef ARM_ALWAYS_BX 1156*4882a593Smuzhiyun+ /* C is always clear here. */ 1157*4882a593Smuzhiyun+ addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1158*4882a593Smuzhiyun+ b 7f 1159*4882a593Smuzhiyun+#else 1160*4882a593Smuzhiyun+ beq 7f 1161*4882a593Smuzhiyun+ push {r10} 1162*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (4) 1163*4882a593Smuzhiyun+ cfi_rel_offset (r10, 0) 1164*4882a593Smuzhiyun+0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1165*4882a593Smuzhiyun+ /* If alignment is not perfect, then there will be some 1166*4882a593Smuzhiyun+ padding (nop) instructions between this BX and label 6. 1167*4882a593Smuzhiyun+ The computation above assumed that two instructions 1168*4882a593Smuzhiyun+ later is exactly the right spot. */ 1169*4882a593Smuzhiyun+ add r10, #(6f - (0b + PC_OFS)) 1170*4882a593Smuzhiyun+ bx r10 1171*4882a593Smuzhiyun+#endif 1172*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1173*4882a593Smuzhiyun+6: nop 1174*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1175*4882a593Smuzhiyun+ ldr r3, [r1, #-4]! 1176*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1177*4882a593Smuzhiyun+ ldr r4, [r1, #-4]! 1178*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1179*4882a593Smuzhiyun+ ldr r5, [r1, #-4]! 1180*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1181*4882a593Smuzhiyun+ ldr r6, [r1, #-4]! 1182*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1183*4882a593Smuzhiyun+ ldr r7, [r1, #-4]! 1184*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1185*4882a593Smuzhiyun+ ldr r8, [r1, #-4]! 1186*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1187*4882a593Smuzhiyun+ ldr lr, [r1, #-4]! 1188*4882a593Smuzhiyun+ 1189*4882a593Smuzhiyun+#ifndef ARM_ALWAYS_BX 1190*4882a593Smuzhiyun+ add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1191*4882a593Smuzhiyun+ nop 1192*4882a593Smuzhiyun+#else 1193*4882a593Smuzhiyun+0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1194*4882a593Smuzhiyun+ /* If alignment is not perfect, then there will be some 1195*4882a593Smuzhiyun+ padding (nop) instructions between this BX and label 66. 1196*4882a593Smuzhiyun+ The computation above assumed that two instructions 1197*4882a593Smuzhiyun+ later is exactly the right spot. */ 1198*4882a593Smuzhiyun+ add r10, #(66f - (0b + PC_OFS)) 1199*4882a593Smuzhiyun+ bx r10 1200*4882a593Smuzhiyun+#endif 1201*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1202*4882a593Smuzhiyun+66: nop 1203*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1204*4882a593Smuzhiyun+ str r3, [r0, #-4]! 1205*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1206*4882a593Smuzhiyun+ str r4, [r0, #-4]! 1207*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1208*4882a593Smuzhiyun+ str r5, [r0, #-4]! 1209*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1210*4882a593Smuzhiyun+ str r6, [r0, #-4]! 1211*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1212*4882a593Smuzhiyun+ str r7, [r0, #-4]! 1213*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1214*4882a593Smuzhiyun+ str r8, [r0, #-4]! 1215*4882a593Smuzhiyun+ .p2align ARM_BX_ALIGN_LOG2 1216*4882a593Smuzhiyun+ str lr, [r0, #-4]! 1217*4882a593Smuzhiyun+ 1218*4882a593Smuzhiyun+#ifdef ARM_ALWAYS_BX 1219*4882a593Smuzhiyun+ pop {r10} 1220*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-4) 1221*4882a593Smuzhiyun+ cfi_restore (r10) 1222*4882a593Smuzhiyun+#endif 1223*4882a593Smuzhiyun+ 1224*4882a593Smuzhiyun+ CALGN( bcs 2b ) 1225*4882a593Smuzhiyun+ 1226*4882a593Smuzhiyun+7: pop {r5 - r8} 1227*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-16) 1228*4882a593Smuzhiyun+ cfi_restore (r5) 1229*4882a593Smuzhiyun+ cfi_restore (r6) 1230*4882a593Smuzhiyun+ cfi_restore (r7) 1231*4882a593Smuzhiyun+ cfi_restore (r8) 1232*4882a593Smuzhiyun+ 1233*4882a593Smuzhiyun+8: movs r2, r2, lsl #31 1234*4882a593Smuzhiyun+ ldrbne r3, [r1, #-1]! 1235*4882a593Smuzhiyun+ ldrbcs r4, [r1, #-1]! 1236*4882a593Smuzhiyun+ ldrbcs ip, [r1, #-1] 1237*4882a593Smuzhiyun+ strbne r3, [r0, #-1]! 1238*4882a593Smuzhiyun+ strbcs r4, [r0, #-1]! 1239*4882a593Smuzhiyun+ strbcs ip, [r0, #-1] 1240*4882a593Smuzhiyun+ 1241*4882a593Smuzhiyun+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ 1242*4882a593Smuzhiyun+ || defined (ARM_ALWAYS_BX)) 1243*4882a593Smuzhiyun+ pop {r0, r4, lr} 1244*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-12) 1245*4882a593Smuzhiyun+ cfi_restore (r4) 1246*4882a593Smuzhiyun+ cfi_restore (lr) 1247*4882a593Smuzhiyun+ bx lr 1248*4882a593Smuzhiyun+#else 1249*4882a593Smuzhiyun+ pop {r0, r4, pc} 1250*4882a593Smuzhiyun+#endif 1251*4882a593Smuzhiyun+ 1252*4882a593Smuzhiyun+ cfi_restore_state 1253*4882a593Smuzhiyun+ 1254*4882a593Smuzhiyun+9: cmp ip, #2 1255*4882a593Smuzhiyun+ ldrbgt r3, [r1, #-1]! 1256*4882a593Smuzhiyun+ ldrbge r4, [r1, #-1]! 1257*4882a593Smuzhiyun+ ldrb lr, [r1, #-1]! 1258*4882a593Smuzhiyun+ strbgt r3, [r0, #-1]! 1259*4882a593Smuzhiyun+ strbge r4, [r0, #-1]! 1260*4882a593Smuzhiyun+ subs r2, r2, ip 1261*4882a593Smuzhiyun+ strb lr, [r0, #-1]! 1262*4882a593Smuzhiyun+ blo 8b 1263*4882a593Smuzhiyun+ ands ip, r1, #3 1264*4882a593Smuzhiyun+ beq 1b 1265*4882a593Smuzhiyun+ 1266*4882a593Smuzhiyun+10: bic r1, r1, #3 1267*4882a593Smuzhiyun+ cmp ip, #2 1268*4882a593Smuzhiyun+ ldr r3, [r1, #0] 1269*4882a593Smuzhiyun+ beq 17f 1270*4882a593Smuzhiyun+ blt 18f 1271*4882a593Smuzhiyun+ 1272*4882a593Smuzhiyun+ 1273*4882a593Smuzhiyun+ .macro backward_copy_shift push pull 1274*4882a593Smuzhiyun+ 1275*4882a593Smuzhiyun+ subs r2, r2, #28 1276*4882a593Smuzhiyun+ blo 14f 1277*4882a593Smuzhiyun+ 1278*4882a593Smuzhiyun+ CALGN( ands ip, r1, #31 ) 1279*4882a593Smuzhiyun+ CALGN( rsb ip, ip, #32 ) 1280*4882a593Smuzhiyun+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here 1281*4882a593Smuzhiyun+ CALGN( subcc r2, r2, ip ) 1282*4882a593Smuzhiyun+ CALGN( bcc 15f ) 1283*4882a593Smuzhiyun+ 1284*4882a593Smuzhiyun+11: push {r5 - r8, r10} 1285*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (20) 1286*4882a593Smuzhiyun+ cfi_rel_offset (r5, 0) 1287*4882a593Smuzhiyun+ cfi_rel_offset (r6, 4) 1288*4882a593Smuzhiyun+ cfi_rel_offset (r7, 8) 1289*4882a593Smuzhiyun+ cfi_rel_offset (r8, 12) 1290*4882a593Smuzhiyun+ cfi_rel_offset (r10, 16) 1291*4882a593Smuzhiyun+ 1292*4882a593Smuzhiyun+ PLD( pld [r1, #-4] ) 1293*4882a593Smuzhiyun+ PLD( cmp r2, #96 ) 1294*4882a593Smuzhiyun+ PLD( pld [r1, #-32] ) 1295*4882a593Smuzhiyun+ PLD( blo 13f ) 1296*4882a593Smuzhiyun+ PLD( pld [r1, #-64] ) 1297*4882a593Smuzhiyun+ PLD( pld [r1, #-96] ) 1298*4882a593Smuzhiyun+ 1299*4882a593Smuzhiyun+12: PLD( pld [r1, #-128] ) 1300*4882a593Smuzhiyun+13: ldmdb r1!, {r7, r8, r10, ip} 1301*4882a593Smuzhiyun+ mov lr, r3, PUSH #\push 1302*4882a593Smuzhiyun+ subs r2, r2, #32 1303*4882a593Smuzhiyun+ ldmdb r1!, {r3, r4, r5, r6} 1304*4882a593Smuzhiyun+ orr lr, lr, ip, PULL #\pull 1305*4882a593Smuzhiyun+ mov ip, ip, PUSH #\push 1306*4882a593Smuzhiyun+ orr ip, ip, r10, PULL #\pull 1307*4882a593Smuzhiyun+ mov r10, r10, PUSH #\push 1308*4882a593Smuzhiyun+ orr r10, r10, r8, PULL #\pull 1309*4882a593Smuzhiyun+ mov r8, r8, PUSH #\push 1310*4882a593Smuzhiyun+ orr r8, r8, r7, PULL #\pull 1311*4882a593Smuzhiyun+ mov r7, r7, PUSH #\push 1312*4882a593Smuzhiyun+ orr r7, r7, r6, PULL #\pull 1313*4882a593Smuzhiyun+ mov r6, r6, PUSH #\push 1314*4882a593Smuzhiyun+ orr r6, r6, r5, PULL #\pull 1315*4882a593Smuzhiyun+ mov r5, r5, PUSH #\push 1316*4882a593Smuzhiyun+ orr r5, r5, r4, PULL #\pull 1317*4882a593Smuzhiyun+ mov r4, r4, PUSH #\push 1318*4882a593Smuzhiyun+ orr r4, r4, r3, PULL #\pull 1319*4882a593Smuzhiyun+ stmdb r0!, {r4 - r8, r10, ip, lr} 1320*4882a593Smuzhiyun+ bhs 12b 1321*4882a593Smuzhiyun+ 1322*4882a593Smuzhiyun+ pop {r5 - r8, r10} 1323*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-20) 1324*4882a593Smuzhiyun+ cfi_restore (r5) 1325*4882a593Smuzhiyun+ cfi_restore (r6) 1326*4882a593Smuzhiyun+ cfi_restore (r7) 1327*4882a593Smuzhiyun+ cfi_restore (r8) 1328*4882a593Smuzhiyun+ cfi_restore (r10) 1329*4882a593Smuzhiyun+ 1330*4882a593Smuzhiyun+14: ands ip, r2, #28 1331*4882a593Smuzhiyun+ beq 16f 1332*4882a593Smuzhiyun+ 1333*4882a593Smuzhiyun+15: mov lr, r3, PUSH #\push 1334*4882a593Smuzhiyun+ ldr r3, [r1, #-4]! 1335*4882a593Smuzhiyun+ subs ip, ip, #4 1336*4882a593Smuzhiyun+ orr lr, lr, r3, PULL #\pull 1337*4882a593Smuzhiyun+ str lr, [r0, #-4]! 1338*4882a593Smuzhiyun+ bgt 15b 1339*4882a593Smuzhiyun+ CALGN( cmp r2, #0 ) 1340*4882a593Smuzhiyun+ CALGN( bge 11b ) 1341*4882a593Smuzhiyun+ 1342*4882a593Smuzhiyun+16: add r1, r1, #(\pull / 8) 1343*4882a593Smuzhiyun+ b 8b 1344*4882a593Smuzhiyun+ 1345*4882a593Smuzhiyun+ .endm 1346*4882a593Smuzhiyun+ 1347*4882a593Smuzhiyun+ 1348*4882a593Smuzhiyun+ backward_copy_shift push=8 pull=24 1349*4882a593Smuzhiyun+ 1350*4882a593Smuzhiyun+17: backward_copy_shift push=16 pull=16 1351*4882a593Smuzhiyun+ 1352*4882a593Smuzhiyun+18: backward_copy_shift push=24 pull=8 1353*4882a593Smuzhiyun+ 1354*4882a593Smuzhiyun+ 1355*4882a593Smuzhiyun+END(memmove) 1356*4882a593Smuzhiyun+libc_hidden_builtin_def (memmove) 1357*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/memset.S b/libc/string/arm/glibc-neon/memset.S 1358*4882a593Smuzhiyunnew file mode 100644 1359*4882a593Smuzhiyunindex 0000000..dc89ca7 1360*4882a593Smuzhiyun--- /dev/null 1361*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/memset.S 1362*4882a593Smuzhiyun@@ -0,0 +1,68 @@ 1363*4882a593Smuzhiyun+/* Copyright (C) 1998-2021 Free Software Foundation, Inc. 1364*4882a593Smuzhiyun+ This file is part of the GNU C Library. 1365*4882a593Smuzhiyun+ 1366*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 1367*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 1368*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 1369*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 1370*4882a593Smuzhiyun+ 1371*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 1372*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1373*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1374*4882a593Smuzhiyun+ Lesser General Public License for more details. 1375*4882a593Smuzhiyun+ 1376*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 1377*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 1378*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 1379*4882a593Smuzhiyun+ 1380*4882a593Smuzhiyun+/* Thumb requires excessive IT insns here. */ 1381*4882a593Smuzhiyun+#define NO_THUMB 1382*4882a593Smuzhiyun+#include <sysdep.h> 1383*4882a593Smuzhiyun+ 1384*4882a593Smuzhiyun+ .text 1385*4882a593Smuzhiyun+ .syntax unified 1386*4882a593Smuzhiyun+ 1387*4882a593Smuzhiyun+/* void *memset (dstpp, c, len) */ 1388*4882a593Smuzhiyun+ 1389*4882a593Smuzhiyun+ENTRY(memset) 1390*4882a593Smuzhiyun+ mov r3, r0 1391*4882a593Smuzhiyun+ cmp r2, #8 1392*4882a593Smuzhiyun+ bcc 2f @ less than 8 bytes to move 1393*4882a593Smuzhiyun+ 1394*4882a593Smuzhiyun+1: 1395*4882a593Smuzhiyun+ tst r3, #3 @ aligned yet? 1396*4882a593Smuzhiyun+ strbne r1, [r3], #1 1397*4882a593Smuzhiyun+ subne r2, r2, #1 1398*4882a593Smuzhiyun+ bne 1b 1399*4882a593Smuzhiyun+ 1400*4882a593Smuzhiyun+ and r1, r1, #255 @ clear any sign bits 1401*4882a593Smuzhiyun+ orr r1, r1, r1, lsl $8 1402*4882a593Smuzhiyun+ orr r1, r1, r1, lsl $16 1403*4882a593Smuzhiyun+ mov ip, r1 1404*4882a593Smuzhiyun+ 1405*4882a593Smuzhiyun+1: 1406*4882a593Smuzhiyun+ subs r2, r2, #8 1407*4882a593Smuzhiyun+ stmiacs r3!, {r1, ip} @ store up to 32 bytes per loop iteration 1408*4882a593Smuzhiyun+ subscs r2, r2, #8 1409*4882a593Smuzhiyun+ stmiacs r3!, {r1, ip} 1410*4882a593Smuzhiyun+ subscs r2, r2, #8 1411*4882a593Smuzhiyun+ stmiacs r3!, {r1, ip} 1412*4882a593Smuzhiyun+ subscs r2, r2, #8 1413*4882a593Smuzhiyun+ stmiacs r3!, {r1, ip} 1414*4882a593Smuzhiyun+ bcs 1b 1415*4882a593Smuzhiyun+ 1416*4882a593Smuzhiyun+ and r2, r2, #7 1417*4882a593Smuzhiyun+2: 1418*4882a593Smuzhiyun+ subs r2, r2, #1 @ store up to 4 bytes per loop iteration 1419*4882a593Smuzhiyun+ strbcs r1, [r3], #1 1420*4882a593Smuzhiyun+ subscs r2, r2, #1 1421*4882a593Smuzhiyun+ strbcs r1, [r3], #1 1422*4882a593Smuzhiyun+ subscs r2, r2, #1 1423*4882a593Smuzhiyun+ strbcs r1, [r3], #1 1424*4882a593Smuzhiyun+ subscs r2, r2, #1 1425*4882a593Smuzhiyun+ strbcs r1, [r3], #1 1426*4882a593Smuzhiyun+ bcs 2b 1427*4882a593Smuzhiyun+ 1428*4882a593Smuzhiyun+ DO_RET(lr) 1429*4882a593Smuzhiyun+END(memset) 1430*4882a593Smuzhiyun+libc_hidden_builtin_def (memset) 1431*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/strcmp.S b/libc/string/arm/glibc-neon/strcmp.S 1432*4882a593Smuzhiyunnew file mode 100644 1433*4882a593Smuzhiyunindex 0000000..98a3c6c 1434*4882a593Smuzhiyun--- /dev/null 1435*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/strcmp.S 1436*4882a593Smuzhiyun@@ -0,0 +1,504 @@ 1437*4882a593Smuzhiyun+/* strcmp implementation for ARMv7-A, optimized for Cortex-A15. 1438*4882a593Smuzhiyun+ Copyright (C) 2012-2021 Free Software Foundation, Inc. 1439*4882a593Smuzhiyun+ This file is part of the GNU C Library. 1440*4882a593Smuzhiyun+ 1441*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 1442*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 1443*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 1444*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 1445*4882a593Smuzhiyun+ 1446*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 1447*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1448*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1449*4882a593Smuzhiyun+ Lesser General Public License for more details. 1450*4882a593Smuzhiyun+ 1451*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 1452*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 1453*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 1454*4882a593Smuzhiyun+ 1455*4882a593Smuzhiyun+#include <arm-features.h> 1456*4882a593Smuzhiyun+#include <sysdep.h> 1457*4882a593Smuzhiyun+ 1458*4882a593Smuzhiyun+/* Implementation of strcmp for ARMv7 when DSP instructions are 1459*4882a593Smuzhiyun+ available. Use ldrd to support wider loads, provided the data 1460*4882a593Smuzhiyun+ is sufficiently aligned. Use saturating arithmetic to optimize 1461*4882a593Smuzhiyun+ the compares. */ 1462*4882a593Smuzhiyun+ 1463*4882a593Smuzhiyun+/* Build Options: 1464*4882a593Smuzhiyun+ STRCMP_PRECHECK: Run a quick pre-check of the first byte in the 1465*4882a593Smuzhiyun+ string. If comparing completely random strings the pre-check will 1466*4882a593Smuzhiyun+ save time, since there is a very high probability of a mismatch in 1467*4882a593Smuzhiyun+ the first character: we save significant overhead if this is the 1468*4882a593Smuzhiyun+ common case. However, if strings are likely to be identical (e.g. 1469*4882a593Smuzhiyun+ because we're verifying a hit in a hash table), then this check 1470*4882a593Smuzhiyun+ is largely redundant. */ 1471*4882a593Smuzhiyun+ 1472*4882a593Smuzhiyun+#define STRCMP_PRECHECK 1 1473*4882a593Smuzhiyun+ 1474*4882a593Smuzhiyun+ .syntax unified 1475*4882a593Smuzhiyun+ 1476*4882a593Smuzhiyun+#ifdef __ARM_BIG_ENDIAN 1477*4882a593Smuzhiyun+# define S2LO lsl 1478*4882a593Smuzhiyun+# define S2LOEQ lsleq 1479*4882a593Smuzhiyun+# define S2HI lsr 1480*4882a593Smuzhiyun+# define MSB 0x000000ff 1481*4882a593Smuzhiyun+# define LSB 0xff000000 1482*4882a593Smuzhiyun+# define BYTE0_OFFSET 24 1483*4882a593Smuzhiyun+# define BYTE1_OFFSET 16 1484*4882a593Smuzhiyun+# define BYTE2_OFFSET 8 1485*4882a593Smuzhiyun+# define BYTE3_OFFSET 0 1486*4882a593Smuzhiyun+#else /* not __ARM_BIG_ENDIAN */ 1487*4882a593Smuzhiyun+# define S2LO lsr 1488*4882a593Smuzhiyun+# define S2LOEQ lsreq 1489*4882a593Smuzhiyun+# define S2HI lsl 1490*4882a593Smuzhiyun+# define BYTE0_OFFSET 0 1491*4882a593Smuzhiyun+# define BYTE1_OFFSET 8 1492*4882a593Smuzhiyun+# define BYTE2_OFFSET 16 1493*4882a593Smuzhiyun+# define BYTE3_OFFSET 24 1494*4882a593Smuzhiyun+# define MSB 0xff000000 1495*4882a593Smuzhiyun+# define LSB 0x000000ff 1496*4882a593Smuzhiyun+#endif /* not __ARM_BIG_ENDIAN */ 1497*4882a593Smuzhiyun+ 1498*4882a593Smuzhiyun+/* Parameters and result. */ 1499*4882a593Smuzhiyun+#define src1 r0 1500*4882a593Smuzhiyun+#define src2 r1 1501*4882a593Smuzhiyun+#define result r0 /* Overlaps src1. */ 1502*4882a593Smuzhiyun+ 1503*4882a593Smuzhiyun+/* Internal variables. */ 1504*4882a593Smuzhiyun+#define tmp1 r4 1505*4882a593Smuzhiyun+#define tmp2 r5 1506*4882a593Smuzhiyun+#define const_m1 r12 1507*4882a593Smuzhiyun+ 1508*4882a593Smuzhiyun+/* Additional internal variables for 64-bit aligned data. */ 1509*4882a593Smuzhiyun+#define data1a r2 1510*4882a593Smuzhiyun+#define data1b r3 1511*4882a593Smuzhiyun+#define data2a r6 1512*4882a593Smuzhiyun+#define data2b r7 1513*4882a593Smuzhiyun+#define syndrome_a tmp1 1514*4882a593Smuzhiyun+#define syndrome_b tmp2 1515*4882a593Smuzhiyun+ 1516*4882a593Smuzhiyun+/* Additional internal variables for 32-bit aligned data. */ 1517*4882a593Smuzhiyun+#define data1 r2 1518*4882a593Smuzhiyun+#define data2 r3 1519*4882a593Smuzhiyun+#define syndrome tmp2 1520*4882a593Smuzhiyun+ 1521*4882a593Smuzhiyun+ 1522*4882a593Smuzhiyun+ .thumb 1523*4882a593Smuzhiyun+ 1524*4882a593Smuzhiyun+/* In Thumb code we can't use MVN with a register shift, but we do have ORN. */ 1525*4882a593Smuzhiyun+.macro prepare_mask mask_reg, nbits_reg 1526*4882a593Smuzhiyun+ S2HI \mask_reg, const_m1, \nbits_reg 1527*4882a593Smuzhiyun+.endm 1528*4882a593Smuzhiyun+.macro apply_mask data_reg, mask_reg 1529*4882a593Smuzhiyun+ orn \data_reg, \data_reg, \mask_reg 1530*4882a593Smuzhiyun+.endm 1531*4882a593Smuzhiyun+ 1532*4882a593Smuzhiyun+ /* Macro to compute and return the result value for word-aligned 1533*4882a593Smuzhiyun+ cases. */ 1534*4882a593Smuzhiyun+ .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 1535*4882a593Smuzhiyun+#ifdef __ARM_BIG_ENDIAN 1536*4882a593Smuzhiyun+ /* If data1 contains a zero byte, then syndrome will contain a 1 in 1537*4882a593Smuzhiyun+ bit 7 of that byte. Otherwise, the highest set bit in the 1538*4882a593Smuzhiyun+ syndrome will highlight the first different bit. It is therefore 1539*4882a593Smuzhiyun+ sufficient to extract the eight bits starting with the syndrome 1540*4882a593Smuzhiyun+ bit. */ 1541*4882a593Smuzhiyun+ clz tmp1, \synd 1542*4882a593Smuzhiyun+ lsl r1, \d2, tmp1 1543*4882a593Smuzhiyun+ .if \restore_r6 1544*4882a593Smuzhiyun+ ldrd r6, r7, [sp, #8] 1545*4882a593Smuzhiyun+ .endif 1546*4882a593Smuzhiyun+ lsl \d1, \d1, tmp1 1547*4882a593Smuzhiyun+ lsr result, \d1, #24 1548*4882a593Smuzhiyun+ ldrd r4, r5, [sp], #16 1549*4882a593Smuzhiyun+ cfi_remember_state 1550*4882a593Smuzhiyun+ cfi_def_cfa_offset (0) 1551*4882a593Smuzhiyun+ cfi_restore (r4) 1552*4882a593Smuzhiyun+ cfi_restore (r5) 1553*4882a593Smuzhiyun+ cfi_restore (r6) 1554*4882a593Smuzhiyun+ cfi_restore (r7) 1555*4882a593Smuzhiyun+ sub result, result, r1, lsr #24 1556*4882a593Smuzhiyun+ bx lr 1557*4882a593Smuzhiyun+#else 1558*4882a593Smuzhiyun+ /* To use the big-endian trick we'd have to reverse all three words. 1559*4882a593Smuzhiyun+ that's slower than this approach. */ 1560*4882a593Smuzhiyun+ rev \synd, \synd 1561*4882a593Smuzhiyun+ clz tmp1, \synd 1562*4882a593Smuzhiyun+ bic tmp1, tmp1, #7 1563*4882a593Smuzhiyun+ lsr r1, \d2, tmp1 1564*4882a593Smuzhiyun+ .if \restore_r6 1565*4882a593Smuzhiyun+ ldrd r6, r7, [sp, #8] 1566*4882a593Smuzhiyun+ .endif 1567*4882a593Smuzhiyun+ lsr \d1, \d1, tmp1 1568*4882a593Smuzhiyun+ and result, \d1, #255 1569*4882a593Smuzhiyun+ and r1, r1, #255 1570*4882a593Smuzhiyun+ ldrd r4, r5, [sp], #16 1571*4882a593Smuzhiyun+ cfi_remember_state 1572*4882a593Smuzhiyun+ cfi_def_cfa_offset (0) 1573*4882a593Smuzhiyun+ cfi_restore (r4) 1574*4882a593Smuzhiyun+ cfi_restore (r5) 1575*4882a593Smuzhiyun+ cfi_restore (r6) 1576*4882a593Smuzhiyun+ cfi_restore (r7) 1577*4882a593Smuzhiyun+ sub result, result, r1 1578*4882a593Smuzhiyun+ 1579*4882a593Smuzhiyun+ bx lr 1580*4882a593Smuzhiyun+#endif 1581*4882a593Smuzhiyun+ .endm 1582*4882a593Smuzhiyun+ 1583*4882a593Smuzhiyun+ .text 1584*4882a593Smuzhiyun+ .p2align 5 1585*4882a593Smuzhiyun+.Lstrcmp_start_addr: 1586*4882a593Smuzhiyun+#if STRCMP_PRECHECK == 1 1587*4882a593Smuzhiyun+.Lfastpath_exit: 1588*4882a593Smuzhiyun+ sub r0, r2, r3 1589*4882a593Smuzhiyun+ bx lr 1590*4882a593Smuzhiyun+ nop 1591*4882a593Smuzhiyun+#endif 1592*4882a593Smuzhiyun+ENTRY (strcmp) 1593*4882a593Smuzhiyun+#if STRCMP_PRECHECK == 1 1594*4882a593Smuzhiyun+ ldrb r2, [src1] 1595*4882a593Smuzhiyun+ ldrb r3, [src2] 1596*4882a593Smuzhiyun+ cmp r2, #1 1597*4882a593Smuzhiyun+ it cs 1598*4882a593Smuzhiyun+ cmpcs r2, r3 1599*4882a593Smuzhiyun+ bne .Lfastpath_exit 1600*4882a593Smuzhiyun+#endif 1601*4882a593Smuzhiyun+ strd r4, r5, [sp, #-16]! 1602*4882a593Smuzhiyun+ cfi_def_cfa_offset (16) 1603*4882a593Smuzhiyun+ cfi_offset (r4, -16) 1604*4882a593Smuzhiyun+ cfi_offset (r5, -12) 1605*4882a593Smuzhiyun+ orr tmp1, src1, src2 1606*4882a593Smuzhiyun+ strd r6, r7, [sp, #8] 1607*4882a593Smuzhiyun+ cfi_offset (r6, -8) 1608*4882a593Smuzhiyun+ cfi_offset (r7, -4) 1609*4882a593Smuzhiyun+ mvn const_m1, #0 1610*4882a593Smuzhiyun+ lsl r2, tmp1, #29 1611*4882a593Smuzhiyun+ cbz r2, .Lloop_aligned8 1612*4882a593Smuzhiyun+ 1613*4882a593Smuzhiyun+.Lnot_aligned: 1614*4882a593Smuzhiyun+ eor tmp1, src1, src2 1615*4882a593Smuzhiyun+ tst tmp1, #7 1616*4882a593Smuzhiyun+ bne .Lmisaligned8 1617*4882a593Smuzhiyun+ 1618*4882a593Smuzhiyun+ /* Deal with mutual misalignment by aligning downwards and then 1619*4882a593Smuzhiyun+ masking off the unwanted loaded data to prevent a difference. */ 1620*4882a593Smuzhiyun+ and tmp1, src1, #7 1621*4882a593Smuzhiyun+ bic src1, src1, #7 1622*4882a593Smuzhiyun+ and tmp2, tmp1, #3 1623*4882a593Smuzhiyun+ bic src2, src2, #7 1624*4882a593Smuzhiyun+ lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 1625*4882a593Smuzhiyun+ ldrd data1a, data1b, [src1], #16 1626*4882a593Smuzhiyun+ tst tmp1, #4 1627*4882a593Smuzhiyun+ ldrd data2a, data2b, [src2], #16 1628*4882a593Smuzhiyun+ prepare_mask tmp1, tmp2 1629*4882a593Smuzhiyun+ apply_mask data1a, tmp1 1630*4882a593Smuzhiyun+ apply_mask data2a, tmp1 1631*4882a593Smuzhiyun+ beq .Lstart_realigned8 1632*4882a593Smuzhiyun+ apply_mask data1b, tmp1 1633*4882a593Smuzhiyun+ mov data1a, const_m1 1634*4882a593Smuzhiyun+ apply_mask data2b, tmp1 1635*4882a593Smuzhiyun+ mov data2a, const_m1 1636*4882a593Smuzhiyun+ b .Lstart_realigned8 1637*4882a593Smuzhiyun+ 1638*4882a593Smuzhiyun+ /* Unwind the inner loop by a factor of 2, giving 16 bytes per 1639*4882a593Smuzhiyun+ pass. */ 1640*4882a593Smuzhiyun+ .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 1641*4882a593Smuzhiyun+ .p2align 2 /* Always word aligned. */ 1642*4882a593Smuzhiyun+.Lloop_aligned8: 1643*4882a593Smuzhiyun+ ldrd data1a, data1b, [src1], #16 1644*4882a593Smuzhiyun+ ldrd data2a, data2b, [src2], #16 1645*4882a593Smuzhiyun+.Lstart_realigned8: 1646*4882a593Smuzhiyun+ uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 1647*4882a593Smuzhiyun+ eor syndrome_a, data1a, data2a 1648*4882a593Smuzhiyun+ sel syndrome_a, syndrome_a, const_m1 1649*4882a593Smuzhiyun+ cbnz syndrome_a, .Ldiff_in_a 1650*4882a593Smuzhiyun+ uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 1651*4882a593Smuzhiyun+ eor syndrome_b, data1b, data2b 1652*4882a593Smuzhiyun+ sel syndrome_b, syndrome_b, const_m1 1653*4882a593Smuzhiyun+ cbnz syndrome_b, .Ldiff_in_b 1654*4882a593Smuzhiyun+ 1655*4882a593Smuzhiyun+ ldrd data1a, data1b, [src1, #-8] 1656*4882a593Smuzhiyun+ ldrd data2a, data2b, [src2, #-8] 1657*4882a593Smuzhiyun+ uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 1658*4882a593Smuzhiyun+ eor syndrome_a, data1a, data2a 1659*4882a593Smuzhiyun+ sel syndrome_a, syndrome_a, const_m1 1660*4882a593Smuzhiyun+ uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 1661*4882a593Smuzhiyun+ eor syndrome_b, data1b, data2b 1662*4882a593Smuzhiyun+ sel syndrome_b, syndrome_b, const_m1 1663*4882a593Smuzhiyun+ /* Can't use CBZ for backwards branch. */ 1664*4882a593Smuzhiyun+ orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 1665*4882a593Smuzhiyun+ beq .Lloop_aligned8 1666*4882a593Smuzhiyun+ 1667*4882a593Smuzhiyun+.Ldiff_found: 1668*4882a593Smuzhiyun+ cbnz syndrome_a, .Ldiff_in_a 1669*4882a593Smuzhiyun+ 1670*4882a593Smuzhiyun+.Ldiff_in_b: 1671*4882a593Smuzhiyun+ strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 1672*4882a593Smuzhiyun+ 1673*4882a593Smuzhiyun+.Ldiff_in_a: 1674*4882a593Smuzhiyun+ cfi_restore_state 1675*4882a593Smuzhiyun+ strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 1676*4882a593Smuzhiyun+ 1677*4882a593Smuzhiyun+ cfi_restore_state 1678*4882a593Smuzhiyun+.Lmisaligned8: 1679*4882a593Smuzhiyun+ tst tmp1, #3 1680*4882a593Smuzhiyun+ bne .Lmisaligned4 1681*4882a593Smuzhiyun+ ands tmp1, src1, #3 1682*4882a593Smuzhiyun+ bne .Lmutual_align4 1683*4882a593Smuzhiyun+ 1684*4882a593Smuzhiyun+ /* Unrolled by a factor of 2, to reduce the number of post-increment 1685*4882a593Smuzhiyun+ operations. */ 1686*4882a593Smuzhiyun+.Lloop_aligned4: 1687*4882a593Smuzhiyun+ ldr data1, [src1], #8 1688*4882a593Smuzhiyun+ ldr data2, [src2], #8 1689*4882a593Smuzhiyun+.Lstart_realigned4: 1690*4882a593Smuzhiyun+ uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 1691*4882a593Smuzhiyun+ eor syndrome, data1, data2 1692*4882a593Smuzhiyun+ sel syndrome, syndrome, const_m1 1693*4882a593Smuzhiyun+ cbnz syndrome, .Laligned4_done 1694*4882a593Smuzhiyun+ ldr data1, [src1, #-4] 1695*4882a593Smuzhiyun+ ldr data2, [src2, #-4] 1696*4882a593Smuzhiyun+ uadd8 syndrome, data1, const_m1 1697*4882a593Smuzhiyun+ eor syndrome, data1, data2 1698*4882a593Smuzhiyun+ sel syndrome, syndrome, const_m1 1699*4882a593Smuzhiyun+ cmp syndrome, #0 1700*4882a593Smuzhiyun+ beq .Lloop_aligned4 1701*4882a593Smuzhiyun+ 1702*4882a593Smuzhiyun+.Laligned4_done: 1703*4882a593Smuzhiyun+ strcmp_epilogue_aligned syndrome, data1, data2, 0 1704*4882a593Smuzhiyun+ 1705*4882a593Smuzhiyun+.Lmutual_align4: 1706*4882a593Smuzhiyun+ cfi_restore_state 1707*4882a593Smuzhiyun+ /* Deal with mutual misalignment by aligning downwards and then 1708*4882a593Smuzhiyun+ masking off the unwanted loaded data to prevent a difference. */ 1709*4882a593Smuzhiyun+ lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 1710*4882a593Smuzhiyun+ bic src1, src1, #3 1711*4882a593Smuzhiyun+ ldr data1, [src1], #8 1712*4882a593Smuzhiyun+ bic src2, src2, #3 1713*4882a593Smuzhiyun+ ldr data2, [src2], #8 1714*4882a593Smuzhiyun+ 1715*4882a593Smuzhiyun+ prepare_mask tmp1, tmp1 1716*4882a593Smuzhiyun+ apply_mask data1, tmp1 1717*4882a593Smuzhiyun+ apply_mask data2, tmp1 1718*4882a593Smuzhiyun+ b .Lstart_realigned4 1719*4882a593Smuzhiyun+ 1720*4882a593Smuzhiyun+.Lmisaligned4: 1721*4882a593Smuzhiyun+ ands tmp1, src1, #3 1722*4882a593Smuzhiyun+ beq .Lsrc1_aligned 1723*4882a593Smuzhiyun+ sub src2, src2, tmp1 1724*4882a593Smuzhiyun+ bic src1, src1, #3 1725*4882a593Smuzhiyun+ lsls tmp1, tmp1, #31 1726*4882a593Smuzhiyun+ ldr data1, [src1], #4 1727*4882a593Smuzhiyun+ beq .Laligned_m2 1728*4882a593Smuzhiyun+ bcs .Laligned_m1 1729*4882a593Smuzhiyun+ 1730*4882a593Smuzhiyun+#if STRCMP_PRECHECK == 0 1731*4882a593Smuzhiyun+ ldrb data2, [src2, #1] 1732*4882a593Smuzhiyun+ uxtb tmp1, data1, ror #BYTE1_OFFSET 1733*4882a593Smuzhiyun+ subs tmp1, tmp1, data2 1734*4882a593Smuzhiyun+ bne .Lmisaligned_exit 1735*4882a593Smuzhiyun+ cbz data2, .Lmisaligned_exit 1736*4882a593Smuzhiyun+ 1737*4882a593Smuzhiyun+.Laligned_m2: 1738*4882a593Smuzhiyun+ ldrb data2, [src2, #2] 1739*4882a593Smuzhiyun+ uxtb tmp1, data1, ror #BYTE2_OFFSET 1740*4882a593Smuzhiyun+ subs tmp1, tmp1, data2 1741*4882a593Smuzhiyun+ bne .Lmisaligned_exit 1742*4882a593Smuzhiyun+ cbz data2, .Lmisaligned_exit 1743*4882a593Smuzhiyun+ 1744*4882a593Smuzhiyun+.Laligned_m1: 1745*4882a593Smuzhiyun+ ldrb data2, [src2, #3] 1746*4882a593Smuzhiyun+ uxtb tmp1, data1, ror #BYTE3_OFFSET 1747*4882a593Smuzhiyun+ subs tmp1, tmp1, data2 1748*4882a593Smuzhiyun+ bne .Lmisaligned_exit 1749*4882a593Smuzhiyun+ add src2, src2, #4 1750*4882a593Smuzhiyun+ cbnz data2, .Lsrc1_aligned 1751*4882a593Smuzhiyun+#else /* STRCMP_PRECHECK */ 1752*4882a593Smuzhiyun+ /* If we've done the pre-check, then we don't need to check the 1753*4882a593Smuzhiyun+ first byte again here. */ 1754*4882a593Smuzhiyun+ ldrb data2, [src2, #2] 1755*4882a593Smuzhiyun+ uxtb tmp1, data1, ror #BYTE2_OFFSET 1756*4882a593Smuzhiyun+ subs tmp1, tmp1, data2 1757*4882a593Smuzhiyun+ bne .Lmisaligned_exit 1758*4882a593Smuzhiyun+ cbz data2, .Lmisaligned_exit 1759*4882a593Smuzhiyun+ 1760*4882a593Smuzhiyun+.Laligned_m2: 1761*4882a593Smuzhiyun+ ldrb data2, [src2, #3] 1762*4882a593Smuzhiyun+ uxtb tmp1, data1, ror #BYTE3_OFFSET 1763*4882a593Smuzhiyun+ subs tmp1, tmp1, data2 1764*4882a593Smuzhiyun+ bne .Lmisaligned_exit 1765*4882a593Smuzhiyun+ cbnz data2, .Laligned_m1 1766*4882a593Smuzhiyun+#endif 1767*4882a593Smuzhiyun+ 1768*4882a593Smuzhiyun+.Lmisaligned_exit: 1769*4882a593Smuzhiyun+ mov result, tmp1 1770*4882a593Smuzhiyun+ ldr r4, [sp], #16 1771*4882a593Smuzhiyun+ cfi_remember_state 1772*4882a593Smuzhiyun+ cfi_def_cfa_offset (0) 1773*4882a593Smuzhiyun+ cfi_restore (r4) 1774*4882a593Smuzhiyun+ cfi_restore (r5) 1775*4882a593Smuzhiyun+ cfi_restore (r6) 1776*4882a593Smuzhiyun+ cfi_restore (r7) 1777*4882a593Smuzhiyun+ bx lr 1778*4882a593Smuzhiyun+ 1779*4882a593Smuzhiyun+#if STRCMP_PRECHECK == 1 1780*4882a593Smuzhiyun+.Laligned_m1: 1781*4882a593Smuzhiyun+ add src2, src2, #4 1782*4882a593Smuzhiyun+#endif 1783*4882a593Smuzhiyun+.Lsrc1_aligned: 1784*4882a593Smuzhiyun+ cfi_restore_state 1785*4882a593Smuzhiyun+ /* src1 is word aligned, but src2 has no common alignment 1786*4882a593Smuzhiyun+ with it. */ 1787*4882a593Smuzhiyun+ ldr data1, [src1], #4 1788*4882a593Smuzhiyun+ lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 1789*4882a593Smuzhiyun+ 1790*4882a593Smuzhiyun+ bic src2, src2, #3 1791*4882a593Smuzhiyun+ ldr data2, [src2], #4 1792*4882a593Smuzhiyun+ bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ 1793*4882a593Smuzhiyun+ bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ 1794*4882a593Smuzhiyun+ 1795*4882a593Smuzhiyun+ /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 1796*4882a593Smuzhiyun+.Loverlap3: 1797*4882a593Smuzhiyun+ bic tmp1, data1, #MSB 1798*4882a593Smuzhiyun+ uadd8 syndrome, data1, const_m1 1799*4882a593Smuzhiyun+ eors syndrome, tmp1, data2, S2LO #8 1800*4882a593Smuzhiyun+ sel syndrome, syndrome, const_m1 1801*4882a593Smuzhiyun+ bne 4f 1802*4882a593Smuzhiyun+ cbnz syndrome, 5f 1803*4882a593Smuzhiyun+ ldr data2, [src2], #4 1804*4882a593Smuzhiyun+ eor tmp1, tmp1, data1 1805*4882a593Smuzhiyun+ cmp tmp1, data2, S2HI #24 1806*4882a593Smuzhiyun+ bne 6f 1807*4882a593Smuzhiyun+ ldr data1, [src1], #4 1808*4882a593Smuzhiyun+ b .Loverlap3 1809*4882a593Smuzhiyun+4: 1810*4882a593Smuzhiyun+ S2LO data2, data2, #8 1811*4882a593Smuzhiyun+ b .Lstrcmp_tail 1812*4882a593Smuzhiyun+ 1813*4882a593Smuzhiyun+5: 1814*4882a593Smuzhiyun+ bics syndrome, syndrome, #MSB 1815*4882a593Smuzhiyun+ bne .Lstrcmp_done_equal 1816*4882a593Smuzhiyun+ 1817*4882a593Smuzhiyun+ /* We can only get here if the MSB of data1 contains 0, so 1818*4882a593Smuzhiyun+ fast-path the exit. */ 1819*4882a593Smuzhiyun+ ldrb result, [src2] 1820*4882a593Smuzhiyun+ ldrd r4, r5, [sp], #16 1821*4882a593Smuzhiyun+ cfi_remember_state 1822*4882a593Smuzhiyun+ cfi_def_cfa_offset (0) 1823*4882a593Smuzhiyun+ cfi_restore (r4) 1824*4882a593Smuzhiyun+ cfi_restore (r5) 1825*4882a593Smuzhiyun+ /* R6/7 Not used in this sequence. */ 1826*4882a593Smuzhiyun+ cfi_restore (r6) 1827*4882a593Smuzhiyun+ cfi_restore (r7) 1828*4882a593Smuzhiyun+ neg result, result 1829*4882a593Smuzhiyun+ bx lr 1830*4882a593Smuzhiyun+ 1831*4882a593Smuzhiyun+6: 1832*4882a593Smuzhiyun+ cfi_restore_state 1833*4882a593Smuzhiyun+ S2LO data1, data1, #24 1834*4882a593Smuzhiyun+ and data2, data2, #LSB 1835*4882a593Smuzhiyun+ b .Lstrcmp_tail 1836*4882a593Smuzhiyun+ 1837*4882a593Smuzhiyun+ .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 1838*4882a593Smuzhiyun+.Loverlap2: 1839*4882a593Smuzhiyun+ and tmp1, data1, const_m1, S2LO #16 1840*4882a593Smuzhiyun+ uadd8 syndrome, data1, const_m1 1841*4882a593Smuzhiyun+ eors syndrome, tmp1, data2, S2LO #16 1842*4882a593Smuzhiyun+ sel syndrome, syndrome, const_m1 1843*4882a593Smuzhiyun+ bne 4f 1844*4882a593Smuzhiyun+ cbnz syndrome, 5f 1845*4882a593Smuzhiyun+ ldr data2, [src2], #4 1846*4882a593Smuzhiyun+ eor tmp1, tmp1, data1 1847*4882a593Smuzhiyun+ cmp tmp1, data2, S2HI #16 1848*4882a593Smuzhiyun+ bne 6f 1849*4882a593Smuzhiyun+ ldr data1, [src1], #4 1850*4882a593Smuzhiyun+ b .Loverlap2 1851*4882a593Smuzhiyun+4: 1852*4882a593Smuzhiyun+ S2LO data2, data2, #16 1853*4882a593Smuzhiyun+ b .Lstrcmp_tail 1854*4882a593Smuzhiyun+5: 1855*4882a593Smuzhiyun+ ands syndrome, syndrome, const_m1, S2LO #16 1856*4882a593Smuzhiyun+ bne .Lstrcmp_done_equal 1857*4882a593Smuzhiyun+ 1858*4882a593Smuzhiyun+ ldrh data2, [src2] 1859*4882a593Smuzhiyun+ S2LO data1, data1, #16 1860*4882a593Smuzhiyun+#ifdef __ARM_BIG_ENDIAN 1861*4882a593Smuzhiyun+ lsl data2, data2, #16 1862*4882a593Smuzhiyun+#endif 1863*4882a593Smuzhiyun+ b .Lstrcmp_tail 1864*4882a593Smuzhiyun+ 1865*4882a593Smuzhiyun+6: 1866*4882a593Smuzhiyun+ S2LO data1, data1, #16 1867*4882a593Smuzhiyun+ and data2, data2, const_m1, S2LO #16 1868*4882a593Smuzhiyun+ b .Lstrcmp_tail 1869*4882a593Smuzhiyun+ 1870*4882a593Smuzhiyun+ .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 1871*4882a593Smuzhiyun+.Loverlap1: 1872*4882a593Smuzhiyun+ and tmp1, data1, #LSB 1873*4882a593Smuzhiyun+ uadd8 syndrome, data1, const_m1 1874*4882a593Smuzhiyun+ eors syndrome, tmp1, data2, S2LO #24 1875*4882a593Smuzhiyun+ sel syndrome, syndrome, const_m1 1876*4882a593Smuzhiyun+ bne 4f 1877*4882a593Smuzhiyun+ cbnz syndrome, 5f 1878*4882a593Smuzhiyun+ ldr data2, [src2], #4 1879*4882a593Smuzhiyun+ eor tmp1, tmp1, data1 1880*4882a593Smuzhiyun+ cmp tmp1, data2, S2HI #8 1881*4882a593Smuzhiyun+ bne 6f 1882*4882a593Smuzhiyun+ ldr data1, [src1], #4 1883*4882a593Smuzhiyun+ b .Loverlap1 1884*4882a593Smuzhiyun+4: 1885*4882a593Smuzhiyun+ S2LO data2, data2, #24 1886*4882a593Smuzhiyun+ b .Lstrcmp_tail 1887*4882a593Smuzhiyun+5: 1888*4882a593Smuzhiyun+ tst syndrome, #LSB 1889*4882a593Smuzhiyun+ bne .Lstrcmp_done_equal 1890*4882a593Smuzhiyun+ ldr data2, [src2] 1891*4882a593Smuzhiyun+6: 1892*4882a593Smuzhiyun+ S2LO data1, data1, #8 1893*4882a593Smuzhiyun+ bic data2, data2, #MSB 1894*4882a593Smuzhiyun+ b .Lstrcmp_tail 1895*4882a593Smuzhiyun+ 1896*4882a593Smuzhiyun+.Lstrcmp_done_equal: 1897*4882a593Smuzhiyun+ mov result, #0 1898*4882a593Smuzhiyun+ ldrd r4, r5, [sp], #16 1899*4882a593Smuzhiyun+ cfi_remember_state 1900*4882a593Smuzhiyun+ cfi_def_cfa_offset (0) 1901*4882a593Smuzhiyun+ cfi_restore (r4) 1902*4882a593Smuzhiyun+ cfi_restore (r5) 1903*4882a593Smuzhiyun+ /* R6/7 not used in this sequence. */ 1904*4882a593Smuzhiyun+ cfi_restore (r6) 1905*4882a593Smuzhiyun+ cfi_restore (r7) 1906*4882a593Smuzhiyun+ bx lr 1907*4882a593Smuzhiyun+ 1908*4882a593Smuzhiyun+.Lstrcmp_tail: 1909*4882a593Smuzhiyun+ cfi_restore_state 1910*4882a593Smuzhiyun+#ifndef __ARM_BIG_ENDIAN 1911*4882a593Smuzhiyun+ rev data1, data1 1912*4882a593Smuzhiyun+ rev data2, data2 1913*4882a593Smuzhiyun+ /* Now everything looks big-endian... */ 1914*4882a593Smuzhiyun+#endif 1915*4882a593Smuzhiyun+ uadd8 tmp1, data1, const_m1 1916*4882a593Smuzhiyun+ eor tmp1, data1, data2 1917*4882a593Smuzhiyun+ sel syndrome, tmp1, const_m1 1918*4882a593Smuzhiyun+ clz tmp1, syndrome 1919*4882a593Smuzhiyun+ lsl data1, data1, tmp1 1920*4882a593Smuzhiyun+ lsl data2, data2, tmp1 1921*4882a593Smuzhiyun+ lsr result, data1, #24 1922*4882a593Smuzhiyun+ ldrd r4, r5, [sp], #16 1923*4882a593Smuzhiyun+ cfi_def_cfa_offset (0) 1924*4882a593Smuzhiyun+ cfi_restore (r4) 1925*4882a593Smuzhiyun+ cfi_restore (r5) 1926*4882a593Smuzhiyun+ /* R6/7 not used in this sequence. */ 1927*4882a593Smuzhiyun+ cfi_restore (r6) 1928*4882a593Smuzhiyun+ cfi_restore (r7) 1929*4882a593Smuzhiyun+ sub result, result, data2, lsr #24 1930*4882a593Smuzhiyun+ bx lr 1931*4882a593Smuzhiyun+END (strcmp) 1932*4882a593Smuzhiyun+libc_hidden_builtin_def (strcmp) 1933*4882a593Smuzhiyun+ 1934*4882a593Smuzhiyun+# From ../strcmp.S 1935*4882a593Smuzhiyun+#include <features.h> 1936*4882a593Smuzhiyun+ 1937*4882a593Smuzhiyun+#ifndef __UCLIBC_HAS_LOCALE__ 1938*4882a593Smuzhiyun+strong_alias(strcmp,strcoll) 1939*4882a593Smuzhiyun+libc_hidden_def(strcoll) 1940*4882a593Smuzhiyun+#endif 1941*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/strlen.S b/libc/string/arm/glibc-neon/strlen.S 1942*4882a593Smuzhiyunnew file mode 100644 1943*4882a593Smuzhiyunindex 0000000..dbb6344 1944*4882a593Smuzhiyun--- /dev/null 1945*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/strlen.S 1946*4882a593Smuzhiyun@@ -0,0 +1,76 @@ 1947*4882a593Smuzhiyun+/* Copyright (C) 1998-2021 Free Software Foundation, Inc. 1948*4882a593Smuzhiyun+ This file is part of the GNU C Library. 1949*4882a593Smuzhiyun+ 1950*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 1951*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 1952*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 1953*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 1954*4882a593Smuzhiyun+ 1955*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 1956*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1957*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1958*4882a593Smuzhiyun+ Lesser General Public License for more details. 1959*4882a593Smuzhiyun+ 1960*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 1961*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 1962*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 1963*4882a593Smuzhiyun+ 1964*4882a593Smuzhiyun+/* Thumb requires excessive IT insns here. */ 1965*4882a593Smuzhiyun+#define NO_THUMB 1966*4882a593Smuzhiyun+#include <sysdep.h> 1967*4882a593Smuzhiyun+ 1968*4882a593Smuzhiyun+/* size_t strlen(const char *S) 1969*4882a593Smuzhiyun+ * entry: r0 -> string 1970*4882a593Smuzhiyun+ * exit: r0 = len 1971*4882a593Smuzhiyun+ */ 1972*4882a593Smuzhiyun+ 1973*4882a593Smuzhiyun+ .syntax unified 1974*4882a593Smuzhiyun+ .text 1975*4882a593Smuzhiyun+ 1976*4882a593Smuzhiyun+ENTRY(strlen) 1977*4882a593Smuzhiyun+ bic r1, r0, $3 @ addr of word containing first byte 1978*4882a593Smuzhiyun+ ldr r2, [r1], $4 @ get the first word 1979*4882a593Smuzhiyun+ ands r3, r0, $3 @ how many bytes are duff? 1980*4882a593Smuzhiyun+ rsb r0, r3, $0 @ get - that number into counter. 1981*4882a593Smuzhiyun+ beq Laligned @ skip into main check routine if no 1982*4882a593Smuzhiyun+ @ more 1983*4882a593Smuzhiyun+#ifdef __ARMEB__ 1984*4882a593Smuzhiyun+ orr r2, r2, $0xff000000 @ set this byte to non-zero 1985*4882a593Smuzhiyun+ subs r3, r3, $1 @ any more to do? 1986*4882a593Smuzhiyun+ orrgt r2, r2, $0x00ff0000 @ if so, set this byte 1987*4882a593Smuzhiyun+ subs r3, r3, $1 @ more? 1988*4882a593Smuzhiyun+ orrgt r2, r2, $0x0000ff00 @ then set. 1989*4882a593Smuzhiyun+#else 1990*4882a593Smuzhiyun+ orr r2, r2, $0x000000ff @ set this byte to non-zero 1991*4882a593Smuzhiyun+ subs r3, r3, $1 @ any more to do? 1992*4882a593Smuzhiyun+ orrgt r2, r2, $0x0000ff00 @ if so, set this byte 1993*4882a593Smuzhiyun+ subs r3, r3, $1 @ more? 1994*4882a593Smuzhiyun+ orrgt r2, r2, $0x00ff0000 @ then set. 1995*4882a593Smuzhiyun+#endif 1996*4882a593Smuzhiyun+Laligned: @ here, we have a word in r2. Does it 1997*4882a593Smuzhiyun+ tst r2, $0x000000ff @ contain any zeroes? 1998*4882a593Smuzhiyun+ tstne r2, $0x0000ff00 @ 1999*4882a593Smuzhiyun+ tstne r2, $0x00ff0000 @ 2000*4882a593Smuzhiyun+ tstne r2, $0xff000000 @ 2001*4882a593Smuzhiyun+ addne r0, r0, $4 @ if not, the string is 4 bytes longer 2002*4882a593Smuzhiyun+ ldrne r2, [r1], $4 @ and we continue to the next word 2003*4882a593Smuzhiyun+ bne Laligned @ 2004*4882a593Smuzhiyun+Llastword: @ drop through to here once we find a 2005*4882a593Smuzhiyun+#ifdef __ARMEB__ 2006*4882a593Smuzhiyun+ tst r2, $0xff000000 @ word that has a zero byte in it 2007*4882a593Smuzhiyun+ addne r0, r0, $1 @ 2008*4882a593Smuzhiyun+ tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it 2009*4882a593Smuzhiyun+ addne r0, r0, $1 @ 2010*4882a593Smuzhiyun+ tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th 2011*4882a593Smuzhiyun+ addne r0, r0, $1 @ must be zero) 2012*4882a593Smuzhiyun+#else 2013*4882a593Smuzhiyun+ tst r2, $0x000000ff @ word that has a zero byte in it 2014*4882a593Smuzhiyun+ addne r0, r0, $1 @ 2015*4882a593Smuzhiyun+ tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it 2016*4882a593Smuzhiyun+ addne r0, r0, $1 @ 2017*4882a593Smuzhiyun+ tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th 2018*4882a593Smuzhiyun+ addne r0, r0, $1 @ must be zero) 2019*4882a593Smuzhiyun+#endif 2020*4882a593Smuzhiyun+ DO_RET(lr) 2021*4882a593Smuzhiyun+END(strlen) 2022*4882a593Smuzhiyun+libc_hidden_builtin_def (strlen) 2023*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/sysdep.h b/libc/string/arm/glibc-neon/sysdep.h 2024*4882a593Smuzhiyunnew file mode 100644 2025*4882a593Smuzhiyunindex 0000000..cceb4a9 2026*4882a593Smuzhiyun--- /dev/null 2027*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/sysdep.h 2028*4882a593Smuzhiyun@@ -0,0 +1,339 @@ 2029*4882a593Smuzhiyun+/* Assembler macros for ARM. 2030*4882a593Smuzhiyun+ Copyright (C) 1997-2021 Free Software Foundation, Inc. 2031*4882a593Smuzhiyun+ This file is part of the GNU C Library. 2032*4882a593Smuzhiyun+ 2033*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 2034*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 2035*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 2036*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 2037*4882a593Smuzhiyun+ 2038*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 2039*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 2040*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2041*4882a593Smuzhiyun+ Lesser General Public License for more details. 2042*4882a593Smuzhiyun+ 2043*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 2044*4882a593Smuzhiyun+ License along with the GNU C Library. If not, see 2045*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 2046*4882a593Smuzhiyun+ 2047*4882a593Smuzhiyun+#include <sysdeps/generic/sysdep.h> 2048*4882a593Smuzhiyun+#include <features.h> 2049*4882a593Smuzhiyun+ 2050*4882a593Smuzhiyun+#ifndef __ASSEMBLER__ 2051*4882a593Smuzhiyun+# include <stdint.h> 2052*4882a593Smuzhiyun+#else 2053*4882a593Smuzhiyun+# include <arm-features.h> 2054*4882a593Smuzhiyun+#endif 2055*4882a593Smuzhiyun+ 2056*4882a593Smuzhiyun+/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ 2057*4882a593Smuzhiyun+#ifndef __ARM_ARCH 2058*4882a593Smuzhiyun+# ifdef __ARM_ARCH_2__ 2059*4882a593Smuzhiyun+# define __ARM_ARCH 2 2060*4882a593Smuzhiyun+# elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__) 2061*4882a593Smuzhiyun+# define __ARM_ARCH 3 2062*4882a593Smuzhiyun+# elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__) 2063*4882a593Smuzhiyun+# define __ARM_ARCH 4 2064*4882a593Smuzhiyun+# elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \ 2065*4882a593Smuzhiyun+ || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ 2066*4882a593Smuzhiyun+ || defined(__ARM_ARCH_5TEJ__) 2067*4882a593Smuzhiyun+# define __ARM_ARCH 5 2068*4882a593Smuzhiyun+# elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ 2069*4882a593Smuzhiyun+ || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ 2070*4882a593Smuzhiyun+ || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) 2071*4882a593Smuzhiyun+# define __ARM_ARCH 6 2072*4882a593Smuzhiyun+# elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ 2073*4882a593Smuzhiyun+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ 2074*4882a593Smuzhiyun+ || defined(__ARM_ARCH_7EM__) 2075*4882a593Smuzhiyun+# define __ARM_ARCH 7 2076*4882a593Smuzhiyun+# else 2077*4882a593Smuzhiyun+# error unknown arm architecture 2078*4882a593Smuzhiyun+# endif 2079*4882a593Smuzhiyun+#endif 2080*4882a593Smuzhiyun+ 2081*4882a593Smuzhiyun+#if __ARM_ARCH > 4 || defined (__ARM_ARCH_4T__) 2082*4882a593Smuzhiyun+# define ARCH_HAS_BX 2083*4882a593Smuzhiyun+#endif 2084*4882a593Smuzhiyun+#if __ARM_ARCH > 4 2085*4882a593Smuzhiyun+# define ARCH_HAS_BLX 2086*4882a593Smuzhiyun+#endif 2087*4882a593Smuzhiyun+#if __ARM_ARCH > 6 || defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) 2088*4882a593Smuzhiyun+# define ARCH_HAS_HARD_TP 2089*4882a593Smuzhiyun+#endif 2090*4882a593Smuzhiyun+#if __ARM_ARCH > 6 || defined (__ARM_ARCH_6T2__) 2091*4882a593Smuzhiyun+# define ARCH_HAS_T2 2092*4882a593Smuzhiyun+#endif 2093*4882a593Smuzhiyun+ 2094*4882a593Smuzhiyun+#ifdef __ASSEMBLER__ 2095*4882a593Smuzhiyun+ 2096*4882a593Smuzhiyun+/* Syntactic details of assembler. */ 2097*4882a593Smuzhiyun+ 2098*4882a593Smuzhiyun+#define ALIGNARG(log2) log2 2099*4882a593Smuzhiyun+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name 2100*4882a593Smuzhiyun+ 2101*4882a593Smuzhiyun+#define PLTJMP(_x) _x##(PLT) 2102*4882a593Smuzhiyun+ 2103*4882a593Smuzhiyun+#ifdef ARCH_HAS_BX 2104*4882a593Smuzhiyun+# define BX(R) bx R 2105*4882a593Smuzhiyun+# define BXC(C, R) bx##C R 2106*4882a593Smuzhiyun+# ifdef ARCH_HAS_BLX 2107*4882a593Smuzhiyun+# define BLX(R) blx R 2108*4882a593Smuzhiyun+# else 2109*4882a593Smuzhiyun+# define BLX(R) mov lr, pc; bx R 2110*4882a593Smuzhiyun+# endif 2111*4882a593Smuzhiyun+#else 2112*4882a593Smuzhiyun+# define BX(R) mov pc, R 2113*4882a593Smuzhiyun+# define BXC(C, R) mov##C pc, R 2114*4882a593Smuzhiyun+# define BLX(R) mov lr, pc; mov pc, R 2115*4882a593Smuzhiyun+#endif 2116*4882a593Smuzhiyun+ 2117*4882a593Smuzhiyun+#define DO_RET(R) BX(R) 2118*4882a593Smuzhiyun+#define RETINSTR(C, R) BXC(C, R) 2119*4882a593Smuzhiyun+ 2120*4882a593Smuzhiyun+/* Define an entry point visible from C. */ 2121*4882a593Smuzhiyun+#define ENTRY(name) \ 2122*4882a593Smuzhiyun+ .globl C_SYMBOL_NAME(name); \ 2123*4882a593Smuzhiyun+ .type C_SYMBOL_NAME(name),%function; \ 2124*4882a593Smuzhiyun+ .align ALIGNARG(4); \ 2125*4882a593Smuzhiyun+ C_LABEL(name) \ 2126*4882a593Smuzhiyun+ CFI_SECTIONS; \ 2127*4882a593Smuzhiyun+ cfi_startproc; \ 2128*4882a593Smuzhiyun+ CALL_MCOUNT 2129*4882a593Smuzhiyun+ 2130*4882a593Smuzhiyun+#define CFI_SECTIONS \ 2131*4882a593Smuzhiyun+ .cfi_sections .debug_frame 2132*4882a593Smuzhiyun+ 2133*4882a593Smuzhiyun+#undef END 2134*4882a593Smuzhiyun+#define END(name) \ 2135*4882a593Smuzhiyun+ cfi_endproc; \ 2136*4882a593Smuzhiyun+ ASM_SIZE_DIRECTIVE(name) 2137*4882a593Smuzhiyun+ 2138*4882a593Smuzhiyun+/* If compiled for profiling, call `mcount' at the start of each function. */ 2139*4882a593Smuzhiyun+#ifdef PROF 2140*4882a593Smuzhiyun+/* Call __gnu_mcount_nc (GCC >= 4.4). */ 2141*4882a593Smuzhiyun+#define CALL_MCOUNT \ 2142*4882a593Smuzhiyun+ push {lr}; \ 2143*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (4); \ 2144*4882a593Smuzhiyun+ cfi_rel_offset (lr, 0); \ 2145*4882a593Smuzhiyun+ bl PLTJMP(mcount); \ 2146*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (-4); \ 2147*4882a593Smuzhiyun+ cfi_restore (lr) 2148*4882a593Smuzhiyun+#else 2149*4882a593Smuzhiyun+#define CALL_MCOUNT /* Do nothing. */ 2150*4882a593Smuzhiyun+#endif 2151*4882a593Smuzhiyun+ 2152*4882a593Smuzhiyun+/* Since C identifiers are not normally prefixed with an underscore 2153*4882a593Smuzhiyun+ on this system, the asm identifier `syscall_error' intrudes on the 2154*4882a593Smuzhiyun+ C name space. Make sure we use an innocuous name. */ 2155*4882a593Smuzhiyun+#define syscall_error __syscall_error 2156*4882a593Smuzhiyun+#define mcount __gnu_mcount_nc 2157*4882a593Smuzhiyun+ 2158*4882a593Smuzhiyun+/* Tag_ABI_align8_preserved: This code preserves 8-byte 2159*4882a593Smuzhiyun+ alignment in any callee. */ 2160*4882a593Smuzhiyun+ .eabi_attribute 25, 1 2161*4882a593Smuzhiyun+/* Tag_ABI_align8_needed: This code may require 8-byte alignment from 2162*4882a593Smuzhiyun+ the caller. */ 2163*4882a593Smuzhiyun+ .eabi_attribute 24, 1 2164*4882a593Smuzhiyun+ 2165*4882a593Smuzhiyun+/* The thumb2 encoding is reasonably complete. Unless suppressed, use it. */ 2166*4882a593Smuzhiyun+ .syntax unified 2167*4882a593Smuzhiyun+# if defined(__thumb2__) && !defined(NO_THUMB) 2168*4882a593Smuzhiyun+ .thumb 2169*4882a593Smuzhiyun+#else 2170*4882a593Smuzhiyun+# undef __thumb__ 2171*4882a593Smuzhiyun+# undef __thumb2__ 2172*4882a593Smuzhiyun+ .arm 2173*4882a593Smuzhiyun+# endif 2174*4882a593Smuzhiyun+ 2175*4882a593Smuzhiyun+/* Load or store to/from address X + Y into/from R, (maybe) using T. 2176*4882a593Smuzhiyun+ X or Y can use T freely; T can be R if OP is a load. The first 2177*4882a593Smuzhiyun+ version eschews the two-register addressing mode, while the 2178*4882a593Smuzhiyun+ second version uses it. */ 2179*4882a593Smuzhiyun+# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \ 2180*4882a593Smuzhiyun+ add T, X, Y; \ 2181*4882a593Smuzhiyun+ OP R, [T] 2182*4882a593Smuzhiyun+# define LDST_INDEXED_INDEX(OP, R, X, Y) \ 2183*4882a593Smuzhiyun+ OP R, [X, Y] 2184*4882a593Smuzhiyun+ 2185*4882a593Smuzhiyun+# ifdef ARM_NO_INDEX_REGISTER 2186*4882a593Smuzhiyun+/* We're never using the two-register addressing mode, so this 2187*4882a593Smuzhiyun+ always uses an intermediate add. */ 2188*4882a593Smuzhiyun+# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y) 2189*4882a593Smuzhiyun+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X) 2190*4882a593Smuzhiyun+# else 2191*4882a593Smuzhiyun+/* The two-register addressing mode is OK, except on Thumb with pc. */ 2192*4882a593Smuzhiyun+# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y) 2193*4882a593Smuzhiyun+# ifdef __thumb2__ 2194*4882a593Smuzhiyun+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X) 2195*4882a593Smuzhiyun+# else 2196*4882a593Smuzhiyun+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X) 2197*4882a593Smuzhiyun+# endif 2198*4882a593Smuzhiyun+# endif 2199*4882a593Smuzhiyun+ 2200*4882a593Smuzhiyun+/* Load or store to/from a pc-relative EXPR into/from R, using T. */ 2201*4882a593Smuzhiyun+# ifdef __thumb2__ 2202*4882a593Smuzhiyun+# define LDST_PCREL(OP, R, T, EXPR) \ 2203*4882a593Smuzhiyun+ ldr T, 98f; \ 2204*4882a593Smuzhiyun+ .subsection 2; \ 2205*4882a593Smuzhiyun+98: .word EXPR - 99f - PC_OFS; \ 2206*4882a593Smuzhiyun+ .previous; \ 2207*4882a593Smuzhiyun+99: add T, T, pc; \ 2208*4882a593Smuzhiyun+ OP R, [T] 2209*4882a593Smuzhiyun+# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK 2210*4882a593Smuzhiyun+# define LDST_PCREL(OP, R, T, EXPR) \ 2211*4882a593Smuzhiyun+ movw T, #:lower16:EXPR - 99f - PC_OFS; \ 2212*4882a593Smuzhiyun+ movt T, #:upper16:EXPR - 99f - PC_OFS; \ 2213*4882a593Smuzhiyun+99: LDST_PC_INDEXED (OP, R, T, T) 2214*4882a593Smuzhiyun+# else 2215*4882a593Smuzhiyun+# define LDST_PCREL(OP, R, T, EXPR) \ 2216*4882a593Smuzhiyun+ ldr T, 98f; \ 2217*4882a593Smuzhiyun+ .subsection 2; \ 2218*4882a593Smuzhiyun+98: .word EXPR - 99f - PC_OFS; \ 2219*4882a593Smuzhiyun+ .previous; \ 2220*4882a593Smuzhiyun+99: OP R, [pc, T] 2221*4882a593Smuzhiyun+# endif 2222*4882a593Smuzhiyun+ 2223*4882a593Smuzhiyun+/* Load from a global SYMBOL + CONSTANT into R, using T. */ 2224*4882a593Smuzhiyun+# if defined (ARCH_HAS_T2) && !defined (PIC) 2225*4882a593Smuzhiyun+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ 2226*4882a593Smuzhiyun+ movw T, #:lower16:SYMBOL; \ 2227*4882a593Smuzhiyun+ movt T, #:upper16:SYMBOL; \ 2228*4882a593Smuzhiyun+ ldr R, [T, $CONSTANT] 2229*4882a593Smuzhiyun+# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK 2230*4882a593Smuzhiyun+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ 2231*4882a593Smuzhiyun+ movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ 2232*4882a593Smuzhiyun+ movw T, #:lower16:99f - 98f - PC_OFS; \ 2233*4882a593Smuzhiyun+ movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ 2234*4882a593Smuzhiyun+ movt T, #:upper16:99f - 98f - PC_OFS; \ 2235*4882a593Smuzhiyun+ .pushsection .rodata.cst4, "aM", %progbits, 4; \ 2236*4882a593Smuzhiyun+ .balign 4; \ 2237*4882a593Smuzhiyun+99: .word SYMBOL##(GOT); \ 2238*4882a593Smuzhiyun+ .popsection; \ 2239*4882a593Smuzhiyun+97: add R, R, pc; \ 2240*4882a593Smuzhiyun+98: LDST_PC_INDEXED (ldr, T, T, T); \ 2241*4882a593Smuzhiyun+ LDST_INDEXED (ldr, R, T, R, T); \ 2242*4882a593Smuzhiyun+ ldr R, [R, $CONSTANT] 2243*4882a593Smuzhiyun+# else 2244*4882a593Smuzhiyun+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ 2245*4882a593Smuzhiyun+ ldr T, 99f; \ 2246*4882a593Smuzhiyun+ ldr R, 100f; \ 2247*4882a593Smuzhiyun+98: add T, T, pc; \ 2248*4882a593Smuzhiyun+ ldr T, [T, R]; \ 2249*4882a593Smuzhiyun+ .subsection 2; \ 2250*4882a593Smuzhiyun+99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \ 2251*4882a593Smuzhiyun+100: .word SYMBOL##(GOT); \ 2252*4882a593Smuzhiyun+ .previous; \ 2253*4882a593Smuzhiyun+ ldr R, [T, $CONSTANT] 2254*4882a593Smuzhiyun+# endif 2255*4882a593Smuzhiyun+ 2256*4882a593Smuzhiyun+/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to 2257*4882a593Smuzhiyun+ be in the same linked object (as for one with hidden visibility). 2258*4882a593Smuzhiyun+ We can avoid the GOT indirection in the PIC case. For the pure 2259*4882a593Smuzhiyun+ static case, LDR_GLOBAL is already optimal. */ 2260*4882a593Smuzhiyun+# ifdef PIC 2261*4882a593Smuzhiyun+# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \ 2262*4882a593Smuzhiyun+ LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT) 2263*4882a593Smuzhiyun+# else 2264*4882a593Smuzhiyun+# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \ 2265*4882a593Smuzhiyun+ LDR_GLOBAL (R, T, SYMBOL, CONSTANT) 2266*4882a593Smuzhiyun+# endif 2267*4882a593Smuzhiyun+ 2268*4882a593Smuzhiyun+/* Cope with negative memory offsets, which thumb can't encode. 2269*4882a593Smuzhiyun+ Use NEGOFF_ADJ_BASE to (conditionally) alter the base register, 2270*4882a593Smuzhiyun+ and then NEGOFF_OFF1 to use 0 for thumb and the offset for arm, 2271*4882a593Smuzhiyun+ or NEGOFF_OFF2 to use A-B for thumb and A for arm. */ 2272*4882a593Smuzhiyun+# ifdef __thumb2__ 2273*4882a593Smuzhiyun+# define NEGOFF_ADJ_BASE(R, OFF) add R, R, $OFF 2274*4882a593Smuzhiyun+# define NEGOFF_ADJ_BASE2(D, S, OFF) add D, S, $OFF 2275*4882a593Smuzhiyun+# define NEGOFF_OFF1(R, OFF) [R] 2276*4882a593Smuzhiyun+# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $((OFFA) - (OFFB))] 2277*4882a593Smuzhiyun+# else 2278*4882a593Smuzhiyun+# define NEGOFF_ADJ_BASE(R, OFF) 2279*4882a593Smuzhiyun+# define NEGOFF_ADJ_BASE2(D, S, OFF) mov D, S 2280*4882a593Smuzhiyun+# define NEGOFF_OFF1(R, OFF) [R, $OFF] 2281*4882a593Smuzhiyun+# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $OFFA] 2282*4882a593Smuzhiyun+# endif 2283*4882a593Smuzhiyun+ 2284*4882a593Smuzhiyun+/* Helper to get the TLS base pointer. The interface is that TMP is a 2285*4882a593Smuzhiyun+ register that may be used to hold the LR, if necessary. TMP may be 2286*4882a593Smuzhiyun+ LR itself to indicate that LR need not be saved. The base pointer 2287*4882a593Smuzhiyun+ is returned in R0. Only R0 and TMP are modified. */ 2288*4882a593Smuzhiyun+ 2289*4882a593Smuzhiyun+# ifdef ARCH_HAS_HARD_TP 2290*4882a593Smuzhiyun+/* If the cpu has cp15 available, use it. */ 2291*4882a593Smuzhiyun+# define GET_TLS(TMP) mrc p15, 0, r0, c13, c0, 3 2292*4882a593Smuzhiyun+# else 2293*4882a593Smuzhiyun+/* At this generic level we have no tricks to pull. Call the ABI routine. */ 2294*4882a593Smuzhiyun+# define GET_TLS(TMP) \ 2295*4882a593Smuzhiyun+ push { r1, r2, r3, lr }; \ 2296*4882a593Smuzhiyun+ cfi_remember_state; \ 2297*4882a593Smuzhiyun+ cfi_adjust_cfa_offset (16); \ 2298*4882a593Smuzhiyun+ cfi_rel_offset (r1, 0); \ 2299*4882a593Smuzhiyun+ cfi_rel_offset (r2, 4); \ 2300*4882a593Smuzhiyun+ cfi_rel_offset (r3, 8); \ 2301*4882a593Smuzhiyun+ cfi_rel_offset (lr, 12); \ 2302*4882a593Smuzhiyun+ bl __aeabi_read_tp; \ 2303*4882a593Smuzhiyun+ pop { r1, r2, r3, lr }; \ 2304*4882a593Smuzhiyun+ cfi_restore_state 2305*4882a593Smuzhiyun+# endif /* ARCH_HAS_HARD_TP */ 2306*4882a593Smuzhiyun+ 2307*4882a593Smuzhiyun+/* These are the directives used for EABI unwind info. 2308*4882a593Smuzhiyun+ Wrap them in macros so another configuration's sysdep.h 2309*4882a593Smuzhiyun+ file can define them away if it doesn't use EABI unwind info. */ 2310*4882a593Smuzhiyun+# define eabi_fnstart .fnstart 2311*4882a593Smuzhiyun+# define eabi_fnend .fnend 2312*4882a593Smuzhiyun+# define eabi_save(...) .save __VA_ARGS__ 2313*4882a593Smuzhiyun+# define eabi_cantunwind .cantunwind 2314*4882a593Smuzhiyun+# define eabi_pad(n) .pad n 2315*4882a593Smuzhiyun+ 2316*4882a593Smuzhiyun+#endif /* __ASSEMBLER__ */ 2317*4882a593Smuzhiyun+ 2318*4882a593Smuzhiyun+/* This number is the offset from the pc at the current location. */ 2319*4882a593Smuzhiyun+#ifdef __thumb__ 2320*4882a593Smuzhiyun+# define PC_OFS 4 2321*4882a593Smuzhiyun+#else 2322*4882a593Smuzhiyun+# define PC_OFS 8 2323*4882a593Smuzhiyun+#endif 2324*4882a593Smuzhiyun+ 2325*4882a593Smuzhiyun+/* Pointer mangling support. */ 2326*4882a593Smuzhiyun+#if (IS_IN (rtld) \ 2327*4882a593Smuzhiyun+ || (!defined SHARED && (IS_IN (libc) || IS_IN (libpthread)))) 2328*4882a593Smuzhiyun+# ifdef __ASSEMBLER__ 2329*4882a593Smuzhiyun+# define PTR_MANGLE_LOAD(guard, tmp) \ 2330*4882a593Smuzhiyun+ LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0) 2331*4882a593Smuzhiyun+# define PTR_MANGLE(dst, src, guard, tmp) \ 2332*4882a593Smuzhiyun+ PTR_MANGLE_LOAD(guard, tmp); \ 2333*4882a593Smuzhiyun+ PTR_MANGLE2(dst, src, guard) 2334*4882a593Smuzhiyun+/* Use PTR_MANGLE2 for efficiency if guard is already loaded. */ 2335*4882a593Smuzhiyun+# define PTR_MANGLE2(dst, src, guard) \ 2336*4882a593Smuzhiyun+ eor dst, src, guard 2337*4882a593Smuzhiyun+# define PTR_DEMANGLE(dst, src, guard, tmp) \ 2338*4882a593Smuzhiyun+ PTR_MANGLE (dst, src, guard, tmp) 2339*4882a593Smuzhiyun+# define PTR_DEMANGLE2(dst, src, guard) \ 2340*4882a593Smuzhiyun+ PTR_MANGLE2 (dst, src, guard) 2341*4882a593Smuzhiyun+# else 2342*4882a593Smuzhiyun+extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden; 2343*4882a593Smuzhiyun+# define PTR_MANGLE(var) \ 2344*4882a593Smuzhiyun+ (var) = (__typeof (var)) ((uintptr_t) (var) ^ __pointer_chk_guard_local) 2345*4882a593Smuzhiyun+# define PTR_DEMANGLE(var) PTR_MANGLE (var) 2346*4882a593Smuzhiyun+# endif 2347*4882a593Smuzhiyun+#else 2348*4882a593Smuzhiyun+# ifdef __ASSEMBLER__ 2349*4882a593Smuzhiyun+# define PTR_MANGLE_LOAD(guard, tmp) \ 2350*4882a593Smuzhiyun+ LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0); 2351*4882a593Smuzhiyun+# define PTR_MANGLE(dst, src, guard, tmp) \ 2352*4882a593Smuzhiyun+ PTR_MANGLE_LOAD(guard, tmp); \ 2353*4882a593Smuzhiyun+ PTR_MANGLE2(dst, src, guard) 2354*4882a593Smuzhiyun+/* Use PTR_MANGLE2 for efficiency if guard is already loaded. */ 2355*4882a593Smuzhiyun+# define PTR_MANGLE2(dst, src, guard) \ 2356*4882a593Smuzhiyun+ eor dst, src, guard 2357*4882a593Smuzhiyun+# define PTR_DEMANGLE(dst, src, guard, tmp) \ 2358*4882a593Smuzhiyun+ PTR_MANGLE (dst, src, guard, tmp) 2359*4882a593Smuzhiyun+# define PTR_DEMANGLE2(dst, src, guard) \ 2360*4882a593Smuzhiyun+ PTR_MANGLE2 (dst, src, guard) 2361*4882a593Smuzhiyun+# else 2362*4882a593Smuzhiyun+extern uintptr_t __pointer_chk_guard attribute_relro; 2363*4882a593Smuzhiyun+# define PTR_MANGLE(var) \ 2364*4882a593Smuzhiyun+ (var) = (__typeof (var)) ((uintptr_t) (var) ^ __pointer_chk_guard) 2365*4882a593Smuzhiyun+# define PTR_DEMANGLE(var) PTR_MANGLE (var) 2366*4882a593Smuzhiyun+# endif 2367*4882a593Smuzhiyun+#endif 2368*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h b/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h 2369*4882a593Smuzhiyunnew file mode 100644 2370*4882a593Smuzhiyunindex 0000000..0d08da0 2371*4882a593Smuzhiyun--- /dev/null 2372*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h 2373*4882a593Smuzhiyun@@ -0,0 +1,590 @@ 2374*4882a593Smuzhiyun+/* Declarations and definitions of codes relating to the DWARF2 symbolic 2375*4882a593Smuzhiyun+ debugging information format. 2376*4882a593Smuzhiyun+ Copyright (C) 1992-2021 Free Software Foundation, Inc. 2377*4882a593Smuzhiyun+ Contributed by Gary Funck (gary@intrepid.com). Derived from the 2378*4882a593Smuzhiyun+ DWARF 1 implementation written by Ron Guilmette (rfg@monkeys.com). 2379*4882a593Smuzhiyun+ 2380*4882a593Smuzhiyun+ This file is part of the GNU C Library. 2381*4882a593Smuzhiyun+ 2382*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 2383*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 2384*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 2385*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 2386*4882a593Smuzhiyun+ 2387*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 2388*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 2389*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2390*4882a593Smuzhiyun+ Lesser General Public License for more details. 2391*4882a593Smuzhiyun+ 2392*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 2393*4882a593Smuzhiyun+ License along with the GNU C Library; if not, see 2394*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 2395*4882a593Smuzhiyun+ 2396*4882a593Smuzhiyun+#ifndef _DWARF2_H 2397*4882a593Smuzhiyun+#define _DWARF2_H 1 2398*4882a593Smuzhiyun+ 2399*4882a593Smuzhiyun+/* This file is derived from the DWARF specification (a public document) 2400*4882a593Smuzhiyun+ Revision 2.0.0 (July 27, 1993) developed by the UNIX International 2401*4882a593Smuzhiyun+ Programming Languages Special Interest Group (UI/PLSIG) and distributed 2402*4882a593Smuzhiyun+ by UNIX International. Copies of this specification are available from 2403*4882a593Smuzhiyun+ UNIX International, 20 Waterview Boulevard, Parsippany, NJ, 07054. */ 2404*4882a593Smuzhiyun+ 2405*4882a593Smuzhiyun+/* This file is shared between GCC and GDB, and should not contain 2406*4882a593Smuzhiyun+ prototypes. */ 2407*4882a593Smuzhiyun+ 2408*4882a593Smuzhiyun+#ifndef __ASSEMBLER__ 2409*4882a593Smuzhiyun+/* Tag names and codes. */ 2410*4882a593Smuzhiyun+ 2411*4882a593Smuzhiyun+enum dwarf_tag 2412*4882a593Smuzhiyun+ { 2413*4882a593Smuzhiyun+ DW_TAG_padding = 0x00, 2414*4882a593Smuzhiyun+ DW_TAG_array_type = 0x01, 2415*4882a593Smuzhiyun+ DW_TAG_class_type = 0x02, 2416*4882a593Smuzhiyun+ DW_TAG_entry_point = 0x03, 2417*4882a593Smuzhiyun+ DW_TAG_enumeration_type = 0x04, 2418*4882a593Smuzhiyun+ DW_TAG_formal_parameter = 0x05, 2419*4882a593Smuzhiyun+ DW_TAG_imported_declaration = 0x08, 2420*4882a593Smuzhiyun+ DW_TAG_label = 0x0a, 2421*4882a593Smuzhiyun+ DW_TAG_lexical_block = 0x0b, 2422*4882a593Smuzhiyun+ DW_TAG_member = 0x0d, 2423*4882a593Smuzhiyun+ DW_TAG_pointer_type = 0x0f, 2424*4882a593Smuzhiyun+ DW_TAG_reference_type = 0x10, 2425*4882a593Smuzhiyun+ DW_TAG_compile_unit = 0x11, 2426*4882a593Smuzhiyun+ DW_TAG_string_type = 0x12, 2427*4882a593Smuzhiyun+ DW_TAG_structure_type = 0x13, 2428*4882a593Smuzhiyun+ DW_TAG_subroutine_type = 0x15, 2429*4882a593Smuzhiyun+ DW_TAG_typedef = 0x16, 2430*4882a593Smuzhiyun+ DW_TAG_union_type = 0x17, 2431*4882a593Smuzhiyun+ DW_TAG_unspecified_parameters = 0x18, 2432*4882a593Smuzhiyun+ DW_TAG_variant = 0x19, 2433*4882a593Smuzhiyun+ DW_TAG_common_block = 0x1a, 2434*4882a593Smuzhiyun+ DW_TAG_common_inclusion = 0x1b, 2435*4882a593Smuzhiyun+ DW_TAG_inheritance = 0x1c, 2436*4882a593Smuzhiyun+ DW_TAG_inlined_subroutine = 0x1d, 2437*4882a593Smuzhiyun+ DW_TAG_module = 0x1e, 2438*4882a593Smuzhiyun+ DW_TAG_ptr_to_member_type = 0x1f, 2439*4882a593Smuzhiyun+ DW_TAG_set_type = 0x20, 2440*4882a593Smuzhiyun+ DW_TAG_subrange_type = 0x21, 2441*4882a593Smuzhiyun+ DW_TAG_with_stmt = 0x22, 2442*4882a593Smuzhiyun+ DW_TAG_access_declaration = 0x23, 2443*4882a593Smuzhiyun+ DW_TAG_base_type = 0x24, 2444*4882a593Smuzhiyun+ DW_TAG_catch_block = 0x25, 2445*4882a593Smuzhiyun+ DW_TAG_const_type = 0x26, 2446*4882a593Smuzhiyun+ DW_TAG_constant = 0x27, 2447*4882a593Smuzhiyun+ DW_TAG_enumerator = 0x28, 2448*4882a593Smuzhiyun+ DW_TAG_file_type = 0x29, 2449*4882a593Smuzhiyun+ DW_TAG_friend = 0x2a, 2450*4882a593Smuzhiyun+ DW_TAG_namelist = 0x2b, 2451*4882a593Smuzhiyun+ DW_TAG_namelist_item = 0x2c, 2452*4882a593Smuzhiyun+ DW_TAG_packed_type = 0x2d, 2453*4882a593Smuzhiyun+ DW_TAG_subprogram = 0x2e, 2454*4882a593Smuzhiyun+ DW_TAG_template_type_param = 0x2f, 2455*4882a593Smuzhiyun+ DW_TAG_template_value_param = 0x30, 2456*4882a593Smuzhiyun+ DW_TAG_thrown_type = 0x31, 2457*4882a593Smuzhiyun+ DW_TAG_try_block = 0x32, 2458*4882a593Smuzhiyun+ DW_TAG_variant_part = 0x33, 2459*4882a593Smuzhiyun+ DW_TAG_variable = 0x34, 2460*4882a593Smuzhiyun+ DW_TAG_volatile_type = 0x35, 2461*4882a593Smuzhiyun+ /* SGI/MIPS Extensions */ 2462*4882a593Smuzhiyun+ DW_TAG_MIPS_loop = 0x4081, 2463*4882a593Smuzhiyun+ /* GNU extensions */ 2464*4882a593Smuzhiyun+ DW_TAG_format_label = 0x4101, /* for FORTRAN 77 and Fortran 90 */ 2465*4882a593Smuzhiyun+ DW_TAG_function_template = 0x4102, /* for C++ */ 2466*4882a593Smuzhiyun+ DW_TAG_class_template = 0x4103, /* for C++ */ 2467*4882a593Smuzhiyun+ DW_TAG_GNU_BINCL = 0x4104, 2468*4882a593Smuzhiyun+ DW_TAG_GNU_EINCL = 0x4105 2469*4882a593Smuzhiyun+ }; 2470*4882a593Smuzhiyun+ 2471*4882a593Smuzhiyun+#define DW_TAG_lo_user 0x4080 2472*4882a593Smuzhiyun+#define DW_TAG_hi_user 0xffff 2473*4882a593Smuzhiyun+ 2474*4882a593Smuzhiyun+/* flag that tells whether entry has a child or not */ 2475*4882a593Smuzhiyun+#define DW_children_no 0 2476*4882a593Smuzhiyun+#define DW_children_yes 1 2477*4882a593Smuzhiyun+ 2478*4882a593Smuzhiyun+/* Form names and codes. */ 2479*4882a593Smuzhiyun+enum dwarf_form 2480*4882a593Smuzhiyun+ { 2481*4882a593Smuzhiyun+ DW_FORM_addr = 0x01, 2482*4882a593Smuzhiyun+ DW_FORM_block2 = 0x03, 2483*4882a593Smuzhiyun+ DW_FORM_block4 = 0x04, 2484*4882a593Smuzhiyun+ DW_FORM_data2 = 0x05, 2485*4882a593Smuzhiyun+ DW_FORM_data4 = 0x06, 2486*4882a593Smuzhiyun+ DW_FORM_data8 = 0x07, 2487*4882a593Smuzhiyun+ DW_FORM_string = 0x08, 2488*4882a593Smuzhiyun+ DW_FORM_block = 0x09, 2489*4882a593Smuzhiyun+ DW_FORM_block1 = 0x0a, 2490*4882a593Smuzhiyun+ DW_FORM_data1 = 0x0b, 2491*4882a593Smuzhiyun+ DW_FORM_flag = 0x0c, 2492*4882a593Smuzhiyun+ DW_FORM_sdata = 0x0d, 2493*4882a593Smuzhiyun+ DW_FORM_strp = 0x0e, 2494*4882a593Smuzhiyun+ DW_FORM_udata = 0x0f, 2495*4882a593Smuzhiyun+ DW_FORM_ref_addr = 0x10, 2496*4882a593Smuzhiyun+ DW_FORM_ref1 = 0x11, 2497*4882a593Smuzhiyun+ DW_FORM_ref2 = 0x12, 2498*4882a593Smuzhiyun+ DW_FORM_ref4 = 0x13, 2499*4882a593Smuzhiyun+ DW_FORM_ref8 = 0x14, 2500*4882a593Smuzhiyun+ DW_FORM_ref_udata = 0x15, 2501*4882a593Smuzhiyun+ DW_FORM_indirect = 0x16 2502*4882a593Smuzhiyun+ }; 2503*4882a593Smuzhiyun+ 2504*4882a593Smuzhiyun+/* Attribute names and codes. */ 2505*4882a593Smuzhiyun+ 2506*4882a593Smuzhiyun+enum dwarf_attribute 2507*4882a593Smuzhiyun+ { 2508*4882a593Smuzhiyun+ DW_AT_sibling = 0x01, 2509*4882a593Smuzhiyun+ DW_AT_location = 0x02, 2510*4882a593Smuzhiyun+ DW_AT_name = 0x03, 2511*4882a593Smuzhiyun+ DW_AT_ordering = 0x09, 2512*4882a593Smuzhiyun+ DW_AT_subscr_data = 0x0a, 2513*4882a593Smuzhiyun+ DW_AT_byte_size = 0x0b, 2514*4882a593Smuzhiyun+ DW_AT_bit_offset = 0x0c, 2515*4882a593Smuzhiyun+ DW_AT_bit_size = 0x0d, 2516*4882a593Smuzhiyun+ DW_AT_element_list = 0x0f, 2517*4882a593Smuzhiyun+ DW_AT_stmt_list = 0x10, 2518*4882a593Smuzhiyun+ DW_AT_low_pc = 0x11, 2519*4882a593Smuzhiyun+ DW_AT_high_pc = 0x12, 2520*4882a593Smuzhiyun+ DW_AT_language = 0x13, 2521*4882a593Smuzhiyun+ DW_AT_member = 0x14, 2522*4882a593Smuzhiyun+ DW_AT_discr = 0x15, 2523*4882a593Smuzhiyun+ DW_AT_discr_value = 0x16, 2524*4882a593Smuzhiyun+ DW_AT_visibility = 0x17, 2525*4882a593Smuzhiyun+ DW_AT_import = 0x18, 2526*4882a593Smuzhiyun+ DW_AT_string_length = 0x19, 2527*4882a593Smuzhiyun+ DW_AT_common_reference = 0x1a, 2528*4882a593Smuzhiyun+ DW_AT_comp_dir = 0x1b, 2529*4882a593Smuzhiyun+ DW_AT_const_value = 0x1c, 2530*4882a593Smuzhiyun+ DW_AT_containing_type = 0x1d, 2531*4882a593Smuzhiyun+ DW_AT_default_value = 0x1e, 2532*4882a593Smuzhiyun+ DW_AT_inline = 0x20, 2533*4882a593Smuzhiyun+ DW_AT_is_optional = 0x21, 2534*4882a593Smuzhiyun+ DW_AT_lower_bound = 0x22, 2535*4882a593Smuzhiyun+ DW_AT_producer = 0x25, 2536*4882a593Smuzhiyun+ DW_AT_prototyped = 0x27, 2537*4882a593Smuzhiyun+ DW_AT_return_addr = 0x2a, 2538*4882a593Smuzhiyun+ DW_AT_start_scope = 0x2c, 2539*4882a593Smuzhiyun+ DW_AT_stride_size = 0x2e, 2540*4882a593Smuzhiyun+ DW_AT_upper_bound = 0x2f, 2541*4882a593Smuzhiyun+ DW_AT_abstract_origin = 0x31, 2542*4882a593Smuzhiyun+ DW_AT_accessibility = 0x32, 2543*4882a593Smuzhiyun+ DW_AT_address_class = 0x33, 2544*4882a593Smuzhiyun+ DW_AT_artificial = 0x34, 2545*4882a593Smuzhiyun+ DW_AT_base_types = 0x35, 2546*4882a593Smuzhiyun+ DW_AT_calling_convention = 0x36, 2547*4882a593Smuzhiyun+ DW_AT_count = 0x37, 2548*4882a593Smuzhiyun+ DW_AT_data_member_location = 0x38, 2549*4882a593Smuzhiyun+ DW_AT_decl_column = 0x39, 2550*4882a593Smuzhiyun+ DW_AT_decl_file = 0x3a, 2551*4882a593Smuzhiyun+ DW_AT_decl_line = 0x3b, 2552*4882a593Smuzhiyun+ DW_AT_declaration = 0x3c, 2553*4882a593Smuzhiyun+ DW_AT_discr_list = 0x3d, 2554*4882a593Smuzhiyun+ DW_AT_encoding = 0x3e, 2555*4882a593Smuzhiyun+ DW_AT_external = 0x3f, 2556*4882a593Smuzhiyun+ DW_AT_frame_base = 0x40, 2557*4882a593Smuzhiyun+ DW_AT_friend = 0x41, 2558*4882a593Smuzhiyun+ DW_AT_identifier_case = 0x42, 2559*4882a593Smuzhiyun+ DW_AT_macro_info = 0x43, 2560*4882a593Smuzhiyun+ DW_AT_namelist_items = 0x44, 2561*4882a593Smuzhiyun+ DW_AT_priority = 0x45, 2562*4882a593Smuzhiyun+ DW_AT_segment = 0x46, 2563*4882a593Smuzhiyun+ DW_AT_specification = 0x47, 2564*4882a593Smuzhiyun+ DW_AT_static_link = 0x48, 2565*4882a593Smuzhiyun+ DW_AT_type = 0x49, 2566*4882a593Smuzhiyun+ DW_AT_use_location = 0x4a, 2567*4882a593Smuzhiyun+ DW_AT_variable_parameter = 0x4b, 2568*4882a593Smuzhiyun+ DW_AT_virtuality = 0x4c, 2569*4882a593Smuzhiyun+ DW_AT_vtable_elem_location = 0x4d, 2570*4882a593Smuzhiyun+ /* SGI/MIPS Extensions */ 2571*4882a593Smuzhiyun+ DW_AT_MIPS_fde = 0x2001, 2572*4882a593Smuzhiyun+ DW_AT_MIPS_loop_begin = 0x2002, 2573*4882a593Smuzhiyun+ DW_AT_MIPS_tail_loop_begin = 0x2003, 2574*4882a593Smuzhiyun+ DW_AT_MIPS_epilog_begin = 0x2004, 2575*4882a593Smuzhiyun+ DW_AT_MIPS_loop_unroll_factor = 0x2005, 2576*4882a593Smuzhiyun+ DW_AT_MIPS_software_pipeline_depth = 0x2006, 2577*4882a593Smuzhiyun+ DW_AT_MIPS_linkage_name = 0x2007, 2578*4882a593Smuzhiyun+ DW_AT_MIPS_stride = 0x2008, 2579*4882a593Smuzhiyun+ DW_AT_MIPS_abstract_name = 0x2009, 2580*4882a593Smuzhiyun+ DW_AT_MIPS_clone_origin = 0x200a, 2581*4882a593Smuzhiyun+ DW_AT_MIPS_has_inlines = 0x200b, 2582*4882a593Smuzhiyun+ /* GNU extensions. */ 2583*4882a593Smuzhiyun+ DW_AT_sf_names = 0x2101, 2584*4882a593Smuzhiyun+ DW_AT_src_info = 0x2102, 2585*4882a593Smuzhiyun+ DW_AT_mac_info = 0x2103, 2586*4882a593Smuzhiyun+ DW_AT_src_coords = 0x2104, 2587*4882a593Smuzhiyun+ DW_AT_body_begin = 0x2105, 2588*4882a593Smuzhiyun+ DW_AT_body_end = 0x2106 2589*4882a593Smuzhiyun+ }; 2590*4882a593Smuzhiyun+ 2591*4882a593Smuzhiyun+#define DW_AT_lo_user 0x2000 /* implementation-defined range start */ 2592*4882a593Smuzhiyun+#define DW_AT_hi_user 0x3ff0 /* implementation-defined range end */ 2593*4882a593Smuzhiyun+ 2594*4882a593Smuzhiyun+/* Location atom names and codes. */ 2595*4882a593Smuzhiyun+ 2596*4882a593Smuzhiyun+enum dwarf_location_atom 2597*4882a593Smuzhiyun+ { 2598*4882a593Smuzhiyun+ DW_OP_addr = 0x03, 2599*4882a593Smuzhiyun+ DW_OP_deref = 0x06, 2600*4882a593Smuzhiyun+ DW_OP_const1u = 0x08, 2601*4882a593Smuzhiyun+ DW_OP_const1s = 0x09, 2602*4882a593Smuzhiyun+ DW_OP_const2u = 0x0a, 2603*4882a593Smuzhiyun+ DW_OP_const2s = 0x0b, 2604*4882a593Smuzhiyun+ DW_OP_const4u = 0x0c, 2605*4882a593Smuzhiyun+ DW_OP_const4s = 0x0d, 2606*4882a593Smuzhiyun+ DW_OP_const8u = 0x0e, 2607*4882a593Smuzhiyun+ DW_OP_const8s = 0x0f, 2608*4882a593Smuzhiyun+ DW_OP_constu = 0x10, 2609*4882a593Smuzhiyun+ DW_OP_consts = 0x11, 2610*4882a593Smuzhiyun+ DW_OP_dup = 0x12, 2611*4882a593Smuzhiyun+ DW_OP_drop = 0x13, 2612*4882a593Smuzhiyun+ DW_OP_over = 0x14, 2613*4882a593Smuzhiyun+ DW_OP_pick = 0x15, 2614*4882a593Smuzhiyun+ DW_OP_swap = 0x16, 2615*4882a593Smuzhiyun+ DW_OP_rot = 0x17, 2616*4882a593Smuzhiyun+ DW_OP_xderef = 0x18, 2617*4882a593Smuzhiyun+ DW_OP_abs = 0x19, 2618*4882a593Smuzhiyun+ DW_OP_and = 0x1a, 2619*4882a593Smuzhiyun+ DW_OP_div = 0x1b, 2620*4882a593Smuzhiyun+ DW_OP_minus = 0x1c, 2621*4882a593Smuzhiyun+ DW_OP_mod = 0x1d, 2622*4882a593Smuzhiyun+ DW_OP_mul = 0x1e, 2623*4882a593Smuzhiyun+ DW_OP_neg = 0x1f, 2624*4882a593Smuzhiyun+ DW_OP_not = 0x20, 2625*4882a593Smuzhiyun+ DW_OP_or = 0x21, 2626*4882a593Smuzhiyun+ DW_OP_plus = 0x22, 2627*4882a593Smuzhiyun+ DW_OP_plus_uconst = 0x23, 2628*4882a593Smuzhiyun+ DW_OP_shl = 0x24, 2629*4882a593Smuzhiyun+ DW_OP_shr = 0x25, 2630*4882a593Smuzhiyun+ DW_OP_shra = 0x26, 2631*4882a593Smuzhiyun+ DW_OP_xor = 0x27, 2632*4882a593Smuzhiyun+ DW_OP_bra = 0x28, 2633*4882a593Smuzhiyun+ DW_OP_eq = 0x29, 2634*4882a593Smuzhiyun+ DW_OP_ge = 0x2a, 2635*4882a593Smuzhiyun+ DW_OP_gt = 0x2b, 2636*4882a593Smuzhiyun+ DW_OP_le = 0x2c, 2637*4882a593Smuzhiyun+ DW_OP_lt = 0x2d, 2638*4882a593Smuzhiyun+ DW_OP_ne = 0x2e, 2639*4882a593Smuzhiyun+ DW_OP_skip = 0x2f, 2640*4882a593Smuzhiyun+ DW_OP_lit0 = 0x30, 2641*4882a593Smuzhiyun+ DW_OP_lit1 = 0x31, 2642*4882a593Smuzhiyun+ DW_OP_lit2 = 0x32, 2643*4882a593Smuzhiyun+ DW_OP_lit3 = 0x33, 2644*4882a593Smuzhiyun+ DW_OP_lit4 = 0x34, 2645*4882a593Smuzhiyun+ DW_OP_lit5 = 0x35, 2646*4882a593Smuzhiyun+ DW_OP_lit6 = 0x36, 2647*4882a593Smuzhiyun+ DW_OP_lit7 = 0x37, 2648*4882a593Smuzhiyun+ DW_OP_lit8 = 0x38, 2649*4882a593Smuzhiyun+ DW_OP_lit9 = 0x39, 2650*4882a593Smuzhiyun+ DW_OP_lit10 = 0x3a, 2651*4882a593Smuzhiyun+ DW_OP_lit11 = 0x3b, 2652*4882a593Smuzhiyun+ DW_OP_lit12 = 0x3c, 2653*4882a593Smuzhiyun+ DW_OP_lit13 = 0x3d, 2654*4882a593Smuzhiyun+ DW_OP_lit14 = 0x3e, 2655*4882a593Smuzhiyun+ DW_OP_lit15 = 0x3f, 2656*4882a593Smuzhiyun+ DW_OP_lit16 = 0x40, 2657*4882a593Smuzhiyun+ DW_OP_lit17 = 0x41, 2658*4882a593Smuzhiyun+ DW_OP_lit18 = 0x42, 2659*4882a593Smuzhiyun+ DW_OP_lit19 = 0x43, 2660*4882a593Smuzhiyun+ DW_OP_lit20 = 0x44, 2661*4882a593Smuzhiyun+ DW_OP_lit21 = 0x45, 2662*4882a593Smuzhiyun+ DW_OP_lit22 = 0x46, 2663*4882a593Smuzhiyun+ DW_OP_lit23 = 0x47, 2664*4882a593Smuzhiyun+ DW_OP_lit24 = 0x48, 2665*4882a593Smuzhiyun+ DW_OP_lit25 = 0x49, 2666*4882a593Smuzhiyun+ DW_OP_lit26 = 0x4a, 2667*4882a593Smuzhiyun+ DW_OP_lit27 = 0x4b, 2668*4882a593Smuzhiyun+ DW_OP_lit28 = 0x4c, 2669*4882a593Smuzhiyun+ DW_OP_lit29 = 0x4d, 2670*4882a593Smuzhiyun+ DW_OP_lit30 = 0x4e, 2671*4882a593Smuzhiyun+ DW_OP_lit31 = 0x4f, 2672*4882a593Smuzhiyun+ DW_OP_reg0 = 0x50, 2673*4882a593Smuzhiyun+ DW_OP_reg1 = 0x51, 2674*4882a593Smuzhiyun+ DW_OP_reg2 = 0x52, 2675*4882a593Smuzhiyun+ DW_OP_reg3 = 0x53, 2676*4882a593Smuzhiyun+ DW_OP_reg4 = 0x54, 2677*4882a593Smuzhiyun+ DW_OP_reg5 = 0x55, 2678*4882a593Smuzhiyun+ DW_OP_reg6 = 0x56, 2679*4882a593Smuzhiyun+ DW_OP_reg7 = 0x57, 2680*4882a593Smuzhiyun+ DW_OP_reg8 = 0x58, 2681*4882a593Smuzhiyun+ DW_OP_reg9 = 0x59, 2682*4882a593Smuzhiyun+ DW_OP_reg10 = 0x5a, 2683*4882a593Smuzhiyun+ DW_OP_reg11 = 0x5b, 2684*4882a593Smuzhiyun+ DW_OP_reg12 = 0x5c, 2685*4882a593Smuzhiyun+ DW_OP_reg13 = 0x5d, 2686*4882a593Smuzhiyun+ DW_OP_reg14 = 0x5e, 2687*4882a593Smuzhiyun+ DW_OP_reg15 = 0x5f, 2688*4882a593Smuzhiyun+ DW_OP_reg16 = 0x60, 2689*4882a593Smuzhiyun+ DW_OP_reg17 = 0x61, 2690*4882a593Smuzhiyun+ DW_OP_reg18 = 0x62, 2691*4882a593Smuzhiyun+ DW_OP_reg19 = 0x63, 2692*4882a593Smuzhiyun+ DW_OP_reg20 = 0x64, 2693*4882a593Smuzhiyun+ DW_OP_reg21 = 0x65, 2694*4882a593Smuzhiyun+ DW_OP_reg22 = 0x66, 2695*4882a593Smuzhiyun+ DW_OP_reg23 = 0x67, 2696*4882a593Smuzhiyun+ DW_OP_reg24 = 0x68, 2697*4882a593Smuzhiyun+ DW_OP_reg25 = 0x69, 2698*4882a593Smuzhiyun+ DW_OP_reg26 = 0x6a, 2699*4882a593Smuzhiyun+ DW_OP_reg27 = 0x6b, 2700*4882a593Smuzhiyun+ DW_OP_reg28 = 0x6c, 2701*4882a593Smuzhiyun+ DW_OP_reg29 = 0x6d, 2702*4882a593Smuzhiyun+ DW_OP_reg30 = 0x6e, 2703*4882a593Smuzhiyun+ DW_OP_reg31 = 0x6f, 2704*4882a593Smuzhiyun+ DW_OP_breg0 = 0x70, 2705*4882a593Smuzhiyun+ DW_OP_breg1 = 0x71, 2706*4882a593Smuzhiyun+ DW_OP_breg2 = 0x72, 2707*4882a593Smuzhiyun+ DW_OP_breg3 = 0x73, 2708*4882a593Smuzhiyun+ DW_OP_breg4 = 0x74, 2709*4882a593Smuzhiyun+ DW_OP_breg5 = 0x75, 2710*4882a593Smuzhiyun+ DW_OP_breg6 = 0x76, 2711*4882a593Smuzhiyun+ DW_OP_breg7 = 0x77, 2712*4882a593Smuzhiyun+ DW_OP_breg8 = 0x78, 2713*4882a593Smuzhiyun+ DW_OP_breg9 = 0x79, 2714*4882a593Smuzhiyun+ DW_OP_breg10 = 0x7a, 2715*4882a593Smuzhiyun+ DW_OP_breg11 = 0x7b, 2716*4882a593Smuzhiyun+ DW_OP_breg12 = 0x7c, 2717*4882a593Smuzhiyun+ DW_OP_breg13 = 0x7d, 2718*4882a593Smuzhiyun+ DW_OP_breg14 = 0x7e, 2719*4882a593Smuzhiyun+ DW_OP_breg15 = 0x7f, 2720*4882a593Smuzhiyun+ DW_OP_breg16 = 0x80, 2721*4882a593Smuzhiyun+ DW_OP_breg17 = 0x81, 2722*4882a593Smuzhiyun+ DW_OP_breg18 = 0x82, 2723*4882a593Smuzhiyun+ DW_OP_breg19 = 0x83, 2724*4882a593Smuzhiyun+ DW_OP_breg20 = 0x84, 2725*4882a593Smuzhiyun+ DW_OP_breg21 = 0x85, 2726*4882a593Smuzhiyun+ DW_OP_breg22 = 0x86, 2727*4882a593Smuzhiyun+ DW_OP_breg23 = 0x87, 2728*4882a593Smuzhiyun+ DW_OP_breg24 = 0x88, 2729*4882a593Smuzhiyun+ DW_OP_breg25 = 0x89, 2730*4882a593Smuzhiyun+ DW_OP_breg26 = 0x8a, 2731*4882a593Smuzhiyun+ DW_OP_breg27 = 0x8b, 2732*4882a593Smuzhiyun+ DW_OP_breg28 = 0x8c, 2733*4882a593Smuzhiyun+ DW_OP_breg29 = 0x8d, 2734*4882a593Smuzhiyun+ DW_OP_breg30 = 0x8e, 2735*4882a593Smuzhiyun+ DW_OP_breg31 = 0x8f, 2736*4882a593Smuzhiyun+ DW_OP_regx = 0x90, 2737*4882a593Smuzhiyun+ DW_OP_fbreg = 0x91, 2738*4882a593Smuzhiyun+ DW_OP_bregx = 0x92, 2739*4882a593Smuzhiyun+ DW_OP_piece = 0x93, 2740*4882a593Smuzhiyun+ DW_OP_deref_size = 0x94, 2741*4882a593Smuzhiyun+ DW_OP_xderef_size = 0x95, 2742*4882a593Smuzhiyun+ DW_OP_nop = 0x96 2743*4882a593Smuzhiyun+ }; 2744*4882a593Smuzhiyun+ 2745*4882a593Smuzhiyun+#define DW_OP_lo_user 0x80 /* implementation-defined range start */ 2746*4882a593Smuzhiyun+#define DW_OP_hi_user 0xff /* implementation-defined range end */ 2747*4882a593Smuzhiyun+ 2748*4882a593Smuzhiyun+/* Type encodings. */ 2749*4882a593Smuzhiyun+ 2750*4882a593Smuzhiyun+enum dwarf_type 2751*4882a593Smuzhiyun+ { 2752*4882a593Smuzhiyun+ DW_ATE_void = 0x0, 2753*4882a593Smuzhiyun+ DW_ATE_address = 0x1, 2754*4882a593Smuzhiyun+ DW_ATE_boolean = 0x2, 2755*4882a593Smuzhiyun+ DW_ATE_complex_float = 0x3, 2756*4882a593Smuzhiyun+ DW_ATE_float = 0x4, 2757*4882a593Smuzhiyun+ DW_ATE_signed = 0x5, 2758*4882a593Smuzhiyun+ DW_ATE_signed_char = 0x6, 2759*4882a593Smuzhiyun+ DW_ATE_unsigned = 0x7, 2760*4882a593Smuzhiyun+ DW_ATE_unsigned_char = 0x8 2761*4882a593Smuzhiyun+ }; 2762*4882a593Smuzhiyun+ 2763*4882a593Smuzhiyun+#define DW_ATE_lo_user 0x80 2764*4882a593Smuzhiyun+#define DW_ATE_hi_user 0xff 2765*4882a593Smuzhiyun+ 2766*4882a593Smuzhiyun+/* Array ordering names and codes. */ 2767*4882a593Smuzhiyun+enum dwarf_array_dim_ordering 2768*4882a593Smuzhiyun+ { 2769*4882a593Smuzhiyun+ DW_ORD_row_major = 0, 2770*4882a593Smuzhiyun+ DW_ORD_col_major = 1 2771*4882a593Smuzhiyun+ }; 2772*4882a593Smuzhiyun+ 2773*4882a593Smuzhiyun+/* access attribute */ 2774*4882a593Smuzhiyun+enum dwarf_access_attribute 2775*4882a593Smuzhiyun+ { 2776*4882a593Smuzhiyun+ DW_ACCESS_public = 1, 2777*4882a593Smuzhiyun+ DW_ACCESS_protected = 2, 2778*4882a593Smuzhiyun+ DW_ACCESS_private = 3 2779*4882a593Smuzhiyun+ }; 2780*4882a593Smuzhiyun+ 2781*4882a593Smuzhiyun+/* visibility */ 2782*4882a593Smuzhiyun+enum dwarf_visibility_attribute 2783*4882a593Smuzhiyun+ { 2784*4882a593Smuzhiyun+ DW_VIS_local = 1, 2785*4882a593Smuzhiyun+ DW_VIS_exported = 2, 2786*4882a593Smuzhiyun+ DW_VIS_qualified = 3 2787*4882a593Smuzhiyun+ }; 2788*4882a593Smuzhiyun+ 2789*4882a593Smuzhiyun+/* virtuality */ 2790*4882a593Smuzhiyun+enum dwarf_virtuality_attribute 2791*4882a593Smuzhiyun+ { 2792*4882a593Smuzhiyun+ DW_VIRTUALITY_none = 0, 2793*4882a593Smuzhiyun+ DW_VIRTUALITY_virtual = 1, 2794*4882a593Smuzhiyun+ DW_VIRTUALITY_pure_virtual = 2 2795*4882a593Smuzhiyun+ }; 2796*4882a593Smuzhiyun+ 2797*4882a593Smuzhiyun+/* case sensitivity */ 2798*4882a593Smuzhiyun+enum dwarf_id_case 2799*4882a593Smuzhiyun+ { 2800*4882a593Smuzhiyun+ DW_ID_case_sensitive = 0, 2801*4882a593Smuzhiyun+ DW_ID_up_case = 1, 2802*4882a593Smuzhiyun+ DW_ID_down_case = 2, 2803*4882a593Smuzhiyun+ DW_ID_case_insensitive = 3 2804*4882a593Smuzhiyun+ }; 2805*4882a593Smuzhiyun+ 2806*4882a593Smuzhiyun+/* calling convention */ 2807*4882a593Smuzhiyun+enum dwarf_calling_convention 2808*4882a593Smuzhiyun+ { 2809*4882a593Smuzhiyun+ DW_CC_normal = 0x1, 2810*4882a593Smuzhiyun+ DW_CC_program = 0x2, 2811*4882a593Smuzhiyun+ DW_CC_nocall = 0x3 2812*4882a593Smuzhiyun+ }; 2813*4882a593Smuzhiyun+ 2814*4882a593Smuzhiyun+#define DW_CC_lo_user 0x40 2815*4882a593Smuzhiyun+#define DW_CC_hi_user 0xff 2816*4882a593Smuzhiyun+ 2817*4882a593Smuzhiyun+/* inline attribute */ 2818*4882a593Smuzhiyun+enum dwarf_inline_attribute 2819*4882a593Smuzhiyun+ { 2820*4882a593Smuzhiyun+ DW_INL_not_inlined = 0, 2821*4882a593Smuzhiyun+ DW_INL_inlined = 1, 2822*4882a593Smuzhiyun+ DW_INL_declared_not_inlined = 2, 2823*4882a593Smuzhiyun+ DW_INL_declared_inlined = 3 2824*4882a593Smuzhiyun+ }; 2825*4882a593Smuzhiyun+ 2826*4882a593Smuzhiyun+/* discriminant lists */ 2827*4882a593Smuzhiyun+enum dwarf_discrim_list 2828*4882a593Smuzhiyun+ { 2829*4882a593Smuzhiyun+ DW_DSC_label = 0, 2830*4882a593Smuzhiyun+ DW_DSC_range = 1 2831*4882a593Smuzhiyun+ }; 2832*4882a593Smuzhiyun+ 2833*4882a593Smuzhiyun+/* line number opcodes */ 2834*4882a593Smuzhiyun+enum dwarf_line_number_ops 2835*4882a593Smuzhiyun+ { 2836*4882a593Smuzhiyun+ DW_LNS_extended_op = 0, 2837*4882a593Smuzhiyun+ DW_LNS_copy = 1, 2838*4882a593Smuzhiyun+ DW_LNS_advance_pc = 2, 2839*4882a593Smuzhiyun+ DW_LNS_advance_line = 3, 2840*4882a593Smuzhiyun+ DW_LNS_set_file = 4, 2841*4882a593Smuzhiyun+ DW_LNS_set_column = 5, 2842*4882a593Smuzhiyun+ DW_LNS_negate_stmt = 6, 2843*4882a593Smuzhiyun+ DW_LNS_set_basic_block = 7, 2844*4882a593Smuzhiyun+ DW_LNS_const_add_pc = 8, 2845*4882a593Smuzhiyun+ DW_LNS_fixed_advance_pc = 9 2846*4882a593Smuzhiyun+ }; 2847*4882a593Smuzhiyun+ 2848*4882a593Smuzhiyun+/* line number extended opcodes */ 2849*4882a593Smuzhiyun+enum dwarf_line_number_x_ops 2850*4882a593Smuzhiyun+ { 2851*4882a593Smuzhiyun+ DW_LNE_end_sequence = 1, 2852*4882a593Smuzhiyun+ DW_LNE_set_address = 2, 2853*4882a593Smuzhiyun+ DW_LNE_define_file = 3 2854*4882a593Smuzhiyun+ }; 2855*4882a593Smuzhiyun+ 2856*4882a593Smuzhiyun+/* call frame information */ 2857*4882a593Smuzhiyun+enum dwarf_call_frame_info 2858*4882a593Smuzhiyun+ { 2859*4882a593Smuzhiyun+ DW_CFA_advance_loc = 0x40, 2860*4882a593Smuzhiyun+ DW_CFA_offset = 0x80, 2861*4882a593Smuzhiyun+ DW_CFA_restore = 0xc0, 2862*4882a593Smuzhiyun+ DW_CFA_nop = 0x00, 2863*4882a593Smuzhiyun+ DW_CFA_set_loc = 0x01, 2864*4882a593Smuzhiyun+ DW_CFA_advance_loc1 = 0x02, 2865*4882a593Smuzhiyun+ DW_CFA_advance_loc2 = 0x03, 2866*4882a593Smuzhiyun+ DW_CFA_advance_loc4 = 0x04, 2867*4882a593Smuzhiyun+ DW_CFA_offset_extended = 0x05, 2868*4882a593Smuzhiyun+ DW_CFA_restore_extended = 0x06, 2869*4882a593Smuzhiyun+ DW_CFA_undefined = 0x07, 2870*4882a593Smuzhiyun+ DW_CFA_same_value = 0x08, 2871*4882a593Smuzhiyun+ DW_CFA_register = 0x09, 2872*4882a593Smuzhiyun+ DW_CFA_remember_state = 0x0a, 2873*4882a593Smuzhiyun+ DW_CFA_restore_state = 0x0b, 2874*4882a593Smuzhiyun+ DW_CFA_def_cfa = 0x0c, 2875*4882a593Smuzhiyun+ DW_CFA_def_cfa_register = 0x0d, 2876*4882a593Smuzhiyun+ DW_CFA_def_cfa_offset = 0x0e, 2877*4882a593Smuzhiyun+ DW_CFA_def_cfa_expression = 0x0f, 2878*4882a593Smuzhiyun+ DW_CFA_expression = 0x10, 2879*4882a593Smuzhiyun+ /* Dwarf 2.1 */ 2880*4882a593Smuzhiyun+ DW_CFA_offset_extended_sf = 0x11, 2881*4882a593Smuzhiyun+ DW_CFA_def_cfa_sf = 0x12, 2882*4882a593Smuzhiyun+ DW_CFA_def_cfa_offset_sf = 0x13, 2883*4882a593Smuzhiyun+ 2884*4882a593Smuzhiyun+ /* SGI/MIPS specific */ 2885*4882a593Smuzhiyun+ DW_CFA_MIPS_advance_loc8 = 0x1d, 2886*4882a593Smuzhiyun+ 2887*4882a593Smuzhiyun+ /* GNU extensions */ 2888*4882a593Smuzhiyun+ DW_CFA_GNU_window_save = 0x2d, 2889*4882a593Smuzhiyun+ DW_CFA_GNU_args_size = 0x2e, 2890*4882a593Smuzhiyun+ DW_CFA_GNU_negative_offset_extended = 0x2f 2891*4882a593Smuzhiyun+ }; 2892*4882a593Smuzhiyun+ 2893*4882a593Smuzhiyun+#define DW_CIE_ID 0xffffffff 2894*4882a593Smuzhiyun+#define DW_CIE_VERSION 1 2895*4882a593Smuzhiyun+ 2896*4882a593Smuzhiyun+#define DW_CFA_extended 0 2897*4882a593Smuzhiyun+#define DW_CFA_low_user 0x1c 2898*4882a593Smuzhiyun+#define DW_CFA_high_user 0x3f 2899*4882a593Smuzhiyun+ 2900*4882a593Smuzhiyun+#define DW_CHILDREN_no 0x00 2901*4882a593Smuzhiyun+#define DW_CHILDREN_yes 0x01 2902*4882a593Smuzhiyun+ 2903*4882a593Smuzhiyun+#define DW_ADDR_none 0 2904*4882a593Smuzhiyun+ 2905*4882a593Smuzhiyun+/* Source language names and codes. */ 2906*4882a593Smuzhiyun+ 2907*4882a593Smuzhiyun+enum dwarf_source_language 2908*4882a593Smuzhiyun+ { 2909*4882a593Smuzhiyun+ DW_LANG_C89 = 0x0001, 2910*4882a593Smuzhiyun+ DW_LANG_C = 0x0002, 2911*4882a593Smuzhiyun+ DW_LANG_Ada83 = 0x0003, 2912*4882a593Smuzhiyun+ DW_LANG_C_plus_plus = 0x0004, 2913*4882a593Smuzhiyun+ DW_LANG_Cobol74 = 0x0005, 2914*4882a593Smuzhiyun+ DW_LANG_Cobol85 = 0x0006, 2915*4882a593Smuzhiyun+ DW_LANG_Fortran77 = 0x0007, 2916*4882a593Smuzhiyun+ DW_LANG_Fortran90 = 0x0008, 2917*4882a593Smuzhiyun+ DW_LANG_Pascal83 = 0x0009, 2918*4882a593Smuzhiyun+ DW_LANG_Modula2 = 0x000a, 2919*4882a593Smuzhiyun+ DW_LANG_Java = 0x000b, 2920*4882a593Smuzhiyun+ DW_LANG_Mips_Assembler = 0x8001 2921*4882a593Smuzhiyun+ }; 2922*4882a593Smuzhiyun+ 2923*4882a593Smuzhiyun+ 2924*4882a593Smuzhiyun+#define DW_LANG_lo_user 0x8000 /* implementation-defined range start */ 2925*4882a593Smuzhiyun+#define DW_LANG_hi_user 0xffff /* implementation-defined range start */ 2926*4882a593Smuzhiyun+ 2927*4882a593Smuzhiyun+/* Names and codes for macro information. */ 2928*4882a593Smuzhiyun+ 2929*4882a593Smuzhiyun+enum dwarf_macinfo_record_type 2930*4882a593Smuzhiyun+ { 2931*4882a593Smuzhiyun+ DW_MACINFO_define = 1, 2932*4882a593Smuzhiyun+ DW_MACINFO_undef = 2, 2933*4882a593Smuzhiyun+ DW_MACINFO_start_file = 3, 2934*4882a593Smuzhiyun+ DW_MACINFO_end_file = 4, 2935*4882a593Smuzhiyun+ DW_MACINFO_vendor_ext = 255 2936*4882a593Smuzhiyun+ }; 2937*4882a593Smuzhiyun+ 2938*4882a593Smuzhiyun+#endif /* !ASSEMBLER */ 2939*4882a593Smuzhiyun+ 2940*4882a593Smuzhiyun+/* @@@ For use with GNU frame unwind information. */ 2941*4882a593Smuzhiyun+ 2942*4882a593Smuzhiyun+#define DW_EH_PE_absptr 0x00 2943*4882a593Smuzhiyun+#define DW_EH_PE_omit 0xff 2944*4882a593Smuzhiyun+ 2945*4882a593Smuzhiyun+#define DW_EH_PE_uleb128 0x01 2946*4882a593Smuzhiyun+#define DW_EH_PE_udata2 0x02 2947*4882a593Smuzhiyun+#define DW_EH_PE_udata4 0x03 2948*4882a593Smuzhiyun+#define DW_EH_PE_udata8 0x04 2949*4882a593Smuzhiyun+#define DW_EH_PE_sleb128 0x09 2950*4882a593Smuzhiyun+#define DW_EH_PE_sdata2 0x0A 2951*4882a593Smuzhiyun+#define DW_EH_PE_sdata4 0x0B 2952*4882a593Smuzhiyun+#define DW_EH_PE_sdata8 0x0C 2953*4882a593Smuzhiyun+#define DW_EH_PE_signed 0x08 2954*4882a593Smuzhiyun+ 2955*4882a593Smuzhiyun+#define DW_EH_PE_pcrel 0x10 2956*4882a593Smuzhiyun+#define DW_EH_PE_textrel 0x20 2957*4882a593Smuzhiyun+#define DW_EH_PE_datarel 0x30 2958*4882a593Smuzhiyun+#define DW_EH_PE_funcrel 0x40 2959*4882a593Smuzhiyun+#define DW_EH_PE_aligned 0x50 2960*4882a593Smuzhiyun+ 2961*4882a593Smuzhiyun+#define DW_EH_PE_indirect 0x80 2962*4882a593Smuzhiyun+ 2963*4882a593Smuzhiyun+#endif /* dwarf2.h */ 2964*4882a593Smuzhiyundiff --git a/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h b/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h 2965*4882a593Smuzhiyunnew file mode 100644 2966*4882a593Smuzhiyunindex 0000000..39cac91 2967*4882a593Smuzhiyun--- /dev/null 2968*4882a593Smuzhiyun+++ b/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h 2969*4882a593Smuzhiyun@@ -0,0 +1,97 @@ 2970*4882a593Smuzhiyun+/* Generic asm macros used on many machines. 2971*4882a593Smuzhiyun+ Copyright (C) 1991-2021 Free Software Foundation, Inc. 2972*4882a593Smuzhiyun+ This file is part of the GNU C Library. 2973*4882a593Smuzhiyun+ 2974*4882a593Smuzhiyun+ The GNU C Library is free software; you can redistribute it and/or 2975*4882a593Smuzhiyun+ modify it under the terms of the GNU Lesser General Public 2976*4882a593Smuzhiyun+ License as published by the Free Software Foundation; either 2977*4882a593Smuzhiyun+ version 2.1 of the License, or (at your option) any later version. 2978*4882a593Smuzhiyun+ 2979*4882a593Smuzhiyun+ The GNU C Library is distributed in the hope that it will be useful, 2980*4882a593Smuzhiyun+ but WITHOUT ANY WARRANTY; without even the implied warranty of 2981*4882a593Smuzhiyun+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2982*4882a593Smuzhiyun+ Lesser General Public License for more details. 2983*4882a593Smuzhiyun+ 2984*4882a593Smuzhiyun+ You should have received a copy of the GNU Lesser General Public 2985*4882a593Smuzhiyun+ License along with the GNU C Library; if not, see 2986*4882a593Smuzhiyun+ <https://www.gnu.org/licenses/>. */ 2987*4882a593Smuzhiyun+ 2988*4882a593Smuzhiyun+#ifndef C_LABEL 2989*4882a593Smuzhiyun+ 2990*4882a593Smuzhiyun+/* Define a macro we can use to construct the asm name for a C symbol. */ 2991*4882a593Smuzhiyun+# define C_LABEL(name) name##: 2992*4882a593Smuzhiyun+ 2993*4882a593Smuzhiyun+#endif 2994*4882a593Smuzhiyun+ 2995*4882a593Smuzhiyun+#ifdef __ASSEMBLER__ 2996*4882a593Smuzhiyun+/* Mark the end of function named SYM. This is used on some platforms 2997*4882a593Smuzhiyun+ to generate correct debugging information. */ 2998*4882a593Smuzhiyun+# ifndef END 2999*4882a593Smuzhiyun+# define END(sym) 3000*4882a593Smuzhiyun+# endif 3001*4882a593Smuzhiyun+ 3002*4882a593Smuzhiyun+# ifndef JUMPTARGET 3003*4882a593Smuzhiyun+# define JUMPTARGET(sym) sym 3004*4882a593Smuzhiyun+# endif 3005*4882a593Smuzhiyun+#endif 3006*4882a593Smuzhiyun+ 3007*4882a593Smuzhiyun+/* Makros to generate eh_frame unwind information. */ 3008*4882a593Smuzhiyun+#ifdef __ASSEMBLER__ 3009*4882a593Smuzhiyun+# define cfi_startproc .cfi_startproc 3010*4882a593Smuzhiyun+# define cfi_endproc .cfi_endproc 3011*4882a593Smuzhiyun+# define cfi_def_cfa(reg, off) .cfi_def_cfa reg, off 3012*4882a593Smuzhiyun+# define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg 3013*4882a593Smuzhiyun+# define cfi_def_cfa_offset(off) .cfi_def_cfa_offset off 3014*4882a593Smuzhiyun+# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 3015*4882a593Smuzhiyun+# define cfi_offset(reg, off) .cfi_offset reg, off 3016*4882a593Smuzhiyun+# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 3017*4882a593Smuzhiyun+# define cfi_register(r1, r2) .cfi_register r1, r2 3018*4882a593Smuzhiyun+# define cfi_return_column(reg) .cfi_return_column reg 3019*4882a593Smuzhiyun+# define cfi_restore(reg) .cfi_restore reg 3020*4882a593Smuzhiyun+# define cfi_same_value(reg) .cfi_same_value reg 3021*4882a593Smuzhiyun+# define cfi_undefined(reg) .cfi_undefined reg 3022*4882a593Smuzhiyun+# define cfi_remember_state .cfi_remember_state 3023*4882a593Smuzhiyun+# define cfi_restore_state .cfi_restore_state 3024*4882a593Smuzhiyun+# define cfi_window_save .cfi_window_save 3025*4882a593Smuzhiyun+# define cfi_personality(enc, exp) .cfi_personality enc, exp 3026*4882a593Smuzhiyun+# define cfi_lsda(enc, exp) .cfi_lsda enc, exp 3027*4882a593Smuzhiyun+ 3028*4882a593Smuzhiyun+#else /* ! ASSEMBLER */ 3029*4882a593Smuzhiyun+ 3030*4882a593Smuzhiyun+# define CFI_STRINGIFY(Name) CFI_STRINGIFY2 (Name) 3031*4882a593Smuzhiyun+# define CFI_STRINGIFY2(Name) #Name 3032*4882a593Smuzhiyun+# define CFI_STARTPROC ".cfi_startproc" 3033*4882a593Smuzhiyun+# define CFI_ENDPROC ".cfi_endproc" 3034*4882a593Smuzhiyun+# define CFI_DEF_CFA(reg, off) \ 3035*4882a593Smuzhiyun+ ".cfi_def_cfa " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) 3036*4882a593Smuzhiyun+# define CFI_DEF_CFA_REGISTER(reg) \ 3037*4882a593Smuzhiyun+ ".cfi_def_cfa_register " CFI_STRINGIFY(reg) 3038*4882a593Smuzhiyun+# define CFI_DEF_CFA_OFFSET(off) \ 3039*4882a593Smuzhiyun+ ".cfi_def_cfa_offset " CFI_STRINGIFY(off) 3040*4882a593Smuzhiyun+# define CFI_ADJUST_CFA_OFFSET(off) \ 3041*4882a593Smuzhiyun+ ".cfi_adjust_cfa_offset " CFI_STRINGIFY(off) 3042*4882a593Smuzhiyun+# define CFI_OFFSET(reg, off) \ 3043*4882a593Smuzhiyun+ ".cfi_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) 3044*4882a593Smuzhiyun+# define CFI_REL_OFFSET(reg, off) \ 3045*4882a593Smuzhiyun+ ".cfi_rel_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) 3046*4882a593Smuzhiyun+# define CFI_REGISTER(r1, r2) \ 3047*4882a593Smuzhiyun+ ".cfi_register " CFI_STRINGIFY(r1) "," CFI_STRINGIFY(r2) 3048*4882a593Smuzhiyun+# define CFI_RETURN_COLUMN(reg) \ 3049*4882a593Smuzhiyun+ ".cfi_return_column " CFI_STRINGIFY(reg) 3050*4882a593Smuzhiyun+# define CFI_RESTORE(reg) \ 3051*4882a593Smuzhiyun+ ".cfi_restore " CFI_STRINGIFY(reg) 3052*4882a593Smuzhiyun+# define CFI_UNDEFINED(reg) \ 3053*4882a593Smuzhiyun+ ".cfi_undefined " CFI_STRINGIFY(reg) 3054*4882a593Smuzhiyun+# define CFI_REMEMBER_STATE \ 3055*4882a593Smuzhiyun+ ".cfi_remember_state" 3056*4882a593Smuzhiyun+# define CFI_RESTORE_STATE \ 3057*4882a593Smuzhiyun+ ".cfi_restore_state" 3058*4882a593Smuzhiyun+# define CFI_WINDOW_SAVE \ 3059*4882a593Smuzhiyun+ ".cfi_window_save" 3060*4882a593Smuzhiyun+# define CFI_PERSONALITY(enc, exp) \ 3061*4882a593Smuzhiyun+ ".cfi_personality " CFI_STRINGIFY(enc) "," CFI_STRINGIFY(exp) 3062*4882a593Smuzhiyun+# define CFI_LSDA(enc, exp) \ 3063*4882a593Smuzhiyun+ ".cfi_lsda " CFI_STRINGIFY(enc) "," CFI_STRINGIFY(exp) 3064*4882a593Smuzhiyun+#endif 3065*4882a593Smuzhiyun+ 3066*4882a593Smuzhiyun+#include "dwarf2.h" 3067*4882a593Smuzhiyun-- 3068*4882a593Smuzhiyun2.20.1 3069*4882a593Smuzhiyun 3070