1From 0bea89508c6be2b1a76650108d759d389e99bbf3 Mon Sep 17 00:00:00 2001 2From: Jeffy Chen <jeffy.chen@rock-chips.com> 3Date: Wed, 13 Apr 2022 16:32:02 +0800 4Subject: [PATCH] libc: string: arm: Support using glibc-neon version of APIs 5 6Set USE_GLIBC_NEON to enable. 7 8Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com> 9--- 10 Makerules | 1 + 11 libc/string/Makefile.in | 10 +- 12 libc/string/arm/Makefile.in | 13 + 13 libc/string/arm/glibc-neon/README | 4 + 14 libc/string/arm/glibc-neon/arm-features.h | 59 ++ 15 libc/string/arm/glibc-neon/bcopy.c | 24 + 16 libc/string/arm/glibc-neon/bzero.c | 28 + 17 libc/string/arm/glibc-neon/glibc_wrap.h | 29 + 18 libc/string/arm/glibc-neon/memcmp.S | 1 + 19 libc/string/arm/glibc-neon/memcpy.S | 728 ++++++++++++++++++ 20 libc/string/arm/glibc-neon/memmove.S | 332 ++++++++ 21 libc/string/arm/glibc-neon/memset.S | 68 ++ 22 libc/string/arm/glibc-neon/strcmp.S | 504 ++++++++++++ 23 libc/string/arm/glibc-neon/strlen.S | 76 ++ 24 libc/string/arm/glibc-neon/sysdep.h | 339 ++++++++ 25 .../arm/glibc-neon/sysdeps/generic/dwarf2.h | 590 ++++++++++++++ 26 .../arm/glibc-neon/sysdeps/generic/sysdep.h | 97 +++ 27 17 files changed, 2901 insertions(+), 2 deletions(-) 28 create mode 100644 libc/string/arm/Makefile.in 29 create mode 100644 libc/string/arm/glibc-neon/README 30 create mode 100644 libc/string/arm/glibc-neon/arm-features.h 31 create mode 100644 libc/string/arm/glibc-neon/bcopy.c 32 create mode 100644 libc/string/arm/glibc-neon/bzero.c 33 create mode 100644 libc/string/arm/glibc-neon/glibc_wrap.h 34 create mode 120000 libc/string/arm/glibc-neon/memcmp.S 35 create mode 100644 libc/string/arm/glibc-neon/memcpy.S 36 create mode 100644 libc/string/arm/glibc-neon/memmove.S 37 create mode 100644 libc/string/arm/glibc-neon/memset.S 38 create mode 100644 libc/string/arm/glibc-neon/strcmp.S 39 create mode 100644 libc/string/arm/glibc-neon/strlen.S 40 create mode 100644 libc/string/arm/glibc-neon/sysdep.h 41 create mode 100644 libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h 42 create mode 100644 libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h 43 44diff --git a/Makerules b/Makerules 45index fd40e6c..2013ba9 100644 46--- a/Makerules 47+++ b/Makerules 48@@ -255,6 +255,7 @@ CFLAGS_gen.dep = -MT $@ -MD -MP -MF $(dir $@).$(notdir $@).dep 49 50 cmd_compile.c = $(CC) -c $< -o $@ \ 51 $(filter-out $(CFLAGS-OMIT-$(notdir $<)), \ 52+ $(CFLAGS-extra-$(subst $(top_srcdir),,$(<D))) \ 53 $(CFLAGS) \ 54 $(CFLAGS-for-library-members) \ 55 $(CFLAGS-$(suffix $@)) \ 56diff --git a/libc/string/Makefile.in b/libc/string/Makefile.in 57index e7f2ccd..975b395 100644 58--- a/libc/string/Makefile.in 59+++ b/libc/string/Makefile.in 60@@ -5,6 +5,12 @@ 61 # Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. 62 # 63 64+ifeq ($(TARGET_ARCH),arm) 65+ifneq ($(USE_GLIBC_NEON),) 66+include $(top_srcdir)libc/string/$(TARGET_ARCH)/Makefile.in 67+endif 68+endif 69+ 70 subdirs += libc/string/$(TARGET_ARCH) libc/string/generic 71 72 # 73@@ -25,8 +31,8 @@ STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ) $(STRING_SUBARCH_COBJ) 74 endif 75 76 # Collect the arch specific implementation (asm, c files) 77-STRING_ARCH_DIR := $(top_srcdir)libc/string/$(TARGET_ARCH) 78-STRING_ARCH_OUT := $(top_builddir)libc/string/$(TARGET_ARCH) 79+STRING_ARCH_DIR ?= $(top_srcdir)libc/string/$(TARGET_ARCH) 80+STRING_ARCH_OUT ?= $(top_builddir)libc/string/$(TARGET_ARCH) 81 82 STRING_ARCH_SRC := $(wildcard $(STRING_ARCH_DIR)/*.c) 83 STRING_ARCH_OBJ := $(patsubst $(STRING_ARCH_DIR)/%.c,$(STRING_ARCH_OUT)/%.o,$(STRING_ARCH_SRC)) 84diff --git a/libc/string/arm/Makefile.in b/libc/string/arm/Makefile.in 85new file mode 100644 86index 0000000..4cd7e24 87--- /dev/null 88+++ b/libc/string/arm/Makefile.in 89@@ -0,0 +1,13 @@ 90+# Makefile for uClibc 91+# 92+# Copyright (C) 2022 Jeffy Chen <jeffy.chen@rock-chips.com> 93+# 94+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. 95+# 96+ 97+SUBDIR := libc/string/arm/glibc-neon 98+ 99+STRING_ARCH_DIR := $(top_srcdir)$(SUBDIR) 100+STRING_ARCH_OUT := $(top_builddir)$(SUBDIR) 101+ 102+CFLAGS-extra-$(SUBDIR) := -include glibc_wrap.h -I$(top_srcdir)$(SUBDIR) 103diff --git a/libc/string/arm/glibc-neon/README b/libc/string/arm/glibc-neon/README 104new file mode 100644 105index 0000000..ad72f05 106--- /dev/null 107+++ b/libc/string/arm/glibc-neon/README 108@@ -0,0 +1,4 @@ 109+NOTE: 110+1/ Ported from glibc-2.34.9. 111+2/ glibc doesn't have an arm arch version of memcmp. 112+3/ uClibc is using strcmp as strcoll when locale disabled. 113diff --git a/libc/string/arm/glibc-neon/arm-features.h b/libc/string/arm/glibc-neon/arm-features.h 114new file mode 100644 115index 0000000..4a86e00 116--- /dev/null 117+++ b/libc/string/arm/glibc-neon/arm-features.h 118@@ -0,0 +1,59 @@ 119+/* Macros to test for CPU features on ARM. Generic ARM version. 120+ Copyright (C) 2012-2021 Free Software Foundation, Inc. 121+ This file is part of the GNU C Library. 122+ 123+ The GNU C Library is free software; you can redistribute it and/or 124+ modify it under the terms of the GNU Lesser General Public 125+ License as published by the Free Software Foundation; either 126+ version 2.1 of the License, or (at your option) any later version. 127+ 128+ The GNU C Library is distributed in the hope that it will be useful, 129+ but WITHOUT ANY WARRANTY; without even the implied warranty of 130+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 131+ Lesser General Public License for more details. 132+ 133+ You should have received a copy of the GNU Lesser General Public 134+ License along with the GNU C Library. If not, see 135+ <https://www.gnu.org/licenses/>. */ 136+ 137+#ifndef _ARM_ARM_FEATURES_H 138+#define _ARM_ARM_FEATURES_H 1 139+ 140+/* An OS-specific arm-features.h file should define ARM_HAVE_VFP to 141+ an appropriate expression for testing at runtime whether the VFP 142+ hardware is present. We'll then redefine it to a constant if we 143+ know at compile time that we can assume VFP. */ 144+ 145+#ifndef __SOFTFP__ 146+/* The compiler is generating VFP instructions, so we're already 147+ assuming the hardware exists. */ 148+# undef ARM_HAVE_VFP 149+# define ARM_HAVE_VFP 1 150+#endif 151+ 152+/* An OS-specific arm-features.h file may define ARM_ASSUME_NO_IWMMXT 153+ to indicate at compile time that iWMMXt hardware is never present 154+ at runtime (or that we never care about its state) and so need not 155+ be checked for. */ 156+ 157+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate 158+ that instructions using pc as a destination register must never be used, 159+ so a "bx" (or "blx") instruction is always required. */ 160+ 161+/* The log2 of the minimum alignment required for an address that 162+ is the target of a computed branch (i.e. a "bx" instruction). 163+ A more-specific arm-features.h file may define this to set a more 164+ stringent requirement. 165+ 166+ Using this only makes sense for code in ARM mode (where instructions 167+ always have a fixed size of four bytes), or for Thumb-mode code that is 168+ specifically aligning all the related branch targets to match (since 169+ Thumb instructions might be either two or four bytes). */ 170+#ifndef ARM_BX_ALIGN_LOG2 171+# define ARM_BX_ALIGN_LOG2 2 172+#endif 173+ 174+/* An OS-specific arm-features.h file may define ARM_NO_INDEX_REGISTER to 175+ indicate that the two-register addressing modes must never be used. */ 176+ 177+#endif /* arm-features.h */ 178diff --git a/libc/string/arm/glibc-neon/bcopy.c b/libc/string/arm/glibc-neon/bcopy.c 179new file mode 100644 180index 0000000..302bbbd 181--- /dev/null 182+++ b/libc/string/arm/glibc-neon/bcopy.c 183@@ -0,0 +1,24 @@ 184+/* Copyright (C) 1991-2021 Free Software Foundation, Inc. 185+ This file is part of the GNU C Library. 186+ 187+ The GNU C Library is free software; you can redistribute it and/or 188+ modify it under the terms of the GNU Lesser General Public 189+ License as published by the Free Software Foundation; either 190+ version 2.1 of the License, or (at your option) any later version. 191+ 192+ The GNU C Library is distributed in the hope that it will be useful, 193+ but WITHOUT ANY WARRANTY; without even the implied warranty of 194+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 195+ Lesser General Public License for more details. 196+ 197+ You should have received a copy of the GNU Lesser General Public 198+ License along with the GNU C Library; if not, see 199+ <https://www.gnu.org/licenses/>. */ 200+ 201+#include <string.h> 202+ 203+void 204+bcopy (const void *src, void *dest, size_t len) 205+{ 206+ memmove (dest, src, len); 207+} 208diff --git a/libc/string/arm/glibc-neon/bzero.c b/libc/string/arm/glibc-neon/bzero.c 209new file mode 100644 210index 0000000..4391dad 211--- /dev/null 212+++ b/libc/string/arm/glibc-neon/bzero.c 213@@ -0,0 +1,28 @@ 214+/* Copyright (C) 1991-2021 Free Software Foundation, Inc. 215+ This file is part of the GNU C Library. 216+ 217+ The GNU C Library is free software; you can redistribute it and/or 218+ modify it under the terms of the GNU Lesser General Public 219+ License as published by the Free Software Foundation; either 220+ version 2.1 of the License, or (at your option) any later version. 221+ 222+ The GNU C Library is distributed in the hope that it will be useful, 223+ but WITHOUT ANY WARRANTY; without even the implied warranty of 224+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 225+ Lesser General Public License for more details. 226+ 227+ You should have received a copy of the GNU Lesser General Public 228+ License along with the GNU C Library; if not, see 229+ <https://www.gnu.org/licenses/>. */ 230+ 231+#include <string.h> 232+ 233+#undef __bzero 234+ 235+/* Set N bytes of S to 0. */ 236+void 237+__bzero (void *s, size_t len) 238+{ 239+ memset (s, '\0', len); 240+} 241+weak_alias (__bzero, bzero) 242diff --git a/libc/string/arm/glibc-neon/glibc_wrap.h b/libc/string/arm/glibc-neon/glibc_wrap.h 243new file mode 100644 244index 0000000..f00d50d 245--- /dev/null 246+++ b/libc/string/arm/glibc-neon/glibc_wrap.h 247@@ -0,0 +1,29 @@ 248+/* Macros to adapt glibc version of string APIs. 249+ Copyright (C) 2022 Jeffy Chen <jeffy.chen@rock-chips.com> 250+ This file is part of the GNU C Library. 251+ 252+ The GNU C Library is free software; you can redistribute it and/or 253+ modify it under the terms of the GNU Lesser General Public 254+ License as published by the Free Software Foundation; either 255+ version 2.1 of the License, or (at your option) any later version. 256+ 257+ The GNU C Library is distributed in the hope that it will be useful, 258+ but WITHOUT ANY WARRANTY; without even the implied warranty of 259+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 260+ Lesser General Public License for more details. 261+ 262+ You should have received a copy of the GNU Lesser General Public 263+ License along with the GNU C Library. If not, see 264+ <https://www.gnu.org/licenses/>. */ 265+ 266+#ifndef GLIBC_WRAP_H 267+#define GLIBC_WRAP_H 268+ 269+#define IS_IN(lib) 1 270+ 271+#define libc_hidden_builtin_def(fn) libc_hidden_def(fn) 272+ 273+#define C_SYMBOL_NAME(name) name 274+#define MEMCPY_NEON 275+ 276+#endif // GLIBC_WRAP_H 277diff --git a/libc/string/arm/glibc-neon/memcmp.S b/libc/string/arm/glibc-neon/memcmp.S 278new file mode 120000 279index 0000000..7c28a09 280--- /dev/null 281+++ b/libc/string/arm/glibc-neon/memcmp.S 282@@ -0,0 +1 @@ 283+../memcmp.S 284\ No newline at end of file 285diff --git a/libc/string/arm/glibc-neon/memcpy.S b/libc/string/arm/glibc-neon/memcpy.S 286new file mode 100644 287index 0000000..ee562e8 288--- /dev/null 289+++ b/libc/string/arm/glibc-neon/memcpy.S 290@@ -0,0 +1,728 @@ 291+/* NEON/VFP/ARM version of memcpy optimized for Cortex-A15. 292+ Copyright (C) 2013-2021 Free Software Foundation, Inc. 293+ This file is part of the GNU C Library. 294+ 295+ The GNU C Library is free software; you can redistribute it and/or 296+ modify it under the terms of the GNU Lesser General Public 297+ License as published by the Free Software Foundation; either 298+ version 2.1 of the License, or (at your option) any later version. 299+ 300+ The GNU C Library is distributed in the hope that it will be useful, 301+ but WITHOUT ANY WARRANTY; without even the implied warranty of 302+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 303+ Lesser General Public License for more details. 304+ 305+ You should have received a copy of the GNU Lesser General Public 306+ License along with the GNU C Library; if not, see 307+ <https://www.gnu.org/licenses/>. 308+ 309+ This memcpy routine is optimised for Cortex-A15 cores and takes advantage 310+ of VFP or NEON when built with the appropriate flags. 311+ 312+ Assumptions: 313+ 314+ ARMv6 (ARMv7-a if using Neon) 315+ ARM state 316+ Unaligned accesses 317+ 318+ */ 319+ 320+/* Thumb cannot encode negative immediate offsets in memory operations. */ 321+#ifndef NO_THUMB 322+#define NO_THUMB 323+#endif 324+#include <sysdep.h> 325+#include <arm-features.h> 326+ 327+ .syntax unified 328+ /* This implementation requires ARM state. */ 329+ .arm 330+ 331+#ifdef MEMCPY_NEON 332+ 333+ .fpu neon 334+ .arch armv7-a 335+# define FRAME_SIZE 4 336+# define USE_VFP 337+# define USE_NEON 338+ 339+#elif defined (MEMCPY_VFP) 340+ 341+ .arch armv6 342+ .fpu vfpv2 343+# define FRAME_SIZE 32 344+# define USE_VFP 345+ 346+#else 347+ .arch armv6 348+# define FRAME_SIZE 32 349+ 350+#endif 351+ 352+#define ALIGN(addr, align) addr:align 353+ 354+#define INSN_SIZE 4 355+ 356+/* Call parameters. */ 357+#define dstin r0 358+#define src r1 359+#define count r2 360+ 361+/* Locals. */ 362+#define tmp1 r3 363+#define dst ip 364+#define tmp2 r8 365+ 366+/* These two macros both work by repeated invocation of the macro 367+ dispatch_step (not defined here). That macro performs one "step", 368+ doing one load instruction and one store instruction to copy one 369+ "unit". On entry, TMP1 contains the number of bytes to be copied, 370+ a multiple of the unit size. The macro clobbers TMP1 in the 371+ process of doing a computed jump to the tail containing the 372+ appropriate number of steps. 373+ 374+ In dispatch_7_dword, dispatch_step is invoked seven times, with an 375+ argument that is 7 for the first and 1 for the last. Units are 376+ double-words (8 bytes). TMP1 is at most 56. 377+ 378+ In dispatch_15_word, dispatch_step is invoked fifteen times, 379+ with an argument that is 15 for the first and 1 for the last. 380+ Units are words (4 bytes). TMP1 is at most 60. */ 381+ 382+#ifndef ARM_ALWAYS_BX 383+# if ARM_BX_ALIGN_LOG2 != 2 384+# error case not handled 385+# endif 386+ .macro dispatch_7_dword 387+ rsb tmp1, tmp1, #((7 * 8) - PC_OFS + INSN_SIZE) 388+ add pc, pc, tmp1 389+ dispatch_step 7 390+ dispatch_step 6 391+ dispatch_step 5 392+ dispatch_step 4 393+ dispatch_step 3 394+ dispatch_step 2 395+ dispatch_step 1 396+ .purgem dispatch_step 397+ .endm 398+ 399+ .macro dispatch_15_word 400+ rsb tmp1, tmp1, #((15 * 4) - PC_OFS/2 + INSN_SIZE/2) 401+ add pc, pc, tmp1, lsl #1 402+ dispatch_step 15 403+ dispatch_step 14 404+ dispatch_step 13 405+ dispatch_step 12 406+ dispatch_step 11 407+ dispatch_step 10 408+ dispatch_step 9 409+ dispatch_step 8 410+ dispatch_step 7 411+ dispatch_step 6 412+ dispatch_step 5 413+ dispatch_step 4 414+ dispatch_step 3 415+ dispatch_step 2 416+ dispatch_step 1 417+ .purgem dispatch_step 418+ .endm 419+#else 420+# if ARM_BX_ALIGN_LOG2 < 3 421+# error case not handled 422+# endif 423+ .macro dispatch_helper steps, log2_bytes_per_step 424+ /* TMP1 gets (max_bytes - bytes_to_copy), where max_bytes is 425+ (STEPS << LOG2_BYTES_PER_STEP). 426+ So this is (steps_to_skip << LOG2_BYTES_PER_STEP). 427+ Then it needs further adjustment to compensate for the 428+ distance between the PC value taken below (0f + PC_OFS) 429+ and the first step's instructions (1f). */ 430+ rsb tmp1, tmp1, #((\steps << \log2_bytes_per_step) \ 431+ + ((1f - PC_OFS - 0f) \ 432+ >> (ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step))) 433+ /* Shifting down LOG2_BYTES_PER_STEP gives us the number of 434+ steps to skip, then shifting up ARM_BX_ALIGN_LOG2 gives us 435+ the (byte) distance to add to the PC. */ 436+0: add tmp1, pc, tmp1, lsl #(ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step) 437+ bx tmp1 438+ .p2align ARM_BX_ALIGN_LOG2 439+1: 440+ .endm 441+ 442+ .macro dispatch_7_dword 443+ dispatch_helper 7, 3 444+ .p2align ARM_BX_ALIGN_LOG2 445+ dispatch_step 7 446+ .p2align ARM_BX_ALIGN_LOG2 447+ dispatch_step 6 448+ .p2align ARM_BX_ALIGN_LOG2 449+ dispatch_step 5 450+ .p2align ARM_BX_ALIGN_LOG2 451+ dispatch_step 4 452+ .p2align ARM_BX_ALIGN_LOG2 453+ dispatch_step 3 454+ .p2align ARM_BX_ALIGN_LOG2 455+ dispatch_step 2 456+ .p2align ARM_BX_ALIGN_LOG2 457+ dispatch_step 1 458+ .p2align ARM_BX_ALIGN_LOG2 459+ .purgem dispatch_step 460+ .endm 461+ 462+ .macro dispatch_15_word 463+ dispatch_helper 15, 2 464+ dispatch_step 15 465+ .p2align ARM_BX_ALIGN_LOG2 466+ dispatch_step 14 467+ .p2align ARM_BX_ALIGN_LOG2 468+ dispatch_step 13 469+ .p2align ARM_BX_ALIGN_LOG2 470+ dispatch_step 12 471+ .p2align ARM_BX_ALIGN_LOG2 472+ dispatch_step 11 473+ .p2align ARM_BX_ALIGN_LOG2 474+ dispatch_step 10 475+ .p2align ARM_BX_ALIGN_LOG2 476+ dispatch_step 9 477+ .p2align ARM_BX_ALIGN_LOG2 478+ dispatch_step 8 479+ .p2align ARM_BX_ALIGN_LOG2 480+ dispatch_step 7 481+ .p2align ARM_BX_ALIGN_LOG2 482+ dispatch_step 6 483+ .p2align ARM_BX_ALIGN_LOG2 484+ dispatch_step 5 485+ .p2align ARM_BX_ALIGN_LOG2 486+ dispatch_step 4 487+ .p2align ARM_BX_ALIGN_LOG2 488+ dispatch_step 3 489+ .p2align ARM_BX_ALIGN_LOG2 490+ dispatch_step 2 491+ .p2align ARM_BX_ALIGN_LOG2 492+ dispatch_step 1 493+ .p2align ARM_BX_ALIGN_LOG2 494+ .purgem dispatch_step 495+ .endm 496+ 497+#endif 498+ 499+#ifndef USE_NEON 500+/* For bulk copies using GP registers. */ 501+#define A_l r2 /* Call-clobbered. */ 502+#define A_h r3 /* Call-clobbered. */ 503+#define B_l r4 504+#define B_h r5 505+#define C_l r6 506+#define C_h r7 507+/* Don't use the pair r8,r9 because in some EABI variants r9 is reserved. */ 508+#define D_l r10 509+#define D_h r11 510+#endif 511+ 512+/* Number of lines ahead to pre-fetch data. If you change this the code 513+ below will need adjustment to compensate. */ 514+ 515+#define prefetch_lines 5 516+ 517+#ifdef USE_VFP 518+ .macro cpy_line_vfp vreg, base 519+ vstr \vreg, [dst, #\base] 520+ vldr \vreg, [src, #\base] 521+ vstr d0, [dst, #\base + 8] 522+ vldr d0, [src, #\base + 8] 523+ vstr d1, [dst, #\base + 16] 524+ vldr d1, [src, #\base + 16] 525+ vstr d2, [dst, #\base + 24] 526+ vldr d2, [src, #\base + 24] 527+ vstr \vreg, [dst, #\base + 32] 528+ vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] 529+ vstr d0, [dst, #\base + 40] 530+ vldr d0, [src, #\base + 40] 531+ vstr d1, [dst, #\base + 48] 532+ vldr d1, [src, #\base + 48] 533+ vstr d2, [dst, #\base + 56] 534+ vldr d2, [src, #\base + 56] 535+ .endm 536+ 537+ .macro cpy_tail_vfp vreg, base 538+ vstr \vreg, [dst, #\base] 539+ vldr \vreg, [src, #\base] 540+ vstr d0, [dst, #\base + 8] 541+ vldr d0, [src, #\base + 8] 542+ vstr d1, [dst, #\base + 16] 543+ vldr d1, [src, #\base + 16] 544+ vstr d2, [dst, #\base + 24] 545+ vldr d2, [src, #\base + 24] 546+ vstr \vreg, [dst, #\base + 32] 547+ vstr d0, [dst, #\base + 40] 548+ vldr d0, [src, #\base + 40] 549+ vstr d1, [dst, #\base + 48] 550+ vldr d1, [src, #\base + 48] 551+ vstr d2, [dst, #\base + 56] 552+ vldr d2, [src, #\base + 56] 553+ .endm 554+#endif 555+ 556+ .p2align 6 557+ENTRY(memcpy) 558+ 559+ mov dst, dstin /* Preserve dstin, we need to return it. */ 560+ cmp count, #64 561+ bhs .Lcpy_not_short 562+ /* Deal with small copies quickly by dropping straight into the 563+ exit block. */ 564+ 565+.Ltail63unaligned: 566+#ifdef USE_NEON 567+ /* These need an extra layer of macro just to work around a 568+ bug in the assembler's parser when an operand starts with 569+ a {...}. https://sourceware.org/bugzilla/show_bug.cgi?id=15647 570+ tracks that bug; it was not fixed as of binutils-2.23.2. */ 571+ .macro neon_load_d0 reg 572+ vld1.8 {d0}, [\reg]! 573+ .endm 574+ .macro neon_store_d0 reg 575+ vst1.8 {d0}, [\reg]! 576+ .endm 577+ 578+ and tmp1, count, #0x38 579+ .macro dispatch_step i 580+ neon_load_d0 src 581+ neon_store_d0 dst 582+ .endm 583+ dispatch_7_dword 584+ 585+ tst count, #4 586+ ldrne tmp1, [src], #4 587+ strne tmp1, [dst], #4 588+#else 589+ /* Copy up to 15 full words of data. May not be aligned. */ 590+ /* Cannot use VFP for unaligned data. */ 591+ and tmp1, count, #0x3c 592+ add dst, dst, tmp1 593+ add src, src, tmp1 594+ /* Jump directly into the sequence below at the correct offset. */ 595+ .macro dispatch_step i 596+ ldr tmp1, [src, #-(\i * 4)] 597+ str tmp1, [dst, #-(\i * 4)] 598+ .endm 599+ dispatch_15_word 600+#endif 601+ 602+ lsls count, count, #31 603+ ldrhcs tmp1, [src], #2 604+ ldrbne src, [src] /* Src is dead, use as a scratch. */ 605+ strhcs tmp1, [dst], #2 606+ strbne src, [dst] 607+ bx lr 608+ 609+.Lcpy_not_short: 610+ /* At least 64 bytes to copy, but don't know the alignment yet. */ 611+ str tmp2, [sp, #-FRAME_SIZE]! 612+ cfi_adjust_cfa_offset (FRAME_SIZE) 613+ cfi_rel_offset (tmp2, 0) 614+ cfi_remember_state 615+ and tmp2, src, #7 616+ and tmp1, dst, #7 617+ cmp tmp1, tmp2 618+ bne .Lcpy_notaligned 619+ 620+#ifdef USE_VFP 621+ /* Magic dust alert! Force VFP on Cortex-A9. Experiments show 622+ that the FP pipeline is much better at streaming loads and 623+ stores. This is outside the critical loop. */ 624+ vmov.f32 s0, s0 625+#endif 626+ 627+ /* SRC and DST have the same mutual 64-bit alignment, but we may 628+ still need to pre-copy some bytes to get to natural alignment. 629+ We bring SRC and DST into full 64-bit alignment. */ 630+ lsls tmp2, dst, #29 631+ beq 1f 632+ rsbs tmp2, tmp2, #0 633+ sub count, count, tmp2, lsr #29 634+ ldrmi tmp1, [src], #4 635+ strmi tmp1, [dst], #4 636+ lsls tmp2, tmp2, #2 637+ ldrhcs tmp1, [src], #2 638+ ldrbne tmp2, [src], #1 639+ strhcs tmp1, [dst], #2 640+ strbne tmp2, [dst], #1 641+ 642+1: 643+ subs tmp2, count, #64 /* Use tmp2 for count. */ 644+ blo .Ltail63aligned 645+ 646+ cmp tmp2, #512 647+ bhs .Lcpy_body_long 648+ 649+.Lcpy_body_medium: /* Count in tmp2. */ 650+#ifdef USE_VFP 651+1: 652+ vldr d0, [src, #0] 653+ subs tmp2, tmp2, #64 654+ vldr d1, [src, #8] 655+ vstr d0, [dst, #0] 656+ vldr d0, [src, #16] 657+ vstr d1, [dst, #8] 658+ vldr d1, [src, #24] 659+ vstr d0, [dst, #16] 660+ vldr d0, [src, #32] 661+ vstr d1, [dst, #24] 662+ vldr d1, [src, #40] 663+ vstr d0, [dst, #32] 664+ vldr d0, [src, #48] 665+ vstr d1, [dst, #40] 666+ vldr d1, [src, #56] 667+ vstr d0, [dst, #48] 668+ add src, src, #64 669+ vstr d1, [dst, #56] 670+ add dst, dst, #64 671+ bhs 1b 672+ tst tmp2, #0x3f 673+ beq .Ldone 674+ 675+.Ltail63aligned: /* Count in tmp2. */ 676+ and tmp1, tmp2, #0x38 677+ add dst, dst, tmp1 678+ add src, src, tmp1 679+ .macro dispatch_step i 680+ vldr d0, [src, #-(\i * 8)] 681+ vstr d0, [dst, #-(\i * 8)] 682+ .endm 683+ dispatch_7_dword 684+#else 685+ sub src, src, #8 686+ sub dst, dst, #8 687+1: 688+ ldrd A_l, A_h, [src, #8] 689+ strd A_l, A_h, [dst, #8] 690+ ldrd A_l, A_h, [src, #16] 691+ strd A_l, A_h, [dst, #16] 692+ ldrd A_l, A_h, [src, #24] 693+ strd A_l, A_h, [dst, #24] 694+ ldrd A_l, A_h, [src, #32] 695+ strd A_l, A_h, [dst, #32] 696+ ldrd A_l, A_h, [src, #40] 697+ strd A_l, A_h, [dst, #40] 698+ ldrd A_l, A_h, [src, #48] 699+ strd A_l, A_h, [dst, #48] 700+ ldrd A_l, A_h, [src, #56] 701+ strd A_l, A_h, [dst, #56] 702+ ldrd A_l, A_h, [src, #64]! 703+ strd A_l, A_h, [dst, #64]! 704+ subs tmp2, tmp2, #64 705+ bhs 1b 706+ tst tmp2, #0x3f 707+ bne 1f 708+ ldr tmp2,[sp], #FRAME_SIZE 709+ cfi_adjust_cfa_offset (-FRAME_SIZE) 710+ cfi_restore (tmp2) 711+ bx lr 712+ 713+ cfi_restore_state 714+ cfi_remember_state 715+1: 716+ add src, src, #8 717+ add dst, dst, #8 718+ 719+.Ltail63aligned: /* Count in tmp2. */ 720+ /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but 721+ we know that the src and dest are 64-bit aligned so we can use 722+ LDRD/STRD to improve efficiency. */ 723+ /* TMP2 is now negative, but we don't care about that. The bottom 724+ six bits still tell us how many bytes are left to copy. */ 725+ 726+ and tmp1, tmp2, #0x38 727+ add dst, dst, tmp1 728+ add src, src, tmp1 729+ .macro dispatch_step i 730+ ldrd A_l, A_h, [src, #-(\i * 8)] 731+ strd A_l, A_h, [dst, #-(\i * 8)] 732+ .endm 733+ dispatch_7_dword 734+#endif 735+ 736+ tst tmp2, #4 737+ ldrne tmp1, [src], #4 738+ strne tmp1, [dst], #4 739+ lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ 740+ ldrhcs tmp1, [src], #2 741+ ldrbne tmp2, [src] 742+ strhcs tmp1, [dst], #2 743+ strbne tmp2, [dst] 744+ 745+.Ldone: 746+ ldr tmp2, [sp], #FRAME_SIZE 747+ cfi_adjust_cfa_offset (-FRAME_SIZE) 748+ cfi_restore (tmp2) 749+ bx lr 750+ 751+ cfi_restore_state 752+ cfi_remember_state 753+ 754+.Lcpy_body_long: /* Count in tmp2. */ 755+ 756+ /* Long copy. We know that there's at least (prefetch_lines * 64) 757+ bytes to go. */ 758+#ifdef USE_VFP 759+ /* Don't use PLD. Instead, read some data in advance of the current 760+ copy position into a register. This should act like a PLD 761+ operation but we won't have to repeat the transfer. */ 762+ 763+ vldr d3, [src, #0] 764+ vldr d4, [src, #64] 765+ vldr d5, [src, #128] 766+ vldr d6, [src, #192] 767+ vldr d7, [src, #256] 768+ 769+ vldr d0, [src, #8] 770+ vldr d1, [src, #16] 771+ vldr d2, [src, #24] 772+ add src, src, #32 773+ 774+ subs tmp2, tmp2, #prefetch_lines * 64 * 2 775+ blo 2f 776+1: 777+ cpy_line_vfp d3, 0 778+ cpy_line_vfp d4, 64 779+ cpy_line_vfp d5, 128 780+ add dst, dst, #3 * 64 781+ add src, src, #3 * 64 782+ cpy_line_vfp d6, 0 783+ cpy_line_vfp d7, 64 784+ add dst, dst, #2 * 64 785+ add src, src, #2 * 64 786+ subs tmp2, tmp2, #prefetch_lines * 64 787+ bhs 1b 788+ 789+2: 790+ cpy_tail_vfp d3, 0 791+ cpy_tail_vfp d4, 64 792+ cpy_tail_vfp d5, 128 793+ add src, src, #3 * 64 794+ add dst, dst, #3 * 64 795+ cpy_tail_vfp d6, 0 796+ vstr d7, [dst, #64] 797+ vldr d7, [src, #64] 798+ vstr d0, [dst, #64 + 8] 799+ vldr d0, [src, #64 + 8] 800+ vstr d1, [dst, #64 + 16] 801+ vldr d1, [src, #64 + 16] 802+ vstr d2, [dst, #64 + 24] 803+ vldr d2, [src, #64 + 24] 804+ vstr d7, [dst, #64 + 32] 805+ add src, src, #96 806+ vstr d0, [dst, #64 + 40] 807+ vstr d1, [dst, #64 + 48] 808+ vstr d2, [dst, #64 + 56] 809+ add dst, dst, #128 810+ add tmp2, tmp2, #prefetch_lines * 64 811+ b .Lcpy_body_medium 812+#else 813+ /* Long copy. Use an SMS style loop to maximize the I/O 814+ bandwidth of the core. We don't have enough spare registers 815+ to synthesise prefetching, so use PLD operations. */ 816+ /* Pre-bias src and dst. */ 817+ sub src, src, #8 818+ sub dst, dst, #8 819+ pld [src, #8] 820+ pld [src, #72] 821+ subs tmp2, tmp2, #64 822+ pld [src, #136] 823+ ldrd A_l, A_h, [src, #8] 824+ strd B_l, B_h, [sp, #8] 825+ cfi_rel_offset (B_l, 8) 826+ cfi_rel_offset (B_h, 12) 827+ ldrd B_l, B_h, [src, #16] 828+ strd C_l, C_h, [sp, #16] 829+ cfi_rel_offset (C_l, 16) 830+ cfi_rel_offset (C_h, 20) 831+ ldrd C_l, C_h, [src, #24] 832+ strd D_l, D_h, [sp, #24] 833+ cfi_rel_offset (D_l, 24) 834+ cfi_rel_offset (D_h, 28) 835+ pld [src, #200] 836+ ldrd D_l, D_h, [src, #32]! 837+ b 1f 838+ .p2align 6 839+2: 840+ pld [src, #232] 841+ strd A_l, A_h, [dst, #40] 842+ ldrd A_l, A_h, [src, #40] 843+ strd B_l, B_h, [dst, #48] 844+ ldrd B_l, B_h, [src, #48] 845+ strd C_l, C_h, [dst, #56] 846+ ldrd C_l, C_h, [src, #56] 847+ strd D_l, D_h, [dst, #64]! 848+ ldrd D_l, D_h, [src, #64]! 849+ subs tmp2, tmp2, #64 850+1: 851+ strd A_l, A_h, [dst, #8] 852+ ldrd A_l, A_h, [src, #8] 853+ strd B_l, B_h, [dst, #16] 854+ ldrd B_l, B_h, [src, #16] 855+ strd C_l, C_h, [dst, #24] 856+ ldrd C_l, C_h, [src, #24] 857+ strd D_l, D_h, [dst, #32] 858+ ldrd D_l, D_h, [src, #32] 859+ bcs 2b 860+ /* Save the remaining bytes and restore the callee-saved regs. */ 861+ strd A_l, A_h, [dst, #40] 862+ add src, src, #40 863+ strd B_l, B_h, [dst, #48] 864+ ldrd B_l, B_h, [sp, #8] 865+ cfi_restore (B_l) 866+ cfi_restore (B_h) 867+ strd C_l, C_h, [dst, #56] 868+ ldrd C_l, C_h, [sp, #16] 869+ cfi_restore (C_l) 870+ cfi_restore (C_h) 871+ strd D_l, D_h, [dst, #64] 872+ ldrd D_l, D_h, [sp, #24] 873+ cfi_restore (D_l) 874+ cfi_restore (D_h) 875+ add dst, dst, #72 876+ tst tmp2, #0x3f 877+ bne .Ltail63aligned 878+ ldr tmp2, [sp], #FRAME_SIZE 879+ cfi_adjust_cfa_offset (-FRAME_SIZE) 880+ cfi_restore (tmp2) 881+ bx lr 882+#endif 883+ 884+ cfi_restore_state 885+ cfi_remember_state 886+ 887+.Lcpy_notaligned: 888+ pld [src, #0] 889+ pld [src, #64] 890+ /* There's at least 64 bytes to copy, but there is no mutual 891+ alignment. */ 892+ /* Bring DST to 64-bit alignment. */ 893+ lsls tmp2, dst, #29 894+ pld [src, #(2 * 64)] 895+ beq 1f 896+ rsbs tmp2, tmp2, #0 897+ sub count, count, tmp2, lsr #29 898+ ldrmi tmp1, [src], #4 899+ strmi tmp1, [dst], #4 900+ lsls tmp2, tmp2, #2 901+ ldrbne tmp1, [src], #1 902+ ldrhcs tmp2, [src], #2 903+ strbne tmp1, [dst], #1 904+ strhcs tmp2, [dst], #2 905+1: 906+ pld [src, #(3 * 64)] 907+ subs count, count, #64 908+ ldrlo tmp2, [sp], #FRAME_SIZE 909+ blo .Ltail63unaligned 910+ pld [src, #(4 * 64)] 911+ 912+#ifdef USE_NEON 913+ /* These need an extra layer of macro just to work around a 914+ bug in the assembler's parser when an operand starts with 915+ a {...}. */ 916+ .macro neon_load_multi reglist, basereg 917+ vld1.8 {\reglist}, [\basereg]! 918+ .endm 919+ .macro neon_store_multi reglist, basereg 920+ vst1.8 {\reglist}, [ALIGN (\basereg, 64)]! 921+ .endm 922+ 923+ neon_load_multi d0-d3, src 924+ neon_load_multi d4-d7, src 925+ subs count, count, #64 926+ blo 2f 927+1: 928+ pld [src, #(4 * 64)] 929+ neon_store_multi d0-d3, dst 930+ neon_load_multi d0-d3, src 931+ neon_store_multi d4-d7, dst 932+ neon_load_multi d4-d7, src 933+ subs count, count, #64 934+ bhs 1b 935+2: 936+ neon_store_multi d0-d3, dst 937+ neon_store_multi d4-d7, dst 938+ ands count, count, #0x3f 939+#else 940+ /* Use an SMS style loop to maximize the I/O bandwidth. */ 941+ sub src, src, #4 942+ sub dst, dst, #8 943+ subs tmp2, count, #64 /* Use tmp2 for count. */ 944+ ldr A_l, [src, #4] 945+ ldr A_h, [src, #8] 946+ strd B_l, B_h, [sp, #8] 947+ cfi_rel_offset (B_l, 8) 948+ cfi_rel_offset (B_h, 12) 949+ ldr B_l, [src, #12] 950+ ldr B_h, [src, #16] 951+ strd C_l, C_h, [sp, #16] 952+ cfi_rel_offset (C_l, 16) 953+ cfi_rel_offset (C_h, 20) 954+ ldr C_l, [src, #20] 955+ ldr C_h, [src, #24] 956+ strd D_l, D_h, [sp, #24] 957+ cfi_rel_offset (D_l, 24) 958+ cfi_rel_offset (D_h, 28) 959+ ldr D_l, [src, #28] 960+ ldr D_h, [src, #32]! 961+ b 1f 962+ .p2align 6 963+2: 964+ pld [src, #(5 * 64) - (32 - 4)] 965+ strd A_l, A_h, [dst, #40] 966+ ldr A_l, [src, #36] 967+ ldr A_h, [src, #40] 968+ strd B_l, B_h, [dst, #48] 969+ ldr B_l, [src, #44] 970+ ldr B_h, [src, #48] 971+ strd C_l, C_h, [dst, #56] 972+ ldr C_l, [src, #52] 973+ ldr C_h, [src, #56] 974+ strd D_l, D_h, [dst, #64]! 975+ ldr D_l, [src, #60] 976+ ldr D_h, [src, #64]! 977+ subs tmp2, tmp2, #64 978+1: 979+ strd A_l, A_h, [dst, #8] 980+ ldr A_l, [src, #4] 981+ ldr A_h, [src, #8] 982+ strd B_l, B_h, [dst, #16] 983+ ldr B_l, [src, #12] 984+ ldr B_h, [src, #16] 985+ strd C_l, C_h, [dst, #24] 986+ ldr C_l, [src, #20] 987+ ldr C_h, [src, #24] 988+ strd D_l, D_h, [dst, #32] 989+ ldr D_l, [src, #28] 990+ ldr D_h, [src, #32] 991+ bcs 2b 992+ 993+ /* Save the remaining bytes and restore the callee-saved regs. */ 994+ strd A_l, A_h, [dst, #40] 995+ add src, src, #36 996+ strd B_l, B_h, [dst, #48] 997+ ldrd B_l, B_h, [sp, #8] 998+ cfi_restore (B_l) 999+ cfi_restore (B_h) 1000+ strd C_l, C_h, [dst, #56] 1001+ ldrd C_l, C_h, [sp, #16] 1002+ cfi_restore (C_l) 1003+ cfi_restore (C_h) 1004+ strd D_l, D_h, [dst, #64] 1005+ ldrd D_l, D_h, [sp, #24] 1006+ cfi_restore (D_l) 1007+ cfi_restore (D_h) 1008+ add dst, dst, #72 1009+ ands count, tmp2, #0x3f 1010+#endif 1011+ ldr tmp2, [sp], #FRAME_SIZE 1012+ cfi_adjust_cfa_offset (-FRAME_SIZE) 1013+ cfi_restore (tmp2) 1014+ bne .Ltail63unaligned 1015+ bx lr 1016+ 1017+END(memcpy) 1018+libc_hidden_builtin_def (memcpy) 1019diff --git a/libc/string/arm/glibc-neon/memmove.S b/libc/string/arm/glibc-neon/memmove.S 1020new file mode 100644 1021index 0000000..b01164a 1022--- /dev/null 1023+++ b/libc/string/arm/glibc-neon/memmove.S 1024@@ -0,0 +1,332 @@ 1025+/* Copyright (C) 2006-2021 Free Software Foundation, Inc. 1026+ This file is part of the GNU C Library. 1027+ 1028+ Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) 1029+ 1030+ The GNU C Library is free software; you can redistribute it and/or 1031+ modify it under the terms of the GNU Lesser General Public 1032+ License as published by the Free Software Foundation; either 1033+ version 2.1 of the License, or (at your option) any later version. 1034+ 1035+ The GNU C Library is distributed in the hope that it will be useful, 1036+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1037+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1038+ Lesser General Public License for more details. 1039+ 1040+ You should have received a copy of the GNU Lesser General Public 1041+ License along with the GNU C Library. If not, see 1042+ <https://www.gnu.org/licenses/>. */ 1043+ 1044+/* Thumb requires excessive IT insns here. */ 1045+#define NO_THUMB 1046+#include <sysdep.h> 1047+#include <arm-features.h> 1048+ 1049+/* 1050+ * Data preload for architectures that support it (ARM V5TE and above) 1051+ */ 1052+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ 1053+ && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ 1054+ && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ 1055+ && !defined (__ARM_ARCH_5T__)) 1056+#define PLD(code...) code 1057+#else 1058+#define PLD(code...) 1059+#endif 1060+ 1061+/* 1062+ * This can be used to enable code to cacheline align the source pointer. 1063+ * Experiments on tested architectures (StrongARM and XScale) didn't show 1064+ * this a worthwhile thing to do. That might be different in the future. 1065+ */ 1066+//#define CALGN(code...) code 1067+#define CALGN(code...) 1068+ 1069+/* 1070+ * Endian independent macros for shifting bytes within registers. 1071+ */ 1072+#ifndef __ARMEB__ 1073+#define PULL lsr 1074+#define PUSH lsl 1075+#else 1076+#define PULL lsl 1077+#define PUSH lsr 1078+#endif 1079+ 1080+ .text 1081+ .syntax unified 1082+ 1083+/* 1084+ * Prototype: void *memmove(void *dest, const void *src, size_t n); 1085+ * 1086+ * Note: 1087+ * 1088+ * If the memory regions don't overlap, we simply branch to memcpy which is 1089+ * normally a bit faster. Otherwise the copy is done going downwards. 1090+ */ 1091+ 1092+ENTRY(memmove) 1093+ 1094+ subs ip, r0, r1 1095+ cmphi r2, ip 1096+#if !IS_IN (libc) 1097+ bls memcpy 1098+#else 1099+ bls HIDDEN_JUMPTARGET(memcpy) 1100+#endif 1101+ 1102+ push {r0, r4, lr} 1103+ cfi_adjust_cfa_offset (12) 1104+ cfi_rel_offset (r4, 4) 1105+ cfi_rel_offset (lr, 8) 1106+ 1107+ cfi_remember_state 1108+ 1109+ add r1, r1, r2 1110+ add r0, r0, r2 1111+ subs r2, r2, #4 1112+ blo 8f 1113+ ands ip, r0, #3 1114+ PLD( pld [r1, #-4] ) 1115+ bne 9f 1116+ ands ip, r1, #3 1117+ bne 10f 1118+ 1119+1: subs r2, r2, #(28) 1120+ push {r5 - r8} 1121+ cfi_adjust_cfa_offset (16) 1122+ cfi_rel_offset (r5, 0) 1123+ cfi_rel_offset (r6, 4) 1124+ cfi_rel_offset (r7, 8) 1125+ cfi_rel_offset (r8, 12) 1126+ blo 5f 1127+ 1128+ CALGN( ands ip, r1, #31 ) 1129+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here 1130+ CALGN( bcs 2f ) 1131+ CALGN( adr r4, 6f ) 1132+ CALGN( subs r2, r2, ip ) @ C is set here 1133+#ifndef ARM_ALWAYS_BX 1134+ CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 1135+#else 1136+ CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 1137+ CALGN( bx r4 ) 1138+#endif 1139+ 1140+ PLD( pld [r1, #-4] ) 1141+2: PLD( cmp r2, #96 ) 1142+ PLD( pld [r1, #-32] ) 1143+ PLD( blo 4f ) 1144+ PLD( pld [r1, #-64] ) 1145+ PLD( pld [r1, #-96] ) 1146+ 1147+3: PLD( pld [r1, #-128] ) 1148+4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} 1149+ subs r2, r2, #32 1150+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} 1151+ bhs 3b 1152+ 1153+5: ands ip, r2, #28 1154+ rsb ip, ip, #32 1155+#ifndef ARM_ALWAYS_BX 1156+ /* C is always clear here. */ 1157+ addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1158+ b 7f 1159+#else 1160+ beq 7f 1161+ push {r10} 1162+ cfi_adjust_cfa_offset (4) 1163+ cfi_rel_offset (r10, 0) 1164+0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1165+ /* If alignment is not perfect, then there will be some 1166+ padding (nop) instructions between this BX and label 6. 1167+ The computation above assumed that two instructions 1168+ later is exactly the right spot. */ 1169+ add r10, #(6f - (0b + PC_OFS)) 1170+ bx r10 1171+#endif 1172+ .p2align ARM_BX_ALIGN_LOG2 1173+6: nop 1174+ .p2align ARM_BX_ALIGN_LOG2 1175+ ldr r3, [r1, #-4]! 1176+ .p2align ARM_BX_ALIGN_LOG2 1177+ ldr r4, [r1, #-4]! 1178+ .p2align ARM_BX_ALIGN_LOG2 1179+ ldr r5, [r1, #-4]! 1180+ .p2align ARM_BX_ALIGN_LOG2 1181+ ldr r6, [r1, #-4]! 1182+ .p2align ARM_BX_ALIGN_LOG2 1183+ ldr r7, [r1, #-4]! 1184+ .p2align ARM_BX_ALIGN_LOG2 1185+ ldr r8, [r1, #-4]! 1186+ .p2align ARM_BX_ALIGN_LOG2 1187+ ldr lr, [r1, #-4]! 1188+ 1189+#ifndef ARM_ALWAYS_BX 1190+ add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1191+ nop 1192+#else 1193+0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 1194+ /* If alignment is not perfect, then there will be some 1195+ padding (nop) instructions between this BX and label 66. 1196+ The computation above assumed that two instructions 1197+ later is exactly the right spot. */ 1198+ add r10, #(66f - (0b + PC_OFS)) 1199+ bx r10 1200+#endif 1201+ .p2align ARM_BX_ALIGN_LOG2 1202+66: nop 1203+ .p2align ARM_BX_ALIGN_LOG2 1204+ str r3, [r0, #-4]! 1205+ .p2align ARM_BX_ALIGN_LOG2 1206+ str r4, [r0, #-4]! 1207+ .p2align ARM_BX_ALIGN_LOG2 1208+ str r5, [r0, #-4]! 1209+ .p2align ARM_BX_ALIGN_LOG2 1210+ str r6, [r0, #-4]! 1211+ .p2align ARM_BX_ALIGN_LOG2 1212+ str r7, [r0, #-4]! 1213+ .p2align ARM_BX_ALIGN_LOG2 1214+ str r8, [r0, #-4]! 1215+ .p2align ARM_BX_ALIGN_LOG2 1216+ str lr, [r0, #-4]! 1217+ 1218+#ifdef ARM_ALWAYS_BX 1219+ pop {r10} 1220+ cfi_adjust_cfa_offset (-4) 1221+ cfi_restore (r10) 1222+#endif 1223+ 1224+ CALGN( bcs 2b ) 1225+ 1226+7: pop {r5 - r8} 1227+ cfi_adjust_cfa_offset (-16) 1228+ cfi_restore (r5) 1229+ cfi_restore (r6) 1230+ cfi_restore (r7) 1231+ cfi_restore (r8) 1232+ 1233+8: movs r2, r2, lsl #31 1234+ ldrbne r3, [r1, #-1]! 1235+ ldrbcs r4, [r1, #-1]! 1236+ ldrbcs ip, [r1, #-1] 1237+ strbne r3, [r0, #-1]! 1238+ strbcs r4, [r0, #-1]! 1239+ strbcs ip, [r0, #-1] 1240+ 1241+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ 1242+ || defined (ARM_ALWAYS_BX)) 1243+ pop {r0, r4, lr} 1244+ cfi_adjust_cfa_offset (-12) 1245+ cfi_restore (r4) 1246+ cfi_restore (lr) 1247+ bx lr 1248+#else 1249+ pop {r0, r4, pc} 1250+#endif 1251+ 1252+ cfi_restore_state 1253+ 1254+9: cmp ip, #2 1255+ ldrbgt r3, [r1, #-1]! 1256+ ldrbge r4, [r1, #-1]! 1257+ ldrb lr, [r1, #-1]! 1258+ strbgt r3, [r0, #-1]! 1259+ strbge r4, [r0, #-1]! 1260+ subs r2, r2, ip 1261+ strb lr, [r0, #-1]! 1262+ blo 8b 1263+ ands ip, r1, #3 1264+ beq 1b 1265+ 1266+10: bic r1, r1, #3 1267+ cmp ip, #2 1268+ ldr r3, [r1, #0] 1269+ beq 17f 1270+ blt 18f 1271+ 1272+ 1273+ .macro backward_copy_shift push pull 1274+ 1275+ subs r2, r2, #28 1276+ blo 14f 1277+ 1278+ CALGN( ands ip, r1, #31 ) 1279+ CALGN( rsb ip, ip, #32 ) 1280+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here 1281+ CALGN( subcc r2, r2, ip ) 1282+ CALGN( bcc 15f ) 1283+ 1284+11: push {r5 - r8, r10} 1285+ cfi_adjust_cfa_offset (20) 1286+ cfi_rel_offset (r5, 0) 1287+ cfi_rel_offset (r6, 4) 1288+ cfi_rel_offset (r7, 8) 1289+ cfi_rel_offset (r8, 12) 1290+ cfi_rel_offset (r10, 16) 1291+ 1292+ PLD( pld [r1, #-4] ) 1293+ PLD( cmp r2, #96 ) 1294+ PLD( pld [r1, #-32] ) 1295+ PLD( blo 13f ) 1296+ PLD( pld [r1, #-64] ) 1297+ PLD( pld [r1, #-96] ) 1298+ 1299+12: PLD( pld [r1, #-128] ) 1300+13: ldmdb r1!, {r7, r8, r10, ip} 1301+ mov lr, r3, PUSH #\push 1302+ subs r2, r2, #32 1303+ ldmdb r1!, {r3, r4, r5, r6} 1304+ orr lr, lr, ip, PULL #\pull 1305+ mov ip, ip, PUSH #\push 1306+ orr ip, ip, r10, PULL #\pull 1307+ mov r10, r10, PUSH #\push 1308+ orr r10, r10, r8, PULL #\pull 1309+ mov r8, r8, PUSH #\push 1310+ orr r8, r8, r7, PULL #\pull 1311+ mov r7, r7, PUSH #\push 1312+ orr r7, r7, r6, PULL #\pull 1313+ mov r6, r6, PUSH #\push 1314+ orr r6, r6, r5, PULL #\pull 1315+ mov r5, r5, PUSH #\push 1316+ orr r5, r5, r4, PULL #\pull 1317+ mov r4, r4, PUSH #\push 1318+ orr r4, r4, r3, PULL #\pull 1319+ stmdb r0!, {r4 - r8, r10, ip, lr} 1320+ bhs 12b 1321+ 1322+ pop {r5 - r8, r10} 1323+ cfi_adjust_cfa_offset (-20) 1324+ cfi_restore (r5) 1325+ cfi_restore (r6) 1326+ cfi_restore (r7) 1327+ cfi_restore (r8) 1328+ cfi_restore (r10) 1329+ 1330+14: ands ip, r2, #28 1331+ beq 16f 1332+ 1333+15: mov lr, r3, PUSH #\push 1334+ ldr r3, [r1, #-4]! 1335+ subs ip, ip, #4 1336+ orr lr, lr, r3, PULL #\pull 1337+ str lr, [r0, #-4]! 1338+ bgt 15b 1339+ CALGN( cmp r2, #0 ) 1340+ CALGN( bge 11b ) 1341+ 1342+16: add r1, r1, #(\pull / 8) 1343+ b 8b 1344+ 1345+ .endm 1346+ 1347+ 1348+ backward_copy_shift push=8 pull=24 1349+ 1350+17: backward_copy_shift push=16 pull=16 1351+ 1352+18: backward_copy_shift push=24 pull=8 1353+ 1354+ 1355+END(memmove) 1356+libc_hidden_builtin_def (memmove) 1357diff --git a/libc/string/arm/glibc-neon/memset.S b/libc/string/arm/glibc-neon/memset.S 1358new file mode 100644 1359index 0000000..dc89ca7 1360--- /dev/null 1361+++ b/libc/string/arm/glibc-neon/memset.S 1362@@ -0,0 +1,68 @@ 1363+/* Copyright (C) 1998-2021 Free Software Foundation, Inc. 1364+ This file is part of the GNU C Library. 1365+ 1366+ The GNU C Library is free software; you can redistribute it and/or 1367+ modify it under the terms of the GNU Lesser General Public 1368+ License as published by the Free Software Foundation; either 1369+ version 2.1 of the License, or (at your option) any later version. 1370+ 1371+ The GNU C Library is distributed in the hope that it will be useful, 1372+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1373+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1374+ Lesser General Public License for more details. 1375+ 1376+ You should have received a copy of the GNU Lesser General Public 1377+ License along with the GNU C Library. If not, see 1378+ <https://www.gnu.org/licenses/>. */ 1379+ 1380+/* Thumb requires excessive IT insns here. */ 1381+#define NO_THUMB 1382+#include <sysdep.h> 1383+ 1384+ .text 1385+ .syntax unified 1386+ 1387+/* void *memset (dstpp, c, len) */ 1388+ 1389+ENTRY(memset) 1390+ mov r3, r0 1391+ cmp r2, #8 1392+ bcc 2f @ less than 8 bytes to move 1393+ 1394+1: 1395+ tst r3, #3 @ aligned yet? 1396+ strbne r1, [r3], #1 1397+ subne r2, r2, #1 1398+ bne 1b 1399+ 1400+ and r1, r1, #255 @ clear any sign bits 1401+ orr r1, r1, r1, lsl $8 1402+ orr r1, r1, r1, lsl $16 1403+ mov ip, r1 1404+ 1405+1: 1406+ subs r2, r2, #8 1407+ stmiacs r3!, {r1, ip} @ store up to 32 bytes per loop iteration 1408+ subscs r2, r2, #8 1409+ stmiacs r3!, {r1, ip} 1410+ subscs r2, r2, #8 1411+ stmiacs r3!, {r1, ip} 1412+ subscs r2, r2, #8 1413+ stmiacs r3!, {r1, ip} 1414+ bcs 1b 1415+ 1416+ and r2, r2, #7 1417+2: 1418+ subs r2, r2, #1 @ store up to 4 bytes per loop iteration 1419+ strbcs r1, [r3], #1 1420+ subscs r2, r2, #1 1421+ strbcs r1, [r3], #1 1422+ subscs r2, r2, #1 1423+ strbcs r1, [r3], #1 1424+ subscs r2, r2, #1 1425+ strbcs r1, [r3], #1 1426+ bcs 2b 1427+ 1428+ DO_RET(lr) 1429+END(memset) 1430+libc_hidden_builtin_def (memset) 1431diff --git a/libc/string/arm/glibc-neon/strcmp.S b/libc/string/arm/glibc-neon/strcmp.S 1432new file mode 100644 1433index 0000000..98a3c6c 1434--- /dev/null 1435+++ b/libc/string/arm/glibc-neon/strcmp.S 1436@@ -0,0 +1,504 @@ 1437+/* strcmp implementation for ARMv7-A, optimized for Cortex-A15. 1438+ Copyright (C) 2012-2021 Free Software Foundation, Inc. 1439+ This file is part of the GNU C Library. 1440+ 1441+ The GNU C Library is free software; you can redistribute it and/or 1442+ modify it under the terms of the GNU Lesser General Public 1443+ License as published by the Free Software Foundation; either 1444+ version 2.1 of the License, or (at your option) any later version. 1445+ 1446+ The GNU C Library is distributed in the hope that it will be useful, 1447+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1448+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1449+ Lesser General Public License for more details. 1450+ 1451+ You should have received a copy of the GNU Lesser General Public 1452+ License along with the GNU C Library. If not, see 1453+ <https://www.gnu.org/licenses/>. */ 1454+ 1455+#include <arm-features.h> 1456+#include <sysdep.h> 1457+ 1458+/* Implementation of strcmp for ARMv7 when DSP instructions are 1459+ available. Use ldrd to support wider loads, provided the data 1460+ is sufficiently aligned. Use saturating arithmetic to optimize 1461+ the compares. */ 1462+ 1463+/* Build Options: 1464+ STRCMP_PRECHECK: Run a quick pre-check of the first byte in the 1465+ string. If comparing completely random strings the pre-check will 1466+ save time, since there is a very high probability of a mismatch in 1467+ the first character: we save significant overhead if this is the 1468+ common case. However, if strings are likely to be identical (e.g. 1469+ because we're verifying a hit in a hash table), then this check 1470+ is largely redundant. */ 1471+ 1472+#define STRCMP_PRECHECK 1 1473+ 1474+ .syntax unified 1475+ 1476+#ifdef __ARM_BIG_ENDIAN 1477+# define S2LO lsl 1478+# define S2LOEQ lsleq 1479+# define S2HI lsr 1480+# define MSB 0x000000ff 1481+# define LSB 0xff000000 1482+# define BYTE0_OFFSET 24 1483+# define BYTE1_OFFSET 16 1484+# define BYTE2_OFFSET 8 1485+# define BYTE3_OFFSET 0 1486+#else /* not __ARM_BIG_ENDIAN */ 1487+# define S2LO lsr 1488+# define S2LOEQ lsreq 1489+# define S2HI lsl 1490+# define BYTE0_OFFSET 0 1491+# define BYTE1_OFFSET 8 1492+# define BYTE2_OFFSET 16 1493+# define BYTE3_OFFSET 24 1494+# define MSB 0xff000000 1495+# define LSB 0x000000ff 1496+#endif /* not __ARM_BIG_ENDIAN */ 1497+ 1498+/* Parameters and result. */ 1499+#define src1 r0 1500+#define src2 r1 1501+#define result r0 /* Overlaps src1. */ 1502+ 1503+/* Internal variables. */ 1504+#define tmp1 r4 1505+#define tmp2 r5 1506+#define const_m1 r12 1507+ 1508+/* Additional internal variables for 64-bit aligned data. */ 1509+#define data1a r2 1510+#define data1b r3 1511+#define data2a r6 1512+#define data2b r7 1513+#define syndrome_a tmp1 1514+#define syndrome_b tmp2 1515+ 1516+/* Additional internal variables for 32-bit aligned data. */ 1517+#define data1 r2 1518+#define data2 r3 1519+#define syndrome tmp2 1520+ 1521+ 1522+ .thumb 1523+ 1524+/* In Thumb code we can't use MVN with a register shift, but we do have ORN. */ 1525+.macro prepare_mask mask_reg, nbits_reg 1526+ S2HI \mask_reg, const_m1, \nbits_reg 1527+.endm 1528+.macro apply_mask data_reg, mask_reg 1529+ orn \data_reg, \data_reg, \mask_reg 1530+.endm 1531+ 1532+ /* Macro to compute and return the result value for word-aligned 1533+ cases. */ 1534+ .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 1535+#ifdef __ARM_BIG_ENDIAN 1536+ /* If data1 contains a zero byte, then syndrome will contain a 1 in 1537+ bit 7 of that byte. Otherwise, the highest set bit in the 1538+ syndrome will highlight the first different bit. It is therefore 1539+ sufficient to extract the eight bits starting with the syndrome 1540+ bit. */ 1541+ clz tmp1, \synd 1542+ lsl r1, \d2, tmp1 1543+ .if \restore_r6 1544+ ldrd r6, r7, [sp, #8] 1545+ .endif 1546+ lsl \d1, \d1, tmp1 1547+ lsr result, \d1, #24 1548+ ldrd r4, r5, [sp], #16 1549+ cfi_remember_state 1550+ cfi_def_cfa_offset (0) 1551+ cfi_restore (r4) 1552+ cfi_restore (r5) 1553+ cfi_restore (r6) 1554+ cfi_restore (r7) 1555+ sub result, result, r1, lsr #24 1556+ bx lr 1557+#else 1558+ /* To use the big-endian trick we'd have to reverse all three words. 1559+ that's slower than this approach. */ 1560+ rev \synd, \synd 1561+ clz tmp1, \synd 1562+ bic tmp1, tmp1, #7 1563+ lsr r1, \d2, tmp1 1564+ .if \restore_r6 1565+ ldrd r6, r7, [sp, #8] 1566+ .endif 1567+ lsr \d1, \d1, tmp1 1568+ and result, \d1, #255 1569+ and r1, r1, #255 1570+ ldrd r4, r5, [sp], #16 1571+ cfi_remember_state 1572+ cfi_def_cfa_offset (0) 1573+ cfi_restore (r4) 1574+ cfi_restore (r5) 1575+ cfi_restore (r6) 1576+ cfi_restore (r7) 1577+ sub result, result, r1 1578+ 1579+ bx lr 1580+#endif 1581+ .endm 1582+ 1583+ .text 1584+ .p2align 5 1585+.Lstrcmp_start_addr: 1586+#if STRCMP_PRECHECK == 1 1587+.Lfastpath_exit: 1588+ sub r0, r2, r3 1589+ bx lr 1590+ nop 1591+#endif 1592+ENTRY (strcmp) 1593+#if STRCMP_PRECHECK == 1 1594+ ldrb r2, [src1] 1595+ ldrb r3, [src2] 1596+ cmp r2, #1 1597+ it cs 1598+ cmpcs r2, r3 1599+ bne .Lfastpath_exit 1600+#endif 1601+ strd r4, r5, [sp, #-16]! 1602+ cfi_def_cfa_offset (16) 1603+ cfi_offset (r4, -16) 1604+ cfi_offset (r5, -12) 1605+ orr tmp1, src1, src2 1606+ strd r6, r7, [sp, #8] 1607+ cfi_offset (r6, -8) 1608+ cfi_offset (r7, -4) 1609+ mvn const_m1, #0 1610+ lsl r2, tmp1, #29 1611+ cbz r2, .Lloop_aligned8 1612+ 1613+.Lnot_aligned: 1614+ eor tmp1, src1, src2 1615+ tst tmp1, #7 1616+ bne .Lmisaligned8 1617+ 1618+ /* Deal with mutual misalignment by aligning downwards and then 1619+ masking off the unwanted loaded data to prevent a difference. */ 1620+ and tmp1, src1, #7 1621+ bic src1, src1, #7 1622+ and tmp2, tmp1, #3 1623+ bic src2, src2, #7 1624+ lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 1625+ ldrd data1a, data1b, [src1], #16 1626+ tst tmp1, #4 1627+ ldrd data2a, data2b, [src2], #16 1628+ prepare_mask tmp1, tmp2 1629+ apply_mask data1a, tmp1 1630+ apply_mask data2a, tmp1 1631+ beq .Lstart_realigned8 1632+ apply_mask data1b, tmp1 1633+ mov data1a, const_m1 1634+ apply_mask data2b, tmp1 1635+ mov data2a, const_m1 1636+ b .Lstart_realigned8 1637+ 1638+ /* Unwind the inner loop by a factor of 2, giving 16 bytes per 1639+ pass. */ 1640+ .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 1641+ .p2align 2 /* Always word aligned. */ 1642+.Lloop_aligned8: 1643+ ldrd data1a, data1b, [src1], #16 1644+ ldrd data2a, data2b, [src2], #16 1645+.Lstart_realigned8: 1646+ uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 1647+ eor syndrome_a, data1a, data2a 1648+ sel syndrome_a, syndrome_a, const_m1 1649+ cbnz syndrome_a, .Ldiff_in_a 1650+ uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 1651+ eor syndrome_b, data1b, data2b 1652+ sel syndrome_b, syndrome_b, const_m1 1653+ cbnz syndrome_b, .Ldiff_in_b 1654+ 1655+ ldrd data1a, data1b, [src1, #-8] 1656+ ldrd data2a, data2b, [src2, #-8] 1657+ uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 1658+ eor syndrome_a, data1a, data2a 1659+ sel syndrome_a, syndrome_a, const_m1 1660+ uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 1661+ eor syndrome_b, data1b, data2b 1662+ sel syndrome_b, syndrome_b, const_m1 1663+ /* Can't use CBZ for backwards branch. */ 1664+ orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 1665+ beq .Lloop_aligned8 1666+ 1667+.Ldiff_found: 1668+ cbnz syndrome_a, .Ldiff_in_a 1669+ 1670+.Ldiff_in_b: 1671+ strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 1672+ 1673+.Ldiff_in_a: 1674+ cfi_restore_state 1675+ strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 1676+ 1677+ cfi_restore_state 1678+.Lmisaligned8: 1679+ tst tmp1, #3 1680+ bne .Lmisaligned4 1681+ ands tmp1, src1, #3 1682+ bne .Lmutual_align4 1683+ 1684+ /* Unrolled by a factor of 2, to reduce the number of post-increment 1685+ operations. */ 1686+.Lloop_aligned4: 1687+ ldr data1, [src1], #8 1688+ ldr data2, [src2], #8 1689+.Lstart_realigned4: 1690+ uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 1691+ eor syndrome, data1, data2 1692+ sel syndrome, syndrome, const_m1 1693+ cbnz syndrome, .Laligned4_done 1694+ ldr data1, [src1, #-4] 1695+ ldr data2, [src2, #-4] 1696+ uadd8 syndrome, data1, const_m1 1697+ eor syndrome, data1, data2 1698+ sel syndrome, syndrome, const_m1 1699+ cmp syndrome, #0 1700+ beq .Lloop_aligned4 1701+ 1702+.Laligned4_done: 1703+ strcmp_epilogue_aligned syndrome, data1, data2, 0 1704+ 1705+.Lmutual_align4: 1706+ cfi_restore_state 1707+ /* Deal with mutual misalignment by aligning downwards and then 1708+ masking off the unwanted loaded data to prevent a difference. */ 1709+ lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 1710+ bic src1, src1, #3 1711+ ldr data1, [src1], #8 1712+ bic src2, src2, #3 1713+ ldr data2, [src2], #8 1714+ 1715+ prepare_mask tmp1, tmp1 1716+ apply_mask data1, tmp1 1717+ apply_mask data2, tmp1 1718+ b .Lstart_realigned4 1719+ 1720+.Lmisaligned4: 1721+ ands tmp1, src1, #3 1722+ beq .Lsrc1_aligned 1723+ sub src2, src2, tmp1 1724+ bic src1, src1, #3 1725+ lsls tmp1, tmp1, #31 1726+ ldr data1, [src1], #4 1727+ beq .Laligned_m2 1728+ bcs .Laligned_m1 1729+ 1730+#if STRCMP_PRECHECK == 0 1731+ ldrb data2, [src2, #1] 1732+ uxtb tmp1, data1, ror #BYTE1_OFFSET 1733+ subs tmp1, tmp1, data2 1734+ bne .Lmisaligned_exit 1735+ cbz data2, .Lmisaligned_exit 1736+ 1737+.Laligned_m2: 1738+ ldrb data2, [src2, #2] 1739+ uxtb tmp1, data1, ror #BYTE2_OFFSET 1740+ subs tmp1, tmp1, data2 1741+ bne .Lmisaligned_exit 1742+ cbz data2, .Lmisaligned_exit 1743+ 1744+.Laligned_m1: 1745+ ldrb data2, [src2, #3] 1746+ uxtb tmp1, data1, ror #BYTE3_OFFSET 1747+ subs tmp1, tmp1, data2 1748+ bne .Lmisaligned_exit 1749+ add src2, src2, #4 1750+ cbnz data2, .Lsrc1_aligned 1751+#else /* STRCMP_PRECHECK */ 1752+ /* If we've done the pre-check, then we don't need to check the 1753+ first byte again here. */ 1754+ ldrb data2, [src2, #2] 1755+ uxtb tmp1, data1, ror #BYTE2_OFFSET 1756+ subs tmp1, tmp1, data2 1757+ bne .Lmisaligned_exit 1758+ cbz data2, .Lmisaligned_exit 1759+ 1760+.Laligned_m2: 1761+ ldrb data2, [src2, #3] 1762+ uxtb tmp1, data1, ror #BYTE3_OFFSET 1763+ subs tmp1, tmp1, data2 1764+ bne .Lmisaligned_exit 1765+ cbnz data2, .Laligned_m1 1766+#endif 1767+ 1768+.Lmisaligned_exit: 1769+ mov result, tmp1 1770+ ldr r4, [sp], #16 1771+ cfi_remember_state 1772+ cfi_def_cfa_offset (0) 1773+ cfi_restore (r4) 1774+ cfi_restore (r5) 1775+ cfi_restore (r6) 1776+ cfi_restore (r7) 1777+ bx lr 1778+ 1779+#if STRCMP_PRECHECK == 1 1780+.Laligned_m1: 1781+ add src2, src2, #4 1782+#endif 1783+.Lsrc1_aligned: 1784+ cfi_restore_state 1785+ /* src1 is word aligned, but src2 has no common alignment 1786+ with it. */ 1787+ ldr data1, [src1], #4 1788+ lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 1789+ 1790+ bic src2, src2, #3 1791+ ldr data2, [src2], #4 1792+ bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ 1793+ bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ 1794+ 1795+ /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 1796+.Loverlap3: 1797+ bic tmp1, data1, #MSB 1798+ uadd8 syndrome, data1, const_m1 1799+ eors syndrome, tmp1, data2, S2LO #8 1800+ sel syndrome, syndrome, const_m1 1801+ bne 4f 1802+ cbnz syndrome, 5f 1803+ ldr data2, [src2], #4 1804+ eor tmp1, tmp1, data1 1805+ cmp tmp1, data2, S2HI #24 1806+ bne 6f 1807+ ldr data1, [src1], #4 1808+ b .Loverlap3 1809+4: 1810+ S2LO data2, data2, #8 1811+ b .Lstrcmp_tail 1812+ 1813+5: 1814+ bics syndrome, syndrome, #MSB 1815+ bne .Lstrcmp_done_equal 1816+ 1817+ /* We can only get here if the MSB of data1 contains 0, so 1818+ fast-path the exit. */ 1819+ ldrb result, [src2] 1820+ ldrd r4, r5, [sp], #16 1821+ cfi_remember_state 1822+ cfi_def_cfa_offset (0) 1823+ cfi_restore (r4) 1824+ cfi_restore (r5) 1825+ /* R6/7 Not used in this sequence. */ 1826+ cfi_restore (r6) 1827+ cfi_restore (r7) 1828+ neg result, result 1829+ bx lr 1830+ 1831+6: 1832+ cfi_restore_state 1833+ S2LO data1, data1, #24 1834+ and data2, data2, #LSB 1835+ b .Lstrcmp_tail 1836+ 1837+ .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 1838+.Loverlap2: 1839+ and tmp1, data1, const_m1, S2LO #16 1840+ uadd8 syndrome, data1, const_m1 1841+ eors syndrome, tmp1, data2, S2LO #16 1842+ sel syndrome, syndrome, const_m1 1843+ bne 4f 1844+ cbnz syndrome, 5f 1845+ ldr data2, [src2], #4 1846+ eor tmp1, tmp1, data1 1847+ cmp tmp1, data2, S2HI #16 1848+ bne 6f 1849+ ldr data1, [src1], #4 1850+ b .Loverlap2 1851+4: 1852+ S2LO data2, data2, #16 1853+ b .Lstrcmp_tail 1854+5: 1855+ ands syndrome, syndrome, const_m1, S2LO #16 1856+ bne .Lstrcmp_done_equal 1857+ 1858+ ldrh data2, [src2] 1859+ S2LO data1, data1, #16 1860+#ifdef __ARM_BIG_ENDIAN 1861+ lsl data2, data2, #16 1862+#endif 1863+ b .Lstrcmp_tail 1864+ 1865+6: 1866+ S2LO data1, data1, #16 1867+ and data2, data2, const_m1, S2LO #16 1868+ b .Lstrcmp_tail 1869+ 1870+ .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 1871+.Loverlap1: 1872+ and tmp1, data1, #LSB 1873+ uadd8 syndrome, data1, const_m1 1874+ eors syndrome, tmp1, data2, S2LO #24 1875+ sel syndrome, syndrome, const_m1 1876+ bne 4f 1877+ cbnz syndrome, 5f 1878+ ldr data2, [src2], #4 1879+ eor tmp1, tmp1, data1 1880+ cmp tmp1, data2, S2HI #8 1881+ bne 6f 1882+ ldr data1, [src1], #4 1883+ b .Loverlap1 1884+4: 1885+ S2LO data2, data2, #24 1886+ b .Lstrcmp_tail 1887+5: 1888+ tst syndrome, #LSB 1889+ bne .Lstrcmp_done_equal 1890+ ldr data2, [src2] 1891+6: 1892+ S2LO data1, data1, #8 1893+ bic data2, data2, #MSB 1894+ b .Lstrcmp_tail 1895+ 1896+.Lstrcmp_done_equal: 1897+ mov result, #0 1898+ ldrd r4, r5, [sp], #16 1899+ cfi_remember_state 1900+ cfi_def_cfa_offset (0) 1901+ cfi_restore (r4) 1902+ cfi_restore (r5) 1903+ /* R6/7 not used in this sequence. */ 1904+ cfi_restore (r6) 1905+ cfi_restore (r7) 1906+ bx lr 1907+ 1908+.Lstrcmp_tail: 1909+ cfi_restore_state 1910+#ifndef __ARM_BIG_ENDIAN 1911+ rev data1, data1 1912+ rev data2, data2 1913+ /* Now everything looks big-endian... */ 1914+#endif 1915+ uadd8 tmp1, data1, const_m1 1916+ eor tmp1, data1, data2 1917+ sel syndrome, tmp1, const_m1 1918+ clz tmp1, syndrome 1919+ lsl data1, data1, tmp1 1920+ lsl data2, data2, tmp1 1921+ lsr result, data1, #24 1922+ ldrd r4, r5, [sp], #16 1923+ cfi_def_cfa_offset (0) 1924+ cfi_restore (r4) 1925+ cfi_restore (r5) 1926+ /* R6/7 not used in this sequence. */ 1927+ cfi_restore (r6) 1928+ cfi_restore (r7) 1929+ sub result, result, data2, lsr #24 1930+ bx lr 1931+END (strcmp) 1932+libc_hidden_builtin_def (strcmp) 1933+ 1934+# From ../strcmp.S 1935+#include <features.h> 1936+ 1937+#ifndef __UCLIBC_HAS_LOCALE__ 1938+strong_alias(strcmp,strcoll) 1939+libc_hidden_def(strcoll) 1940+#endif 1941diff --git a/libc/string/arm/glibc-neon/strlen.S b/libc/string/arm/glibc-neon/strlen.S 1942new file mode 100644 1943index 0000000..dbb6344 1944--- /dev/null 1945+++ b/libc/string/arm/glibc-neon/strlen.S 1946@@ -0,0 +1,76 @@ 1947+/* Copyright (C) 1998-2021 Free Software Foundation, Inc. 1948+ This file is part of the GNU C Library. 1949+ 1950+ The GNU C Library is free software; you can redistribute it and/or 1951+ modify it under the terms of the GNU Lesser General Public 1952+ License as published by the Free Software Foundation; either 1953+ version 2.1 of the License, or (at your option) any later version. 1954+ 1955+ The GNU C Library is distributed in the hope that it will be useful, 1956+ but WITHOUT ANY WARRANTY; without even the implied warranty of 1957+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1958+ Lesser General Public License for more details. 1959+ 1960+ You should have received a copy of the GNU Lesser General Public 1961+ License along with the GNU C Library. If not, see 1962+ <https://www.gnu.org/licenses/>. */ 1963+ 1964+/* Thumb requires excessive IT insns here. */ 1965+#define NO_THUMB 1966+#include <sysdep.h> 1967+ 1968+/* size_t strlen(const char *S) 1969+ * entry: r0 -> string 1970+ * exit: r0 = len 1971+ */ 1972+ 1973+ .syntax unified 1974+ .text 1975+ 1976+ENTRY(strlen) 1977+ bic r1, r0, $3 @ addr of word containing first byte 1978+ ldr r2, [r1], $4 @ get the first word 1979+ ands r3, r0, $3 @ how many bytes are duff? 1980+ rsb r0, r3, $0 @ get - that number into counter. 1981+ beq Laligned @ skip into main check routine if no 1982+ @ more 1983+#ifdef __ARMEB__ 1984+ orr r2, r2, $0xff000000 @ set this byte to non-zero 1985+ subs r3, r3, $1 @ any more to do? 1986+ orrgt r2, r2, $0x00ff0000 @ if so, set this byte 1987+ subs r3, r3, $1 @ more? 1988+ orrgt r2, r2, $0x0000ff00 @ then set. 1989+#else 1990+ orr r2, r2, $0x000000ff @ set this byte to non-zero 1991+ subs r3, r3, $1 @ any more to do? 1992+ orrgt r2, r2, $0x0000ff00 @ if so, set this byte 1993+ subs r3, r3, $1 @ more? 1994+ orrgt r2, r2, $0x00ff0000 @ then set. 1995+#endif 1996+Laligned: @ here, we have a word in r2. Does it 1997+ tst r2, $0x000000ff @ contain any zeroes? 1998+ tstne r2, $0x0000ff00 @ 1999+ tstne r2, $0x00ff0000 @ 2000+ tstne r2, $0xff000000 @ 2001+ addne r0, r0, $4 @ if not, the string is 4 bytes longer 2002+ ldrne r2, [r1], $4 @ and we continue to the next word 2003+ bne Laligned @ 2004+Llastword: @ drop through to here once we find a 2005+#ifdef __ARMEB__ 2006+ tst r2, $0xff000000 @ word that has a zero byte in it 2007+ addne r0, r0, $1 @ 2008+ tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it 2009+ addne r0, r0, $1 @ 2010+ tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th 2011+ addne r0, r0, $1 @ must be zero) 2012+#else 2013+ tst r2, $0x000000ff @ word that has a zero byte in it 2014+ addne r0, r0, $1 @ 2015+ tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it 2016+ addne r0, r0, $1 @ 2017+ tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th 2018+ addne r0, r0, $1 @ must be zero) 2019+#endif 2020+ DO_RET(lr) 2021+END(strlen) 2022+libc_hidden_builtin_def (strlen) 2023diff --git a/libc/string/arm/glibc-neon/sysdep.h b/libc/string/arm/glibc-neon/sysdep.h 2024new file mode 100644 2025index 0000000..cceb4a9 2026--- /dev/null 2027+++ b/libc/string/arm/glibc-neon/sysdep.h 2028@@ -0,0 +1,339 @@ 2029+/* Assembler macros for ARM. 2030+ Copyright (C) 1997-2021 Free Software Foundation, Inc. 2031+ This file is part of the GNU C Library. 2032+ 2033+ The GNU C Library is free software; you can redistribute it and/or 2034+ modify it under the terms of the GNU Lesser General Public 2035+ License as published by the Free Software Foundation; either 2036+ version 2.1 of the License, or (at your option) any later version. 2037+ 2038+ The GNU C Library is distributed in the hope that it will be useful, 2039+ but WITHOUT ANY WARRANTY; without even the implied warranty of 2040+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2041+ Lesser General Public License for more details. 2042+ 2043+ You should have received a copy of the GNU Lesser General Public 2044+ License along with the GNU C Library. If not, see 2045+ <https://www.gnu.org/licenses/>. */ 2046+ 2047+#include <sysdeps/generic/sysdep.h> 2048+#include <features.h> 2049+ 2050+#ifndef __ASSEMBLER__ 2051+# include <stdint.h> 2052+#else 2053+# include <arm-features.h> 2054+#endif 2055+ 2056+/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ 2057+#ifndef __ARM_ARCH 2058+# ifdef __ARM_ARCH_2__ 2059+# define __ARM_ARCH 2 2060+# elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__) 2061+# define __ARM_ARCH 3 2062+# elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__) 2063+# define __ARM_ARCH 4 2064+# elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \ 2065+ || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ 2066+ || defined(__ARM_ARCH_5TEJ__) 2067+# define __ARM_ARCH 5 2068+# elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ 2069+ || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ 2070+ || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) 2071+# define __ARM_ARCH 6 2072+# elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ 2073+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ 2074+ || defined(__ARM_ARCH_7EM__) 2075+# define __ARM_ARCH 7 2076+# else 2077+# error unknown arm architecture 2078+# endif 2079+#endif 2080+ 2081+#if __ARM_ARCH > 4 || defined (__ARM_ARCH_4T__) 2082+# define ARCH_HAS_BX 2083+#endif 2084+#if __ARM_ARCH > 4 2085+# define ARCH_HAS_BLX 2086+#endif 2087+#if __ARM_ARCH > 6 || defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) 2088+# define ARCH_HAS_HARD_TP 2089+#endif 2090+#if __ARM_ARCH > 6 || defined (__ARM_ARCH_6T2__) 2091+# define ARCH_HAS_T2 2092+#endif 2093+ 2094+#ifdef __ASSEMBLER__ 2095+ 2096+/* Syntactic details of assembler. */ 2097+ 2098+#define ALIGNARG(log2) log2 2099+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name 2100+ 2101+#define PLTJMP(_x) _x##(PLT) 2102+ 2103+#ifdef ARCH_HAS_BX 2104+# define BX(R) bx R 2105+# define BXC(C, R) bx##C R 2106+# ifdef ARCH_HAS_BLX 2107+# define BLX(R) blx R 2108+# else 2109+# define BLX(R) mov lr, pc; bx R 2110+# endif 2111+#else 2112+# define BX(R) mov pc, R 2113+# define BXC(C, R) mov##C pc, R 2114+# define BLX(R) mov lr, pc; mov pc, R 2115+#endif 2116+ 2117+#define DO_RET(R) BX(R) 2118+#define RETINSTR(C, R) BXC(C, R) 2119+ 2120+/* Define an entry point visible from C. */ 2121+#define ENTRY(name) \ 2122+ .globl C_SYMBOL_NAME(name); \ 2123+ .type C_SYMBOL_NAME(name),%function; \ 2124+ .align ALIGNARG(4); \ 2125+ C_LABEL(name) \ 2126+ CFI_SECTIONS; \ 2127+ cfi_startproc; \ 2128+ CALL_MCOUNT 2129+ 2130+#define CFI_SECTIONS \ 2131+ .cfi_sections .debug_frame 2132+ 2133+#undef END 2134+#define END(name) \ 2135+ cfi_endproc; \ 2136+ ASM_SIZE_DIRECTIVE(name) 2137+ 2138+/* If compiled for profiling, call `mcount' at the start of each function. */ 2139+#ifdef PROF 2140+/* Call __gnu_mcount_nc (GCC >= 4.4). */ 2141+#define CALL_MCOUNT \ 2142+ push {lr}; \ 2143+ cfi_adjust_cfa_offset (4); \ 2144+ cfi_rel_offset (lr, 0); \ 2145+ bl PLTJMP(mcount); \ 2146+ cfi_adjust_cfa_offset (-4); \ 2147+ cfi_restore (lr) 2148+#else 2149+#define CALL_MCOUNT /* Do nothing. */ 2150+#endif 2151+ 2152+/* Since C identifiers are not normally prefixed with an underscore 2153+ on this system, the asm identifier `syscall_error' intrudes on the 2154+ C name space. Make sure we use an innocuous name. */ 2155+#define syscall_error __syscall_error 2156+#define mcount __gnu_mcount_nc 2157+ 2158+/* Tag_ABI_align8_preserved: This code preserves 8-byte 2159+ alignment in any callee. */ 2160+ .eabi_attribute 25, 1 2161+/* Tag_ABI_align8_needed: This code may require 8-byte alignment from 2162+ the caller. */ 2163+ .eabi_attribute 24, 1 2164+ 2165+/* The thumb2 encoding is reasonably complete. Unless suppressed, use it. */ 2166+ .syntax unified 2167+# if defined(__thumb2__) && !defined(NO_THUMB) 2168+ .thumb 2169+#else 2170+# undef __thumb__ 2171+# undef __thumb2__ 2172+ .arm 2173+# endif 2174+ 2175+/* Load or store to/from address X + Y into/from R, (maybe) using T. 2176+ X or Y can use T freely; T can be R if OP is a load. The first 2177+ version eschews the two-register addressing mode, while the 2178+ second version uses it. */ 2179+# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \ 2180+ add T, X, Y; \ 2181+ OP R, [T] 2182+# define LDST_INDEXED_INDEX(OP, R, X, Y) \ 2183+ OP R, [X, Y] 2184+ 2185+# ifdef ARM_NO_INDEX_REGISTER 2186+/* We're never using the two-register addressing mode, so this 2187+ always uses an intermediate add. */ 2188+# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y) 2189+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X) 2190+# else 2191+/* The two-register addressing mode is OK, except on Thumb with pc. */ 2192+# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y) 2193+# ifdef __thumb2__ 2194+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X) 2195+# else 2196+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X) 2197+# endif 2198+# endif 2199+ 2200+/* Load or store to/from a pc-relative EXPR into/from R, using T. */ 2201+# ifdef __thumb2__ 2202+# define LDST_PCREL(OP, R, T, EXPR) \ 2203+ ldr T, 98f; \ 2204+ .subsection 2; \ 2205+98: .word EXPR - 99f - PC_OFS; \ 2206+ .previous; \ 2207+99: add T, T, pc; \ 2208+ OP R, [T] 2209+# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK 2210+# define LDST_PCREL(OP, R, T, EXPR) \ 2211+ movw T, #:lower16:EXPR - 99f - PC_OFS; \ 2212+ movt T, #:upper16:EXPR - 99f - PC_OFS; \ 2213+99: LDST_PC_INDEXED (OP, R, T, T) 2214+# else 2215+# define LDST_PCREL(OP, R, T, EXPR) \ 2216+ ldr T, 98f; \ 2217+ .subsection 2; \ 2218+98: .word EXPR - 99f - PC_OFS; \ 2219+ .previous; \ 2220+99: OP R, [pc, T] 2221+# endif 2222+ 2223+/* Load from a global SYMBOL + CONSTANT into R, using T. */ 2224+# if defined (ARCH_HAS_T2) && !defined (PIC) 2225+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ 2226+ movw T, #:lower16:SYMBOL; \ 2227+ movt T, #:upper16:SYMBOL; \ 2228+ ldr R, [T, $CONSTANT] 2229+# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK 2230+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ 2231+ movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ 2232+ movw T, #:lower16:99f - 98f - PC_OFS; \ 2233+ movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ 2234+ movt T, #:upper16:99f - 98f - PC_OFS; \ 2235+ .pushsection .rodata.cst4, "aM", %progbits, 4; \ 2236+ .balign 4; \ 2237+99: .word SYMBOL##(GOT); \ 2238+ .popsection; \ 2239+97: add R, R, pc; \ 2240+98: LDST_PC_INDEXED (ldr, T, T, T); \ 2241+ LDST_INDEXED (ldr, R, T, R, T); \ 2242+ ldr R, [R, $CONSTANT] 2243+# else 2244+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ 2245+ ldr T, 99f; \ 2246+ ldr R, 100f; \ 2247+98: add T, T, pc; \ 2248+ ldr T, [T, R]; \ 2249+ .subsection 2; \ 2250+99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \ 2251+100: .word SYMBOL##(GOT); \ 2252+ .previous; \ 2253+ ldr R, [T, $CONSTANT] 2254+# endif 2255+ 2256+/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to 2257+ be in the same linked object (as for one with hidden visibility). 2258+ We can avoid the GOT indirection in the PIC case. For the pure 2259+ static case, LDR_GLOBAL is already optimal. */ 2260+# ifdef PIC 2261+# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \ 2262+ LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT) 2263+# else 2264+# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \ 2265+ LDR_GLOBAL (R, T, SYMBOL, CONSTANT) 2266+# endif 2267+ 2268+/* Cope with negative memory offsets, which thumb can't encode. 2269+ Use NEGOFF_ADJ_BASE to (conditionally) alter the base register, 2270+ and then NEGOFF_OFF1 to use 0 for thumb and the offset for arm, 2271+ or NEGOFF_OFF2 to use A-B for thumb and A for arm. */ 2272+# ifdef __thumb2__ 2273+# define NEGOFF_ADJ_BASE(R, OFF) add R, R, $OFF 2274+# define NEGOFF_ADJ_BASE2(D, S, OFF) add D, S, $OFF 2275+# define NEGOFF_OFF1(R, OFF) [R] 2276+# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $((OFFA) - (OFFB))] 2277+# else 2278+# define NEGOFF_ADJ_BASE(R, OFF) 2279+# define NEGOFF_ADJ_BASE2(D, S, OFF) mov D, S 2280+# define NEGOFF_OFF1(R, OFF) [R, $OFF] 2281+# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $OFFA] 2282+# endif 2283+ 2284+/* Helper to get the TLS base pointer. The interface is that TMP is a 2285+ register that may be used to hold the LR, if necessary. TMP may be 2286+ LR itself to indicate that LR need not be saved. The base pointer 2287+ is returned in R0. Only R0 and TMP are modified. */ 2288+ 2289+# ifdef ARCH_HAS_HARD_TP 2290+/* If the cpu has cp15 available, use it. */ 2291+# define GET_TLS(TMP) mrc p15, 0, r0, c13, c0, 3 2292+# else 2293+/* At this generic level we have no tricks to pull. Call the ABI routine. */ 2294+# define GET_TLS(TMP) \ 2295+ push { r1, r2, r3, lr }; \ 2296+ cfi_remember_state; \ 2297+ cfi_adjust_cfa_offset (16); \ 2298+ cfi_rel_offset (r1, 0); \ 2299+ cfi_rel_offset (r2, 4); \ 2300+ cfi_rel_offset (r3, 8); \ 2301+ cfi_rel_offset (lr, 12); \ 2302+ bl __aeabi_read_tp; \ 2303+ pop { r1, r2, r3, lr }; \ 2304+ cfi_restore_state 2305+# endif /* ARCH_HAS_HARD_TP */ 2306+ 2307+/* These are the directives used for EABI unwind info. 2308+ Wrap them in macros so another configuration's sysdep.h 2309+ file can define them away if it doesn't use EABI unwind info. */ 2310+# define eabi_fnstart .fnstart 2311+# define eabi_fnend .fnend 2312+# define eabi_save(...) .save __VA_ARGS__ 2313+# define eabi_cantunwind .cantunwind 2314+# define eabi_pad(n) .pad n 2315+ 2316+#endif /* __ASSEMBLER__ */ 2317+ 2318+/* This number is the offset from the pc at the current location. */ 2319+#ifdef __thumb__ 2320+# define PC_OFS 4 2321+#else 2322+# define PC_OFS 8 2323+#endif 2324+ 2325+/* Pointer mangling support. */ 2326+#if (IS_IN (rtld) \ 2327+ || (!defined SHARED && (IS_IN (libc) || IS_IN (libpthread)))) 2328+# ifdef __ASSEMBLER__ 2329+# define PTR_MANGLE_LOAD(guard, tmp) \ 2330+ LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0) 2331+# define PTR_MANGLE(dst, src, guard, tmp) \ 2332+ PTR_MANGLE_LOAD(guard, tmp); \ 2333+ PTR_MANGLE2(dst, src, guard) 2334+/* Use PTR_MANGLE2 for efficiency if guard is already loaded. */ 2335+# define PTR_MANGLE2(dst, src, guard) \ 2336+ eor dst, src, guard 2337+# define PTR_DEMANGLE(dst, src, guard, tmp) \ 2338+ PTR_MANGLE (dst, src, guard, tmp) 2339+# define PTR_DEMANGLE2(dst, src, guard) \ 2340+ PTR_MANGLE2 (dst, src, guard) 2341+# else 2342+extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden; 2343+# define PTR_MANGLE(var) \ 2344+ (var) = (__typeof (var)) ((uintptr_t) (var) ^ __pointer_chk_guard_local) 2345+# define PTR_DEMANGLE(var) PTR_MANGLE (var) 2346+# endif 2347+#else 2348+# ifdef __ASSEMBLER__ 2349+# define PTR_MANGLE_LOAD(guard, tmp) \ 2350+ LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0); 2351+# define PTR_MANGLE(dst, src, guard, tmp) \ 2352+ PTR_MANGLE_LOAD(guard, tmp); \ 2353+ PTR_MANGLE2(dst, src, guard) 2354+/* Use PTR_MANGLE2 for efficiency if guard is already loaded. */ 2355+# define PTR_MANGLE2(dst, src, guard) \ 2356+ eor dst, src, guard 2357+# define PTR_DEMANGLE(dst, src, guard, tmp) \ 2358+ PTR_MANGLE (dst, src, guard, tmp) 2359+# define PTR_DEMANGLE2(dst, src, guard) \ 2360+ PTR_MANGLE2 (dst, src, guard) 2361+# else 2362+extern uintptr_t __pointer_chk_guard attribute_relro; 2363+# define PTR_MANGLE(var) \ 2364+ (var) = (__typeof (var)) ((uintptr_t) (var) ^ __pointer_chk_guard) 2365+# define PTR_DEMANGLE(var) PTR_MANGLE (var) 2366+# endif 2367+#endif 2368diff --git a/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h b/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h 2369new file mode 100644 2370index 0000000..0d08da0 2371--- /dev/null 2372+++ b/libc/string/arm/glibc-neon/sysdeps/generic/dwarf2.h 2373@@ -0,0 +1,590 @@ 2374+/* Declarations and definitions of codes relating to the DWARF2 symbolic 2375+ debugging information format. 2376+ Copyright (C) 1992-2021 Free Software Foundation, Inc. 2377+ Contributed by Gary Funck (gary@intrepid.com). Derived from the 2378+ DWARF 1 implementation written by Ron Guilmette (rfg@monkeys.com). 2379+ 2380+ This file is part of the GNU C Library. 2381+ 2382+ The GNU C Library is free software; you can redistribute it and/or 2383+ modify it under the terms of the GNU Lesser General Public 2384+ License as published by the Free Software Foundation; either 2385+ version 2.1 of the License, or (at your option) any later version. 2386+ 2387+ The GNU C Library is distributed in the hope that it will be useful, 2388+ but WITHOUT ANY WARRANTY; without even the implied warranty of 2389+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2390+ Lesser General Public License for more details. 2391+ 2392+ You should have received a copy of the GNU Lesser General Public 2393+ License along with the GNU C Library; if not, see 2394+ <https://www.gnu.org/licenses/>. */ 2395+ 2396+#ifndef _DWARF2_H 2397+#define _DWARF2_H 1 2398+ 2399+/* This file is derived from the DWARF specification (a public document) 2400+ Revision 2.0.0 (July 27, 1993) developed by the UNIX International 2401+ Programming Languages Special Interest Group (UI/PLSIG) and distributed 2402+ by UNIX International. Copies of this specification are available from 2403+ UNIX International, 20 Waterview Boulevard, Parsippany, NJ, 07054. */ 2404+ 2405+/* This file is shared between GCC and GDB, and should not contain 2406+ prototypes. */ 2407+ 2408+#ifndef __ASSEMBLER__ 2409+/* Tag names and codes. */ 2410+ 2411+enum dwarf_tag 2412+ { 2413+ DW_TAG_padding = 0x00, 2414+ DW_TAG_array_type = 0x01, 2415+ DW_TAG_class_type = 0x02, 2416+ DW_TAG_entry_point = 0x03, 2417+ DW_TAG_enumeration_type = 0x04, 2418+ DW_TAG_formal_parameter = 0x05, 2419+ DW_TAG_imported_declaration = 0x08, 2420+ DW_TAG_label = 0x0a, 2421+ DW_TAG_lexical_block = 0x0b, 2422+ DW_TAG_member = 0x0d, 2423+ DW_TAG_pointer_type = 0x0f, 2424+ DW_TAG_reference_type = 0x10, 2425+ DW_TAG_compile_unit = 0x11, 2426+ DW_TAG_string_type = 0x12, 2427+ DW_TAG_structure_type = 0x13, 2428+ DW_TAG_subroutine_type = 0x15, 2429+ DW_TAG_typedef = 0x16, 2430+ DW_TAG_union_type = 0x17, 2431+ DW_TAG_unspecified_parameters = 0x18, 2432+ DW_TAG_variant = 0x19, 2433+ DW_TAG_common_block = 0x1a, 2434+ DW_TAG_common_inclusion = 0x1b, 2435+ DW_TAG_inheritance = 0x1c, 2436+ DW_TAG_inlined_subroutine = 0x1d, 2437+ DW_TAG_module = 0x1e, 2438+ DW_TAG_ptr_to_member_type = 0x1f, 2439+ DW_TAG_set_type = 0x20, 2440+ DW_TAG_subrange_type = 0x21, 2441+ DW_TAG_with_stmt = 0x22, 2442+ DW_TAG_access_declaration = 0x23, 2443+ DW_TAG_base_type = 0x24, 2444+ DW_TAG_catch_block = 0x25, 2445+ DW_TAG_const_type = 0x26, 2446+ DW_TAG_constant = 0x27, 2447+ DW_TAG_enumerator = 0x28, 2448+ DW_TAG_file_type = 0x29, 2449+ DW_TAG_friend = 0x2a, 2450+ DW_TAG_namelist = 0x2b, 2451+ DW_TAG_namelist_item = 0x2c, 2452+ DW_TAG_packed_type = 0x2d, 2453+ DW_TAG_subprogram = 0x2e, 2454+ DW_TAG_template_type_param = 0x2f, 2455+ DW_TAG_template_value_param = 0x30, 2456+ DW_TAG_thrown_type = 0x31, 2457+ DW_TAG_try_block = 0x32, 2458+ DW_TAG_variant_part = 0x33, 2459+ DW_TAG_variable = 0x34, 2460+ DW_TAG_volatile_type = 0x35, 2461+ /* SGI/MIPS Extensions */ 2462+ DW_TAG_MIPS_loop = 0x4081, 2463+ /* GNU extensions */ 2464+ DW_TAG_format_label = 0x4101, /* for FORTRAN 77 and Fortran 90 */ 2465+ DW_TAG_function_template = 0x4102, /* for C++ */ 2466+ DW_TAG_class_template = 0x4103, /* for C++ */ 2467+ DW_TAG_GNU_BINCL = 0x4104, 2468+ DW_TAG_GNU_EINCL = 0x4105 2469+ }; 2470+ 2471+#define DW_TAG_lo_user 0x4080 2472+#define DW_TAG_hi_user 0xffff 2473+ 2474+/* flag that tells whether entry has a child or not */ 2475+#define DW_children_no 0 2476+#define DW_children_yes 1 2477+ 2478+/* Form names and codes. */ 2479+enum dwarf_form 2480+ { 2481+ DW_FORM_addr = 0x01, 2482+ DW_FORM_block2 = 0x03, 2483+ DW_FORM_block4 = 0x04, 2484+ DW_FORM_data2 = 0x05, 2485+ DW_FORM_data4 = 0x06, 2486+ DW_FORM_data8 = 0x07, 2487+ DW_FORM_string = 0x08, 2488+ DW_FORM_block = 0x09, 2489+ DW_FORM_block1 = 0x0a, 2490+ DW_FORM_data1 = 0x0b, 2491+ DW_FORM_flag = 0x0c, 2492+ DW_FORM_sdata = 0x0d, 2493+ DW_FORM_strp = 0x0e, 2494+ DW_FORM_udata = 0x0f, 2495+ DW_FORM_ref_addr = 0x10, 2496+ DW_FORM_ref1 = 0x11, 2497+ DW_FORM_ref2 = 0x12, 2498+ DW_FORM_ref4 = 0x13, 2499+ DW_FORM_ref8 = 0x14, 2500+ DW_FORM_ref_udata = 0x15, 2501+ DW_FORM_indirect = 0x16 2502+ }; 2503+ 2504+/* Attribute names and codes. */ 2505+ 2506+enum dwarf_attribute 2507+ { 2508+ DW_AT_sibling = 0x01, 2509+ DW_AT_location = 0x02, 2510+ DW_AT_name = 0x03, 2511+ DW_AT_ordering = 0x09, 2512+ DW_AT_subscr_data = 0x0a, 2513+ DW_AT_byte_size = 0x0b, 2514+ DW_AT_bit_offset = 0x0c, 2515+ DW_AT_bit_size = 0x0d, 2516+ DW_AT_element_list = 0x0f, 2517+ DW_AT_stmt_list = 0x10, 2518+ DW_AT_low_pc = 0x11, 2519+ DW_AT_high_pc = 0x12, 2520+ DW_AT_language = 0x13, 2521+ DW_AT_member = 0x14, 2522+ DW_AT_discr = 0x15, 2523+ DW_AT_discr_value = 0x16, 2524+ DW_AT_visibility = 0x17, 2525+ DW_AT_import = 0x18, 2526+ DW_AT_string_length = 0x19, 2527+ DW_AT_common_reference = 0x1a, 2528+ DW_AT_comp_dir = 0x1b, 2529+ DW_AT_const_value = 0x1c, 2530+ DW_AT_containing_type = 0x1d, 2531+ DW_AT_default_value = 0x1e, 2532+ DW_AT_inline = 0x20, 2533+ DW_AT_is_optional = 0x21, 2534+ DW_AT_lower_bound = 0x22, 2535+ DW_AT_producer = 0x25, 2536+ DW_AT_prototyped = 0x27, 2537+ DW_AT_return_addr = 0x2a, 2538+ DW_AT_start_scope = 0x2c, 2539+ DW_AT_stride_size = 0x2e, 2540+ DW_AT_upper_bound = 0x2f, 2541+ DW_AT_abstract_origin = 0x31, 2542+ DW_AT_accessibility = 0x32, 2543+ DW_AT_address_class = 0x33, 2544+ DW_AT_artificial = 0x34, 2545+ DW_AT_base_types = 0x35, 2546+ DW_AT_calling_convention = 0x36, 2547+ DW_AT_count = 0x37, 2548+ DW_AT_data_member_location = 0x38, 2549+ DW_AT_decl_column = 0x39, 2550+ DW_AT_decl_file = 0x3a, 2551+ DW_AT_decl_line = 0x3b, 2552+ DW_AT_declaration = 0x3c, 2553+ DW_AT_discr_list = 0x3d, 2554+ DW_AT_encoding = 0x3e, 2555+ DW_AT_external = 0x3f, 2556+ DW_AT_frame_base = 0x40, 2557+ DW_AT_friend = 0x41, 2558+ DW_AT_identifier_case = 0x42, 2559+ DW_AT_macro_info = 0x43, 2560+ DW_AT_namelist_items = 0x44, 2561+ DW_AT_priority = 0x45, 2562+ DW_AT_segment = 0x46, 2563+ DW_AT_specification = 0x47, 2564+ DW_AT_static_link = 0x48, 2565+ DW_AT_type = 0x49, 2566+ DW_AT_use_location = 0x4a, 2567+ DW_AT_variable_parameter = 0x4b, 2568+ DW_AT_virtuality = 0x4c, 2569+ DW_AT_vtable_elem_location = 0x4d, 2570+ /* SGI/MIPS Extensions */ 2571+ DW_AT_MIPS_fde = 0x2001, 2572+ DW_AT_MIPS_loop_begin = 0x2002, 2573+ DW_AT_MIPS_tail_loop_begin = 0x2003, 2574+ DW_AT_MIPS_epilog_begin = 0x2004, 2575+ DW_AT_MIPS_loop_unroll_factor = 0x2005, 2576+ DW_AT_MIPS_software_pipeline_depth = 0x2006, 2577+ DW_AT_MIPS_linkage_name = 0x2007, 2578+ DW_AT_MIPS_stride = 0x2008, 2579+ DW_AT_MIPS_abstract_name = 0x2009, 2580+ DW_AT_MIPS_clone_origin = 0x200a, 2581+ DW_AT_MIPS_has_inlines = 0x200b, 2582+ /* GNU extensions. */ 2583+ DW_AT_sf_names = 0x2101, 2584+ DW_AT_src_info = 0x2102, 2585+ DW_AT_mac_info = 0x2103, 2586+ DW_AT_src_coords = 0x2104, 2587+ DW_AT_body_begin = 0x2105, 2588+ DW_AT_body_end = 0x2106 2589+ }; 2590+ 2591+#define DW_AT_lo_user 0x2000 /* implementation-defined range start */ 2592+#define DW_AT_hi_user 0x3ff0 /* implementation-defined range end */ 2593+ 2594+/* Location atom names and codes. */ 2595+ 2596+enum dwarf_location_atom 2597+ { 2598+ DW_OP_addr = 0x03, 2599+ DW_OP_deref = 0x06, 2600+ DW_OP_const1u = 0x08, 2601+ DW_OP_const1s = 0x09, 2602+ DW_OP_const2u = 0x0a, 2603+ DW_OP_const2s = 0x0b, 2604+ DW_OP_const4u = 0x0c, 2605+ DW_OP_const4s = 0x0d, 2606+ DW_OP_const8u = 0x0e, 2607+ DW_OP_const8s = 0x0f, 2608+ DW_OP_constu = 0x10, 2609+ DW_OP_consts = 0x11, 2610+ DW_OP_dup = 0x12, 2611+ DW_OP_drop = 0x13, 2612+ DW_OP_over = 0x14, 2613+ DW_OP_pick = 0x15, 2614+ DW_OP_swap = 0x16, 2615+ DW_OP_rot = 0x17, 2616+ DW_OP_xderef = 0x18, 2617+ DW_OP_abs = 0x19, 2618+ DW_OP_and = 0x1a, 2619+ DW_OP_div = 0x1b, 2620+ DW_OP_minus = 0x1c, 2621+ DW_OP_mod = 0x1d, 2622+ DW_OP_mul = 0x1e, 2623+ DW_OP_neg = 0x1f, 2624+ DW_OP_not = 0x20, 2625+ DW_OP_or = 0x21, 2626+ DW_OP_plus = 0x22, 2627+ DW_OP_plus_uconst = 0x23, 2628+ DW_OP_shl = 0x24, 2629+ DW_OP_shr = 0x25, 2630+ DW_OP_shra = 0x26, 2631+ DW_OP_xor = 0x27, 2632+ DW_OP_bra = 0x28, 2633+ DW_OP_eq = 0x29, 2634+ DW_OP_ge = 0x2a, 2635+ DW_OP_gt = 0x2b, 2636+ DW_OP_le = 0x2c, 2637+ DW_OP_lt = 0x2d, 2638+ DW_OP_ne = 0x2e, 2639+ DW_OP_skip = 0x2f, 2640+ DW_OP_lit0 = 0x30, 2641+ DW_OP_lit1 = 0x31, 2642+ DW_OP_lit2 = 0x32, 2643+ DW_OP_lit3 = 0x33, 2644+ DW_OP_lit4 = 0x34, 2645+ DW_OP_lit5 = 0x35, 2646+ DW_OP_lit6 = 0x36, 2647+ DW_OP_lit7 = 0x37, 2648+ DW_OP_lit8 = 0x38, 2649+ DW_OP_lit9 = 0x39, 2650+ DW_OP_lit10 = 0x3a, 2651+ DW_OP_lit11 = 0x3b, 2652+ DW_OP_lit12 = 0x3c, 2653+ DW_OP_lit13 = 0x3d, 2654+ DW_OP_lit14 = 0x3e, 2655+ DW_OP_lit15 = 0x3f, 2656+ DW_OP_lit16 = 0x40, 2657+ DW_OP_lit17 = 0x41, 2658+ DW_OP_lit18 = 0x42, 2659+ DW_OP_lit19 = 0x43, 2660+ DW_OP_lit20 = 0x44, 2661+ DW_OP_lit21 = 0x45, 2662+ DW_OP_lit22 = 0x46, 2663+ DW_OP_lit23 = 0x47, 2664+ DW_OP_lit24 = 0x48, 2665+ DW_OP_lit25 = 0x49, 2666+ DW_OP_lit26 = 0x4a, 2667+ DW_OP_lit27 = 0x4b, 2668+ DW_OP_lit28 = 0x4c, 2669+ DW_OP_lit29 = 0x4d, 2670+ DW_OP_lit30 = 0x4e, 2671+ DW_OP_lit31 = 0x4f, 2672+ DW_OP_reg0 = 0x50, 2673+ DW_OP_reg1 = 0x51, 2674+ DW_OP_reg2 = 0x52, 2675+ DW_OP_reg3 = 0x53, 2676+ DW_OP_reg4 = 0x54, 2677+ DW_OP_reg5 = 0x55, 2678+ DW_OP_reg6 = 0x56, 2679+ DW_OP_reg7 = 0x57, 2680+ DW_OP_reg8 = 0x58, 2681+ DW_OP_reg9 = 0x59, 2682+ DW_OP_reg10 = 0x5a, 2683+ DW_OP_reg11 = 0x5b, 2684+ DW_OP_reg12 = 0x5c, 2685+ DW_OP_reg13 = 0x5d, 2686+ DW_OP_reg14 = 0x5e, 2687+ DW_OP_reg15 = 0x5f, 2688+ DW_OP_reg16 = 0x60, 2689+ DW_OP_reg17 = 0x61, 2690+ DW_OP_reg18 = 0x62, 2691+ DW_OP_reg19 = 0x63, 2692+ DW_OP_reg20 = 0x64, 2693+ DW_OP_reg21 = 0x65, 2694+ DW_OP_reg22 = 0x66, 2695+ DW_OP_reg23 = 0x67, 2696+ DW_OP_reg24 = 0x68, 2697+ DW_OP_reg25 = 0x69, 2698+ DW_OP_reg26 = 0x6a, 2699+ DW_OP_reg27 = 0x6b, 2700+ DW_OP_reg28 = 0x6c, 2701+ DW_OP_reg29 = 0x6d, 2702+ DW_OP_reg30 = 0x6e, 2703+ DW_OP_reg31 = 0x6f, 2704+ DW_OP_breg0 = 0x70, 2705+ DW_OP_breg1 = 0x71, 2706+ DW_OP_breg2 = 0x72, 2707+ DW_OP_breg3 = 0x73, 2708+ DW_OP_breg4 = 0x74, 2709+ DW_OP_breg5 = 0x75, 2710+ DW_OP_breg6 = 0x76, 2711+ DW_OP_breg7 = 0x77, 2712+ DW_OP_breg8 = 0x78, 2713+ DW_OP_breg9 = 0x79, 2714+ DW_OP_breg10 = 0x7a, 2715+ DW_OP_breg11 = 0x7b, 2716+ DW_OP_breg12 = 0x7c, 2717+ DW_OP_breg13 = 0x7d, 2718+ DW_OP_breg14 = 0x7e, 2719+ DW_OP_breg15 = 0x7f, 2720+ DW_OP_breg16 = 0x80, 2721+ DW_OP_breg17 = 0x81, 2722+ DW_OP_breg18 = 0x82, 2723+ DW_OP_breg19 = 0x83, 2724+ DW_OP_breg20 = 0x84, 2725+ DW_OP_breg21 = 0x85, 2726+ DW_OP_breg22 = 0x86, 2727+ DW_OP_breg23 = 0x87, 2728+ DW_OP_breg24 = 0x88, 2729+ DW_OP_breg25 = 0x89, 2730+ DW_OP_breg26 = 0x8a, 2731+ DW_OP_breg27 = 0x8b, 2732+ DW_OP_breg28 = 0x8c, 2733+ DW_OP_breg29 = 0x8d, 2734+ DW_OP_breg30 = 0x8e, 2735+ DW_OP_breg31 = 0x8f, 2736+ DW_OP_regx = 0x90, 2737+ DW_OP_fbreg = 0x91, 2738+ DW_OP_bregx = 0x92, 2739+ DW_OP_piece = 0x93, 2740+ DW_OP_deref_size = 0x94, 2741+ DW_OP_xderef_size = 0x95, 2742+ DW_OP_nop = 0x96 2743+ }; 2744+ 2745+#define DW_OP_lo_user 0x80 /* implementation-defined range start */ 2746+#define DW_OP_hi_user 0xff /* implementation-defined range end */ 2747+ 2748+/* Type encodings. */ 2749+ 2750+enum dwarf_type 2751+ { 2752+ DW_ATE_void = 0x0, 2753+ DW_ATE_address = 0x1, 2754+ DW_ATE_boolean = 0x2, 2755+ DW_ATE_complex_float = 0x3, 2756+ DW_ATE_float = 0x4, 2757+ DW_ATE_signed = 0x5, 2758+ DW_ATE_signed_char = 0x6, 2759+ DW_ATE_unsigned = 0x7, 2760+ DW_ATE_unsigned_char = 0x8 2761+ }; 2762+ 2763+#define DW_ATE_lo_user 0x80 2764+#define DW_ATE_hi_user 0xff 2765+ 2766+/* Array ordering names and codes. */ 2767+enum dwarf_array_dim_ordering 2768+ { 2769+ DW_ORD_row_major = 0, 2770+ DW_ORD_col_major = 1 2771+ }; 2772+ 2773+/* access attribute */ 2774+enum dwarf_access_attribute 2775+ { 2776+ DW_ACCESS_public = 1, 2777+ DW_ACCESS_protected = 2, 2778+ DW_ACCESS_private = 3 2779+ }; 2780+ 2781+/* visibility */ 2782+enum dwarf_visibility_attribute 2783+ { 2784+ DW_VIS_local = 1, 2785+ DW_VIS_exported = 2, 2786+ DW_VIS_qualified = 3 2787+ }; 2788+ 2789+/* virtuality */ 2790+enum dwarf_virtuality_attribute 2791+ { 2792+ DW_VIRTUALITY_none = 0, 2793+ DW_VIRTUALITY_virtual = 1, 2794+ DW_VIRTUALITY_pure_virtual = 2 2795+ }; 2796+ 2797+/* case sensitivity */ 2798+enum dwarf_id_case 2799+ { 2800+ DW_ID_case_sensitive = 0, 2801+ DW_ID_up_case = 1, 2802+ DW_ID_down_case = 2, 2803+ DW_ID_case_insensitive = 3 2804+ }; 2805+ 2806+/* calling convention */ 2807+enum dwarf_calling_convention 2808+ { 2809+ DW_CC_normal = 0x1, 2810+ DW_CC_program = 0x2, 2811+ DW_CC_nocall = 0x3 2812+ }; 2813+ 2814+#define DW_CC_lo_user 0x40 2815+#define DW_CC_hi_user 0xff 2816+ 2817+/* inline attribute */ 2818+enum dwarf_inline_attribute 2819+ { 2820+ DW_INL_not_inlined = 0, 2821+ DW_INL_inlined = 1, 2822+ DW_INL_declared_not_inlined = 2, 2823+ DW_INL_declared_inlined = 3 2824+ }; 2825+ 2826+/* discriminant lists */ 2827+enum dwarf_discrim_list 2828+ { 2829+ DW_DSC_label = 0, 2830+ DW_DSC_range = 1 2831+ }; 2832+ 2833+/* line number opcodes */ 2834+enum dwarf_line_number_ops 2835+ { 2836+ DW_LNS_extended_op = 0, 2837+ DW_LNS_copy = 1, 2838+ DW_LNS_advance_pc = 2, 2839+ DW_LNS_advance_line = 3, 2840+ DW_LNS_set_file = 4, 2841+ DW_LNS_set_column = 5, 2842+ DW_LNS_negate_stmt = 6, 2843+ DW_LNS_set_basic_block = 7, 2844+ DW_LNS_const_add_pc = 8, 2845+ DW_LNS_fixed_advance_pc = 9 2846+ }; 2847+ 2848+/* line number extended opcodes */ 2849+enum dwarf_line_number_x_ops 2850+ { 2851+ DW_LNE_end_sequence = 1, 2852+ DW_LNE_set_address = 2, 2853+ DW_LNE_define_file = 3 2854+ }; 2855+ 2856+/* call frame information */ 2857+enum dwarf_call_frame_info 2858+ { 2859+ DW_CFA_advance_loc = 0x40, 2860+ DW_CFA_offset = 0x80, 2861+ DW_CFA_restore = 0xc0, 2862+ DW_CFA_nop = 0x00, 2863+ DW_CFA_set_loc = 0x01, 2864+ DW_CFA_advance_loc1 = 0x02, 2865+ DW_CFA_advance_loc2 = 0x03, 2866+ DW_CFA_advance_loc4 = 0x04, 2867+ DW_CFA_offset_extended = 0x05, 2868+ DW_CFA_restore_extended = 0x06, 2869+ DW_CFA_undefined = 0x07, 2870+ DW_CFA_same_value = 0x08, 2871+ DW_CFA_register = 0x09, 2872+ DW_CFA_remember_state = 0x0a, 2873+ DW_CFA_restore_state = 0x0b, 2874+ DW_CFA_def_cfa = 0x0c, 2875+ DW_CFA_def_cfa_register = 0x0d, 2876+ DW_CFA_def_cfa_offset = 0x0e, 2877+ DW_CFA_def_cfa_expression = 0x0f, 2878+ DW_CFA_expression = 0x10, 2879+ /* Dwarf 2.1 */ 2880+ DW_CFA_offset_extended_sf = 0x11, 2881+ DW_CFA_def_cfa_sf = 0x12, 2882+ DW_CFA_def_cfa_offset_sf = 0x13, 2883+ 2884+ /* SGI/MIPS specific */ 2885+ DW_CFA_MIPS_advance_loc8 = 0x1d, 2886+ 2887+ /* GNU extensions */ 2888+ DW_CFA_GNU_window_save = 0x2d, 2889+ DW_CFA_GNU_args_size = 0x2e, 2890+ DW_CFA_GNU_negative_offset_extended = 0x2f 2891+ }; 2892+ 2893+#define DW_CIE_ID 0xffffffff 2894+#define DW_CIE_VERSION 1 2895+ 2896+#define DW_CFA_extended 0 2897+#define DW_CFA_low_user 0x1c 2898+#define DW_CFA_high_user 0x3f 2899+ 2900+#define DW_CHILDREN_no 0x00 2901+#define DW_CHILDREN_yes 0x01 2902+ 2903+#define DW_ADDR_none 0 2904+ 2905+/* Source language names and codes. */ 2906+ 2907+enum dwarf_source_language 2908+ { 2909+ DW_LANG_C89 = 0x0001, 2910+ DW_LANG_C = 0x0002, 2911+ DW_LANG_Ada83 = 0x0003, 2912+ DW_LANG_C_plus_plus = 0x0004, 2913+ DW_LANG_Cobol74 = 0x0005, 2914+ DW_LANG_Cobol85 = 0x0006, 2915+ DW_LANG_Fortran77 = 0x0007, 2916+ DW_LANG_Fortran90 = 0x0008, 2917+ DW_LANG_Pascal83 = 0x0009, 2918+ DW_LANG_Modula2 = 0x000a, 2919+ DW_LANG_Java = 0x000b, 2920+ DW_LANG_Mips_Assembler = 0x8001 2921+ }; 2922+ 2923+ 2924+#define DW_LANG_lo_user 0x8000 /* implementation-defined range start */ 2925+#define DW_LANG_hi_user 0xffff /* implementation-defined range start */ 2926+ 2927+/* Names and codes for macro information. */ 2928+ 2929+enum dwarf_macinfo_record_type 2930+ { 2931+ DW_MACINFO_define = 1, 2932+ DW_MACINFO_undef = 2, 2933+ DW_MACINFO_start_file = 3, 2934+ DW_MACINFO_end_file = 4, 2935+ DW_MACINFO_vendor_ext = 255 2936+ }; 2937+ 2938+#endif /* !ASSEMBLER */ 2939+ 2940+/* @@@ For use with GNU frame unwind information. */ 2941+ 2942+#define DW_EH_PE_absptr 0x00 2943+#define DW_EH_PE_omit 0xff 2944+ 2945+#define DW_EH_PE_uleb128 0x01 2946+#define DW_EH_PE_udata2 0x02 2947+#define DW_EH_PE_udata4 0x03 2948+#define DW_EH_PE_udata8 0x04 2949+#define DW_EH_PE_sleb128 0x09 2950+#define DW_EH_PE_sdata2 0x0A 2951+#define DW_EH_PE_sdata4 0x0B 2952+#define DW_EH_PE_sdata8 0x0C 2953+#define DW_EH_PE_signed 0x08 2954+ 2955+#define DW_EH_PE_pcrel 0x10 2956+#define DW_EH_PE_textrel 0x20 2957+#define DW_EH_PE_datarel 0x30 2958+#define DW_EH_PE_funcrel 0x40 2959+#define DW_EH_PE_aligned 0x50 2960+ 2961+#define DW_EH_PE_indirect 0x80 2962+ 2963+#endif /* dwarf2.h */ 2964diff --git a/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h b/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h 2965new file mode 100644 2966index 0000000..39cac91 2967--- /dev/null 2968+++ b/libc/string/arm/glibc-neon/sysdeps/generic/sysdep.h 2969@@ -0,0 +1,97 @@ 2970+/* Generic asm macros used on many machines. 2971+ Copyright (C) 1991-2021 Free Software Foundation, Inc. 2972+ This file is part of the GNU C Library. 2973+ 2974+ The GNU C Library is free software; you can redistribute it and/or 2975+ modify it under the terms of the GNU Lesser General Public 2976+ License as published by the Free Software Foundation; either 2977+ version 2.1 of the License, or (at your option) any later version. 2978+ 2979+ The GNU C Library is distributed in the hope that it will be useful, 2980+ but WITHOUT ANY WARRANTY; without even the implied warranty of 2981+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2982+ Lesser General Public License for more details. 2983+ 2984+ You should have received a copy of the GNU Lesser General Public 2985+ License along with the GNU C Library; if not, see 2986+ <https://www.gnu.org/licenses/>. */ 2987+ 2988+#ifndef C_LABEL 2989+ 2990+/* Define a macro we can use to construct the asm name for a C symbol. */ 2991+# define C_LABEL(name) name##: 2992+ 2993+#endif 2994+ 2995+#ifdef __ASSEMBLER__ 2996+/* Mark the end of function named SYM. This is used on some platforms 2997+ to generate correct debugging information. */ 2998+# ifndef END 2999+# define END(sym) 3000+# endif 3001+ 3002+# ifndef JUMPTARGET 3003+# define JUMPTARGET(sym) sym 3004+# endif 3005+#endif 3006+ 3007+/* Makros to generate eh_frame unwind information. */ 3008+#ifdef __ASSEMBLER__ 3009+# define cfi_startproc .cfi_startproc 3010+# define cfi_endproc .cfi_endproc 3011+# define cfi_def_cfa(reg, off) .cfi_def_cfa reg, off 3012+# define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg 3013+# define cfi_def_cfa_offset(off) .cfi_def_cfa_offset off 3014+# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 3015+# define cfi_offset(reg, off) .cfi_offset reg, off 3016+# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 3017+# define cfi_register(r1, r2) .cfi_register r1, r2 3018+# define cfi_return_column(reg) .cfi_return_column reg 3019+# define cfi_restore(reg) .cfi_restore reg 3020+# define cfi_same_value(reg) .cfi_same_value reg 3021+# define cfi_undefined(reg) .cfi_undefined reg 3022+# define cfi_remember_state .cfi_remember_state 3023+# define cfi_restore_state .cfi_restore_state 3024+# define cfi_window_save .cfi_window_save 3025+# define cfi_personality(enc, exp) .cfi_personality enc, exp 3026+# define cfi_lsda(enc, exp) .cfi_lsda enc, exp 3027+ 3028+#else /* ! ASSEMBLER */ 3029+ 3030+# define CFI_STRINGIFY(Name) CFI_STRINGIFY2 (Name) 3031+# define CFI_STRINGIFY2(Name) #Name 3032+# define CFI_STARTPROC ".cfi_startproc" 3033+# define CFI_ENDPROC ".cfi_endproc" 3034+# define CFI_DEF_CFA(reg, off) \ 3035+ ".cfi_def_cfa " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) 3036+# define CFI_DEF_CFA_REGISTER(reg) \ 3037+ ".cfi_def_cfa_register " CFI_STRINGIFY(reg) 3038+# define CFI_DEF_CFA_OFFSET(off) \ 3039+ ".cfi_def_cfa_offset " CFI_STRINGIFY(off) 3040+# define CFI_ADJUST_CFA_OFFSET(off) \ 3041+ ".cfi_adjust_cfa_offset " CFI_STRINGIFY(off) 3042+# define CFI_OFFSET(reg, off) \ 3043+ ".cfi_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) 3044+# define CFI_REL_OFFSET(reg, off) \ 3045+ ".cfi_rel_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) 3046+# define CFI_REGISTER(r1, r2) \ 3047+ ".cfi_register " CFI_STRINGIFY(r1) "," CFI_STRINGIFY(r2) 3048+# define CFI_RETURN_COLUMN(reg) \ 3049+ ".cfi_return_column " CFI_STRINGIFY(reg) 3050+# define CFI_RESTORE(reg) \ 3051+ ".cfi_restore " CFI_STRINGIFY(reg) 3052+# define CFI_UNDEFINED(reg) \ 3053+ ".cfi_undefined " CFI_STRINGIFY(reg) 3054+# define CFI_REMEMBER_STATE \ 3055+ ".cfi_remember_state" 3056+# define CFI_RESTORE_STATE \ 3057+ ".cfi_restore_state" 3058+# define CFI_WINDOW_SAVE \ 3059+ ".cfi_window_save" 3060+# define CFI_PERSONALITY(enc, exp) \ 3061+ ".cfi_personality " CFI_STRINGIFY(enc) "," CFI_STRINGIFY(exp) 3062+# define CFI_LSDA(enc, exp) \ 3063+ ".cfi_lsda " CFI_STRINGIFY(enc) "," CFI_STRINGIFY(exp) 3064+#endif 3065+ 3066+#include "dwarf2.h" 3067-- 30682.20.1 3069 3070