1dbaef6efSGabe Black /* 2dbaef6efSGabe Black * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc. 3dbaef6efSGabe Black * This file is part of the GNU C Library. 4dbaef6efSGabe Black * Copyright (c) 2011 The Chromium OS Authors. 5dbaef6efSGabe Black * 61a459660SWolfgang Denk * SPDX-License-Identifier: GPL-2.0+ 7dbaef6efSGabe Black */ 8dbaef6efSGabe Black 9dbaef6efSGabe Black /* From glibc-2.14, sysdeps/i386/memset.c */ 10dbaef6efSGabe Black 11dbaef6efSGabe Black #include <linux/types.h> 12*afc366f0SMasahiro Yamada #include <linux/compiler.h> 13*afc366f0SMasahiro Yamada #include <asm/string.h> 14dbaef6efSGabe Black 15dbaef6efSGabe Black typedef uint32_t op_t; 16dbaef6efSGabe Black 17dbaef6efSGabe Black void *memset(void *dstpp, int c, size_t len) 18dbaef6efSGabe Black { 19dbaef6efSGabe Black int d0; 20dbaef6efSGabe Black unsigned long int dstp = (unsigned long int) dstpp; 21dbaef6efSGabe Black 22dbaef6efSGabe Black /* This explicit register allocation improves code very much indeed. */ 23dbaef6efSGabe Black register op_t x asm("ax"); 24dbaef6efSGabe Black 25dbaef6efSGabe Black x = (unsigned char) c; 26dbaef6efSGabe Black 27dbaef6efSGabe Black /* Clear the direction flag, so filling will move forward. */ 28dbaef6efSGabe Black asm volatile("cld"); 29dbaef6efSGabe Black 30dbaef6efSGabe Black /* This threshold value is optimal. */ 31dbaef6efSGabe Black if (len >= 12) { 32dbaef6efSGabe Black /* Fill X with four copies of the char we want to fill with. */ 33dbaef6efSGabe Black x |= (x << 8); 34dbaef6efSGabe Black x |= (x << 16); 35dbaef6efSGabe Black 36dbaef6efSGabe Black /* Adjust LEN for the bytes handled in the first loop. */ 37dbaef6efSGabe Black len -= (-dstp) % sizeof(op_t); 38dbaef6efSGabe Black 39dbaef6efSGabe Black /* 40dbaef6efSGabe Black * There are at least some bytes to set. No need to test for 41dbaef6efSGabe Black * LEN == 0 in this alignment loop. 42dbaef6efSGabe Black */ 43dbaef6efSGabe Black 44dbaef6efSGabe Black /* Fill bytes until DSTP is aligned on a longword boundary. */ 45dbaef6efSGabe Black asm volatile( 46dbaef6efSGabe Black "rep\n" 47dbaef6efSGabe Black "stosb" /* %0, %2, %3 */ : 48dbaef6efSGabe Black "=D" (dstp), "=c" (d0) : 49dbaef6efSGabe Black "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) : 50dbaef6efSGabe Black "memory"); 51dbaef6efSGabe Black 52dbaef6efSGabe Black /* Fill longwords. */ 53dbaef6efSGabe Black asm volatile( 54dbaef6efSGabe Black "rep\n" 55dbaef6efSGabe Black "stosl" /* %0, %2, %3 */ : 56dbaef6efSGabe Black "=D" (dstp), "=c" (d0) : 57dbaef6efSGabe Black "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) : 58dbaef6efSGabe Black "memory"); 59dbaef6efSGabe Black len %= sizeof(op_t); 60dbaef6efSGabe Black } 61dbaef6efSGabe Black 62dbaef6efSGabe Black /* Write the last few bytes. */ 63dbaef6efSGabe Black asm volatile( 64dbaef6efSGabe Black "rep\n" 65dbaef6efSGabe Black "stosb" /* %0, %2, %3 */ : 66dbaef6efSGabe Black "=D" (dstp), "=c" (d0) : 67dbaef6efSGabe Black "0" (dstp), "1" (len), "a" (x) : 68dbaef6efSGabe Black "memory"); 69dbaef6efSGabe Black 70dbaef6efSGabe Black return dstpp; 71dbaef6efSGabe Black } 72b2c2a038SGraeme Russ 73b2c2a038SGraeme Russ #define OP_T_THRES 8 74b2c2a038SGraeme Russ #define OPSIZ (sizeof(op_t)) 75b2c2a038SGraeme Russ 76b2c2a038SGraeme Russ #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ 77b2c2a038SGraeme Russ do { \ 78b2c2a038SGraeme Russ int __d0; \ 79b2c2a038SGraeme Russ asm volatile( \ 80b2c2a038SGraeme Russ /* Clear the direction flag, so copying goes forward. */ \ 81b2c2a038SGraeme Russ "cld\n" \ 82b2c2a038SGraeme Russ /* Copy bytes. */ \ 83b2c2a038SGraeme Russ "rep\n" \ 84b2c2a038SGraeme Russ "movsb" : \ 85b2c2a038SGraeme Russ "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \ 86b2c2a038SGraeme Russ "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \ 87b2c2a038SGraeme Russ "memory"); \ 88b2c2a038SGraeme Russ } while (0) 89b2c2a038SGraeme Russ 90b2c2a038SGraeme Russ #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ 91b2c2a038SGraeme Russ do { \ 92b2c2a038SGraeme Russ int __d0; \ 93b2c2a038SGraeme Russ asm volatile( \ 94b2c2a038SGraeme Russ /* Clear the direction flag, so copying goes forward. */ \ 95b2c2a038SGraeme Russ "cld\n" \ 96b2c2a038SGraeme Russ /* Copy longwords. */ \ 97b2c2a038SGraeme Russ "rep\n" \ 98b2c2a038SGraeme Russ "movsl" : \ 99b2c2a038SGraeme Russ "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \ 100b2c2a038SGraeme Russ "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \ 101b2c2a038SGraeme Russ "memory"); \ 102b2c2a038SGraeme Russ (nbytes_left) = (nbytes) % 4; \ 103b2c2a038SGraeme Russ } while (0) 104b2c2a038SGraeme Russ 105b2c2a038SGraeme Russ void *memcpy(void *dstpp, const void *srcpp, size_t len) 106b2c2a038SGraeme Russ { 107b2c2a038SGraeme Russ unsigned long int dstp = (long int)dstpp; 108b2c2a038SGraeme Russ unsigned long int srcp = (long int)srcpp; 109b2c2a038SGraeme Russ 110b2c2a038SGraeme Russ /* Copy from the beginning to the end. */ 111b2c2a038SGraeme Russ 112b2c2a038SGraeme Russ /* If there not too few bytes to copy, use word copy. */ 113b2c2a038SGraeme Russ if (len >= OP_T_THRES) { 114b2c2a038SGraeme Russ /* Copy just a few bytes to make DSTP aligned. */ 115b2c2a038SGraeme Russ len -= (-dstp) % OPSIZ; 116b2c2a038SGraeme Russ BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); 117b2c2a038SGraeme Russ 118b2c2a038SGraeme Russ /* Copy from SRCP to DSTP taking advantage of the known 119b2c2a038SGraeme Russ * alignment of DSTP. Number of bytes remaining is put 120b2c2a038SGraeme Russ * in the third argument, i.e. in LEN. This number may 121b2c2a038SGraeme Russ * vary from machine to machine. 122b2c2a038SGraeme Russ */ 123b2c2a038SGraeme Russ WORD_COPY_FWD(dstp, srcp, len, len); 124b2c2a038SGraeme Russ 125b2c2a038SGraeme Russ /* Fall out and copy the tail. */ 126b2c2a038SGraeme Russ } 127b2c2a038SGraeme Russ 128b2c2a038SGraeme Russ /* There are just a few bytes to copy. Use byte memory operations. */ 129b2c2a038SGraeme Russ BYTE_COPY_FWD(dstp, srcp, len); 130b2c2a038SGraeme Russ 131b2c2a038SGraeme Russ return dstpp; 132b2c2a038SGraeme Russ } 133