1dbaef6efSGabe Black /* 2dbaef6efSGabe Black * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc. 3dbaef6efSGabe Black * This file is part of the GNU C Library. 4dbaef6efSGabe Black * Copyright (c) 2011 The Chromium OS Authors. 5dbaef6efSGabe Black * 6dbaef6efSGabe Black * See file CREDITS for list of people who contributed to this 7dbaef6efSGabe Black * project. 8dbaef6efSGabe Black * 9dbaef6efSGabe Black * This program is free software; you can redistribute it and/or 10dbaef6efSGabe Black * modify it under the terms of the GNU General Public License as 11dbaef6efSGabe Black * published by the Free Software Foundation; either version 2 of 12dbaef6efSGabe Black * the License, or (at your option) any later version. 13dbaef6efSGabe Black * 14dbaef6efSGabe Black * This program is distributed in the hope that it will be useful, 15dbaef6efSGabe Black * but WITHOUT ANY WARRANTY; without even the implied warranty of 16dbaef6efSGabe Black * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17dbaef6efSGabe Black * GNU General Public License for more details. 18dbaef6efSGabe Black * 19dbaef6efSGabe Black * You should have received a copy of the GNU General Public License 20dbaef6efSGabe Black * along with this program; if not, write to the Free Software 21dbaef6efSGabe Black * Foundation, Inc., 59 Temple Place, Suite 330, Boston, 22dbaef6efSGabe Black * MA 02111-1307 USA 23dbaef6efSGabe Black */ 24dbaef6efSGabe Black 25dbaef6efSGabe Black /* From glibc-2.14, sysdeps/i386/memset.c */ 26dbaef6efSGabe Black 27dbaef6efSGabe Black #include <compiler.h> 28dbaef6efSGabe Black #include <asm/string.h> 29dbaef6efSGabe Black #include <linux/types.h> 30dbaef6efSGabe Black 31dbaef6efSGabe Black typedef uint32_t op_t; 32dbaef6efSGabe Black 33dbaef6efSGabe Black void *memset(void *dstpp, int c, size_t len) 34dbaef6efSGabe Black { 35dbaef6efSGabe Black int d0; 36dbaef6efSGabe Black unsigned long int dstp = (unsigned long int) dstpp; 37dbaef6efSGabe Black 38dbaef6efSGabe Black /* This explicit register allocation improves code very much indeed. */ 39dbaef6efSGabe Black register op_t x asm("ax"); 40dbaef6efSGabe Black 41dbaef6efSGabe Black x = (unsigned char) c; 42dbaef6efSGabe Black 43dbaef6efSGabe Black /* Clear the direction flag, so filling will move forward. */ 44dbaef6efSGabe Black asm volatile("cld"); 45dbaef6efSGabe Black 46dbaef6efSGabe Black /* This threshold value is optimal. */ 47dbaef6efSGabe Black if (len >= 12) { 48dbaef6efSGabe Black /* Fill X with four copies of the char we want to fill with. */ 49dbaef6efSGabe Black x |= (x << 8); 50dbaef6efSGabe Black x |= (x << 16); 51dbaef6efSGabe Black 52dbaef6efSGabe Black /* Adjust LEN for the bytes handled in the first loop. */ 53dbaef6efSGabe Black len -= (-dstp) % sizeof(op_t); 54dbaef6efSGabe Black 55dbaef6efSGabe Black /* 56dbaef6efSGabe Black * There are at least some bytes to set. No need to test for 57dbaef6efSGabe Black * LEN == 0 in this alignment loop. 58dbaef6efSGabe Black */ 59dbaef6efSGabe Black 60dbaef6efSGabe Black /* Fill bytes until DSTP is aligned on a longword boundary. */ 61dbaef6efSGabe Black asm volatile( 62dbaef6efSGabe Black "rep\n" 63dbaef6efSGabe Black "stosb" /* %0, %2, %3 */ : 64dbaef6efSGabe Black "=D" (dstp), "=c" (d0) : 65dbaef6efSGabe Black "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) : 66dbaef6efSGabe Black "memory"); 67dbaef6efSGabe Black 68dbaef6efSGabe Black /* Fill longwords. */ 69dbaef6efSGabe Black asm volatile( 70dbaef6efSGabe Black "rep\n" 71dbaef6efSGabe Black "stosl" /* %0, %2, %3 */ : 72dbaef6efSGabe Black "=D" (dstp), "=c" (d0) : 73dbaef6efSGabe Black "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) : 74dbaef6efSGabe Black "memory"); 75dbaef6efSGabe Black len %= sizeof(op_t); 76dbaef6efSGabe Black } 77dbaef6efSGabe Black 78dbaef6efSGabe Black /* Write the last few bytes. */ 79dbaef6efSGabe Black asm volatile( 80dbaef6efSGabe Black "rep\n" 81dbaef6efSGabe Black "stosb" /* %0, %2, %3 */ : 82dbaef6efSGabe Black "=D" (dstp), "=c" (d0) : 83dbaef6efSGabe Black "0" (dstp), "1" (len), "a" (x) : 84dbaef6efSGabe Black "memory"); 85dbaef6efSGabe Black 86dbaef6efSGabe Black return dstpp; 87dbaef6efSGabe Black } 88*b2c2a038SGraeme Russ 89*b2c2a038SGraeme Russ #define OP_T_THRES 8 90*b2c2a038SGraeme Russ #define OPSIZ (sizeof(op_t)) 91*b2c2a038SGraeme Russ 92*b2c2a038SGraeme Russ #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ 93*b2c2a038SGraeme Russ do { \ 94*b2c2a038SGraeme Russ int __d0; \ 95*b2c2a038SGraeme Russ asm volatile( \ 96*b2c2a038SGraeme Russ /* Clear the direction flag, so copying goes forward. */ \ 97*b2c2a038SGraeme Russ "cld\n" \ 98*b2c2a038SGraeme Russ /* Copy bytes. */ \ 99*b2c2a038SGraeme Russ "rep\n" \ 100*b2c2a038SGraeme Russ "movsb" : \ 101*b2c2a038SGraeme Russ "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \ 102*b2c2a038SGraeme Russ "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \ 103*b2c2a038SGraeme Russ "memory"); \ 104*b2c2a038SGraeme Russ } while (0) 105*b2c2a038SGraeme Russ 106*b2c2a038SGraeme Russ #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ 107*b2c2a038SGraeme Russ do { \ 108*b2c2a038SGraeme Russ int __d0; \ 109*b2c2a038SGraeme Russ asm volatile( \ 110*b2c2a038SGraeme Russ /* Clear the direction flag, so copying goes forward. */ \ 111*b2c2a038SGraeme Russ "cld\n" \ 112*b2c2a038SGraeme Russ /* Copy longwords. */ \ 113*b2c2a038SGraeme Russ "rep\n" \ 114*b2c2a038SGraeme Russ "movsl" : \ 115*b2c2a038SGraeme Russ "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \ 116*b2c2a038SGraeme Russ "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \ 117*b2c2a038SGraeme Russ "memory"); \ 118*b2c2a038SGraeme Russ (nbytes_left) = (nbytes) % 4; \ 119*b2c2a038SGraeme Russ } while (0) 120*b2c2a038SGraeme Russ 121*b2c2a038SGraeme Russ void *memcpy(void *dstpp, const void *srcpp, size_t len) 122*b2c2a038SGraeme Russ { 123*b2c2a038SGraeme Russ unsigned long int dstp = (long int)dstpp; 124*b2c2a038SGraeme Russ unsigned long int srcp = (long int)srcpp; 125*b2c2a038SGraeme Russ 126*b2c2a038SGraeme Russ /* Copy from the beginning to the end. */ 127*b2c2a038SGraeme Russ 128*b2c2a038SGraeme Russ /* If there not too few bytes to copy, use word copy. */ 129*b2c2a038SGraeme Russ if (len >= OP_T_THRES) { 130*b2c2a038SGraeme Russ /* Copy just a few bytes to make DSTP aligned. */ 131*b2c2a038SGraeme Russ len -= (-dstp) % OPSIZ; 132*b2c2a038SGraeme Russ BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); 133*b2c2a038SGraeme Russ 134*b2c2a038SGraeme Russ /* Copy from SRCP to DSTP taking advantage of the known 135*b2c2a038SGraeme Russ * alignment of DSTP. Number of bytes remaining is put 136*b2c2a038SGraeme Russ * in the third argument, i.e. in LEN. This number may 137*b2c2a038SGraeme Russ * vary from machine to machine. 138*b2c2a038SGraeme Russ */ 139*b2c2a038SGraeme Russ WORD_COPY_FWD(dstp, srcp, len, len); 140*b2c2a038SGraeme Russ 141*b2c2a038SGraeme Russ /* Fall out and copy the tail. */ 142*b2c2a038SGraeme Russ } 143*b2c2a038SGraeme Russ 144*b2c2a038SGraeme Russ /* There are just a few bytes to copy. Use byte memory operations. */ 145*b2c2a038SGraeme Russ BYTE_COPY_FWD(dstp, srcp, len); 146*b2c2a038SGraeme Russ 147*b2c2a038SGraeme Russ return dstpp; 148*b2c2a038SGraeme Russ } 149