1*4882a593SmuzhiyunFrom b0a64ddebb517a1678c44d9baf24d8bbe39d02cd Mon Sep 17 00:00:00 2001 2*4882a593SmuzhiyunFrom: Khem Raj <raj.khem@gmail.com> 3*4882a593SmuzhiyunDate: Tue, 29 Jan 2019 13:15:07 -0800 4*4882a593SmuzhiyunSubject: [PATCH] asm: Delete .func/.endfunc directives 5*4882a593Smuzhiyun 6*4882a593SmuzhiyunThese are useful only with stabs debug format, which is not used on 7*4882a593Smuzhiyunlinux systems, gas ignores them silently, but clang assembler does not 8*4882a593Smuzhiyunand rightly so. 9*4882a593Smuzhiyun 10*4882a593SmuzhiyunSigned-off-by: Khem Raj <raj.khem@gmail.com> 11*4882a593Smuzhiyun--- 12*4882a593Smuzhiyun aarch64-asm.S | 14 +------------- 13*4882a593Smuzhiyun arm-neon.S | 24 ------------------------ 14*4882a593Smuzhiyun mips-32.S | 5 ++--- 15*4882a593Smuzhiyun x86-sse2.S | 21 ++++++++++----------- 16*4882a593Smuzhiyun 4 files changed, 13 insertions(+), 51 deletions(-) 17*4882a593Smuzhiyun 18*4882a593Smuzhiyundiff --git a/aarch64-asm.S b/aarch64-asm.S 19*4882a593Smuzhiyunindex 842b9e2..165c8ac 100644 20*4882a593Smuzhiyun--- a/aarch64-asm.S 21*4882a593Smuzhiyun+++ b/aarch64-asm.S 22*4882a593Smuzhiyun@@ -31,8 +31,7 @@ 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun .macro asm_function function_name 25*4882a593Smuzhiyun .global \function_name 26*4882a593Smuzhiyun- .type \function_name,%function 27*4882a593Smuzhiyun-.func \function_name 28*4882a593Smuzhiyun+ .type \function_name,%function 29*4882a593Smuzhiyun \function_name: 30*4882a593Smuzhiyun DST .req x0 31*4882a593Smuzhiyun SRC .req x1 32*4882a593Smuzhiyun@@ -54,7 +53,6 @@ asm_function aligned_block_copy_ldpstp_x_aarch64 33*4882a593Smuzhiyun subs SIZE, SIZE, #64 34*4882a593Smuzhiyun bgt 0b 35*4882a593Smuzhiyun ret 36*4882a593Smuzhiyun-.endfunc 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun asm_function aligned_block_copy_ldpstp_q_aarch64 39*4882a593Smuzhiyun 0: 40*4882a593Smuzhiyun@@ -67,7 +65,6 @@ asm_function aligned_block_copy_ldpstp_q_aarch64 41*4882a593Smuzhiyun subs SIZE, SIZE, #64 42*4882a593Smuzhiyun bgt 0b 43*4882a593Smuzhiyun ret 44*4882a593Smuzhiyun-.endfunc 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun asm_function aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64 47*4882a593Smuzhiyun 0: 48*4882a593Smuzhiyun@@ -82,7 +79,6 @@ asm_function aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64 49*4882a593Smuzhiyun subs SIZE, SIZE, #64 50*4882a593Smuzhiyun bgt 0b 51*4882a593Smuzhiyun ret 52*4882a593Smuzhiyun-.endfunc 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun asm_function aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64 55*4882a593Smuzhiyun 0: 56*4882a593Smuzhiyun@@ -96,7 +92,6 @@ asm_function aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64 57*4882a593Smuzhiyun subs SIZE, SIZE, #64 58*4882a593Smuzhiyun bgt 0b 59*4882a593Smuzhiyun ret 60*4882a593Smuzhiyun-.endfunc 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun asm_function aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64 63*4882a593Smuzhiyun 0: 64*4882a593Smuzhiyun@@ -111,7 +106,6 @@ asm_function aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64 65*4882a593Smuzhiyun subs SIZE, SIZE, #64 66*4882a593Smuzhiyun bgt 0b 67*4882a593Smuzhiyun ret 68*4882a593Smuzhiyun-.endfunc 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun asm_function aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64 71*4882a593Smuzhiyun 0: 72*4882a593Smuzhiyun@@ -125,7 +119,6 @@ asm_function aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64 73*4882a593Smuzhiyun subs SIZE, SIZE, #64 74*4882a593Smuzhiyun bgt 0b 75*4882a593Smuzhiyun ret 76*4882a593Smuzhiyun-.endfunc 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun asm_function aligned_block_fill_stp_x_aarch64 79*4882a593Smuzhiyun 0: 80*4882a593Smuzhiyun@@ -137,7 +130,6 @@ asm_function aligned_block_fill_stp_x_aarch64 81*4882a593Smuzhiyun subs SIZE, SIZE, #64 82*4882a593Smuzhiyun bgt 0b 83*4882a593Smuzhiyun ret 84*4882a593Smuzhiyun-.endfunc 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun asm_function aligned_block_fill_stp_q_aarch64 87*4882a593Smuzhiyun 0: 88*4882a593Smuzhiyun@@ -147,7 +139,6 @@ asm_function aligned_block_fill_stp_q_aarch64 89*4882a593Smuzhiyun subs SIZE, SIZE, #64 90*4882a593Smuzhiyun bgt 0b 91*4882a593Smuzhiyun ret 92*4882a593Smuzhiyun-.endfunc 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun asm_function aligned_block_fill_stnp_x_aarch64 95*4882a593Smuzhiyun 0: 96*4882a593Smuzhiyun@@ -159,7 +150,6 @@ asm_function aligned_block_fill_stnp_x_aarch64 97*4882a593Smuzhiyun subs SIZE, SIZE, #64 98*4882a593Smuzhiyun bgt 0b 99*4882a593Smuzhiyun ret 100*4882a593Smuzhiyun-.endfunc 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun asm_function aligned_block_fill_stnp_q_aarch64 103*4882a593Smuzhiyun 0: 104*4882a593Smuzhiyun@@ -169,7 +159,6 @@ asm_function aligned_block_fill_stnp_q_aarch64 105*4882a593Smuzhiyun subs SIZE, SIZE, #64 106*4882a593Smuzhiyun bgt 0b 107*4882a593Smuzhiyun ret 108*4882a593Smuzhiyun-.endfunc 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun asm_function aligned_block_copy_ld1st1_aarch64 111*4882a593Smuzhiyun 0: 112*4882a593Smuzhiyun@@ -180,6 +169,5 @@ asm_function aligned_block_copy_ld1st1_aarch64 113*4882a593Smuzhiyun subs SIZE, SIZE, #64 114*4882a593Smuzhiyun bgt 0b 115*4882a593Smuzhiyun ret 116*4882a593Smuzhiyun-.endfunc 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun #endif 119*4882a593Smuzhiyundiff --git a/arm-neon.S b/arm-neon.S 120*4882a593Smuzhiyunindex 4db78ce..9631d82 100644 121*4882a593Smuzhiyun--- a/arm-neon.S 122*4882a593Smuzhiyun+++ b/arm-neon.S 123*4882a593Smuzhiyun@@ -32,7 +32,6 @@ 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun .macro asm_function function_name 126*4882a593Smuzhiyun .global \function_name 127*4882a593Smuzhiyun-.func \function_name 128*4882a593Smuzhiyun \function_name: 129*4882a593Smuzhiyun DST .req r0 130*4882a593Smuzhiyun SRC .req r1 131*4882a593Smuzhiyun@@ -66,7 +65,6 @@ asm_function aligned_block_read_neon 132*4882a593Smuzhiyun vpadd.u32 d31, d31, d31 133*4882a593Smuzhiyun vmov.u32 r0, d31[0] 134*4882a593Smuzhiyun bx lr 135*4882a593Smuzhiyun-.endfunc 136*4882a593Smuzhiyun 137*4882a593Smuzhiyun /* Actually this calculates a sum of 32-bit values */ 138*4882a593Smuzhiyun asm_function aligned_block_read_pf32_neon 139*4882a593Smuzhiyun@@ -97,7 +95,6 @@ asm_function aligned_block_read_pf32_neon 140*4882a593Smuzhiyun vpadd.u32 d31, d31, d31 141*4882a593Smuzhiyun vmov.u32 r0, d31[0] 142*4882a593Smuzhiyun bx lr 143*4882a593Smuzhiyun-.endfunc 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun /* Actually this calculates a sum of 32-bit values */ 146*4882a593Smuzhiyun asm_function aligned_block_read_pf64_neon 147*4882a593Smuzhiyun@@ -127,7 +124,6 @@ asm_function aligned_block_read_pf64_neon 148*4882a593Smuzhiyun vpadd.u32 d31, d31, d31 149*4882a593Smuzhiyun vmov.u32 r0, d31[0] 150*4882a593Smuzhiyun bx lr 151*4882a593Smuzhiyun-.endfunc 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun /* Actually this calculates a sum of 32-bit values */ 154*4882a593Smuzhiyun asm_function aligned_block_read2_neon 155*4882a593Smuzhiyun@@ -156,7 +152,6 @@ asm_function aligned_block_read2_neon 156*4882a593Smuzhiyun vpadd.u32 d31, d31, d31 157*4882a593Smuzhiyun vmov.u32 r0, d31[0] 158*4882a593Smuzhiyun bx lr 159*4882a593Smuzhiyun-.endfunc 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun /* Actually this calculates a sum of 32-bit values */ 162*4882a593Smuzhiyun asm_function aligned_block_read2_pf32_neon 163*4882a593Smuzhiyun@@ -187,7 +182,6 @@ asm_function aligned_block_read2_pf32_neon 164*4882a593Smuzhiyun vpadd.u32 d31, d31, d31 165*4882a593Smuzhiyun vmov.u32 r0, d31[0] 166*4882a593Smuzhiyun bx lr 167*4882a593Smuzhiyun-.endfunc 168*4882a593Smuzhiyun 169*4882a593Smuzhiyun /* Actually this calculates a sum of 32-bit values */ 170*4882a593Smuzhiyun asm_function aligned_block_read2_pf64_neon 171*4882a593Smuzhiyun@@ -217,7 +211,6 @@ asm_function aligned_block_read2_pf64_neon 172*4882a593Smuzhiyun vpadd.u32 d31, d31, d31 173*4882a593Smuzhiyun vmov.u32 r0, d31[0] 174*4882a593Smuzhiyun bx lr 175*4882a593Smuzhiyun-.endfunc 176*4882a593Smuzhiyun 177*4882a593Smuzhiyun asm_function aligned_block_copy_neon 178*4882a593Smuzhiyun 0: 179*4882a593Smuzhiyun@@ -226,7 +219,6 @@ asm_function aligned_block_copy_neon 180*4882a593Smuzhiyun subs SIZE, SIZE, #32 181*4882a593Smuzhiyun bgt 0b 182*4882a593Smuzhiyun bx lr 183*4882a593Smuzhiyun-.endfunc 184*4882a593Smuzhiyun 185*4882a593Smuzhiyun asm_function aligned_block_copy_unrolled_neon 186*4882a593Smuzhiyun vpush {d8-d15} 187*4882a593Smuzhiyun@@ -244,7 +236,6 @@ asm_function aligned_block_copy_unrolled_neon 188*4882a593Smuzhiyun bgt 0b 189*4882a593Smuzhiyun vpop {d8-d15} 190*4882a593Smuzhiyun bx lr 191*4882a593Smuzhiyun-.endfunc 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun asm_function aligned_block_copy_pf32_neon 194*4882a593Smuzhiyun 0: 195*4882a593Smuzhiyun@@ -254,7 +245,6 @@ asm_function aligned_block_copy_pf32_neon 196*4882a593Smuzhiyun subs SIZE, SIZE, #32 197*4882a593Smuzhiyun bgt 0b 198*4882a593Smuzhiyun bx lr 199*4882a593Smuzhiyun-.endfunc 200*4882a593Smuzhiyun 201*4882a593Smuzhiyun asm_function aligned_block_copy_unrolled_pf32_neon 202*4882a593Smuzhiyun vpush {d8-d15} 203*4882a593Smuzhiyun@@ -280,7 +270,6 @@ asm_function aligned_block_copy_unrolled_pf32_neon 204*4882a593Smuzhiyun bgt 0b 205*4882a593Smuzhiyun vpop {d8-d15} 206*4882a593Smuzhiyun bx lr 207*4882a593Smuzhiyun-.endfunc 208*4882a593Smuzhiyun 209*4882a593Smuzhiyun asm_function aligned_block_copy_pf64_neon 210*4882a593Smuzhiyun 0: 211*4882a593Smuzhiyun@@ -292,7 +281,6 @@ asm_function aligned_block_copy_pf64_neon 212*4882a593Smuzhiyun subs SIZE, SIZE, #64 213*4882a593Smuzhiyun bgt 0b 214*4882a593Smuzhiyun bx lr 215*4882a593Smuzhiyun-.endfunc 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun asm_function aligned_block_copy_unrolled_pf64_neon 218*4882a593Smuzhiyun vpush {d8-d15} 219*4882a593Smuzhiyun@@ -314,7 +302,6 @@ asm_function aligned_block_copy_unrolled_pf64_neon 220*4882a593Smuzhiyun bgt 0b 221*4882a593Smuzhiyun vpop {d8-d15} 222*4882a593Smuzhiyun bx lr 223*4882a593Smuzhiyun-.endfunc 224*4882a593Smuzhiyun 225*4882a593Smuzhiyun asm_function aligned_block_copy_backwards_neon 226*4882a593Smuzhiyun add SRC, SRC, SIZE 227*4882a593Smuzhiyun@@ -328,7 +315,6 @@ asm_function aligned_block_copy_backwards_neon 228*4882a593Smuzhiyun subs SIZE, SIZE, #32 229*4882a593Smuzhiyun bgt 0b 230*4882a593Smuzhiyun bx lr 231*4882a593Smuzhiyun-.endfunc 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun asm_function aligned_block_copy_backwards_pf32_neon 234*4882a593Smuzhiyun add SRC, SRC, SIZE 235*4882a593Smuzhiyun@@ -343,7 +329,6 @@ asm_function aligned_block_copy_backwards_pf32_neon 236*4882a593Smuzhiyun subs SIZE, SIZE, #32 237*4882a593Smuzhiyun bgt 0b 238*4882a593Smuzhiyun bx lr 239*4882a593Smuzhiyun-.endfunc 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun asm_function aligned_block_copy_backwards_pf64_neon 242*4882a593Smuzhiyun add SRC, SRC, SIZE 243*4882a593Smuzhiyun@@ -360,7 +345,6 @@ asm_function aligned_block_copy_backwards_pf64_neon 244*4882a593Smuzhiyun subs SIZE, SIZE, #64 245*4882a593Smuzhiyun bgt 0b 246*4882a593Smuzhiyun bx lr 247*4882a593Smuzhiyun-.endfunc 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun asm_function aligned_block_fill_neon 250*4882a593Smuzhiyun vld1.8 {d0, d1, d2, d3}, [SRC]! 251*4882a593Smuzhiyun@@ -370,7 +354,6 @@ asm_function aligned_block_fill_neon 252*4882a593Smuzhiyun subs SIZE, SIZE, #64 253*4882a593Smuzhiyun bgt 0b 254*4882a593Smuzhiyun bx lr 255*4882a593Smuzhiyun-.endfunc 256*4882a593Smuzhiyun 257*4882a593Smuzhiyun asm_function aligned_block_fill_backwards_neon 258*4882a593Smuzhiyun add SRC, SRC, SIZE 259*4882a593Smuzhiyun@@ -383,7 +366,6 @@ asm_function aligned_block_fill_backwards_neon 260*4882a593Smuzhiyun subs SIZE, SIZE, #32 261*4882a593Smuzhiyun bgt 0b 262*4882a593Smuzhiyun bx lr 263*4882a593Smuzhiyun-.endfunc 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun /* some code for older ARM processors */ 266*4882a593Smuzhiyun 267*4882a593Smuzhiyun@@ -398,7 +380,6 @@ asm_function aligned_block_fill_stm4_armv4 268*4882a593Smuzhiyun subs SIZE, SIZE, #64 269*4882a593Smuzhiyun bgt 0b 270*4882a593Smuzhiyun pop {r4-r12, pc} 271*4882a593Smuzhiyun-.endfunc 272*4882a593Smuzhiyun 273*4882a593Smuzhiyun asm_function aligned_block_fill_stm8_armv4 274*4882a593Smuzhiyun push {r4-r12, lr} 275*4882a593Smuzhiyun@@ -409,7 +390,6 @@ asm_function aligned_block_fill_stm8_armv4 276*4882a593Smuzhiyun subs SIZE, SIZE, #64 277*4882a593Smuzhiyun bgt 0b 278*4882a593Smuzhiyun pop {r4-r12, pc} 279*4882a593Smuzhiyun-.endfunc 280*4882a593Smuzhiyun 281*4882a593Smuzhiyun asm_function aligned_block_fill_strd_armv5te 282*4882a593Smuzhiyun push {r4-r12, lr} 283*4882a593Smuzhiyun@@ -426,7 +406,6 @@ asm_function aligned_block_fill_strd_armv5te 284*4882a593Smuzhiyun subs SIZE, SIZE, #64 285*4882a593Smuzhiyun bgt 0b 286*4882a593Smuzhiyun pop {r4-r12, pc} 287*4882a593Smuzhiyun-.endfunc 288*4882a593Smuzhiyun 289*4882a593Smuzhiyun asm_function aligned_block_copy_incr_armv5te 290*4882a593Smuzhiyun push {r4-r12, lr} 291*4882a593Smuzhiyun@@ -442,7 +421,6 @@ asm_function aligned_block_copy_incr_armv5te 292*4882a593Smuzhiyun stmia DST!, {r8-r11} 293*4882a593Smuzhiyun bgt 0b 294*4882a593Smuzhiyun pop {r4-r12, pc} 295*4882a593Smuzhiyun-.endfunc 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun asm_function aligned_block_copy_wrap_armv5te 298*4882a593Smuzhiyun push {r4-r12, lr} 299*4882a593Smuzhiyun@@ -458,7 +436,6 @@ asm_function aligned_block_copy_wrap_armv5te 300*4882a593Smuzhiyun stmia DST!, {r8-r11} 301*4882a593Smuzhiyun bgt 0b 302*4882a593Smuzhiyun pop {r4-r12, pc} 303*4882a593Smuzhiyun-.endfunc 304*4882a593Smuzhiyun 305*4882a593Smuzhiyun asm_function aligned_block_copy_vfp 306*4882a593Smuzhiyun push {r4-r12, lr} 307*4882a593Smuzhiyun@@ -470,6 +447,5 @@ asm_function aligned_block_copy_vfp 308*4882a593Smuzhiyun bgt 0b 309*4882a593Smuzhiyun vpop {d8-d15} 310*4882a593Smuzhiyun pop {r4-r12, pc} 311*4882a593Smuzhiyun-.endfunc 312*4882a593Smuzhiyun 313*4882a593Smuzhiyun #endif 314*4882a593Smuzhiyundiff --git a/mips-32.S b/mips-32.S 315*4882a593Smuzhiyunindex 17b2b7f..4f7ddae 100644 316*4882a593Smuzhiyun--- a/mips-32.S 317*4882a593Smuzhiyun+++ b/mips-32.S 318*4882a593Smuzhiyun@@ -32,7 +32,6 @@ 319*4882a593Smuzhiyun .macro asm_function function_name 320*4882a593Smuzhiyun .global \function_name 321*4882a593Smuzhiyun .type \function_name, @function 322*4882a593Smuzhiyun- .func \function_name 323*4882a593Smuzhiyun \function_name: 324*4882a593Smuzhiyun .endm 325*4882a593Smuzhiyun 326*4882a593Smuzhiyun@@ -93,7 +92,7 @@ asm_function aligned_block_fill_pf32_mips32 327*4882a593Smuzhiyun 2: 328*4882a593Smuzhiyun jr $ra 329*4882a593Smuzhiyun nop 330*4882a593Smuzhiyun-.endfunc 331*4882a593Smuzhiyun+ 332*4882a593Smuzhiyun 333*4882a593Smuzhiyun /* 334*4882a593Smuzhiyun * void aligned_block_copy_pf32_mips32(int64_t *dst, int64_t *src, int size) 335*4882a593Smuzhiyun@@ -178,6 +177,6 @@ asm_function aligned_block_copy_pf32_mips32 336*4882a593Smuzhiyun lw $s7, 28($sp) 337*4882a593Smuzhiyun jr $ra 338*4882a593Smuzhiyun addi $sp, $sp, 32 339*4882a593Smuzhiyun-.endfunc 340*4882a593Smuzhiyun+ 341*4882a593Smuzhiyun 342*4882a593Smuzhiyun #endif 343*4882a593Smuzhiyundiff --git a/x86-sse2.S b/x86-sse2.S 344*4882a593Smuzhiyunindex d8840e4..409031b 100644 345*4882a593Smuzhiyun--- a/x86-sse2.S 346*4882a593Smuzhiyun+++ b/x86-sse2.S 347*4882a593Smuzhiyun@@ -30,7 +30,6 @@ 348*4882a593Smuzhiyun 349*4882a593Smuzhiyun .macro asm_function_helper function_name 350*4882a593Smuzhiyun .global \function_name 351*4882a593Smuzhiyun-.func \function_name 352*4882a593Smuzhiyun \function_name: 353*4882a593Smuzhiyun #ifdef __amd64__ 354*4882a593Smuzhiyun #ifdef _WIN64 355*4882a593Smuzhiyun@@ -90,7 +89,7 @@ asm_function aligned_block_copy_movsb 356*4882a593Smuzhiyun pop3 edi esi ecx 357*4882a593Smuzhiyun #endif 358*4882a593Smuzhiyun ret 359*4882a593Smuzhiyun-.endfunc 360*4882a593Smuzhiyun+ 361*4882a593Smuzhiyun 362*4882a593Smuzhiyun asm_function aligned_block_copy_movsd 363*4882a593Smuzhiyun 0: 364*4882a593Smuzhiyun@@ -110,7 +109,7 @@ asm_function aligned_block_copy_movsd 365*4882a593Smuzhiyun pop3 edi esi ecx 366*4882a593Smuzhiyun #endif 367*4882a593Smuzhiyun ret 368*4882a593Smuzhiyun-.endfunc 369*4882a593Smuzhiyun+ 370*4882a593Smuzhiyun 371*4882a593Smuzhiyun asm_function aligned_block_copy_sse2 372*4882a593Smuzhiyun 0: 373*4882a593Smuzhiyun@@ -127,7 +126,7 @@ asm_function aligned_block_copy_sse2 374*4882a593Smuzhiyun sub SIZE, 64 375*4882a593Smuzhiyun jg 0b 376*4882a593Smuzhiyun ret 377*4882a593Smuzhiyun-.endfunc 378*4882a593Smuzhiyun+ 379*4882a593Smuzhiyun 380*4882a593Smuzhiyun asm_function aligned_block_copy_nt_sse2 381*4882a593Smuzhiyun 0: 382*4882a593Smuzhiyun@@ -144,7 +143,7 @@ asm_function aligned_block_copy_nt_sse2 383*4882a593Smuzhiyun sub SIZE, 64 384*4882a593Smuzhiyun jg 0b 385*4882a593Smuzhiyun ret 386*4882a593Smuzhiyun-.endfunc 387*4882a593Smuzhiyun+ 388*4882a593Smuzhiyun 389*4882a593Smuzhiyun asm_function aligned_block_copy_pf32_sse2 390*4882a593Smuzhiyun 0: 391*4882a593Smuzhiyun@@ -163,7 +162,7 @@ asm_function aligned_block_copy_pf32_sse2 392*4882a593Smuzhiyun sub SIZE, 64 393*4882a593Smuzhiyun jg 0b 394*4882a593Smuzhiyun ret 395*4882a593Smuzhiyun-.endfunc 396*4882a593Smuzhiyun+ 397*4882a593Smuzhiyun 398*4882a593Smuzhiyun asm_function aligned_block_copy_nt_pf32_sse2 399*4882a593Smuzhiyun 0: 400*4882a593Smuzhiyun@@ -182,7 +181,7 @@ asm_function aligned_block_copy_nt_pf32_sse2 401*4882a593Smuzhiyun sub SIZE, 64 402*4882a593Smuzhiyun jg 0b 403*4882a593Smuzhiyun ret 404*4882a593Smuzhiyun-.endfunc 405*4882a593Smuzhiyun+ 406*4882a593Smuzhiyun 407*4882a593Smuzhiyun asm_function aligned_block_copy_pf64_sse2 408*4882a593Smuzhiyun 0: 409*4882a593Smuzhiyun@@ -200,7 +199,7 @@ asm_function aligned_block_copy_pf64_sse2 410*4882a593Smuzhiyun sub SIZE, 64 411*4882a593Smuzhiyun jg 0b 412*4882a593Smuzhiyun ret 413*4882a593Smuzhiyun-.endfunc 414*4882a593Smuzhiyun+ 415*4882a593Smuzhiyun 416*4882a593Smuzhiyun asm_function aligned_block_copy_nt_pf64_sse2 417*4882a593Smuzhiyun 0: 418*4882a593Smuzhiyun@@ -218,7 +217,7 @@ asm_function aligned_block_copy_nt_pf64_sse2 419*4882a593Smuzhiyun sub SIZE, 64 420*4882a593Smuzhiyun jg 0b 421*4882a593Smuzhiyun ret 422*4882a593Smuzhiyun-.endfunc 423*4882a593Smuzhiyun+ 424*4882a593Smuzhiyun 425*4882a593Smuzhiyun asm_function aligned_block_fill_sse2 426*4882a593Smuzhiyun movdqa xmm0, [SRC + 0] 427*4882a593Smuzhiyun@@ -231,7 +230,7 @@ asm_function aligned_block_fill_sse2 428*4882a593Smuzhiyun sub SIZE, 64 429*4882a593Smuzhiyun jg 0b 430*4882a593Smuzhiyun ret 431*4882a593Smuzhiyun-.endfunc 432*4882a593Smuzhiyun+ 433*4882a593Smuzhiyun 434*4882a593Smuzhiyun asm_function aligned_block_fill_nt_sse2 435*4882a593Smuzhiyun movdqa xmm0, [SRC + 0] 436*4882a593Smuzhiyun@@ -244,7 +243,7 @@ asm_function aligned_block_fill_nt_sse2 437*4882a593Smuzhiyun sub SIZE, 64 438*4882a593Smuzhiyun jg 0b 439*4882a593Smuzhiyun ret 440*4882a593Smuzhiyun-.endfunc 441*4882a593Smuzhiyun+ 442*4882a593Smuzhiyun 443*4882a593Smuzhiyun /*****************************************************************************/ 444*4882a593Smuzhiyun 445