1*4882a593SmuzhiyunFrom 372bdf0348fb86d671e73baab19daa34bd0cf73d Mon Sep 17 00:00:00 2001 2*4882a593SmuzhiyunFrom: Fabrice Fontaine <fontaine.fabrice@gmail.com> 3*4882a593SmuzhiyunDate: Tue, 9 Nov 2021 19:01:20 +0100 4*4882a593SmuzhiyunSubject: [PATCH] Revert "workaround a miscompilation issue in clang 12 5*4882a593Smuzhiyun (XCode 13)" 6*4882a593Smuzhiyun 7*4882a593SmuzhiyunThis reverts commit 219329f8e777af54d785ae7259f8be32a714b751. 8*4882a593Smuzhiyun 9*4882a593SmuzhiyunSigned-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com> 10*4882a593Smuzhiyun[Upstream status: https://github.com/randombit/botan/issues/2845] 11*4882a593Smuzhiyun--- 12*4882a593Smuzhiyun src/lib/hash/sha3/sha3.cpp | 46 ++++++----------------- 13*4882a593Smuzhiyun src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp | 46 ++++++----------------- 14*4882a593Smuzhiyun 2 files changed, 22 insertions(+), 70 deletions(-) 15*4882a593Smuzhiyun 16*4882a593Smuzhiyundiff --git a/src/lib/hash/sha3/sha3.cpp b/src/lib/hash/sha3/sha3.cpp 17*4882a593Smuzhiyunindex 289e451ff..690c2b264 100644 18*4882a593Smuzhiyun--- a/src/lib/hash/sha3/sha3.cpp 19*4882a593Smuzhiyun+++ b/src/lib/hash/sha3/sha3.cpp 20*4882a593Smuzhiyun@@ -11,47 +11,23 @@ 21*4882a593Smuzhiyun #include <botan/exceptn.h> 22*4882a593Smuzhiyun #include <botan/cpuid.h> 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun-#include <tuple> 25*4882a593Smuzhiyun- 26*4882a593Smuzhiyun namespace Botan { 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun namespace { 29*4882a593Smuzhiyun 30*4882a593Smuzhiyun-// This is a workaround for a suspected bug in clang 12 (and XCode 13) 31*4882a593Smuzhiyun-// that caused a miscompile of the SHA3 implementation for optimization 32*4882a593Smuzhiyun-// level -O2 and higher. 33*4882a593Smuzhiyun-// 34*4882a593Smuzhiyun-// For details, see: https://github.com/randombit/botan/issues/2802 35*4882a593Smuzhiyun-#if defined(__clang__) && \ 36*4882a593Smuzhiyun- (( defined(__apple_build_version__) && __clang_major__ == 13) || \ 37*4882a593Smuzhiyun- (!defined(__apple_build_version__) && __clang_major__ == 12)) 38*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE __attribute__((noinline)) 39*4882a593Smuzhiyun-#else 40*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE inline 41*4882a593Smuzhiyun-#endif 42*4882a593Smuzhiyun- 43*4882a593Smuzhiyun-BOTAN_WORKAROUND_MAYBE_INLINE std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t> 44*4882a593Smuzhiyun- xor_CNs(const uint64_t A[25]) 45*4882a593Smuzhiyun- { 46*4882a593Smuzhiyun- return { 47*4882a593Smuzhiyun- A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20], 48*4882a593Smuzhiyun- A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21], 49*4882a593Smuzhiyun- A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22], 50*4882a593Smuzhiyun- A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23], 51*4882a593Smuzhiyun- A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]}; 52*4882a593Smuzhiyun- } 53*4882a593Smuzhiyun- 54*4882a593Smuzhiyun-#undef BOTAN_WORKAROUND_MAYBE_INLINE 55*4882a593Smuzhiyun- 56*4882a593Smuzhiyun inline void SHA3_round(uint64_t T[25], const uint64_t A[25], uint64_t RC) 57*4882a593Smuzhiyun { 58*4882a593Smuzhiyun- const auto Cs = xor_CNs(A); 59*4882a593Smuzhiyun- 60*4882a593Smuzhiyun- const uint64_t D0 = rotl<1>(std::get<0>(Cs)) ^ std::get<3>(Cs); 61*4882a593Smuzhiyun- const uint64_t D1 = rotl<1>(std::get<1>(Cs)) ^ std::get<4>(Cs); 62*4882a593Smuzhiyun- const uint64_t D2 = rotl<1>(std::get<2>(Cs)) ^ std::get<0>(Cs); 63*4882a593Smuzhiyun- const uint64_t D3 = rotl<1>(std::get<3>(Cs)) ^ std::get<1>(Cs); 64*4882a593Smuzhiyun- const uint64_t D4 = rotl<1>(std::get<4>(Cs)) ^ std::get<2>(Cs); 65*4882a593Smuzhiyun+ const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20]; 66*4882a593Smuzhiyun+ const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21]; 67*4882a593Smuzhiyun+ const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22]; 68*4882a593Smuzhiyun+ const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23]; 69*4882a593Smuzhiyun+ const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]; 70*4882a593Smuzhiyun+ 71*4882a593Smuzhiyun+ const uint64_t D0 = rotl<1>(C0) ^ C3; 72*4882a593Smuzhiyun+ const uint64_t D1 = rotl<1>(C1) ^ C4; 73*4882a593Smuzhiyun+ const uint64_t D2 = rotl<1>(C2) ^ C0; 74*4882a593Smuzhiyun+ const uint64_t D3 = rotl<1>(C3) ^ C1; 75*4882a593Smuzhiyun+ const uint64_t D4 = rotl<1>(C4) ^ C2; 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun const uint64_t B00 = A[ 0] ^ D1; 78*4882a593Smuzhiyun const uint64_t B01 = rotl<44>(A[ 6] ^ D2); 79*4882a593Smuzhiyundiff --git a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp 80*4882a593Smuzhiyunindex c7f1914a3..a9650ad9d 100644 81*4882a593Smuzhiyun--- a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp 82*4882a593Smuzhiyun+++ b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp 83*4882a593Smuzhiyun@@ -8,47 +8,23 @@ 84*4882a593Smuzhiyun #include <botan/sha3.h> 85*4882a593Smuzhiyun #include <botan/rotate.h> 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun-#include <tuple> 88*4882a593Smuzhiyun- 89*4882a593Smuzhiyun namespace Botan { 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun namespace { 92*4882a593Smuzhiyun 93*4882a593Smuzhiyun-// This is a workaround for a suspected bug in clang 12 (and XCode 13) 94*4882a593Smuzhiyun-// that caused a miscompile of the SHA3 implementation for optimization 95*4882a593Smuzhiyun-// level -O2 and higher. 96*4882a593Smuzhiyun-// 97*4882a593Smuzhiyun-// For details, see: https://github.com/randombit/botan/issues/2802 98*4882a593Smuzhiyun-#if defined(__clang__) && \ 99*4882a593Smuzhiyun- (( defined(__apple_build_version__) && __clang_major__ == 13) || \ 100*4882a593Smuzhiyun- (!defined(__apple_build_version__) && __clang_major__ == 12)) 101*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE __attribute__((noinline)) 102*4882a593Smuzhiyun-#else 103*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE inline 104*4882a593Smuzhiyun-#endif 105*4882a593Smuzhiyun- 106*4882a593Smuzhiyun-BOTAN_WORKAROUND_MAYBE_INLINE std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t> 107*4882a593Smuzhiyun- xor_CNs(const uint64_t A[25]) 108*4882a593Smuzhiyun- { 109*4882a593Smuzhiyun- return { 110*4882a593Smuzhiyun- A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20], 111*4882a593Smuzhiyun- A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21], 112*4882a593Smuzhiyun- A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22], 113*4882a593Smuzhiyun- A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23], 114*4882a593Smuzhiyun- A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]}; 115*4882a593Smuzhiyun- } 116*4882a593Smuzhiyun- 117*4882a593Smuzhiyun-#undef BOTAN_WORKAROUND_MAYBE_INLINE 118*4882a593Smuzhiyun- 119*4882a593Smuzhiyun inline void SHA3_BMI2_round(uint64_t T[25], const uint64_t A[25], uint64_t RC) 120*4882a593Smuzhiyun { 121*4882a593Smuzhiyun- const auto Cs = xor_CNs(A); 122*4882a593Smuzhiyun- 123*4882a593Smuzhiyun- const uint64_t D0 = rotl<1>(std::get<0>(Cs)) ^ std::get<3>(Cs); 124*4882a593Smuzhiyun- const uint64_t D1 = rotl<1>(std::get<1>(Cs)) ^ std::get<4>(Cs); 125*4882a593Smuzhiyun- const uint64_t D2 = rotl<1>(std::get<2>(Cs)) ^ std::get<0>(Cs); 126*4882a593Smuzhiyun- const uint64_t D3 = rotl<1>(std::get<3>(Cs)) ^ std::get<1>(Cs); 127*4882a593Smuzhiyun- const uint64_t D4 = rotl<1>(std::get<4>(Cs)) ^ std::get<2>(Cs); 128*4882a593Smuzhiyun+ const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20]; 129*4882a593Smuzhiyun+ const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21]; 130*4882a593Smuzhiyun+ const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22]; 131*4882a593Smuzhiyun+ const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23]; 132*4882a593Smuzhiyun+ const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]; 133*4882a593Smuzhiyun+ 134*4882a593Smuzhiyun+ const uint64_t D0 = rotl<1>(C0) ^ C3; 135*4882a593Smuzhiyun+ const uint64_t D1 = rotl<1>(C1) ^ C4; 136*4882a593Smuzhiyun+ const uint64_t D2 = rotl<1>(C2) ^ C0; 137*4882a593Smuzhiyun+ const uint64_t D3 = rotl<1>(C3) ^ C1; 138*4882a593Smuzhiyun+ const uint64_t D4 = rotl<1>(C4) ^ C2; 139*4882a593Smuzhiyun 140*4882a593Smuzhiyun const uint64_t B00 = A[ 0] ^ D1; 141*4882a593Smuzhiyun const uint64_t B01 = rotl<44>(A[ 6] ^ D2); 142*4882a593Smuzhiyun-- 143*4882a593Smuzhiyun2.33.0 144*4882a593Smuzhiyun 145