1*4882a593SmuzhiyunFrom 372bdf0348fb86d671e73baab19daa34bd0cf73d Mon Sep 17 00:00:00 2001
2*4882a593SmuzhiyunFrom: Fabrice Fontaine <fontaine.fabrice@gmail.com>
3*4882a593SmuzhiyunDate: Tue, 9 Nov 2021 19:01:20 +0100
4*4882a593SmuzhiyunSubject: [PATCH] Revert "workaround a miscompilation issue in clang 12
5*4882a593Smuzhiyun (XCode 13)"
6*4882a593Smuzhiyun
7*4882a593SmuzhiyunThis reverts commit 219329f8e777af54d785ae7259f8be32a714b751.
8*4882a593Smuzhiyun
9*4882a593SmuzhiyunSigned-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
10*4882a593Smuzhiyun[Upstream status: https://github.com/randombit/botan/issues/2845]
11*4882a593Smuzhiyun---
12*4882a593Smuzhiyun src/lib/hash/sha3/sha3.cpp                | 46 ++++++-----------------
13*4882a593Smuzhiyun src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp | 46 ++++++-----------------
14*4882a593Smuzhiyun 2 files changed, 22 insertions(+), 70 deletions(-)
15*4882a593Smuzhiyun
16*4882a593Smuzhiyundiff --git a/src/lib/hash/sha3/sha3.cpp b/src/lib/hash/sha3/sha3.cpp
17*4882a593Smuzhiyunindex 289e451ff..690c2b264 100644
18*4882a593Smuzhiyun--- a/src/lib/hash/sha3/sha3.cpp
19*4882a593Smuzhiyun+++ b/src/lib/hash/sha3/sha3.cpp
20*4882a593Smuzhiyun@@ -11,47 +11,23 @@
21*4882a593Smuzhiyun #include <botan/exceptn.h>
22*4882a593Smuzhiyun #include <botan/cpuid.h>
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun-#include <tuple>
25*4882a593Smuzhiyun-
26*4882a593Smuzhiyun namespace Botan {
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun namespace {
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun-// This is a workaround for a suspected bug in clang 12 (and XCode 13)
31*4882a593Smuzhiyun-// that caused a miscompile of the SHA3 implementation for optimization
32*4882a593Smuzhiyun-// level -O2 and higher.
33*4882a593Smuzhiyun-//
34*4882a593Smuzhiyun-// For details, see: https://github.com/randombit/botan/issues/2802
35*4882a593Smuzhiyun-#if    defined(__clang__) && \
36*4882a593Smuzhiyun-    (( defined(__apple_build_version__) && __clang_major__ == 13) || \
37*4882a593Smuzhiyun-     (!defined(__apple_build_version__) && __clang_major__ == 12))
38*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE __attribute__((noinline))
39*4882a593Smuzhiyun-#else
40*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE inline
41*4882a593Smuzhiyun-#endif
42*4882a593Smuzhiyun-
43*4882a593Smuzhiyun-BOTAN_WORKAROUND_MAYBE_INLINE std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>
44*4882a593Smuzhiyun-   xor_CNs(const uint64_t A[25])
45*4882a593Smuzhiyun-   {
46*4882a593Smuzhiyun-   return {
47*4882a593Smuzhiyun-      A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20],
48*4882a593Smuzhiyun-      A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21],
49*4882a593Smuzhiyun-      A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22],
50*4882a593Smuzhiyun-      A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23],
51*4882a593Smuzhiyun-      A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]};
52*4882a593Smuzhiyun-   }
53*4882a593Smuzhiyun-
54*4882a593Smuzhiyun-#undef BOTAN_WORKAROUND_MAYBE_INLINE
55*4882a593Smuzhiyun-
56*4882a593Smuzhiyun inline void SHA3_round(uint64_t T[25], const uint64_t A[25], uint64_t RC)
57*4882a593Smuzhiyun    {
58*4882a593Smuzhiyun-   const auto Cs = xor_CNs(A);
59*4882a593Smuzhiyun-
60*4882a593Smuzhiyun-   const uint64_t D0 = rotl<1>(std::get<0>(Cs)) ^ std::get<3>(Cs);
61*4882a593Smuzhiyun-   const uint64_t D1 = rotl<1>(std::get<1>(Cs)) ^ std::get<4>(Cs);
62*4882a593Smuzhiyun-   const uint64_t D2 = rotl<1>(std::get<2>(Cs)) ^ std::get<0>(Cs);
63*4882a593Smuzhiyun-   const uint64_t D3 = rotl<1>(std::get<3>(Cs)) ^ std::get<1>(Cs);
64*4882a593Smuzhiyun-   const uint64_t D4 = rotl<1>(std::get<4>(Cs)) ^ std::get<2>(Cs);
65*4882a593Smuzhiyun+   const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
66*4882a593Smuzhiyun+   const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
67*4882a593Smuzhiyun+   const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
68*4882a593Smuzhiyun+   const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
69*4882a593Smuzhiyun+   const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
70*4882a593Smuzhiyun+
71*4882a593Smuzhiyun+   const uint64_t D0 = rotl<1>(C0) ^ C3;
72*4882a593Smuzhiyun+   const uint64_t D1 = rotl<1>(C1) ^ C4;
73*4882a593Smuzhiyun+   const uint64_t D2 = rotl<1>(C2) ^ C0;
74*4882a593Smuzhiyun+   const uint64_t D3 = rotl<1>(C3) ^ C1;
75*4882a593Smuzhiyun+   const uint64_t D4 = rotl<1>(C4) ^ C2;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun    const uint64_t B00 =          A[ 0] ^ D1;
78*4882a593Smuzhiyun    const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
79*4882a593Smuzhiyundiff --git a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
80*4882a593Smuzhiyunindex c7f1914a3..a9650ad9d 100644
81*4882a593Smuzhiyun--- a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
82*4882a593Smuzhiyun+++ b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
83*4882a593Smuzhiyun@@ -8,47 +8,23 @@
84*4882a593Smuzhiyun #include <botan/sha3.h>
85*4882a593Smuzhiyun #include <botan/rotate.h>
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun-#include <tuple>
88*4882a593Smuzhiyun-
89*4882a593Smuzhiyun namespace Botan {
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun namespace {
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun-// This is a workaround for a suspected bug in clang 12 (and XCode 13)
94*4882a593Smuzhiyun-// that caused a miscompile of the SHA3 implementation for optimization
95*4882a593Smuzhiyun-// level -O2 and higher.
96*4882a593Smuzhiyun-//
97*4882a593Smuzhiyun-// For details, see: https://github.com/randombit/botan/issues/2802
98*4882a593Smuzhiyun-#if    defined(__clang__) && \
99*4882a593Smuzhiyun-    (( defined(__apple_build_version__) && __clang_major__ == 13) || \
100*4882a593Smuzhiyun-     (!defined(__apple_build_version__) && __clang_major__ == 12))
101*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE __attribute__((noinline))
102*4882a593Smuzhiyun-#else
103*4882a593Smuzhiyun-#define BOTAN_WORKAROUND_MAYBE_INLINE inline
104*4882a593Smuzhiyun-#endif
105*4882a593Smuzhiyun-
106*4882a593Smuzhiyun-BOTAN_WORKAROUND_MAYBE_INLINE std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>
107*4882a593Smuzhiyun-   xor_CNs(const uint64_t A[25])
108*4882a593Smuzhiyun-   {
109*4882a593Smuzhiyun-   return {
110*4882a593Smuzhiyun-      A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20],
111*4882a593Smuzhiyun-      A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21],
112*4882a593Smuzhiyun-      A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22],
113*4882a593Smuzhiyun-      A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23],
114*4882a593Smuzhiyun-      A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]};
115*4882a593Smuzhiyun-   }
116*4882a593Smuzhiyun-
117*4882a593Smuzhiyun-#undef BOTAN_WORKAROUND_MAYBE_INLINE
118*4882a593Smuzhiyun-
119*4882a593Smuzhiyun inline void SHA3_BMI2_round(uint64_t T[25], const uint64_t A[25], uint64_t RC)
120*4882a593Smuzhiyun    {
121*4882a593Smuzhiyun-   const auto Cs = xor_CNs(A);
122*4882a593Smuzhiyun-
123*4882a593Smuzhiyun-   const uint64_t D0 = rotl<1>(std::get<0>(Cs)) ^ std::get<3>(Cs);
124*4882a593Smuzhiyun-   const uint64_t D1 = rotl<1>(std::get<1>(Cs)) ^ std::get<4>(Cs);
125*4882a593Smuzhiyun-   const uint64_t D2 = rotl<1>(std::get<2>(Cs)) ^ std::get<0>(Cs);
126*4882a593Smuzhiyun-   const uint64_t D3 = rotl<1>(std::get<3>(Cs)) ^ std::get<1>(Cs);
127*4882a593Smuzhiyun-   const uint64_t D4 = rotl<1>(std::get<4>(Cs)) ^ std::get<2>(Cs);
128*4882a593Smuzhiyun+   const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
129*4882a593Smuzhiyun+   const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
130*4882a593Smuzhiyun+   const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
131*4882a593Smuzhiyun+   const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
132*4882a593Smuzhiyun+   const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
133*4882a593Smuzhiyun+
134*4882a593Smuzhiyun+   const uint64_t D0 = rotl<1>(C0) ^ C3;
135*4882a593Smuzhiyun+   const uint64_t D1 = rotl<1>(C1) ^ C4;
136*4882a593Smuzhiyun+   const uint64_t D2 = rotl<1>(C2) ^ C0;
137*4882a593Smuzhiyun+   const uint64_t D3 = rotl<1>(C3) ^ C1;
138*4882a593Smuzhiyun+   const uint64_t D4 = rotl<1>(C4) ^ C2;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun    const uint64_t B00 =          A[ 0] ^ D1;
141*4882a593Smuzhiyun    const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
142*4882a593Smuzhiyun--
143*4882a593Smuzhiyun2.33.0
144*4882a593Smuzhiyun
145