1*4882a593SmuzhiyunFrom a0ae2ba37ca479c6edddec8634b25686be965e0d Mon Sep 17 00:00:00 2001
2*4882a593SmuzhiyunFrom: Peter Korsgaard <peter@korsgaard.com>
3*4882a593SmuzhiyunDate: Mon, 27 Aug 2018 22:50:57 +0200
4*4882a593SmuzhiyunSubject: [PATCH] bn_mul.h: fix x86 PIC inline ASM compilation with GCC < 5
5*4882a593SmuzhiyunMIME-Version: 1.0
6*4882a593SmuzhiyunContent-Type: text/plain; charset=UTF-8
7*4882a593SmuzhiyunContent-Transfer-Encoding: 8bit
8*4882a593Smuzhiyun
9*4882a593SmuzhiyunFixes #1910
10*4882a593Smuzhiyun
11*4882a593SmuzhiyunWith ebx added to the MULADDC_STOP clobber list to fix #1550, the inline
12*4882a593Smuzhiyunassembly fails to build with GCC < 5 in PIC mode with the following error:
13*4882a593Smuzhiyun
14*4882a593Smuzhiyuninclude/mbedtls/bn_mul.h:46:13: error: PIC register clobbered by ‘ebx’ in ‘asm’
15*4882a593Smuzhiyun
16*4882a593SmuzhiyunThis is because older GCC versions treated the x86 ebx register (which is
17*4882a593Smuzhiyunused for the GOT) as a fixed reserved register when building as PIC.
18*4882a593Smuzhiyun
19*4882a593SmuzhiyunThis is fixed by an improved register allocator in GCC 5+.  From the release
20*4882a593Smuzhiyunnotes:
21*4882a593Smuzhiyun
22*4882a593SmuzhiyunRegister allocation improvements: Reuse of the PIC hard register, instead of
23*4882a593Smuzhiyunusing a fixed register, was implemented on x86/x86-64 targets.  This
24*4882a593Smuzhiyunimproves generated PIC code performance as more hard registers can be used.
25*4882a593Smuzhiyun
26*4882a593Smuzhiyunhttps://www.gnu.org/software/gcc/gcc-5/changes.html
27*4882a593Smuzhiyun
28*4882a593SmuzhiyunAs a workaround, detect this situation and disable the inline assembly,
29*4882a593Smuzhiyunsimilar to the MULADDC_CANNOT_USE_R7 logic.
30*4882a593Smuzhiyun
31*4882a593SmuzhiyunSigned-off-by: Peter Korsgaard <peter@korsgaard.com>
32*4882a593SmuzhiyunUpstream: https://github.com/ARMmbed/mbedtls/pull/1986
33*4882a593Smuzhiyun---
34*4882a593Smuzhiyun include/mbedtls/bn_mul.h | 18 +++++++++++++++++-
35*4882a593Smuzhiyun 1 file changed, 17 insertions(+), 1 deletion(-)
36*4882a593Smuzhiyun
37*4882a593Smuzhiyundiff --git a/include/mbedtls/bn_mul.h b/include/mbedtls/bn_mul.h
38*4882a593Smuzhiyunindex b587317d9..74a2d29be 100644
39*4882a593Smuzhiyun--- a/include/mbedtls/bn_mul.h
40*4882a593Smuzhiyun+++ b/include/mbedtls/bn_mul.h
41*4882a593Smuzhiyun@@ -50,13 +50,29 @@
42*4882a593Smuzhiyun #if defined(__GNUC__) && \
43*4882a593Smuzhiyun     ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun+/*
46*4882a593Smuzhiyun+ * GCC < 5.0 treated the x86 ebx (which is used for the GOT) as a
47*4882a593Smuzhiyun+ * fixed reserved register when building as PIC, leading to errors
48*4882a593Smuzhiyun+ * like: bn_mul.h:46:13: error: PIC register clobbered by ‘ebx’ in ‘asm’
49*4882a593Smuzhiyun+ *
50*4882a593Smuzhiyun+ * This is fixed by an improved register allocator in GCC 5+. From the
51*4882a593Smuzhiyun+ * release notes:
52*4882a593Smuzhiyun+ * Register allocation improvements: Reuse of the PIC hard register,
53*4882a593Smuzhiyun+ * instead of using a fixed register, was implemented on x86/x86-64
54*4882a593Smuzhiyun+ * targets. This improves generated PIC code performance as more hard
55*4882a593Smuzhiyun+ * registers can be used.
56*4882a593Smuzhiyun+ */
57*4882a593Smuzhiyun+#if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
58*4882a593Smuzhiyun+#define MULADDC_CANNOT_USE_EBX
59*4882a593Smuzhiyun+#endif
60*4882a593Smuzhiyun+
61*4882a593Smuzhiyun /*
62*4882a593Smuzhiyun  * Disable use of the i386 assembly code below if option -O0, to disable all
63*4882a593Smuzhiyun  * compiler optimisations, is passed, detected with __OPTIMIZE__
64*4882a593Smuzhiyun  * This is done as the number of registers used in the assembly code doesn't
65*4882a593Smuzhiyun  * work with the -O0 option.
66*4882a593Smuzhiyun  */
67*4882a593Smuzhiyun-#if defined(__i386__) && defined(__OPTIMIZE__)
68*4882a593Smuzhiyun+#if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX)
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun #define MULADDC_INIT                        \
71*4882a593Smuzhiyun     asm(                                    \
72*4882a593Smuzhiyun--
73*4882a593Smuzhiyun2.11.0
74*4882a593Smuzhiyun
75