xref: /optee_os/core/lib/qcbor/src/ieee754.h (revision 2e6f5bf11f69414c8c2cedf3c2bd7eab8aa5beb3)
1 /* ==========================================================================
2  * ieee754.h -- Conversion between half, double & single-precision floats
3  *
4  * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
5  *
6  * SPDX-License-Identifier: BSD-3-Clause
7  *
8  * See BSD-3-Clause license in README.md
9  *
10  * Created on 7/23/18
11  * ========================================================================== */
12 
13 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
14 
15 #ifndef ieee754_h
16 #define ieee754_h
17 
18 #include <stdint.h>
19 
20 
21 /** @file ieee754.h
22  *
23  * This implements floating-point conversion between half, single and
24  * double precision floating-point numbers, in particular convesion to
25  * smaller representation (e.g., double to single) that does not lose
26  * precision for CBOR preferred serialization.
27  *
28  * This implementation works entirely with shifts and masks and does
29  * not require any floating-point HW or library.
30  *
31  * This conforms to IEEE 754-2008, but note that it doesn't specify
32  * conversions, just the encodings.
33  *
34  * This is complete, supporting +/- infinity, +/- zero, subnormals and
35  * NaN payloads. NaN payloads are converted to smaller by dropping the
36  * right most bits if they are zero and shifting to the right. If the
37  * rightmost bits are not zero the conversion is not performed. When
38  * converting from smaller to larger, the payload is shifted left and
39  * zero-padded. This is what is specified by CBOR preferred
40  * serialization and what modern HW conversion instructions do. CBOR
41  * CDE handling for NaN is not clearly specified, but upcoming
42  * documents may clarify this.
43  *
44  * There is no special handling of silent and quiet NaNs. It probably
45  * isn't necessary to transmit these special NaNs as there purpose is
46  * more for propgating errors up through some calculation. In many
47  * cases the handlng of the NaN payload will work for silent and quiet
48  * NaNs.
49  *
50  * A previous version of this was usable as a general library for
51  * conversion. This version is reduced to what is needed for CBOR.
52  */
53 
54 
55 /**
56  * @brief Convert half-precision float to double-precision float.
57  *
58  * @param[in] uHalfPrecision   Half-prevision number to convert.
59  *
60  * @returns double-presion value.
61  *
62  * This is a lossless conversion because every half-precision value
63  * can be represented as a double. There is no error condition.
64  *
65  * There is no half-precision type in C, so it is represented here as
66  * a @c uint16_t. The bits of @c uHalfPrecision are as described for
67  * half-precision by IEEE 754.
68  */
69 double
70 IEEE754_HalfToDouble(uint16_t uHalfPrecision);
71 
72 
73 /** Holds a floating-point value that could be half, single or
74  * double-precision.  The value is in a @c uint64_t that may be copied
75  * to a float or double.  Simply casting uValue will usually work but
76  * may generate compiler or static analyzer warnings. Using
77  * UsefulBufUtil_CopyUint64ToDouble() or
78  * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
79  * any extra code).
80  */
81 typedef struct {
82    enum {IEEE754_UNION_IS_HALF   = 2,
83          IEEE754_UNION_IS_SINGLE = 4,
84          IEEE754_UNION_IS_DOUBLE = 8,
85    } uSize; /* Size of uValue */
86    uint64_t uValue;
87 } IEEE754_union;
88 
89 
90 /**
91  * @brief Convert a double to either single or half-precision.
92  *
93  * @param[in] d                    The value to convert.
94  * @param[in] bAllowHalfPrecision  If true, convert to either half or
95  *                                 single precision.
96  *
97  * @returns Unconverted value, or value converted to single or half-precision.
98  *
99  * This always succeeds. If the value cannot be converted without the
100  * loss of precision, it is not converted.
101  *
102  * This handles all subnormals and NaN payloads.
103  */
104 IEEE754_union
105 IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
106 
107 
108 /**
109  * @brief Convert a single-precision float to half-precision.
110  *
111  * @param[in] f  The value to convert.
112  *
113  * @returns Either unconverted value or value converted to half-precision.
114  *
115  * This always succeeds. If the value cannot be converted without the
116  * loss of precision, it is not converted.
117  *
118  * This handles all subnormals and NaN payloads.
119  */
120 IEEE754_union
121 IEEE754_SingleToHalf(float f);
122 
123 
124 #endif /* ieee754_h */
125 
126 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
127