xref: /optee_os/core/lib/qcbor/src/ieee754.h (revision b586599be35c4311337a5d8db5f4b5e5c81a754d)
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* ==========================================================================
3  * ieee754.h -- Conversion between half, double & single-precision floats
4  *
5  * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
6  *
7  * SPDX-License-Identifier: BSD-3-Clause
8  *
9  * See BSD-3-Clause license in README.md
10  *
11  * Created on 7/23/18
12  * ========================================================================== */
13 
14 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
15 
16 #ifndef ieee754_h
17 #define ieee754_h
18 
19 #include <stdint.h>
20 
21 
22 /** @file ieee754.h
23  *
24  * This implements floating-point conversion between half, single and
25  * double precision floating-point numbers, in particular convesion to
26  * smaller representation (e.g., double to single) that does not lose
27  * precision for CBOR preferred serialization.
28  *
29  * This implementation works entirely with shifts and masks and does
30  * not require any floating-point HW or library.
31  *
32  * This conforms to IEEE 754-2008, but note that it doesn't specify
33  * conversions, just the encodings.
34  *
35  * This is complete, supporting +/- infinity, +/- zero, subnormals and
36  * NaN payloads. NaN payloads are converted to smaller by dropping the
37  * right most bits if they are zero and shifting to the right. If the
38  * rightmost bits are not zero the conversion is not performed. When
39  * converting from smaller to larger, the payload is shifted left and
40  * zero-padded. This is what is specified by CBOR preferred
41  * serialization and what modern HW conversion instructions do. CBOR
42  * CDE handling for NaN is not clearly specified, but upcoming
43  * documents may clarify this.
44  *
45  * There is no special handling of silent and quiet NaNs. It probably
46  * isn't necessary to transmit these special NaNs as there purpose is
47  * more for propgating errors up through some calculation. In many
48  * cases the handlng of the NaN payload will work for silent and quiet
49  * NaNs.
50  *
51  * A previous version of this was usable as a general library for
52  * conversion. This version is reduced to what is needed for CBOR.
53  */
54 
55 
56 /**
57  * @brief Convert half-precision float to double-precision float.
58  *
59  * @param[in] uHalfPrecision   Half-prevision number to convert.
60  *
61  * @returns double-presion value.
62  *
63  * This is a lossless conversion because every half-precision value
64  * can be represented as a double. There is no error condition.
65  *
66  * There is no half-precision type in C, so it is represented here as
67  * a @c uint16_t. The bits of @c uHalfPrecision are as described for
68  * half-precision by IEEE 754.
69  */
70 double
71 IEEE754_HalfToDouble(uint16_t uHalfPrecision);
72 
73 
74 /** Holds a floating-point value that could be half, single or
75  * double-precision.  The value is in a @c uint64_t that may be copied
76  * to a float or double.  Simply casting uValue will usually work but
77  * may generate compiler or static analyzer warnings. Using
78  * UsefulBufUtil_CopyUint64ToDouble() or
79  * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
80  * any extra code).
81  */
82 typedef struct {
83    enum {IEEE754_UNION_IS_HALF   = 2,
84          IEEE754_UNION_IS_SINGLE = 4,
85          IEEE754_UNION_IS_DOUBLE = 8,
86    } uSize; /* Size of uValue */
87    uint64_t uValue;
88 } IEEE754_union;
89 
90 
91 /**
92  * @brief Convert a double to either single or half-precision.
93  *
94  * @param[in] d                    The value to convert.
95  * @param[in] bAllowHalfPrecision  If true, convert to either half or
96  *                                 single precision.
97  *
98  * @returns Unconverted value, or value converted to single or half-precision.
99  *
100  * This always succeeds. If the value cannot be converted without the
101  * loss of precision, it is not converted.
102  *
103  * This handles all subnormals and NaN payloads.
104  */
105 IEEE754_union
106 IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
107 
108 
109 /**
110  * @brief Convert a single-precision float to half-precision.
111  *
112  * @param[in] f  The value to convert.
113  *
114  * @returns Either unconverted value or value converted to half-precision.
115  *
116  * This always succeeds. If the value cannot be converted without the
117  * loss of precision, it is not converted.
118  *
119  * This handles all subnormals and NaN payloads.
120  */
121 IEEE754_union
122 IEEE754_SingleToHalf(float f);
123 
124 
125 #endif /* ieee754_h */
126 
127 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
128