1 // SPDX-License-Identifier: BSD-3-Clause 2 /* ========================================================================== 3 * ieee754.h -- Conversion between half, double & single-precision floats 4 * 5 * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved. 6 * 7 * SPDX-License-Identifier: BSD-3-Clause 8 * 9 * See BSD-3-Clause license in README.md 10 * 11 * Created on 7/23/18 12 * ========================================================================== */ 13 14 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT 15 16 #ifndef ieee754_h 17 #define ieee754_h 18 19 #include <stdint.h> 20 21 22 /** @file ieee754.h 23 * 24 * This implements floating-point conversion between half, single and 25 * double precision floating-point numbers, in particular convesion to 26 * smaller representation (e.g., double to single) that does not lose 27 * precision for CBOR preferred serialization. 28 * 29 * This implementation works entirely with shifts and masks and does 30 * not require any floating-point HW or library. 31 * 32 * This conforms to IEEE 754-2008, but note that it doesn't specify 33 * conversions, just the encodings. 34 * 35 * This is complete, supporting +/- infinity, +/- zero, subnormals and 36 * NaN payloads. NaN payloads are converted to smaller by dropping the 37 * right most bits if they are zero and shifting to the right. If the 38 * rightmost bits are not zero the conversion is not performed. When 39 * converting from smaller to larger, the payload is shifted left and 40 * zero-padded. This is what is specified by CBOR preferred 41 * serialization and what modern HW conversion instructions do. CBOR 42 * CDE handling for NaN is not clearly specified, but upcoming 43 * documents may clarify this. 44 * 45 * There is no special handling of silent and quiet NaNs. It probably 46 * isn't necessary to transmit these special NaNs as there purpose is 47 * more for propgating errors up through some calculation. In many 48 * cases the handlng of the NaN payload will work for silent and quiet 49 * NaNs. 50 * 51 * A previous version of this was usable as a general library for 52 * conversion. This version is reduced to what is needed for CBOR. 53 */ 54 55 56 /** 57 * @brief Convert half-precision float to double-precision float. 58 * 59 * @param[in] uHalfPrecision Half-prevision number to convert. 60 * 61 * @returns double-presion value. 62 * 63 * This is a lossless conversion because every half-precision value 64 * can be represented as a double. There is no error condition. 65 * 66 * There is no half-precision type in C, so it is represented here as 67 * a @c uint16_t. The bits of @c uHalfPrecision are as described for 68 * half-precision by IEEE 754. 69 */ 70 double 71 IEEE754_HalfToDouble(uint16_t uHalfPrecision); 72 73 74 /** Holds a floating-point value that could be half, single or 75 * double-precision. The value is in a @c uint64_t that may be copied 76 * to a float or double. Simply casting uValue will usually work but 77 * may generate compiler or static analyzer warnings. Using 78 * UsefulBufUtil_CopyUint64ToDouble() or 79 * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate 80 * any extra code). 81 */ 82 typedef struct { 83 enum {IEEE754_UNION_IS_HALF = 2, 84 IEEE754_UNION_IS_SINGLE = 4, 85 IEEE754_UNION_IS_DOUBLE = 8, 86 } uSize; /* Size of uValue */ 87 uint64_t uValue; 88 } IEEE754_union; 89 90 91 /** 92 * @brief Convert a double to either single or half-precision. 93 * 94 * @param[in] d The value to convert. 95 * @param[in] bAllowHalfPrecision If true, convert to either half or 96 * single precision. 97 * 98 * @returns Unconverted value, or value converted to single or half-precision. 99 * 100 * This always succeeds. If the value cannot be converted without the 101 * loss of precision, it is not converted. 102 * 103 * This handles all subnormals and NaN payloads. 104 */ 105 IEEE754_union 106 IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision); 107 108 109 /** 110 * @brief Convert a single-precision float to half-precision. 111 * 112 * @param[in] f The value to convert. 113 * 114 * @returns Either unconverted value or value converted to half-precision. 115 * 116 * This always succeeds. If the value cannot be converted without the 117 * loss of precision, it is not converted. 118 * 119 * This handles all subnormals and NaN payloads. 120 */ 121 IEEE754_union 122 IEEE754_SingleToHalf(float f); 123 124 125 #endif /* ieee754_h */ 126 127 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */ 128