1 /* ========================================================================== 2 * ieee754.h -- Conversion between half, double & single-precision floats 3 * 4 * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved. 5 * 6 * SPDX-License-Identifier: BSD-3-Clause 7 * 8 * See BSD-3-Clause license in README.md 9 * 10 * Created on 7/23/18 11 * ========================================================================== */ 12 13 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT 14 15 #ifndef ieee754_h 16 #define ieee754_h 17 18 #include <stdint.h> 19 20 21 /** @file ieee754.h 22 * 23 * This implements floating-point conversion between half, single and 24 * double precision floating-point numbers, in particular convesion to 25 * smaller representation (e.g., double to single) that does not lose 26 * precision for CBOR preferred serialization. 27 * 28 * This implementation works entirely with shifts and masks and does 29 * not require any floating-point HW or library. 30 * 31 * This conforms to IEEE 754-2008, but note that it doesn't specify 32 * conversions, just the encodings. 33 * 34 * This is complete, supporting +/- infinity, +/- zero, subnormals and 35 * NaN payloads. NaN payloads are converted to smaller by dropping the 36 * right most bits if they are zero and shifting to the right. If the 37 * rightmost bits are not zero the conversion is not performed. When 38 * converting from smaller to larger, the payload is shifted left and 39 * zero-padded. This is what is specified by CBOR preferred 40 * serialization and what modern HW conversion instructions do. CBOR 41 * CDE handling for NaN is not clearly specified, but upcoming 42 * documents may clarify this. 43 * 44 * There is no special handling of silent and quiet NaNs. It probably 45 * isn't necessary to transmit these special NaNs as there purpose is 46 * more for propgating errors up through some calculation. In many 47 * cases the handlng of the NaN payload will work for silent and quiet 48 * NaNs. 49 * 50 * A previous version of this was usable as a general library for 51 * conversion. This version is reduced to what is needed for CBOR. 52 */ 53 54 55 /** 56 * @brief Convert half-precision float to double-precision float. 57 * 58 * @param[in] uHalfPrecision Half-prevision number to convert. 59 * 60 * @returns double-presion value. 61 * 62 * This is a lossless conversion because every half-precision value 63 * can be represented as a double. There is no error condition. 64 * 65 * There is no half-precision type in C, so it is represented here as 66 * a @c uint16_t. The bits of @c uHalfPrecision are as described for 67 * half-precision by IEEE 754. 68 */ 69 double 70 IEEE754_HalfToDouble(uint16_t uHalfPrecision); 71 72 73 /** Holds a floating-point value that could be half, single or 74 * double-precision. The value is in a @c uint64_t that may be copied 75 * to a float or double. Simply casting uValue will usually work but 76 * may generate compiler or static analyzer warnings. Using 77 * UsefulBufUtil_CopyUint64ToDouble() or 78 * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate 79 * any extra code). 80 */ 81 typedef struct { 82 enum {IEEE754_UNION_IS_HALF = 2, 83 IEEE754_UNION_IS_SINGLE = 4, 84 IEEE754_UNION_IS_DOUBLE = 8, 85 } uSize; /* Size of uValue */ 86 uint64_t uValue; 87 } IEEE754_union; 88 89 90 /** 91 * @brief Convert a double to either single or half-precision. 92 * 93 * @param[in] d The value to convert. 94 * @param[in] bAllowHalfPrecision If true, convert to either half or 95 * single precision. 96 * 97 * @returns Unconverted value, or value converted to single or half-precision. 98 * 99 * This always succeeds. If the value cannot be converted without the 100 * loss of precision, it is not converted. 101 * 102 * This handles all subnormals and NaN payloads. 103 */ 104 IEEE754_union 105 IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision); 106 107 108 /** 109 * @brief Convert a single-precision float to half-precision. 110 * 111 * @param[in] f The value to convert. 112 * 113 * @returns Either unconverted value or value converted to half-precision. 114 * 115 * This always succeeds. If the value cannot be converted without the 116 * loss of precision, it is not converted. 117 * 118 * This handles all subnormals and NaN payloads. 119 */ 120 IEEE754_union 121 IEEE754_SingleToHalf(float f); 122 123 124 #endif /* ieee754_h */ 125 126 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */ 127