1*b586599bSYuichi Sugiyama // SPDX-License-Identifier: BSD-3-Clause 22e6f5bf1SYuichi Sugiyama /* ========================================================================== 32e6f5bf1SYuichi Sugiyama * ieee754.h -- Conversion between half, double & single-precision floats 42e6f5bf1SYuichi Sugiyama * 52e6f5bf1SYuichi Sugiyama * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved. 62e6f5bf1SYuichi Sugiyama * 72e6f5bf1SYuichi Sugiyama * SPDX-License-Identifier: BSD-3-Clause 82e6f5bf1SYuichi Sugiyama * 92e6f5bf1SYuichi Sugiyama * See BSD-3-Clause license in README.md 102e6f5bf1SYuichi Sugiyama * 112e6f5bf1SYuichi Sugiyama * Created on 7/23/18 122e6f5bf1SYuichi Sugiyama * ========================================================================== */ 132e6f5bf1SYuichi Sugiyama 142e6f5bf1SYuichi Sugiyama #ifndef QCBOR_DISABLE_PREFERRED_FLOAT 152e6f5bf1SYuichi Sugiyama 162e6f5bf1SYuichi Sugiyama #ifndef ieee754_h 172e6f5bf1SYuichi Sugiyama #define ieee754_h 182e6f5bf1SYuichi Sugiyama 192e6f5bf1SYuichi Sugiyama #include <stdint.h> 202e6f5bf1SYuichi Sugiyama 212e6f5bf1SYuichi Sugiyama 222e6f5bf1SYuichi Sugiyama /** @file ieee754.h 232e6f5bf1SYuichi Sugiyama * 242e6f5bf1SYuichi Sugiyama * This implements floating-point conversion between half, single and 252e6f5bf1SYuichi Sugiyama * double precision floating-point numbers, in particular convesion to 262e6f5bf1SYuichi Sugiyama * smaller representation (e.g., double to single) that does not lose 272e6f5bf1SYuichi Sugiyama * precision for CBOR preferred serialization. 282e6f5bf1SYuichi Sugiyama * 292e6f5bf1SYuichi Sugiyama * This implementation works entirely with shifts and masks and does 302e6f5bf1SYuichi Sugiyama * not require any floating-point HW or library. 312e6f5bf1SYuichi Sugiyama * 322e6f5bf1SYuichi Sugiyama * This conforms to IEEE 754-2008, but note that it doesn't specify 332e6f5bf1SYuichi Sugiyama * conversions, just the encodings. 342e6f5bf1SYuichi Sugiyama * 352e6f5bf1SYuichi Sugiyama * This is complete, supporting +/- infinity, +/- zero, subnormals and 362e6f5bf1SYuichi Sugiyama * NaN payloads. NaN payloads are converted to smaller by dropping the 372e6f5bf1SYuichi Sugiyama * right most bits if they are zero and shifting to the right. If the 382e6f5bf1SYuichi Sugiyama * rightmost bits are not zero the conversion is not performed. When 392e6f5bf1SYuichi Sugiyama * converting from smaller to larger, the payload is shifted left and 402e6f5bf1SYuichi Sugiyama * zero-padded. This is what is specified by CBOR preferred 412e6f5bf1SYuichi Sugiyama * serialization and what modern HW conversion instructions do. CBOR 422e6f5bf1SYuichi Sugiyama * CDE handling for NaN is not clearly specified, but upcoming 432e6f5bf1SYuichi Sugiyama * documents may clarify this. 442e6f5bf1SYuichi Sugiyama * 452e6f5bf1SYuichi Sugiyama * There is no special handling of silent and quiet NaNs. It probably 462e6f5bf1SYuichi Sugiyama * isn't necessary to transmit these special NaNs as there purpose is 472e6f5bf1SYuichi Sugiyama * more for propgating errors up through some calculation. In many 482e6f5bf1SYuichi Sugiyama * cases the handlng of the NaN payload will work for silent and quiet 492e6f5bf1SYuichi Sugiyama * NaNs. 502e6f5bf1SYuichi Sugiyama * 512e6f5bf1SYuichi Sugiyama * A previous version of this was usable as a general library for 522e6f5bf1SYuichi Sugiyama * conversion. This version is reduced to what is needed for CBOR. 532e6f5bf1SYuichi Sugiyama */ 542e6f5bf1SYuichi Sugiyama 552e6f5bf1SYuichi Sugiyama 562e6f5bf1SYuichi Sugiyama /** 572e6f5bf1SYuichi Sugiyama * @brief Convert half-precision float to double-precision float. 582e6f5bf1SYuichi Sugiyama * 592e6f5bf1SYuichi Sugiyama * @param[in] uHalfPrecision Half-prevision number to convert. 602e6f5bf1SYuichi Sugiyama * 612e6f5bf1SYuichi Sugiyama * @returns double-presion value. 622e6f5bf1SYuichi Sugiyama * 632e6f5bf1SYuichi Sugiyama * This is a lossless conversion because every half-precision value 642e6f5bf1SYuichi Sugiyama * can be represented as a double. There is no error condition. 652e6f5bf1SYuichi Sugiyama * 662e6f5bf1SYuichi Sugiyama * There is no half-precision type in C, so it is represented here as 672e6f5bf1SYuichi Sugiyama * a @c uint16_t. The bits of @c uHalfPrecision are as described for 682e6f5bf1SYuichi Sugiyama * half-precision by IEEE 754. 692e6f5bf1SYuichi Sugiyama */ 702e6f5bf1SYuichi Sugiyama double 712e6f5bf1SYuichi Sugiyama IEEE754_HalfToDouble(uint16_t uHalfPrecision); 722e6f5bf1SYuichi Sugiyama 732e6f5bf1SYuichi Sugiyama 742e6f5bf1SYuichi Sugiyama /** Holds a floating-point value that could be half, single or 752e6f5bf1SYuichi Sugiyama * double-precision. The value is in a @c uint64_t that may be copied 762e6f5bf1SYuichi Sugiyama * to a float or double. Simply casting uValue will usually work but 772e6f5bf1SYuichi Sugiyama * may generate compiler or static analyzer warnings. Using 782e6f5bf1SYuichi Sugiyama * UsefulBufUtil_CopyUint64ToDouble() or 792e6f5bf1SYuichi Sugiyama * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate 802e6f5bf1SYuichi Sugiyama * any extra code). 812e6f5bf1SYuichi Sugiyama */ 822e6f5bf1SYuichi Sugiyama typedef struct { 832e6f5bf1SYuichi Sugiyama enum {IEEE754_UNION_IS_HALF = 2, 842e6f5bf1SYuichi Sugiyama IEEE754_UNION_IS_SINGLE = 4, 852e6f5bf1SYuichi Sugiyama IEEE754_UNION_IS_DOUBLE = 8, 862e6f5bf1SYuichi Sugiyama } uSize; /* Size of uValue */ 872e6f5bf1SYuichi Sugiyama uint64_t uValue; 882e6f5bf1SYuichi Sugiyama } IEEE754_union; 892e6f5bf1SYuichi Sugiyama 902e6f5bf1SYuichi Sugiyama 912e6f5bf1SYuichi Sugiyama /** 922e6f5bf1SYuichi Sugiyama * @brief Convert a double to either single or half-precision. 932e6f5bf1SYuichi Sugiyama * 942e6f5bf1SYuichi Sugiyama * @param[in] d The value to convert. 952e6f5bf1SYuichi Sugiyama * @param[in] bAllowHalfPrecision If true, convert to either half or 962e6f5bf1SYuichi Sugiyama * single precision. 972e6f5bf1SYuichi Sugiyama * 982e6f5bf1SYuichi Sugiyama * @returns Unconverted value, or value converted to single or half-precision. 992e6f5bf1SYuichi Sugiyama * 1002e6f5bf1SYuichi Sugiyama * This always succeeds. If the value cannot be converted without the 1012e6f5bf1SYuichi Sugiyama * loss of precision, it is not converted. 1022e6f5bf1SYuichi Sugiyama * 1032e6f5bf1SYuichi Sugiyama * This handles all subnormals and NaN payloads. 1042e6f5bf1SYuichi Sugiyama */ 1052e6f5bf1SYuichi Sugiyama IEEE754_union 1062e6f5bf1SYuichi Sugiyama IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision); 1072e6f5bf1SYuichi Sugiyama 1082e6f5bf1SYuichi Sugiyama 1092e6f5bf1SYuichi Sugiyama /** 1102e6f5bf1SYuichi Sugiyama * @brief Convert a single-precision float to half-precision. 1112e6f5bf1SYuichi Sugiyama * 1122e6f5bf1SYuichi Sugiyama * @param[in] f The value to convert. 1132e6f5bf1SYuichi Sugiyama * 1142e6f5bf1SYuichi Sugiyama * @returns Either unconverted value or value converted to half-precision. 1152e6f5bf1SYuichi Sugiyama * 1162e6f5bf1SYuichi Sugiyama * This always succeeds. If the value cannot be converted without the 1172e6f5bf1SYuichi Sugiyama * loss of precision, it is not converted. 1182e6f5bf1SYuichi Sugiyama * 1192e6f5bf1SYuichi Sugiyama * This handles all subnormals and NaN payloads. 1202e6f5bf1SYuichi Sugiyama */ 1212e6f5bf1SYuichi Sugiyama IEEE754_union 1222e6f5bf1SYuichi Sugiyama IEEE754_SingleToHalf(float f); 1232e6f5bf1SYuichi Sugiyama 1242e6f5bf1SYuichi Sugiyama 1252e6f5bf1SYuichi Sugiyama #endif /* ieee754_h */ 1262e6f5bf1SYuichi Sugiyama 1272e6f5bf1SYuichi Sugiyama #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */ 128