1*b586599bSYuichi Sugiyama // SPDX-License-Identifier: BSD-3-Clause
22e6f5bf1SYuichi Sugiyama /* ==========================================================================
32e6f5bf1SYuichi Sugiyama * ieee754.c -- floating-point conversion between half, double & single-precision
42e6f5bf1SYuichi Sugiyama *
52e6f5bf1SYuichi Sugiyama * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
62e6f5bf1SYuichi Sugiyama * Copyright (c) 2021, Arm Limited. All rights reserved.
72e6f5bf1SYuichi Sugiyama *
82e6f5bf1SYuichi Sugiyama * SPDX-License-Identifier: BSD-3-Clause
92e6f5bf1SYuichi Sugiyama *
102e6f5bf1SYuichi Sugiyama * See BSD-3-Clause license in README.md
112e6f5bf1SYuichi Sugiyama *
122e6f5bf1SYuichi Sugiyama * Created on 7/23/18
132e6f5bf1SYuichi Sugiyama * ========================================================================== */
142e6f5bf1SYuichi Sugiyama
152e6f5bf1SYuichi Sugiyama /*
162e6f5bf1SYuichi Sugiyama * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
172e6f5bf1SYuichi Sugiyama * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
182e6f5bf1SYuichi Sugiyama */
192e6f5bf1SYuichi Sugiyama #include "qcbor/qcbor_common.h"
202e6f5bf1SYuichi Sugiyama
212e6f5bf1SYuichi Sugiyama #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
222e6f5bf1SYuichi Sugiyama
232e6f5bf1SYuichi Sugiyama #include "ieee754.h"
242e6f5bf1SYuichi Sugiyama #include <string.h> /* For memcpy() */
252e6f5bf1SYuichi Sugiyama
262e6f5bf1SYuichi Sugiyama
272e6f5bf1SYuichi Sugiyama /*
282e6f5bf1SYuichi Sugiyama * This code has long lines and is easier to read because of
292e6f5bf1SYuichi Sugiyama * them. Some coding guidelines prefer 80 column lines (can they not
302e6f5bf1SYuichi Sugiyama * afford big displays?).
312e6f5bf1SYuichi Sugiyama *
322e6f5bf1SYuichi Sugiyama * This code works solely using shifts and masks and thus has no
332e6f5bf1SYuichi Sugiyama * dependency on any math libraries. It can even work if the CPU
342e6f5bf1SYuichi Sugiyama * doesn't have any floating-point support, though that isn't the most
352e6f5bf1SYuichi Sugiyama * useful thing to do.
362e6f5bf1SYuichi Sugiyama *
372e6f5bf1SYuichi Sugiyama * The memcpy() dependency is only for CopyFloatToUint32() and friends
382e6f5bf1SYuichi Sugiyama * which only is needed to avoid type punning when converting the
392e6f5bf1SYuichi Sugiyama * actual float bits to an unsigned value so the bit shifts and masks
402e6f5bf1SYuichi Sugiyama * can work.
412e6f5bf1SYuichi Sugiyama *
422e6f5bf1SYuichi Sugiyama * The references used to write this code:
432e6f5bf1SYuichi Sugiyama *
442e6f5bf1SYuichi Sugiyama * IEEE 754-2008, particularly section 3.6 and 6.2.1
452e6f5bf1SYuichi Sugiyama *
462e6f5bf1SYuichi Sugiyama * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
472e6f5bf1SYuichi Sugiyama *
482e6f5bf1SYuichi Sugiyama * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
492e6f5bf1SYuichi Sugiyama *
502e6f5bf1SYuichi Sugiyama * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
512e6f5bf1SYuichi Sugiyama *
522e6f5bf1SYuichi Sugiyama * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
532e6f5bf1SYuichi Sugiyama *
542e6f5bf1SYuichi Sugiyama * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
552e6f5bf1SYuichi Sugiyama * needed. It can be retrieved from github history if needed.
562e6f5bf1SYuichi Sugiyama */
572e6f5bf1SYuichi Sugiyama
582e6f5bf1SYuichi Sugiyama
592e6f5bf1SYuichi Sugiyama
602e6f5bf1SYuichi Sugiyama
612e6f5bf1SYuichi Sugiyama /* ----- Half Precsion ----------- */
622e6f5bf1SYuichi Sugiyama #define HALF_NUM_SIGNIFICAND_BITS (10)
632e6f5bf1SYuichi Sugiyama #define HALF_NUM_EXPONENT_BITS (5)
642e6f5bf1SYuichi Sugiyama #define HALF_NUM_SIGN_BITS (1)
652e6f5bf1SYuichi Sugiyama
662e6f5bf1SYuichi Sugiyama #define HALF_SIGNIFICAND_SHIFT (0)
672e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
682e6f5bf1SYuichi Sugiyama #define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
692e6f5bf1SYuichi Sugiyama
702e6f5bf1SYuichi Sugiyama #define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits
712e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
722e6f5bf1SYuichi Sugiyama #define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
732e6f5bf1SYuichi Sugiyama #define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
742e6f5bf1SYuichi Sugiyama
752e6f5bf1SYuichi Sugiyama /* Biased Biased Unbiased Use
762e6f5bf1SYuichi Sugiyama * 0x00 0 -15 0 and subnormal
772e6f5bf1SYuichi Sugiyama * 0x01 1 -14 Smallest normal exponent
782e6f5bf1SYuichi Sugiyama * 0x1e 30 15 Largest normal exponent
792e6f5bf1SYuichi Sugiyama * 0x1F 31 16 NaN and Infinity */
802e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_BIAS (15)
812e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
822e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
832e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
842e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
852e6f5bf1SYuichi Sugiyama
862e6f5bf1SYuichi Sugiyama
872e6f5bf1SYuichi Sugiyama /* ------ Single-Precision -------- */
882e6f5bf1SYuichi Sugiyama #define SINGLE_NUM_SIGNIFICAND_BITS (23)
892e6f5bf1SYuichi Sugiyama #define SINGLE_NUM_EXPONENT_BITS (8)
902e6f5bf1SYuichi Sugiyama #define SINGLE_NUM_SIGN_BITS (1)
912e6f5bf1SYuichi Sugiyama
922e6f5bf1SYuichi Sugiyama #define SINGLE_SIGNIFICAND_SHIFT (0)
932e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
942e6f5bf1SYuichi Sugiyama #define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
952e6f5bf1SYuichi Sugiyama
962e6f5bf1SYuichi Sugiyama #define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits
972e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
982e6f5bf1SYuichi Sugiyama #define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
992e6f5bf1SYuichi Sugiyama #define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
1002e6f5bf1SYuichi Sugiyama
1012e6f5bf1SYuichi Sugiyama /* Biased Biased Unbiased Use
1022e6f5bf1SYuichi Sugiyama * 0x0000 0 -127 0 and subnormal
1032e6f5bf1SYuichi Sugiyama * 0x0001 1 -126 Smallest normal exponent
1042e6f5bf1SYuichi Sugiyama * 0x7f 127 0 1
1052e6f5bf1SYuichi Sugiyama * 0xfe 254 127 Largest normal exponent
1062e6f5bf1SYuichi Sugiyama * 0xff 255 128 NaN and Infinity */
1072e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_BIAS (127)
1082e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)
1092e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)
1102e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)
1112e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)
1122e6f5bf1SYuichi Sugiyama
1132e6f5bf1SYuichi Sugiyama
1142e6f5bf1SYuichi Sugiyama /* --------- Double-Precision ---------- */
1152e6f5bf1SYuichi Sugiyama #define DOUBLE_NUM_SIGNIFICAND_BITS (52)
1162e6f5bf1SYuichi Sugiyama #define DOUBLE_NUM_EXPONENT_BITS (11)
1172e6f5bf1SYuichi Sugiyama #define DOUBLE_NUM_SIGN_BITS (1)
1182e6f5bf1SYuichi Sugiyama
1192e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGNIFICAND_SHIFT (0)
1202e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
1212e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
1222e6f5bf1SYuichi Sugiyama
1232e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
1242e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
1252e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
1262e6f5bf1SYuichi Sugiyama #define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
1272e6f5bf1SYuichi Sugiyama
1282e6f5bf1SYuichi Sugiyama
1292e6f5bf1SYuichi Sugiyama /* Biased Biased Unbiased Use
1302e6f5bf1SYuichi Sugiyama * 0x00000000 0 -1023 0 and subnormal
1312e6f5bf1SYuichi Sugiyama * 0x00000001 1 -1022 Smallest normal exponent
1322e6f5bf1SYuichi Sugiyama * 0x000007fe 2046 1023 Largest normal exponent
1332e6f5bf1SYuichi Sugiyama * 0x000007ff 2047 1024 NaN and Infinity */
1342e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_BIAS (1023)
1352e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)
1362e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)
1372e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)
1382e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)
1392e6f5bf1SYuichi Sugiyama
1402e6f5bf1SYuichi Sugiyama
1412e6f5bf1SYuichi Sugiyama
1422e6f5bf1SYuichi Sugiyama
1432e6f5bf1SYuichi Sugiyama /*
1442e6f5bf1SYuichi Sugiyama * Convenient functions to avoid type punning, compiler warnings and
1452e6f5bf1SYuichi Sugiyama * such. The optimizer reduces them to a simple assignment. This is a
1462e6f5bf1SYuichi Sugiyama * crusty corner of C. It shouldn't be this hard.
1472e6f5bf1SYuichi Sugiyama *
1482e6f5bf1SYuichi Sugiyama * These are also in UsefulBuf.h under a different name. They are copied
1492e6f5bf1SYuichi Sugiyama * here to avoid a dependency on UsefulBuf.h. There is no object code
1502e6f5bf1SYuichi Sugiyama * size impact because these always optimze down to a simple assignment.
1512e6f5bf1SYuichi Sugiyama */
1522e6f5bf1SYuichi Sugiyama static inline uint32_t
CopyFloatToUint32(float f)1532e6f5bf1SYuichi Sugiyama CopyFloatToUint32(float f)
1542e6f5bf1SYuichi Sugiyama {
1552e6f5bf1SYuichi Sugiyama uint32_t u32;
1562e6f5bf1SYuichi Sugiyama memcpy(&u32, &f, sizeof(uint32_t));
1572e6f5bf1SYuichi Sugiyama return u32;
1582e6f5bf1SYuichi Sugiyama }
1592e6f5bf1SYuichi Sugiyama
1602e6f5bf1SYuichi Sugiyama static inline uint64_t
CopyDoubleToUint64(double d)1612e6f5bf1SYuichi Sugiyama CopyDoubleToUint64(double d)
1622e6f5bf1SYuichi Sugiyama {
1632e6f5bf1SYuichi Sugiyama uint64_t u64;
1642e6f5bf1SYuichi Sugiyama memcpy(&u64, &d, sizeof(uint64_t));
1652e6f5bf1SYuichi Sugiyama return u64;
1662e6f5bf1SYuichi Sugiyama }
1672e6f5bf1SYuichi Sugiyama
1682e6f5bf1SYuichi Sugiyama static inline double
CopyUint64ToDouble(uint64_t u64)1692e6f5bf1SYuichi Sugiyama CopyUint64ToDouble(uint64_t u64)
1702e6f5bf1SYuichi Sugiyama {
1712e6f5bf1SYuichi Sugiyama double d;
1722e6f5bf1SYuichi Sugiyama memcpy(&d, &u64, sizeof(uint64_t));
1732e6f5bf1SYuichi Sugiyama return d;
1742e6f5bf1SYuichi Sugiyama }
1752e6f5bf1SYuichi Sugiyama
1762e6f5bf1SYuichi Sugiyama static inline float
CopyUint32ToSingle(uint32_t u32)1772e6f5bf1SYuichi Sugiyama CopyUint32ToSingle(uint32_t u32)
1782e6f5bf1SYuichi Sugiyama {
1792e6f5bf1SYuichi Sugiyama float f;
1802e6f5bf1SYuichi Sugiyama memcpy(&f, &u32, sizeof(uint32_t));
1812e6f5bf1SYuichi Sugiyama return f;
1822e6f5bf1SYuichi Sugiyama }
1832e6f5bf1SYuichi Sugiyama
1842e6f5bf1SYuichi Sugiyama
1852e6f5bf1SYuichi Sugiyama
1862e6f5bf1SYuichi Sugiyama
1872e6f5bf1SYuichi Sugiyama /**
1882e6f5bf1SYuichi Sugiyama * @brief Assemble sign, significand and exponent into single precision float.
1892e6f5bf1SYuichi Sugiyama *
1902e6f5bf1SYuichi Sugiyama * @param[in] uDoubleSign 0 if positive, 1 if negative
1912e6f5bf1SYuichi Sugiyama * @pararm[in] uDoubleSignificand Bits of the significand
1922e6f5bf1SYuichi Sugiyama * @param[in] nDoubleUnBiasedExponent Exponent
1932e6f5bf1SYuichi Sugiyama *
1942e6f5bf1SYuichi Sugiyama * This returns the bits for a single-precision float, a binary64
1952e6f5bf1SYuichi Sugiyama * as specified in IEEE754.
1962e6f5bf1SYuichi Sugiyama */
1972e6f5bf1SYuichi Sugiyama static double
IEEE754_AssembleDouble(uint64_t uDoubleSign,uint64_t uDoubleSignificand,int64_t nDoubleUnBiasedExponent)1982e6f5bf1SYuichi Sugiyama IEEE754_AssembleDouble(uint64_t uDoubleSign,
1992e6f5bf1SYuichi Sugiyama uint64_t uDoubleSignificand,
2002e6f5bf1SYuichi Sugiyama int64_t nDoubleUnBiasedExponent)
2012e6f5bf1SYuichi Sugiyama {
2022e6f5bf1SYuichi Sugiyama uint64_t uDoubleBiasedExponent;
2032e6f5bf1SYuichi Sugiyama
2042e6f5bf1SYuichi Sugiyama uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
2052e6f5bf1SYuichi Sugiyama
2062e6f5bf1SYuichi Sugiyama return CopyUint64ToDouble(uDoubleSignificand |
2072e6f5bf1SYuichi Sugiyama (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
2082e6f5bf1SYuichi Sugiyama (uDoubleSign << DOUBLE_SIGN_SHIFT));
2092e6f5bf1SYuichi Sugiyama }
2102e6f5bf1SYuichi Sugiyama
2112e6f5bf1SYuichi Sugiyama
2122e6f5bf1SYuichi Sugiyama double
IEEE754_HalfToDouble(uint16_t uHalfPrecision)2132e6f5bf1SYuichi Sugiyama IEEE754_HalfToDouble(uint16_t uHalfPrecision)
2142e6f5bf1SYuichi Sugiyama {
2152e6f5bf1SYuichi Sugiyama uint64_t uDoubleSignificand;
2162e6f5bf1SYuichi Sugiyama int64_t nDoubleUnBiasedExponent;
2172e6f5bf1SYuichi Sugiyama double dResult;
2182e6f5bf1SYuichi Sugiyama
2192e6f5bf1SYuichi Sugiyama /* Pull out the three parts of the half-precision float. Do all
2202e6f5bf1SYuichi Sugiyama * the work in 64 bits because that is what the end result is. It
2212e6f5bf1SYuichi Sugiyama * may give smaller code size and will keep static analyzers
2222e6f5bf1SYuichi Sugiyama * happier.
2232e6f5bf1SYuichi Sugiyama */
2242e6f5bf1SYuichi Sugiyama const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
2252e6f5bf1SYuichi Sugiyama const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
2262e6f5bf1SYuichi Sugiyama const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
2272e6f5bf1SYuichi Sugiyama const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
2282e6f5bf1SYuichi Sugiyama
2292e6f5bf1SYuichi Sugiyama if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
2302e6f5bf1SYuichi Sugiyama /* 0 or subnormal */
2312e6f5bf1SYuichi Sugiyama if(uHalfSignificand) {
2322e6f5bf1SYuichi Sugiyama /* --- SUBNORMAL --- */
2332e6f5bf1SYuichi Sugiyama /* A half-precision subnormal can always be converted to a
2342e6f5bf1SYuichi Sugiyama * normal double-precision float because the ranges line up.
2352e6f5bf1SYuichi Sugiyama * The exponent of a subnormal starts out at the min exponent
2362e6f5bf1SYuichi Sugiyama * for a normal. As the sub normal significand bits are
2372e6f5bf1SYuichi Sugiyama * shifted, left to normalize, the exponent is
2382e6f5bf1SYuichi Sugiyama * decremented. Shifting continues until fully normalized.
2392e6f5bf1SYuichi Sugiyama */
2402e6f5bf1SYuichi Sugiyama nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
2412e6f5bf1SYuichi Sugiyama uDoubleSignificand = uHalfSignificand;
2422e6f5bf1SYuichi Sugiyama do {
2432e6f5bf1SYuichi Sugiyama uDoubleSignificand <<= 1;
2442e6f5bf1SYuichi Sugiyama nDoubleUnBiasedExponent--;
2452e6f5bf1SYuichi Sugiyama } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
2462e6f5bf1SYuichi Sugiyama /* A normal has an implied 1 in the most significant
2472e6f5bf1SYuichi Sugiyama * position that a subnormal doesn't. */
2482e6f5bf1SYuichi Sugiyama uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
2492e6f5bf1SYuichi Sugiyama /* Must shift into place for a double significand */
2502e6f5bf1SYuichi Sugiyama uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
2512e6f5bf1SYuichi Sugiyama
2522e6f5bf1SYuichi Sugiyama dResult = IEEE754_AssembleDouble(uHalfSign,
2532e6f5bf1SYuichi Sugiyama uDoubleSignificand,
2542e6f5bf1SYuichi Sugiyama nDoubleUnBiasedExponent);
2552e6f5bf1SYuichi Sugiyama } else {
2562e6f5bf1SYuichi Sugiyama /* --- ZERO --- */
2572e6f5bf1SYuichi Sugiyama dResult = IEEE754_AssembleDouble(uHalfSign,
2582e6f5bf1SYuichi Sugiyama 0,
2592e6f5bf1SYuichi Sugiyama DOUBLE_EXPONENT_ZERO);
2602e6f5bf1SYuichi Sugiyama }
2612e6f5bf1SYuichi Sugiyama } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
2622e6f5bf1SYuichi Sugiyama /* NaN or Inifinity */
2632e6f5bf1SYuichi Sugiyama if(uHalfSignificand) {
2642e6f5bf1SYuichi Sugiyama /* --- NaN --- */
2652e6f5bf1SYuichi Sugiyama /* Half-precision payloads always fit into double precision
2662e6f5bf1SYuichi Sugiyama * payloads. They are shifted left the same as a normal
2672e6f5bf1SYuichi Sugiyama * number significand.
2682e6f5bf1SYuichi Sugiyama */
2692e6f5bf1SYuichi Sugiyama uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
2702e6f5bf1SYuichi Sugiyama dResult = IEEE754_AssembleDouble(uHalfSign,
2712e6f5bf1SYuichi Sugiyama uDoubleSignificand,
2722e6f5bf1SYuichi Sugiyama DOUBLE_EXPONENT_INF_OR_NAN);
2732e6f5bf1SYuichi Sugiyama } else {
2742e6f5bf1SYuichi Sugiyama /* --- INFINITY --- */
2752e6f5bf1SYuichi Sugiyama dResult = IEEE754_AssembleDouble(uHalfSign,
2762e6f5bf1SYuichi Sugiyama 0,
2772e6f5bf1SYuichi Sugiyama DOUBLE_EXPONENT_INF_OR_NAN);
2782e6f5bf1SYuichi Sugiyama }
2792e6f5bf1SYuichi Sugiyama } else {
2802e6f5bf1SYuichi Sugiyama /* --- NORMAL NUMBER --- */
2812e6f5bf1SYuichi Sugiyama uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
2822e6f5bf1SYuichi Sugiyama dResult = IEEE754_AssembleDouble(uHalfSign,
2832e6f5bf1SYuichi Sugiyama uDoubleSignificand,
2842e6f5bf1SYuichi Sugiyama nHalfUnBiasedExponent);
2852e6f5bf1SYuichi Sugiyama }
2862e6f5bf1SYuichi Sugiyama
2872e6f5bf1SYuichi Sugiyama return dResult;
2882e6f5bf1SYuichi Sugiyama }
2892e6f5bf1SYuichi Sugiyama
2902e6f5bf1SYuichi Sugiyama
2912e6f5bf1SYuichi Sugiyama /**
2922e6f5bf1SYuichi Sugiyama * @brief Assemble sign, significand and exponent into single precision float.
2932e6f5bf1SYuichi Sugiyama *
2942e6f5bf1SYuichi Sugiyama * @param[in] uHalfSign 0 if positive, 1 if negative
2952e6f5bf1SYuichi Sugiyama * @pararm[in] uHalfSignificand Bits of the significand
2962e6f5bf1SYuichi Sugiyama * @param[in] nHalfUnBiasedExponent Exponent
2972e6f5bf1SYuichi Sugiyama *
2982e6f5bf1SYuichi Sugiyama * This returns the bits for a single-precision float, a binary32 as
2992e6f5bf1SYuichi Sugiyama * specified in IEEE754. It is returned as a uint64_t rather than a
3002e6f5bf1SYuichi Sugiyama * uint32_t or a float for convenience of usage.
3012e6f5bf1SYuichi Sugiyama */
3022e6f5bf1SYuichi Sugiyama static uint32_t
IEEE754_AssembleHalf(uint32_t uHalfSign,uint32_t uHalfSignificand,int32_t nHalfUnBiasedExponent)3032e6f5bf1SYuichi Sugiyama IEEE754_AssembleHalf(uint32_t uHalfSign,
3042e6f5bf1SYuichi Sugiyama uint32_t uHalfSignificand,
3052e6f5bf1SYuichi Sugiyama int32_t nHalfUnBiasedExponent)
3062e6f5bf1SYuichi Sugiyama {
3072e6f5bf1SYuichi Sugiyama uint32_t uHalfUnbiasedExponent;
3082e6f5bf1SYuichi Sugiyama
3092e6f5bf1SYuichi Sugiyama uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
3102e6f5bf1SYuichi Sugiyama
3112e6f5bf1SYuichi Sugiyama return uHalfSignificand |
3122e6f5bf1SYuichi Sugiyama (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
3132e6f5bf1SYuichi Sugiyama (uHalfSign << HALF_SIGN_SHIFT);
3142e6f5bf1SYuichi Sugiyama }
3152e6f5bf1SYuichi Sugiyama
3162e6f5bf1SYuichi Sugiyama
3172e6f5bf1SYuichi Sugiyama /* Public function; see ieee754.h */
3182e6f5bf1SYuichi Sugiyama IEEE754_union
IEEE754_SingleToHalf(float f)3192e6f5bf1SYuichi Sugiyama IEEE754_SingleToHalf(float f)
3202e6f5bf1SYuichi Sugiyama {
3212e6f5bf1SYuichi Sugiyama IEEE754_union result;
3222e6f5bf1SYuichi Sugiyama uint32_t uDroppedBits;
3232e6f5bf1SYuichi Sugiyama int32_t nExponentDifference;
3242e6f5bf1SYuichi Sugiyama int32_t nShiftAmount;
3252e6f5bf1SYuichi Sugiyama uint32_t uHalfSignificand;
3262e6f5bf1SYuichi Sugiyama
3272e6f5bf1SYuichi Sugiyama /* Pull the three parts out of the double-precision float Most work
3282e6f5bf1SYuichi Sugiyama * is done with uint32_t which helps avoid integer promotions and
3292e6f5bf1SYuichi Sugiyama * static analyzer complaints.
3302e6f5bf1SYuichi Sugiyama */
3312e6f5bf1SYuichi Sugiyama const uint32_t uSingle = CopyFloatToUint32(f);
3322e6f5bf1SYuichi Sugiyama const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
3332e6f5bf1SYuichi Sugiyama const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
3342e6f5bf1SYuichi Sugiyama const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
3352e6f5bf1SYuichi Sugiyama const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
3362e6f5bf1SYuichi Sugiyama
3372e6f5bf1SYuichi Sugiyama if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
3382e6f5bf1SYuichi Sugiyama if(uSingleSignificand == 0) {
3392e6f5bf1SYuichi Sugiyama /* --- IS ZERO --- */
3402e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_HALF;
3412e6f5bf1SYuichi Sugiyama result.uValue = IEEE754_AssembleHalf(uSingleSign,
3422e6f5bf1SYuichi Sugiyama 0,
3432e6f5bf1SYuichi Sugiyama HALF_EXPONENT_ZERO);
3442e6f5bf1SYuichi Sugiyama } else {
3452e6f5bf1SYuichi Sugiyama /* --- IS SINGLE SUBNORMAL --- */
3462e6f5bf1SYuichi Sugiyama /* The largest single subnormal is slightly less than the
3472e6f5bf1SYuichi Sugiyama * largest single normal which is 2^-149 or
3482e6f5bf1SYuichi Sugiyama * 2.2040517676619426e-38. The smallest half subnormal is
3492e6f5bf1SYuichi Sugiyama * 2^-14 or 5.9604644775390625E-8. There is no overlap so
3502e6f5bf1SYuichi Sugiyama * single subnormals can't be converted to halfs of any sort.
3512e6f5bf1SYuichi Sugiyama */
3522e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_SINGLE;
3532e6f5bf1SYuichi Sugiyama result.uValue = uSingle;
3542e6f5bf1SYuichi Sugiyama }
3552e6f5bf1SYuichi Sugiyama } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
3562e6f5bf1SYuichi Sugiyama if(uSingleSignificand == 0) {
3572e6f5bf1SYuichi Sugiyama /* ---- IS INFINITY ---- */
3582e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_HALF;
3592e6f5bf1SYuichi Sugiyama result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
3602e6f5bf1SYuichi Sugiyama } else {
3612e6f5bf1SYuichi Sugiyama /* The NaN can only be converted if no payload bits are lost
3622e6f5bf1SYuichi Sugiyama * per RFC 8949 section 4.1 that defines Preferred
3632e6f5bf1SYuichi Sugiyama * Serializaton. Note that Deterministically Encode CBOR in
3642e6f5bf1SYuichi Sugiyama * section 4.2 allows for some variation of this rule, but at
3652e6f5bf1SYuichi Sugiyama * the moment this implementation is of Preferred
3662e6f5bf1SYuichi Sugiyama * Serialization, not CDE. As of December 2023, we are also
3672e6f5bf1SYuichi Sugiyama * expecting an update to CDE. This code may need to be
3682e6f5bf1SYuichi Sugiyama * updated for CDE.
3692e6f5bf1SYuichi Sugiyama */
3702e6f5bf1SYuichi Sugiyama uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
3712e6f5bf1SYuichi Sugiyama if(uDroppedBits == 0) {
3722e6f5bf1SYuichi Sugiyama /* --- IS CONVERTABLE NAN --- */
3732e6f5bf1SYuichi Sugiyama uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
3742e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_HALF;
3752e6f5bf1SYuichi Sugiyama result.uValue = IEEE754_AssembleHalf(uSingleSign,
3762e6f5bf1SYuichi Sugiyama uHalfSignificand,
3772e6f5bf1SYuichi Sugiyama HALF_EXPONENT_INF_OR_NAN);
3782e6f5bf1SYuichi Sugiyama
3792e6f5bf1SYuichi Sugiyama } else {
3802e6f5bf1SYuichi Sugiyama /* --- IS UNCONVERTABLE NAN --- */
3812e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_SINGLE;
3822e6f5bf1SYuichi Sugiyama result.uValue = uSingle;
3832e6f5bf1SYuichi Sugiyama }
3842e6f5bf1SYuichi Sugiyama }
3852e6f5bf1SYuichi Sugiyama } else {
3862e6f5bf1SYuichi Sugiyama /* ---- REGULAR NUMBER ---- */
3872e6f5bf1SYuichi Sugiyama /* A regular single can be converted to a regular half if the
3882e6f5bf1SYuichi Sugiyama * single's exponent is in the smaller range of a half and if no
3892e6f5bf1SYuichi Sugiyama * precision is lost in the significand.
3902e6f5bf1SYuichi Sugiyama */
3912e6f5bf1SYuichi Sugiyama if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
3922e6f5bf1SYuichi Sugiyama nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
3932e6f5bf1SYuichi Sugiyama (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
3942e6f5bf1SYuichi Sugiyama uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
3952e6f5bf1SYuichi Sugiyama
3962e6f5bf1SYuichi Sugiyama /* --- CONVERT TO HALF NORMAL --- */
3972e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_HALF;
3982e6f5bf1SYuichi Sugiyama result.uValue = IEEE754_AssembleHalf(uSingleSign,
3992e6f5bf1SYuichi Sugiyama uHalfSignificand,
4002e6f5bf1SYuichi Sugiyama nSingleUnbiasedExponent);
4012e6f5bf1SYuichi Sugiyama } else {
4022e6f5bf1SYuichi Sugiyama /* Unable to convert to a half normal. See if it can be
4032e6f5bf1SYuichi Sugiyama * converted to a half subnormal. To do that, the exponent
4042e6f5bf1SYuichi Sugiyama * must be in range and no precision can be lost in the
4052e6f5bf1SYuichi Sugiyama * signficand.
4062e6f5bf1SYuichi Sugiyama *
4072e6f5bf1SYuichi Sugiyama * This is more complicated because the number is not
4082e6f5bf1SYuichi Sugiyama * normalized. The signficand must be shifted proprotionally
4092e6f5bf1SYuichi Sugiyama * to the exponent and 1 must be added in. See
4102e6f5bf1SYuichi Sugiyama * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
4112e6f5bf1SYuichi Sugiyama *
4122e6f5bf1SYuichi Sugiyama * Exponents -14 to -24 map to a shift of 0 to 10 of the
4132e6f5bf1SYuichi Sugiyama * significand. The largest value of a half subnormal has an
4142e6f5bf1SYuichi Sugiyama * exponent of -14. Subnormals are not normalized like
4152e6f5bf1SYuichi Sugiyama * normals meaning they lose precision as the numbers get
4162e6f5bf1SYuichi Sugiyama * smaller. Normals don't lose precision because the exponent
4172e6f5bf1SYuichi Sugiyama * allows all the bits of the significand to be significant.
4182e6f5bf1SYuichi Sugiyama */
4192e6f5bf1SYuichi Sugiyama /* The exponent of the largest possible half-precision
4202e6f5bf1SYuichi Sugiyama * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger
4212e6f5bf1SYuichi Sugiyama * than this are normal and handled above. We're going to
4222e6f5bf1SYuichi Sugiyama * shift the significand right by at least this amount.
4232e6f5bf1SYuichi Sugiyama */
4242e6f5bf1SYuichi Sugiyama nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
4252e6f5bf1SYuichi Sugiyama
4262e6f5bf1SYuichi Sugiyama /* In addition to the shift based on the exponent's value,
4272e6f5bf1SYuichi Sugiyama * the single significand has to be shifted right to fit into
4282e6f5bf1SYuichi Sugiyama * a half-precision significand */
4292e6f5bf1SYuichi Sugiyama nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
4302e6f5bf1SYuichi Sugiyama
4312e6f5bf1SYuichi Sugiyama /* Must add 1 in to the possible significand because there is
4322e6f5bf1SYuichi Sugiyama * an implied 1 for normal values and not for subnormal
4332e6f5bf1SYuichi Sugiyama * values. See equations here:
4342e6f5bf1SYuichi Sugiyama * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
4352e6f5bf1SYuichi Sugiyama */
4362e6f5bf1SYuichi Sugiyama uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
4372e6f5bf1SYuichi Sugiyama
4382e6f5bf1SYuichi Sugiyama /* If only zero bits get shifted out, this can be converted
4392e6f5bf1SYuichi Sugiyama * to subnormal */
4402e6f5bf1SYuichi Sugiyama if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
4412e6f5bf1SYuichi Sugiyama nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
4422e6f5bf1SYuichi Sugiyama uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
4432e6f5bf1SYuichi Sugiyama /* --- CONVERTABLE TO HALF SUBNORMAL --- */
4442e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_HALF;
4452e6f5bf1SYuichi Sugiyama result.uValue = IEEE754_AssembleHalf(uSingleSign,
4462e6f5bf1SYuichi Sugiyama uHalfSignificand,
4472e6f5bf1SYuichi Sugiyama HALF_EXPONENT_ZERO);
4482e6f5bf1SYuichi Sugiyama } else {
4492e6f5bf1SYuichi Sugiyama /* --- DO NOT CONVERT --- */
4502e6f5bf1SYuichi Sugiyama result.uSize = IEEE754_UNION_IS_SINGLE;
4512e6f5bf1SYuichi Sugiyama result.uValue = uSingle;
4522e6f5bf1SYuichi Sugiyama }
4532e6f5bf1SYuichi Sugiyama }
4542e6f5bf1SYuichi Sugiyama }
4552e6f5bf1SYuichi Sugiyama
4562e6f5bf1SYuichi Sugiyama return result;
4572e6f5bf1SYuichi Sugiyama }
4582e6f5bf1SYuichi Sugiyama
4592e6f5bf1SYuichi Sugiyama
4602e6f5bf1SYuichi Sugiyama /**
4612e6f5bf1SYuichi Sugiyama * @brief Assemble sign, significand and exponent into single precision float.
4622e6f5bf1SYuichi Sugiyama *
4632e6f5bf1SYuichi Sugiyama * @param[in] uSingleSign 0 if positive, 1 if negative
4642e6f5bf1SYuichi Sugiyama * @pararm[in] uSingleSignificand Bits of the significand
4652e6f5bf1SYuichi Sugiyama * @param[in] nSingleUnBiasedExponent Exponent
4662e6f5bf1SYuichi Sugiyama *
4672e6f5bf1SYuichi Sugiyama * This returns the bits for a single-precision float, a binary32 as
4682e6f5bf1SYuichi Sugiyama * specified in IEEE754. It is returned as a uint64_t rather than a
4692e6f5bf1SYuichi Sugiyama * uint32_t or a float for convenience of usage.
4702e6f5bf1SYuichi Sugiyama */
4712e6f5bf1SYuichi Sugiyama static uint64_t
IEEE754_AssembleSingle(uint64_t uSingleSign,uint64_t uSingleSignificand,int64_t nSingleUnBiasedExponent)4722e6f5bf1SYuichi Sugiyama IEEE754_AssembleSingle(uint64_t uSingleSign,
4732e6f5bf1SYuichi Sugiyama uint64_t uSingleSignificand,
4742e6f5bf1SYuichi Sugiyama int64_t nSingleUnBiasedExponent)
4752e6f5bf1SYuichi Sugiyama {
4762e6f5bf1SYuichi Sugiyama uint64_t uSingleBiasedExponent;
4772e6f5bf1SYuichi Sugiyama
4782e6f5bf1SYuichi Sugiyama uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
4792e6f5bf1SYuichi Sugiyama
4802e6f5bf1SYuichi Sugiyama return uSingleSignificand |
4812e6f5bf1SYuichi Sugiyama (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
4822e6f5bf1SYuichi Sugiyama (uSingleSign << SINGLE_SIGN_SHIFT);
4832e6f5bf1SYuichi Sugiyama }
4842e6f5bf1SYuichi Sugiyama
4852e6f5bf1SYuichi Sugiyama
4862e6f5bf1SYuichi Sugiyama /**
4872e6f5bf1SYuichi Sugiyama * @brief Convert a double-precision float to single-precision.
4882e6f5bf1SYuichi Sugiyama *
4892e6f5bf1SYuichi Sugiyama * @param[in] d The value to convert.
4902e6f5bf1SYuichi Sugiyama *
4912e6f5bf1SYuichi Sugiyama * @returns Either unconverted value or value converted to single-precision.
4922e6f5bf1SYuichi Sugiyama *
4932e6f5bf1SYuichi Sugiyama * This always succeeds. If the value cannot be converted without the
4942e6f5bf1SYuichi Sugiyama * loss of precision, it is not converted.
4952e6f5bf1SYuichi Sugiyama *
4962e6f5bf1SYuichi Sugiyama * This handles all subnormals and NaN payloads.
4972e6f5bf1SYuichi Sugiyama */
4982e6f5bf1SYuichi Sugiyama static IEEE754_union
IEEE754_DoubleToSingle(double d)4992e6f5bf1SYuichi Sugiyama IEEE754_DoubleToSingle(double d)
5002e6f5bf1SYuichi Sugiyama {
5012e6f5bf1SYuichi Sugiyama IEEE754_union Result;
5022e6f5bf1SYuichi Sugiyama int64_t nExponentDifference;
5032e6f5bf1SYuichi Sugiyama int64_t nShiftAmount;
5042e6f5bf1SYuichi Sugiyama uint64_t uSingleSignificand;
5052e6f5bf1SYuichi Sugiyama uint64_t uDroppedBits;
5062e6f5bf1SYuichi Sugiyama
5072e6f5bf1SYuichi Sugiyama
5082e6f5bf1SYuichi Sugiyama /* Pull the three parts out of the double-precision float. Most
5092e6f5bf1SYuichi Sugiyama * work is done with uint64_t which helps avoid integer promotions
5102e6f5bf1SYuichi Sugiyama * and static analyzer complaints.
5112e6f5bf1SYuichi Sugiyama */
5122e6f5bf1SYuichi Sugiyama const uint64_t uDouble = CopyDoubleToUint64(d);
5132e6f5bf1SYuichi Sugiyama const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
5142e6f5bf1SYuichi Sugiyama const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
5152e6f5bf1SYuichi Sugiyama const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
5162e6f5bf1SYuichi Sugiyama const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
5172e6f5bf1SYuichi Sugiyama
5182e6f5bf1SYuichi Sugiyama
5192e6f5bf1SYuichi Sugiyama if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
5202e6f5bf1SYuichi Sugiyama if(uDoubleSignificand == 0) {
5212e6f5bf1SYuichi Sugiyama /* --- IS ZERO --- */
5222e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_SINGLE;
5232e6f5bf1SYuichi Sugiyama Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5242e6f5bf1SYuichi Sugiyama 0,
5252e6f5bf1SYuichi Sugiyama SINGLE_EXPONENT_ZERO);
5262e6f5bf1SYuichi Sugiyama } else {
5272e6f5bf1SYuichi Sugiyama /* --- IS DOUBLE SUBNORMAL --- */
5282e6f5bf1SYuichi Sugiyama /* The largest double subnormal is slightly less than the
5292e6f5bf1SYuichi Sugiyama * largest double normal which is 2^-1022 or
5302e6f5bf1SYuichi Sugiyama * 2.2250738585072014e-308. The smallest single subnormal
5312e6f5bf1SYuichi Sugiyama * is 2^-149 or 1.401298464324817e-45. There is no
5322e6f5bf1SYuichi Sugiyama * overlap so double subnormals can't be converted to
5332e6f5bf1SYuichi Sugiyama * singles of any sort.
5342e6f5bf1SYuichi Sugiyama */
5352e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_DOUBLE;
5362e6f5bf1SYuichi Sugiyama Result.uValue = uDouble;
5372e6f5bf1SYuichi Sugiyama }
5382e6f5bf1SYuichi Sugiyama } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
5392e6f5bf1SYuichi Sugiyama if(uDoubleSignificand == 0) {
5402e6f5bf1SYuichi Sugiyama /* ---- IS INFINITY ---- */
5412e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_SINGLE;
5422e6f5bf1SYuichi Sugiyama Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5432e6f5bf1SYuichi Sugiyama 0,
5442e6f5bf1SYuichi Sugiyama SINGLE_EXPONENT_INF_OR_NAN);
5452e6f5bf1SYuichi Sugiyama } else {
5462e6f5bf1SYuichi Sugiyama /* The NaN can only be converted if no payload bits are
5472e6f5bf1SYuichi Sugiyama * lost per RFC 8949 section 4.1 that defines Preferred
5482e6f5bf1SYuichi Sugiyama * Serializaton. Note that Deterministically Encode CBOR
5492e6f5bf1SYuichi Sugiyama * in section 4.2 allows for some variation of this rule,
5502e6f5bf1SYuichi Sugiyama * but at the moment this implementation is of Preferred
5512e6f5bf1SYuichi Sugiyama * Serialization, not CDE. As of December 2023, we are
5522e6f5bf1SYuichi Sugiyama * also expecting an update to CDE. This code may need to
5532e6f5bf1SYuichi Sugiyama * be updated for CDE.
5542e6f5bf1SYuichi Sugiyama */
5552e6f5bf1SYuichi Sugiyama uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
5562e6f5bf1SYuichi Sugiyama if(uDroppedBits == 0) {
5572e6f5bf1SYuichi Sugiyama /* --- IS CONVERTABLE NAN --- */
5582e6f5bf1SYuichi Sugiyama uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
5592e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_SINGLE;
5602e6f5bf1SYuichi Sugiyama Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5612e6f5bf1SYuichi Sugiyama uSingleSignificand,
5622e6f5bf1SYuichi Sugiyama SINGLE_EXPONENT_INF_OR_NAN);
5632e6f5bf1SYuichi Sugiyama } else {
5642e6f5bf1SYuichi Sugiyama /* --- IS UNCONVERTABLE NAN --- */
5652e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_DOUBLE;
5662e6f5bf1SYuichi Sugiyama Result.uValue = uDouble;
5672e6f5bf1SYuichi Sugiyama }
5682e6f5bf1SYuichi Sugiyama }
5692e6f5bf1SYuichi Sugiyama } else {
5702e6f5bf1SYuichi Sugiyama /* ---- REGULAR NUMBER ---- */
5712e6f5bf1SYuichi Sugiyama /* A regular double can be converted to a regular single if
5722e6f5bf1SYuichi Sugiyama * the double's exponent is in the smaller range of a single
5732e6f5bf1SYuichi Sugiyama * and if no precision is lost in the significand.
5742e6f5bf1SYuichi Sugiyama */
5752e6f5bf1SYuichi Sugiyama uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
5762e6f5bf1SYuichi Sugiyama if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
5772e6f5bf1SYuichi Sugiyama nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
5782e6f5bf1SYuichi Sugiyama uDroppedBits == 0) {
5792e6f5bf1SYuichi Sugiyama /* --- IS CONVERTABLE TO SINGLE --- */
5802e6f5bf1SYuichi Sugiyama uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
5812e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_SINGLE;
5822e6f5bf1SYuichi Sugiyama Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5832e6f5bf1SYuichi Sugiyama uSingleSignificand,
5842e6f5bf1SYuichi Sugiyama nDoubleUnbiasedExponent);
5852e6f5bf1SYuichi Sugiyama } else {
5862e6f5bf1SYuichi Sugiyama /* Unable to convert to a single normal. See if it can be
5872e6f5bf1SYuichi Sugiyama * converted to a single subnormal. To do that, the
5882e6f5bf1SYuichi Sugiyama * exponent must be in range and no precision can be lost
5892e6f5bf1SYuichi Sugiyama * in the signficand.
5902e6f5bf1SYuichi Sugiyama *
5912e6f5bf1SYuichi Sugiyama * This is more complicated because the number is not
5922e6f5bf1SYuichi Sugiyama * normalized. The signficand must be shifted
5932e6f5bf1SYuichi Sugiyama * proprotionally to the exponent and 1 must be added
5942e6f5bf1SYuichi Sugiyama * in. See
5952e6f5bf1SYuichi Sugiyama * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
5962e6f5bf1SYuichi Sugiyama */
5972e6f5bf1SYuichi Sugiyama nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
5982e6f5bf1SYuichi Sugiyama nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
5992e6f5bf1SYuichi Sugiyama uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
6002e6f5bf1SYuichi Sugiyama
6012e6f5bf1SYuichi Sugiyama if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
6022e6f5bf1SYuichi Sugiyama nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
6032e6f5bf1SYuichi Sugiyama uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
6042e6f5bf1SYuichi Sugiyama /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
6052e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_SINGLE;
6062e6f5bf1SYuichi Sugiyama Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
6072e6f5bf1SYuichi Sugiyama uSingleSignificand,
6082e6f5bf1SYuichi Sugiyama SINGLE_EXPONENT_ZERO);
6092e6f5bf1SYuichi Sugiyama } else {
6102e6f5bf1SYuichi Sugiyama /* --- CAN NOT BE CONVERTED --- */
6112e6f5bf1SYuichi Sugiyama Result.uSize = IEEE754_UNION_IS_DOUBLE;
6122e6f5bf1SYuichi Sugiyama Result.uValue = uDouble;
6132e6f5bf1SYuichi Sugiyama }
6142e6f5bf1SYuichi Sugiyama }
6152e6f5bf1SYuichi Sugiyama }
6162e6f5bf1SYuichi Sugiyama
6172e6f5bf1SYuichi Sugiyama return Result;
6182e6f5bf1SYuichi Sugiyama }
6192e6f5bf1SYuichi Sugiyama
6202e6f5bf1SYuichi Sugiyama
6212e6f5bf1SYuichi Sugiyama /* Public function; see ieee754.h */
6222e6f5bf1SYuichi Sugiyama IEEE754_union
IEEE754_DoubleToSmaller(double d,int bAllowHalfPrecision)6232e6f5bf1SYuichi Sugiyama IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
6242e6f5bf1SYuichi Sugiyama {
6252e6f5bf1SYuichi Sugiyama IEEE754_union result;
6262e6f5bf1SYuichi Sugiyama
6272e6f5bf1SYuichi Sugiyama result = IEEE754_DoubleToSingle(d);
6282e6f5bf1SYuichi Sugiyama
6292e6f5bf1SYuichi Sugiyama if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
6302e6f5bf1SYuichi Sugiyama /* Cast to uint32_t is OK, because value was just successfully
6312e6f5bf1SYuichi Sugiyama * converted to single. */
6322e6f5bf1SYuichi Sugiyama float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
6332e6f5bf1SYuichi Sugiyama result = IEEE754_SingleToHalf(uSingle);
6342e6f5bf1SYuichi Sugiyama }
6352e6f5bf1SYuichi Sugiyama
6362e6f5bf1SYuichi Sugiyama return result;
6372e6f5bf1SYuichi Sugiyama }
6382e6f5bf1SYuichi Sugiyama
6392e6f5bf1SYuichi Sugiyama
6402e6f5bf1SYuichi Sugiyama #else /* QCBOR_DISABLE_PREFERRED_FLOAT */
6412e6f5bf1SYuichi Sugiyama
6422e6f5bf1SYuichi Sugiyama int ieee754_dummy_place_holder;
6432e6f5bf1SYuichi Sugiyama
6442e6f5bf1SYuichi Sugiyama #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
645