xref: /optee_os/core/lib/qcbor/src/ieee754.c (revision b586599be35c4311337a5d8db5f4b5e5c81a754d)
1*b586599bSYuichi Sugiyama // SPDX-License-Identifier: BSD-3-Clause
22e6f5bf1SYuichi Sugiyama /* ==========================================================================
32e6f5bf1SYuichi Sugiyama  * ieee754.c -- floating-point conversion between half, double & single-precision
42e6f5bf1SYuichi Sugiyama  *
52e6f5bf1SYuichi Sugiyama  * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
62e6f5bf1SYuichi Sugiyama  * Copyright (c) 2021, Arm Limited. All rights reserved.
72e6f5bf1SYuichi Sugiyama  *
82e6f5bf1SYuichi Sugiyama  * SPDX-License-Identifier: BSD-3-Clause
92e6f5bf1SYuichi Sugiyama  *
102e6f5bf1SYuichi Sugiyama  * See BSD-3-Clause license in README.md
112e6f5bf1SYuichi Sugiyama  *
122e6f5bf1SYuichi Sugiyama  * Created on 7/23/18
132e6f5bf1SYuichi Sugiyama  * ========================================================================== */
142e6f5bf1SYuichi Sugiyama 
152e6f5bf1SYuichi Sugiyama /*
162e6f5bf1SYuichi Sugiyama  * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
172e6f5bf1SYuichi Sugiyama  * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
182e6f5bf1SYuichi Sugiyama  */
192e6f5bf1SYuichi Sugiyama #include "qcbor/qcbor_common.h"
202e6f5bf1SYuichi Sugiyama 
212e6f5bf1SYuichi Sugiyama #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
222e6f5bf1SYuichi Sugiyama 
232e6f5bf1SYuichi Sugiyama #include "ieee754.h"
242e6f5bf1SYuichi Sugiyama #include <string.h> /* For memcpy() */
252e6f5bf1SYuichi Sugiyama 
262e6f5bf1SYuichi Sugiyama 
272e6f5bf1SYuichi Sugiyama /*
282e6f5bf1SYuichi Sugiyama  * This code has long lines and is easier to read because of
292e6f5bf1SYuichi Sugiyama  * them. Some coding guidelines prefer 80 column lines (can they not
302e6f5bf1SYuichi Sugiyama  * afford big displays?).
312e6f5bf1SYuichi Sugiyama  *
322e6f5bf1SYuichi Sugiyama  * This code works solely using shifts and masks and thus has no
332e6f5bf1SYuichi Sugiyama  * dependency on any math libraries. It can even work if the CPU
342e6f5bf1SYuichi Sugiyama  * doesn't have any floating-point support, though that isn't the most
352e6f5bf1SYuichi Sugiyama  * useful thing to do.
362e6f5bf1SYuichi Sugiyama  *
372e6f5bf1SYuichi Sugiyama  * The memcpy() dependency is only for CopyFloatToUint32() and friends
382e6f5bf1SYuichi Sugiyama  * which only is needed to avoid type punning when converting the
392e6f5bf1SYuichi Sugiyama  * actual float bits to an unsigned value so the bit shifts and masks
402e6f5bf1SYuichi Sugiyama  * can work.
412e6f5bf1SYuichi Sugiyama  *
422e6f5bf1SYuichi Sugiyama  * The references used to write this code:
432e6f5bf1SYuichi Sugiyama  *
442e6f5bf1SYuichi Sugiyama  *  IEEE 754-2008, particularly section 3.6 and 6.2.1
452e6f5bf1SYuichi Sugiyama  *
462e6f5bf1SYuichi Sugiyama  *  https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
472e6f5bf1SYuichi Sugiyama  *
482e6f5bf1SYuichi Sugiyama  *  https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
492e6f5bf1SYuichi Sugiyama  *
502e6f5bf1SYuichi Sugiyama  *  https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
512e6f5bf1SYuichi Sugiyama  *
522e6f5bf1SYuichi Sugiyama  *  https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
532e6f5bf1SYuichi Sugiyama  *
542e6f5bf1SYuichi Sugiyama  * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
552e6f5bf1SYuichi Sugiyama  * needed. It can be retrieved from github history if needed.
562e6f5bf1SYuichi Sugiyama  */
572e6f5bf1SYuichi Sugiyama 
582e6f5bf1SYuichi Sugiyama 
592e6f5bf1SYuichi Sugiyama 
602e6f5bf1SYuichi Sugiyama 
612e6f5bf1SYuichi Sugiyama /* ----- Half Precsion ----------- */
622e6f5bf1SYuichi Sugiyama #define HALF_NUM_SIGNIFICAND_BITS (10)
632e6f5bf1SYuichi Sugiyama #define HALF_NUM_EXPONENT_BITS    (5)
642e6f5bf1SYuichi Sugiyama #define HALF_NUM_SIGN_BITS        (1)
652e6f5bf1SYuichi Sugiyama 
662e6f5bf1SYuichi Sugiyama #define HALF_SIGNIFICAND_SHIFT    (0)
672e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_SHIFT       (HALF_NUM_SIGNIFICAND_BITS)
682e6f5bf1SYuichi Sugiyama #define HALF_SIGN_SHIFT           (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
692e6f5bf1SYuichi Sugiyama 
702e6f5bf1SYuichi Sugiyama #define HALF_SIGNIFICAND_MASK     (0x3ffU) // The lower 10 bits
712e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_MASK        (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
722e6f5bf1SYuichi Sugiyama #define HALF_SIGN_MASK            (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
732e6f5bf1SYuichi Sugiyama #define HALF_QUIET_NAN_BIT        (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
742e6f5bf1SYuichi Sugiyama 
752e6f5bf1SYuichi Sugiyama /* Biased    Biased    Unbiased   Use
762e6f5bf1SYuichi Sugiyama  *  0x00       0        -15       0 and subnormal
772e6f5bf1SYuichi Sugiyama  *  0x01       1        -14       Smallest normal exponent
782e6f5bf1SYuichi Sugiyama  *  0x1e      30         15       Largest normal exponent
792e6f5bf1SYuichi Sugiyama  *  0x1F      31         16       NaN and Infinity  */
802e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_BIAS        (15)
812e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_MAX         (HALF_EXPONENT_BIAS)    //  15 Unbiased
822e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_MIN         (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
832e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_ZERO        (-HALF_EXPONENT_BIAS)   // -15 Unbiased
842e6f5bf1SYuichi Sugiyama #define HALF_EXPONENT_INF_OR_NAN  (HALF_EXPONENT_BIAS+1)  //  16 Unbiased
852e6f5bf1SYuichi Sugiyama 
862e6f5bf1SYuichi Sugiyama 
872e6f5bf1SYuichi Sugiyama /* ------ Single-Precision -------- */
882e6f5bf1SYuichi Sugiyama #define SINGLE_NUM_SIGNIFICAND_BITS (23)
892e6f5bf1SYuichi Sugiyama #define SINGLE_NUM_EXPONENT_BITS    (8)
902e6f5bf1SYuichi Sugiyama #define SINGLE_NUM_SIGN_BITS        (1)
912e6f5bf1SYuichi Sugiyama 
922e6f5bf1SYuichi Sugiyama #define SINGLE_SIGNIFICAND_SHIFT    (0)
932e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_SHIFT       (SINGLE_NUM_SIGNIFICAND_BITS)
942e6f5bf1SYuichi Sugiyama #define SINGLE_SIGN_SHIFT           (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
952e6f5bf1SYuichi Sugiyama 
962e6f5bf1SYuichi Sugiyama #define SINGLE_SIGNIFICAND_MASK     (0x7fffffU) // The lower 23 bits
972e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_MASK        (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
982e6f5bf1SYuichi Sugiyama #define SINGLE_SIGN_MASK            (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
992e6f5bf1SYuichi Sugiyama #define SINGLE_QUIET_NAN_BIT        (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
1002e6f5bf1SYuichi Sugiyama 
1012e6f5bf1SYuichi Sugiyama /* Biased  Biased   Unbiased  Use
1022e6f5bf1SYuichi Sugiyama  *  0x0000     0     -127      0 and subnormal
1032e6f5bf1SYuichi Sugiyama  *  0x0001     1     -126      Smallest normal exponent
1042e6f5bf1SYuichi Sugiyama  *  0x7f     127        0      1
1052e6f5bf1SYuichi Sugiyama  *  0xfe     254      127      Largest normal exponent
1062e6f5bf1SYuichi Sugiyama  *  0xff     255      128      NaN and Infinity  */
1072e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_BIAS        (127)
1082e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_MAX         (SINGLE_EXPONENT_BIAS)
1092e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_MIN         (-SINGLE_EXPONENT_BIAS+1)
1102e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_ZERO        (-SINGLE_EXPONENT_BIAS)
1112e6f5bf1SYuichi Sugiyama #define SINGLE_EXPONENT_INF_OR_NAN  (SINGLE_EXPONENT_BIAS+1)
1122e6f5bf1SYuichi Sugiyama 
1132e6f5bf1SYuichi Sugiyama 
1142e6f5bf1SYuichi Sugiyama /* --------- Double-Precision ---------- */
1152e6f5bf1SYuichi Sugiyama #define DOUBLE_NUM_SIGNIFICAND_BITS (52)
1162e6f5bf1SYuichi Sugiyama #define DOUBLE_NUM_EXPONENT_BITS    (11)
1172e6f5bf1SYuichi Sugiyama #define DOUBLE_NUM_SIGN_BITS        (1)
1182e6f5bf1SYuichi Sugiyama 
1192e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGNIFICAND_SHIFT    (0)
1202e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_SHIFT       (DOUBLE_NUM_SIGNIFICAND_BITS)
1212e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGN_SHIFT           (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
1222e6f5bf1SYuichi Sugiyama 
1232e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGNIFICAND_MASK     (0xfffffffffffffULL) // The lower 52 bits
1242e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_MASK        (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
1252e6f5bf1SYuichi Sugiyama #define DOUBLE_SIGN_MASK            (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
1262e6f5bf1SYuichi Sugiyama #define DOUBLE_QUIET_NAN_BIT        (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
1272e6f5bf1SYuichi Sugiyama 
1282e6f5bf1SYuichi Sugiyama 
1292e6f5bf1SYuichi Sugiyama /* Biased      Biased   Unbiased  Use
1302e6f5bf1SYuichi Sugiyama  * 0x00000000     0     -1023     0 and subnormal
1312e6f5bf1SYuichi Sugiyama  * 0x00000001     1     -1022     Smallest normal exponent
1322e6f5bf1SYuichi Sugiyama  * 0x000007fe  2046      1023     Largest normal exponent
1332e6f5bf1SYuichi Sugiyama  * 0x000007ff  2047      1024     NaN and Infinity  */
1342e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_BIAS        (1023)
1352e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_MAX         (DOUBLE_EXPONENT_BIAS)
1362e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_MIN         (-DOUBLE_EXPONENT_BIAS+1)
1372e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_ZERO        (-DOUBLE_EXPONENT_BIAS)
1382e6f5bf1SYuichi Sugiyama #define DOUBLE_EXPONENT_INF_OR_NAN  (DOUBLE_EXPONENT_BIAS+1)
1392e6f5bf1SYuichi Sugiyama 
1402e6f5bf1SYuichi Sugiyama 
1412e6f5bf1SYuichi Sugiyama 
1422e6f5bf1SYuichi Sugiyama 
1432e6f5bf1SYuichi Sugiyama /*
1442e6f5bf1SYuichi Sugiyama  * Convenient functions to avoid type punning, compiler warnings and
1452e6f5bf1SYuichi Sugiyama  * such. The optimizer reduces them to a simple assignment. This is a
1462e6f5bf1SYuichi Sugiyama  * crusty corner of C. It shouldn't be this hard.
1472e6f5bf1SYuichi Sugiyama  *
1482e6f5bf1SYuichi Sugiyama  * These are also in UsefulBuf.h under a different name. They are copied
1492e6f5bf1SYuichi Sugiyama  * here to avoid a dependency on UsefulBuf.h. There is no object code
1502e6f5bf1SYuichi Sugiyama  * size impact because these always optimze down to a simple assignment.
1512e6f5bf1SYuichi Sugiyama  */
1522e6f5bf1SYuichi Sugiyama static inline uint32_t
CopyFloatToUint32(float f)1532e6f5bf1SYuichi Sugiyama CopyFloatToUint32(float f)
1542e6f5bf1SYuichi Sugiyama {
1552e6f5bf1SYuichi Sugiyama    uint32_t u32;
1562e6f5bf1SYuichi Sugiyama    memcpy(&u32, &f, sizeof(uint32_t));
1572e6f5bf1SYuichi Sugiyama    return u32;
1582e6f5bf1SYuichi Sugiyama }
1592e6f5bf1SYuichi Sugiyama 
1602e6f5bf1SYuichi Sugiyama static inline uint64_t
CopyDoubleToUint64(double d)1612e6f5bf1SYuichi Sugiyama CopyDoubleToUint64(double d)
1622e6f5bf1SYuichi Sugiyama {
1632e6f5bf1SYuichi Sugiyama    uint64_t u64;
1642e6f5bf1SYuichi Sugiyama    memcpy(&u64, &d, sizeof(uint64_t));
1652e6f5bf1SYuichi Sugiyama    return u64;
1662e6f5bf1SYuichi Sugiyama }
1672e6f5bf1SYuichi Sugiyama 
1682e6f5bf1SYuichi Sugiyama static inline double
CopyUint64ToDouble(uint64_t u64)1692e6f5bf1SYuichi Sugiyama CopyUint64ToDouble(uint64_t u64)
1702e6f5bf1SYuichi Sugiyama {
1712e6f5bf1SYuichi Sugiyama    double d;
1722e6f5bf1SYuichi Sugiyama    memcpy(&d, &u64, sizeof(uint64_t));
1732e6f5bf1SYuichi Sugiyama    return d;
1742e6f5bf1SYuichi Sugiyama }
1752e6f5bf1SYuichi Sugiyama 
1762e6f5bf1SYuichi Sugiyama static inline float
CopyUint32ToSingle(uint32_t u32)1772e6f5bf1SYuichi Sugiyama CopyUint32ToSingle(uint32_t u32)
1782e6f5bf1SYuichi Sugiyama {
1792e6f5bf1SYuichi Sugiyama    float f;
1802e6f5bf1SYuichi Sugiyama    memcpy(&f, &u32, sizeof(uint32_t));
1812e6f5bf1SYuichi Sugiyama    return f;
1822e6f5bf1SYuichi Sugiyama }
1832e6f5bf1SYuichi Sugiyama 
1842e6f5bf1SYuichi Sugiyama 
1852e6f5bf1SYuichi Sugiyama 
1862e6f5bf1SYuichi Sugiyama 
1872e6f5bf1SYuichi Sugiyama /**
1882e6f5bf1SYuichi Sugiyama  * @brief Assemble sign, significand and exponent into single precision float.
1892e6f5bf1SYuichi Sugiyama  *
1902e6f5bf1SYuichi Sugiyama  * @param[in] uDoubleSign              0 if positive, 1 if negative
1912e6f5bf1SYuichi Sugiyama  * @pararm[in] uDoubleSignificand      Bits of the significand
1922e6f5bf1SYuichi Sugiyama  * @param[in] nDoubleUnBiasedExponent  Exponent
1932e6f5bf1SYuichi Sugiyama  *
1942e6f5bf1SYuichi Sugiyama  * This returns the bits for a single-precision float, a binary64
1952e6f5bf1SYuichi Sugiyama  * as specified in IEEE754.
1962e6f5bf1SYuichi Sugiyama  */
1972e6f5bf1SYuichi Sugiyama static double
IEEE754_AssembleDouble(uint64_t uDoubleSign,uint64_t uDoubleSignificand,int64_t nDoubleUnBiasedExponent)1982e6f5bf1SYuichi Sugiyama IEEE754_AssembleDouble(uint64_t uDoubleSign,
1992e6f5bf1SYuichi Sugiyama                        uint64_t uDoubleSignificand,
2002e6f5bf1SYuichi Sugiyama                        int64_t  nDoubleUnBiasedExponent)
2012e6f5bf1SYuichi Sugiyama {
2022e6f5bf1SYuichi Sugiyama    uint64_t uDoubleBiasedExponent;
2032e6f5bf1SYuichi Sugiyama 
2042e6f5bf1SYuichi Sugiyama    uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
2052e6f5bf1SYuichi Sugiyama 
2062e6f5bf1SYuichi Sugiyama    return CopyUint64ToDouble(uDoubleSignificand |
2072e6f5bf1SYuichi Sugiyama                              (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
2082e6f5bf1SYuichi Sugiyama                              (uDoubleSign << DOUBLE_SIGN_SHIFT));
2092e6f5bf1SYuichi Sugiyama }
2102e6f5bf1SYuichi Sugiyama 
2112e6f5bf1SYuichi Sugiyama 
2122e6f5bf1SYuichi Sugiyama double
IEEE754_HalfToDouble(uint16_t uHalfPrecision)2132e6f5bf1SYuichi Sugiyama IEEE754_HalfToDouble(uint16_t uHalfPrecision)
2142e6f5bf1SYuichi Sugiyama {
2152e6f5bf1SYuichi Sugiyama    uint64_t uDoubleSignificand;
2162e6f5bf1SYuichi Sugiyama    int64_t  nDoubleUnBiasedExponent;
2172e6f5bf1SYuichi Sugiyama    double   dResult;
2182e6f5bf1SYuichi Sugiyama 
2192e6f5bf1SYuichi Sugiyama    /* Pull out the three parts of the half-precision float.  Do all
2202e6f5bf1SYuichi Sugiyama     * the work in 64 bits because that is what the end result is.  It
2212e6f5bf1SYuichi Sugiyama     * may give smaller code size and will keep static analyzers
2222e6f5bf1SYuichi Sugiyama     * happier.
2232e6f5bf1SYuichi Sugiyama     */
2242e6f5bf1SYuichi Sugiyama    const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
2252e6f5bf1SYuichi Sugiyama    const uint64_t uHalfBiasedExponent   = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
2262e6f5bf1SYuichi Sugiyama    const int64_t  nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
2272e6f5bf1SYuichi Sugiyama    const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
2282e6f5bf1SYuichi Sugiyama 
2292e6f5bf1SYuichi Sugiyama    if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
2302e6f5bf1SYuichi Sugiyama       /* 0 or subnormal */
2312e6f5bf1SYuichi Sugiyama       if(uHalfSignificand) {
2322e6f5bf1SYuichi Sugiyama          /* --- SUBNORMAL --- */
2332e6f5bf1SYuichi Sugiyama          /* A half-precision subnormal can always be converted to a
2342e6f5bf1SYuichi Sugiyama           * normal double-precision float because the ranges line up.
2352e6f5bf1SYuichi Sugiyama           * The exponent of a subnormal starts out at the min exponent
2362e6f5bf1SYuichi Sugiyama           * for a normal. As the sub normal significand bits are
2372e6f5bf1SYuichi Sugiyama           * shifted, left to normalize, the exponent is
2382e6f5bf1SYuichi Sugiyama           * decremented. Shifting continues until fully normalized.
2392e6f5bf1SYuichi Sugiyama           */
2402e6f5bf1SYuichi Sugiyama           nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
2412e6f5bf1SYuichi Sugiyama           uDoubleSignificand      = uHalfSignificand;
2422e6f5bf1SYuichi Sugiyama           do {
2432e6f5bf1SYuichi Sugiyama              uDoubleSignificand <<= 1;
2442e6f5bf1SYuichi Sugiyama              nDoubleUnBiasedExponent--;
2452e6f5bf1SYuichi Sugiyama           } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
2462e6f5bf1SYuichi Sugiyama           /* A normal has an implied 1 in the most significant
2472e6f5bf1SYuichi Sugiyama            * position that a subnormal doesn't. */
2482e6f5bf1SYuichi Sugiyama           uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
2492e6f5bf1SYuichi Sugiyama           /* Must shift into place for a double significand */
2502e6f5bf1SYuichi Sugiyama           uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
2512e6f5bf1SYuichi Sugiyama 
2522e6f5bf1SYuichi Sugiyama           dResult = IEEE754_AssembleDouble(uHalfSign,
2532e6f5bf1SYuichi Sugiyama                                            uDoubleSignificand,
2542e6f5bf1SYuichi Sugiyama                                            nDoubleUnBiasedExponent);
2552e6f5bf1SYuichi Sugiyama       } else {
2562e6f5bf1SYuichi Sugiyama          /* --- ZERO --- */
2572e6f5bf1SYuichi Sugiyama          dResult = IEEE754_AssembleDouble(uHalfSign,
2582e6f5bf1SYuichi Sugiyama                                           0,
2592e6f5bf1SYuichi Sugiyama                                           DOUBLE_EXPONENT_ZERO);
2602e6f5bf1SYuichi Sugiyama       }
2612e6f5bf1SYuichi Sugiyama    } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
2622e6f5bf1SYuichi Sugiyama       /* NaN or Inifinity */
2632e6f5bf1SYuichi Sugiyama       if(uHalfSignificand) {
2642e6f5bf1SYuichi Sugiyama          /* --- NaN --- */
2652e6f5bf1SYuichi Sugiyama          /* Half-precision payloads always fit into double precision
2662e6f5bf1SYuichi Sugiyama           * payloads. They are shifted left the same as a normal
2672e6f5bf1SYuichi Sugiyama           * number significand.
2682e6f5bf1SYuichi Sugiyama           */
2692e6f5bf1SYuichi Sugiyama          uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
2702e6f5bf1SYuichi Sugiyama          dResult = IEEE754_AssembleDouble(uHalfSign,
2712e6f5bf1SYuichi Sugiyama                                           uDoubleSignificand,
2722e6f5bf1SYuichi Sugiyama                                           DOUBLE_EXPONENT_INF_OR_NAN);
2732e6f5bf1SYuichi Sugiyama       } else {
2742e6f5bf1SYuichi Sugiyama          /* --- INFINITY --- */
2752e6f5bf1SYuichi Sugiyama          dResult = IEEE754_AssembleDouble(uHalfSign,
2762e6f5bf1SYuichi Sugiyama                                           0,
2772e6f5bf1SYuichi Sugiyama                                           DOUBLE_EXPONENT_INF_OR_NAN);
2782e6f5bf1SYuichi Sugiyama       }
2792e6f5bf1SYuichi Sugiyama    } else {
2802e6f5bf1SYuichi Sugiyama       /* --- NORMAL NUMBER --- */
2812e6f5bf1SYuichi Sugiyama       uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
2822e6f5bf1SYuichi Sugiyama       dResult = IEEE754_AssembleDouble(uHalfSign,
2832e6f5bf1SYuichi Sugiyama                                        uDoubleSignificand,
2842e6f5bf1SYuichi Sugiyama                                        nHalfUnBiasedExponent);
2852e6f5bf1SYuichi Sugiyama    }
2862e6f5bf1SYuichi Sugiyama 
2872e6f5bf1SYuichi Sugiyama    return dResult;
2882e6f5bf1SYuichi Sugiyama }
2892e6f5bf1SYuichi Sugiyama 
2902e6f5bf1SYuichi Sugiyama 
2912e6f5bf1SYuichi Sugiyama /**
2922e6f5bf1SYuichi Sugiyama  * @brief Assemble sign, significand and exponent into single precision float.
2932e6f5bf1SYuichi Sugiyama  *
2942e6f5bf1SYuichi Sugiyama  * @param[in] uHalfSign              0 if positive, 1 if negative
2952e6f5bf1SYuichi Sugiyama  * @pararm[in] uHalfSignificand      Bits of the significand
2962e6f5bf1SYuichi Sugiyama  * @param[in] nHalfUnBiasedExponent  Exponent
2972e6f5bf1SYuichi Sugiyama  *
2982e6f5bf1SYuichi Sugiyama  * This returns the bits for a single-precision float, a binary32 as
2992e6f5bf1SYuichi Sugiyama  * specified in IEEE754. It is returned as a uint64_t rather than a
3002e6f5bf1SYuichi Sugiyama  * uint32_t or a float for convenience of usage.
3012e6f5bf1SYuichi Sugiyama  */
3022e6f5bf1SYuichi Sugiyama static uint32_t
IEEE754_AssembleHalf(uint32_t uHalfSign,uint32_t uHalfSignificand,int32_t nHalfUnBiasedExponent)3032e6f5bf1SYuichi Sugiyama IEEE754_AssembleHalf(uint32_t uHalfSign,
3042e6f5bf1SYuichi Sugiyama                      uint32_t uHalfSignificand,
3052e6f5bf1SYuichi Sugiyama                      int32_t nHalfUnBiasedExponent)
3062e6f5bf1SYuichi Sugiyama {
3072e6f5bf1SYuichi Sugiyama    uint32_t uHalfUnbiasedExponent;
3082e6f5bf1SYuichi Sugiyama 
3092e6f5bf1SYuichi Sugiyama    uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
3102e6f5bf1SYuichi Sugiyama 
3112e6f5bf1SYuichi Sugiyama    return uHalfSignificand |
3122e6f5bf1SYuichi Sugiyama           (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
3132e6f5bf1SYuichi Sugiyama           (uHalfSign << HALF_SIGN_SHIFT);
3142e6f5bf1SYuichi Sugiyama }
3152e6f5bf1SYuichi Sugiyama 
3162e6f5bf1SYuichi Sugiyama 
3172e6f5bf1SYuichi Sugiyama /*  Public function; see ieee754.h */
3182e6f5bf1SYuichi Sugiyama IEEE754_union
IEEE754_SingleToHalf(float f)3192e6f5bf1SYuichi Sugiyama IEEE754_SingleToHalf(float f)
3202e6f5bf1SYuichi Sugiyama {
3212e6f5bf1SYuichi Sugiyama    IEEE754_union result;
3222e6f5bf1SYuichi Sugiyama    uint32_t      uDroppedBits;
3232e6f5bf1SYuichi Sugiyama    int32_t       nExponentDifference;
3242e6f5bf1SYuichi Sugiyama    int32_t       nShiftAmount;
3252e6f5bf1SYuichi Sugiyama    uint32_t      uHalfSignificand;
3262e6f5bf1SYuichi Sugiyama 
3272e6f5bf1SYuichi Sugiyama    /* Pull the three parts out of the double-precision float Most work
3282e6f5bf1SYuichi Sugiyama     * is done with uint32_t which helps avoid integer promotions and
3292e6f5bf1SYuichi Sugiyama     * static analyzer complaints.
3302e6f5bf1SYuichi Sugiyama     */
3312e6f5bf1SYuichi Sugiyama    const uint32_t uSingle                 = CopyFloatToUint32(f);
3322e6f5bf1SYuichi Sugiyama    const uint32_t uSingleBiasedExponent   = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
3332e6f5bf1SYuichi Sugiyama    const int32_t  nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
3342e6f5bf1SYuichi Sugiyama    const uint32_t uSingleSignificand      = uSingle & SINGLE_SIGNIFICAND_MASK;
3352e6f5bf1SYuichi Sugiyama    const uint32_t uSingleSign             = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
3362e6f5bf1SYuichi Sugiyama 
3372e6f5bf1SYuichi Sugiyama    if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
3382e6f5bf1SYuichi Sugiyama       if(uSingleSignificand == 0) {
3392e6f5bf1SYuichi Sugiyama          /* --- IS ZERO --- */
3402e6f5bf1SYuichi Sugiyama          result.uSize  = IEEE754_UNION_IS_HALF;
3412e6f5bf1SYuichi Sugiyama          result.uValue = IEEE754_AssembleHalf(uSingleSign,
3422e6f5bf1SYuichi Sugiyama                                               0,
3432e6f5bf1SYuichi Sugiyama                                               HALF_EXPONENT_ZERO);
3442e6f5bf1SYuichi Sugiyama       } else {
3452e6f5bf1SYuichi Sugiyama          /* --- IS SINGLE SUBNORMAL --- */
3462e6f5bf1SYuichi Sugiyama          /* The largest single subnormal is slightly less than the
3472e6f5bf1SYuichi Sugiyama           * largest single normal which is 2^-149 or
3482e6f5bf1SYuichi Sugiyama           * 2.2040517676619426e-38.  The smallest half subnormal is
3492e6f5bf1SYuichi Sugiyama           * 2^-14 or 5.9604644775390625E-8.  There is no overlap so
3502e6f5bf1SYuichi Sugiyama           * single subnormals can't be converted to halfs of any sort.
3512e6f5bf1SYuichi Sugiyama           */
3522e6f5bf1SYuichi Sugiyama          result.uSize   = IEEE754_UNION_IS_SINGLE;
3532e6f5bf1SYuichi Sugiyama          result.uValue  = uSingle;
3542e6f5bf1SYuichi Sugiyama       }
3552e6f5bf1SYuichi Sugiyama    } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
3562e6f5bf1SYuichi Sugiyama       if(uSingleSignificand == 0) {
3572e6f5bf1SYuichi Sugiyama          /* ---- IS INFINITY ---- */
3582e6f5bf1SYuichi Sugiyama          result.uSize  = IEEE754_UNION_IS_HALF;
3592e6f5bf1SYuichi Sugiyama          result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
3602e6f5bf1SYuichi Sugiyama       } else {
3612e6f5bf1SYuichi Sugiyama          /* The NaN can only be converted if no payload bits are lost
3622e6f5bf1SYuichi Sugiyama           * per RFC 8949 section 4.1 that defines Preferred
3632e6f5bf1SYuichi Sugiyama           * Serializaton. Note that Deterministically Encode CBOR in
3642e6f5bf1SYuichi Sugiyama           * section 4.2 allows for some variation of this rule, but at
3652e6f5bf1SYuichi Sugiyama           * the moment this implementation is of Preferred
3662e6f5bf1SYuichi Sugiyama           * Serialization, not CDE. As of December 2023, we are also
3672e6f5bf1SYuichi Sugiyama           * expecting an update to CDE. This code may need to be
3682e6f5bf1SYuichi Sugiyama           * updated for CDE.
3692e6f5bf1SYuichi Sugiyama           */
3702e6f5bf1SYuichi Sugiyama          uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
3712e6f5bf1SYuichi Sugiyama          if(uDroppedBits == 0) {
3722e6f5bf1SYuichi Sugiyama             /* --- IS CONVERTABLE NAN --- */
3732e6f5bf1SYuichi Sugiyama             uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
3742e6f5bf1SYuichi Sugiyama             result.uSize  = IEEE754_UNION_IS_HALF;
3752e6f5bf1SYuichi Sugiyama             result.uValue = IEEE754_AssembleHalf(uSingleSign,
3762e6f5bf1SYuichi Sugiyama                                                  uHalfSignificand,
3772e6f5bf1SYuichi Sugiyama                                                  HALF_EXPONENT_INF_OR_NAN);
3782e6f5bf1SYuichi Sugiyama 
3792e6f5bf1SYuichi Sugiyama          } else {
3802e6f5bf1SYuichi Sugiyama             /* --- IS UNCONVERTABLE NAN --- */
3812e6f5bf1SYuichi Sugiyama             result.uSize   = IEEE754_UNION_IS_SINGLE;
3822e6f5bf1SYuichi Sugiyama             result.uValue  = uSingle;
3832e6f5bf1SYuichi Sugiyama          }
3842e6f5bf1SYuichi Sugiyama       }
3852e6f5bf1SYuichi Sugiyama    } else {
3862e6f5bf1SYuichi Sugiyama       /* ---- REGULAR NUMBER ---- */
3872e6f5bf1SYuichi Sugiyama       /* A regular single can be converted to a regular half if the
3882e6f5bf1SYuichi Sugiyama        * single's exponent is in the smaller range of a half and if no
3892e6f5bf1SYuichi Sugiyama        * precision is lost in the significand.
3902e6f5bf1SYuichi Sugiyama        */
3912e6f5bf1SYuichi Sugiyama       if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
3922e6f5bf1SYuichi Sugiyama          nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
3932e6f5bf1SYuichi Sugiyama         (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
3942e6f5bf1SYuichi Sugiyama          uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
3952e6f5bf1SYuichi Sugiyama 
3962e6f5bf1SYuichi Sugiyama          /* --- CONVERT TO HALF NORMAL --- */
3972e6f5bf1SYuichi Sugiyama          result.uSize  = IEEE754_UNION_IS_HALF;
3982e6f5bf1SYuichi Sugiyama          result.uValue = IEEE754_AssembleHalf(uSingleSign,
3992e6f5bf1SYuichi Sugiyama                                               uHalfSignificand,
4002e6f5bf1SYuichi Sugiyama                                               nSingleUnbiasedExponent);
4012e6f5bf1SYuichi Sugiyama       } else {
4022e6f5bf1SYuichi Sugiyama          /* Unable to convert to a half normal. See if it can be
4032e6f5bf1SYuichi Sugiyama           * converted to a half subnormal. To do that, the exponent
4042e6f5bf1SYuichi Sugiyama           * must be in range and no precision can be lost in the
4052e6f5bf1SYuichi Sugiyama           * signficand.
4062e6f5bf1SYuichi Sugiyama           *
4072e6f5bf1SYuichi Sugiyama           * This is more complicated because the number is not
4082e6f5bf1SYuichi Sugiyama           * normalized.  The signficand must be shifted proprotionally
4092e6f5bf1SYuichi Sugiyama           * to the exponent and 1 must be added in.  See
4102e6f5bf1SYuichi Sugiyama           * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
4112e6f5bf1SYuichi Sugiyama           *
4122e6f5bf1SYuichi Sugiyama           * Exponents -14 to -24 map to a shift of 0 to 10 of the
4132e6f5bf1SYuichi Sugiyama           * significand.  The largest value of a half subnormal has an
4142e6f5bf1SYuichi Sugiyama           * exponent of -14. Subnormals are not normalized like
4152e6f5bf1SYuichi Sugiyama           * normals meaning they lose precision as the numbers get
4162e6f5bf1SYuichi Sugiyama           * smaller. Normals don't lose precision because the exponent
4172e6f5bf1SYuichi Sugiyama           * allows all the bits of the significand to be significant.
4182e6f5bf1SYuichi Sugiyama           */
4192e6f5bf1SYuichi Sugiyama          /* The exponent of the largest possible half-precision
4202e6f5bf1SYuichi Sugiyama           * subnormal is HALF_EXPONENT_MIN (-14).  Exponents larger
4212e6f5bf1SYuichi Sugiyama           * than this are normal and handled above. We're going to
4222e6f5bf1SYuichi Sugiyama           * shift the significand right by at least this amount.
4232e6f5bf1SYuichi Sugiyama           */
4242e6f5bf1SYuichi Sugiyama          nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
4252e6f5bf1SYuichi Sugiyama 
4262e6f5bf1SYuichi Sugiyama          /* In addition to the shift based on the exponent's value,
4272e6f5bf1SYuichi Sugiyama           * the single significand has to be shifted right to fit into
4282e6f5bf1SYuichi Sugiyama           * a half-precision significand */
4292e6f5bf1SYuichi Sugiyama          nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
4302e6f5bf1SYuichi Sugiyama 
4312e6f5bf1SYuichi Sugiyama          /* Must add 1 in to the possible significand because there is
4322e6f5bf1SYuichi Sugiyama           * an implied 1 for normal values and not for subnormal
4332e6f5bf1SYuichi Sugiyama           * values. See equations here:
4342e6f5bf1SYuichi Sugiyama           * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
4352e6f5bf1SYuichi Sugiyama           */
4362e6f5bf1SYuichi Sugiyama          uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
4372e6f5bf1SYuichi Sugiyama 
4382e6f5bf1SYuichi Sugiyama          /* If only zero bits get shifted out, this can be converted
4392e6f5bf1SYuichi Sugiyama           * to subnormal */
4402e6f5bf1SYuichi Sugiyama          if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
4412e6f5bf1SYuichi Sugiyama             nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
4422e6f5bf1SYuichi Sugiyama             uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
4432e6f5bf1SYuichi Sugiyama             /* --- CONVERTABLE TO HALF SUBNORMAL --- */
4442e6f5bf1SYuichi Sugiyama             result.uSize  = IEEE754_UNION_IS_HALF;
4452e6f5bf1SYuichi Sugiyama             result.uValue = IEEE754_AssembleHalf(uSingleSign,
4462e6f5bf1SYuichi Sugiyama                                                  uHalfSignificand,
4472e6f5bf1SYuichi Sugiyama                                                  HALF_EXPONENT_ZERO);
4482e6f5bf1SYuichi Sugiyama          } else {
4492e6f5bf1SYuichi Sugiyama             /* --- DO NOT CONVERT --- */
4502e6f5bf1SYuichi Sugiyama             result.uSize   = IEEE754_UNION_IS_SINGLE;
4512e6f5bf1SYuichi Sugiyama             result.uValue  = uSingle;
4522e6f5bf1SYuichi Sugiyama          }
4532e6f5bf1SYuichi Sugiyama       }
4542e6f5bf1SYuichi Sugiyama    }
4552e6f5bf1SYuichi Sugiyama 
4562e6f5bf1SYuichi Sugiyama    return result;
4572e6f5bf1SYuichi Sugiyama }
4582e6f5bf1SYuichi Sugiyama 
4592e6f5bf1SYuichi Sugiyama 
4602e6f5bf1SYuichi Sugiyama /**
4612e6f5bf1SYuichi Sugiyama  * @brief Assemble sign, significand and exponent into single precision float.
4622e6f5bf1SYuichi Sugiyama  *
4632e6f5bf1SYuichi Sugiyama  * @param[in] uSingleSign              0 if positive, 1 if negative
4642e6f5bf1SYuichi Sugiyama  * @pararm[in] uSingleSignificand      Bits of the significand
4652e6f5bf1SYuichi Sugiyama  * @param[in] nSingleUnBiasedExponent  Exponent
4662e6f5bf1SYuichi Sugiyama  *
4672e6f5bf1SYuichi Sugiyama  * This returns the bits for a single-precision float, a binary32 as
4682e6f5bf1SYuichi Sugiyama  * specified in IEEE754. It is returned as a uint64_t rather than a
4692e6f5bf1SYuichi Sugiyama  * uint32_t or a float for convenience of usage.
4702e6f5bf1SYuichi Sugiyama  */
4712e6f5bf1SYuichi Sugiyama static uint64_t
IEEE754_AssembleSingle(uint64_t uSingleSign,uint64_t uSingleSignificand,int64_t nSingleUnBiasedExponent)4722e6f5bf1SYuichi Sugiyama IEEE754_AssembleSingle(uint64_t uSingleSign,
4732e6f5bf1SYuichi Sugiyama                        uint64_t uSingleSignificand,
4742e6f5bf1SYuichi Sugiyama                        int64_t  nSingleUnBiasedExponent)
4752e6f5bf1SYuichi Sugiyama {
4762e6f5bf1SYuichi Sugiyama    uint64_t uSingleBiasedExponent;
4772e6f5bf1SYuichi Sugiyama 
4782e6f5bf1SYuichi Sugiyama    uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
4792e6f5bf1SYuichi Sugiyama 
4802e6f5bf1SYuichi Sugiyama    return uSingleSignificand |
4812e6f5bf1SYuichi Sugiyama           (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
4822e6f5bf1SYuichi Sugiyama           (uSingleSign << SINGLE_SIGN_SHIFT);
4832e6f5bf1SYuichi Sugiyama }
4842e6f5bf1SYuichi Sugiyama 
4852e6f5bf1SYuichi Sugiyama 
4862e6f5bf1SYuichi Sugiyama /**
4872e6f5bf1SYuichi Sugiyama  * @brief Convert a double-precision float to single-precision.
4882e6f5bf1SYuichi Sugiyama  *
4892e6f5bf1SYuichi Sugiyama  * @param[in] d  The value to convert.
4902e6f5bf1SYuichi Sugiyama  *
4912e6f5bf1SYuichi Sugiyama  * @returns Either unconverted value or value converted to single-precision.
4922e6f5bf1SYuichi Sugiyama  *
4932e6f5bf1SYuichi Sugiyama  * This always succeeds. If the value cannot be converted without the
4942e6f5bf1SYuichi Sugiyama  * loss of precision, it is not converted.
4952e6f5bf1SYuichi Sugiyama  *
4962e6f5bf1SYuichi Sugiyama  * This handles all subnormals and NaN payloads.
4972e6f5bf1SYuichi Sugiyama  */
4982e6f5bf1SYuichi Sugiyama static IEEE754_union
IEEE754_DoubleToSingle(double d)4992e6f5bf1SYuichi Sugiyama IEEE754_DoubleToSingle(double d)
5002e6f5bf1SYuichi Sugiyama {
5012e6f5bf1SYuichi Sugiyama    IEEE754_union Result;
5022e6f5bf1SYuichi Sugiyama    int64_t       nExponentDifference;
5032e6f5bf1SYuichi Sugiyama    int64_t       nShiftAmount;
5042e6f5bf1SYuichi Sugiyama    uint64_t      uSingleSignificand;
5052e6f5bf1SYuichi Sugiyama    uint64_t      uDroppedBits;
5062e6f5bf1SYuichi Sugiyama 
5072e6f5bf1SYuichi Sugiyama 
5082e6f5bf1SYuichi Sugiyama    /* Pull the three parts out of the double-precision float. Most
5092e6f5bf1SYuichi Sugiyama     * work is done with uint64_t which helps avoid integer promotions
5102e6f5bf1SYuichi Sugiyama     * and static analyzer complaints.
5112e6f5bf1SYuichi Sugiyama     */
5122e6f5bf1SYuichi Sugiyama    const uint64_t uDouble                 = CopyDoubleToUint64(d);
5132e6f5bf1SYuichi Sugiyama    const uint64_t uDoubleBiasedExponent   = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
5142e6f5bf1SYuichi Sugiyama    const int64_t  nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
5152e6f5bf1SYuichi Sugiyama    const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
5162e6f5bf1SYuichi Sugiyama    const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
5172e6f5bf1SYuichi Sugiyama 
5182e6f5bf1SYuichi Sugiyama 
5192e6f5bf1SYuichi Sugiyama     if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
5202e6f5bf1SYuichi Sugiyama         if(uDoubleSignificand == 0) {
5212e6f5bf1SYuichi Sugiyama             /* --- IS ZERO --- */
5222e6f5bf1SYuichi Sugiyama             Result.uSize  = IEEE754_UNION_IS_SINGLE;
5232e6f5bf1SYuichi Sugiyama             Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5242e6f5bf1SYuichi Sugiyama                                                    0,
5252e6f5bf1SYuichi Sugiyama                                                    SINGLE_EXPONENT_ZERO);
5262e6f5bf1SYuichi Sugiyama         } else {
5272e6f5bf1SYuichi Sugiyama             /* --- IS DOUBLE SUBNORMAL --- */
5282e6f5bf1SYuichi Sugiyama             /* The largest double subnormal is slightly less than the
5292e6f5bf1SYuichi Sugiyama              * largest double normal which is 2^-1022 or
5302e6f5bf1SYuichi Sugiyama              * 2.2250738585072014e-308.  The smallest single subnormal
5312e6f5bf1SYuichi Sugiyama              * is 2^-149 or 1.401298464324817e-45.  There is no
5322e6f5bf1SYuichi Sugiyama              * overlap so double subnormals can't be converted to
5332e6f5bf1SYuichi Sugiyama              * singles of any sort.
5342e6f5bf1SYuichi Sugiyama              */
5352e6f5bf1SYuichi Sugiyama             Result.uSize   = IEEE754_UNION_IS_DOUBLE;
5362e6f5bf1SYuichi Sugiyama             Result.uValue  = uDouble;
5372e6f5bf1SYuichi Sugiyama          }
5382e6f5bf1SYuichi Sugiyama     } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
5392e6f5bf1SYuichi Sugiyama          if(uDoubleSignificand == 0) {
5402e6f5bf1SYuichi Sugiyama              /* ---- IS INFINITY ---- */
5412e6f5bf1SYuichi Sugiyama              Result.uSize  = IEEE754_UNION_IS_SINGLE;
5422e6f5bf1SYuichi Sugiyama              Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5432e6f5bf1SYuichi Sugiyama                                                     0,
5442e6f5bf1SYuichi Sugiyama                                                     SINGLE_EXPONENT_INF_OR_NAN);
5452e6f5bf1SYuichi Sugiyama          } else {
5462e6f5bf1SYuichi Sugiyama              /* The NaN can only be converted if no payload bits are
5472e6f5bf1SYuichi Sugiyama               * lost per RFC 8949 section 4.1 that defines Preferred
5482e6f5bf1SYuichi Sugiyama               * Serializaton. Note that Deterministically Encode CBOR
5492e6f5bf1SYuichi Sugiyama               * in section 4.2 allows for some variation of this rule,
5502e6f5bf1SYuichi Sugiyama               * but at the moment this implementation is of Preferred
5512e6f5bf1SYuichi Sugiyama               * Serialization, not CDE. As of December 2023, we are
5522e6f5bf1SYuichi Sugiyama               * also expecting an update to CDE. This code may need to
5532e6f5bf1SYuichi Sugiyama               * be updated for CDE.
5542e6f5bf1SYuichi Sugiyama               */
5552e6f5bf1SYuichi Sugiyama              uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
5562e6f5bf1SYuichi Sugiyama              if(uDroppedBits == 0) {
5572e6f5bf1SYuichi Sugiyama                 /* --- IS CONVERTABLE NAN --- */
5582e6f5bf1SYuichi Sugiyama                 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
5592e6f5bf1SYuichi Sugiyama                 Result.uSize  = IEEE754_UNION_IS_SINGLE;
5602e6f5bf1SYuichi Sugiyama                 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5612e6f5bf1SYuichi Sugiyama                                                        uSingleSignificand,
5622e6f5bf1SYuichi Sugiyama                                                        SINGLE_EXPONENT_INF_OR_NAN);
5632e6f5bf1SYuichi Sugiyama             } else {
5642e6f5bf1SYuichi Sugiyama                /* --- IS UNCONVERTABLE NAN --- */
5652e6f5bf1SYuichi Sugiyama                Result.uSize   = IEEE754_UNION_IS_DOUBLE;
5662e6f5bf1SYuichi Sugiyama                Result.uValue  = uDouble;
5672e6f5bf1SYuichi Sugiyama             }
5682e6f5bf1SYuichi Sugiyama          }
5692e6f5bf1SYuichi Sugiyama     } else {
5702e6f5bf1SYuichi Sugiyama         /* ---- REGULAR NUMBER ---- */
5712e6f5bf1SYuichi Sugiyama         /* A regular double can be converted to a regular single if
5722e6f5bf1SYuichi Sugiyama          * the double's exponent is in the smaller range of a single
5732e6f5bf1SYuichi Sugiyama          * and if no precision is lost in the significand.
5742e6f5bf1SYuichi Sugiyama          */
5752e6f5bf1SYuichi Sugiyama         uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
5762e6f5bf1SYuichi Sugiyama         if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
5772e6f5bf1SYuichi Sugiyama            nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
5782e6f5bf1SYuichi Sugiyama            uDroppedBits == 0) {
5792e6f5bf1SYuichi Sugiyama             /* --- IS CONVERTABLE TO SINGLE --- */
5802e6f5bf1SYuichi Sugiyama             uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
5812e6f5bf1SYuichi Sugiyama             Result.uSize  = IEEE754_UNION_IS_SINGLE;
5822e6f5bf1SYuichi Sugiyama             Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
5832e6f5bf1SYuichi Sugiyama                                                    uSingleSignificand,
5842e6f5bf1SYuichi Sugiyama                                                    nDoubleUnbiasedExponent);
5852e6f5bf1SYuichi Sugiyama         } else {
5862e6f5bf1SYuichi Sugiyama             /* Unable to convert to a single normal. See if it can be
5872e6f5bf1SYuichi Sugiyama              * converted to a single subnormal. To do that, the
5882e6f5bf1SYuichi Sugiyama              * exponent must be in range and no precision can be lost
5892e6f5bf1SYuichi Sugiyama              * in the signficand.
5902e6f5bf1SYuichi Sugiyama              *
5912e6f5bf1SYuichi Sugiyama              * This is more complicated because the number is not
5922e6f5bf1SYuichi Sugiyama              * normalized.  The signficand must be shifted
5932e6f5bf1SYuichi Sugiyama              * proprotionally to the exponent and 1 must be added
5942e6f5bf1SYuichi Sugiyama              * in. See
5952e6f5bf1SYuichi Sugiyama              * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
5962e6f5bf1SYuichi Sugiyama              */
5972e6f5bf1SYuichi Sugiyama             nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
5982e6f5bf1SYuichi Sugiyama             nShiftAmount        = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
5992e6f5bf1SYuichi Sugiyama             uSingleSignificand  = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
6002e6f5bf1SYuichi Sugiyama 
6012e6f5bf1SYuichi Sugiyama             if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
6022e6f5bf1SYuichi Sugiyama                nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
6032e6f5bf1SYuichi Sugiyama                uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
6042e6f5bf1SYuichi Sugiyama                /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
6052e6f5bf1SYuichi Sugiyama                Result.uSize  = IEEE754_UNION_IS_SINGLE;
6062e6f5bf1SYuichi Sugiyama                Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
6072e6f5bf1SYuichi Sugiyama                                                       uSingleSignificand,
6082e6f5bf1SYuichi Sugiyama                                                       SINGLE_EXPONENT_ZERO);
6092e6f5bf1SYuichi Sugiyama             } else {
6102e6f5bf1SYuichi Sugiyama                /* --- CAN NOT BE CONVERTED --- */
6112e6f5bf1SYuichi Sugiyama                Result.uSize   = IEEE754_UNION_IS_DOUBLE;
6122e6f5bf1SYuichi Sugiyama                Result.uValue  = uDouble;
6132e6f5bf1SYuichi Sugiyama             }
6142e6f5bf1SYuichi Sugiyama         }
6152e6f5bf1SYuichi Sugiyama     }
6162e6f5bf1SYuichi Sugiyama 
6172e6f5bf1SYuichi Sugiyama     return Result;
6182e6f5bf1SYuichi Sugiyama }
6192e6f5bf1SYuichi Sugiyama 
6202e6f5bf1SYuichi Sugiyama 
6212e6f5bf1SYuichi Sugiyama /* Public function; see ieee754.h */
6222e6f5bf1SYuichi Sugiyama IEEE754_union
IEEE754_DoubleToSmaller(double d,int bAllowHalfPrecision)6232e6f5bf1SYuichi Sugiyama IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
6242e6f5bf1SYuichi Sugiyama {
6252e6f5bf1SYuichi Sugiyama    IEEE754_union result;
6262e6f5bf1SYuichi Sugiyama 
6272e6f5bf1SYuichi Sugiyama    result = IEEE754_DoubleToSingle(d);
6282e6f5bf1SYuichi Sugiyama 
6292e6f5bf1SYuichi Sugiyama    if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
6302e6f5bf1SYuichi Sugiyama       /* Cast to uint32_t is OK, because value was just successfully
6312e6f5bf1SYuichi Sugiyama        * converted to single. */
6322e6f5bf1SYuichi Sugiyama       float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
6332e6f5bf1SYuichi Sugiyama       result = IEEE754_SingleToHalf(uSingle);
6342e6f5bf1SYuichi Sugiyama    }
6352e6f5bf1SYuichi Sugiyama 
6362e6f5bf1SYuichi Sugiyama    return result;
6372e6f5bf1SYuichi Sugiyama }
6382e6f5bf1SYuichi Sugiyama 
6392e6f5bf1SYuichi Sugiyama 
6402e6f5bf1SYuichi Sugiyama #else /* QCBOR_DISABLE_PREFERRED_FLOAT */
6412e6f5bf1SYuichi Sugiyama 
6422e6f5bf1SYuichi Sugiyama int ieee754_dummy_place_holder;
6432e6f5bf1SYuichi Sugiyama 
6442e6f5bf1SYuichi Sugiyama #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
645