xref: /optee_os/core/lib/qcbor/src/ieee754.h (revision b586599be35c4311337a5d8db5f4b5e5c81a754d)
1*b586599bSYuichi Sugiyama // SPDX-License-Identifier: BSD-3-Clause
22e6f5bf1SYuichi Sugiyama /* ==========================================================================
32e6f5bf1SYuichi Sugiyama  * ieee754.h -- Conversion between half, double & single-precision floats
42e6f5bf1SYuichi Sugiyama  *
52e6f5bf1SYuichi Sugiyama  * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
62e6f5bf1SYuichi Sugiyama  *
72e6f5bf1SYuichi Sugiyama  * SPDX-License-Identifier: BSD-3-Clause
82e6f5bf1SYuichi Sugiyama  *
92e6f5bf1SYuichi Sugiyama  * See BSD-3-Clause license in README.md
102e6f5bf1SYuichi Sugiyama  *
112e6f5bf1SYuichi Sugiyama  * Created on 7/23/18
122e6f5bf1SYuichi Sugiyama  * ========================================================================== */
132e6f5bf1SYuichi Sugiyama 
142e6f5bf1SYuichi Sugiyama #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
152e6f5bf1SYuichi Sugiyama 
162e6f5bf1SYuichi Sugiyama #ifndef ieee754_h
172e6f5bf1SYuichi Sugiyama #define ieee754_h
182e6f5bf1SYuichi Sugiyama 
192e6f5bf1SYuichi Sugiyama #include <stdint.h>
202e6f5bf1SYuichi Sugiyama 
212e6f5bf1SYuichi Sugiyama 
222e6f5bf1SYuichi Sugiyama /** @file ieee754.h
232e6f5bf1SYuichi Sugiyama  *
242e6f5bf1SYuichi Sugiyama  * This implements floating-point conversion between half, single and
252e6f5bf1SYuichi Sugiyama  * double precision floating-point numbers, in particular convesion to
262e6f5bf1SYuichi Sugiyama  * smaller representation (e.g., double to single) that does not lose
272e6f5bf1SYuichi Sugiyama  * precision for CBOR preferred serialization.
282e6f5bf1SYuichi Sugiyama  *
292e6f5bf1SYuichi Sugiyama  * This implementation works entirely with shifts and masks and does
302e6f5bf1SYuichi Sugiyama  * not require any floating-point HW or library.
312e6f5bf1SYuichi Sugiyama  *
322e6f5bf1SYuichi Sugiyama  * This conforms to IEEE 754-2008, but note that it doesn't specify
332e6f5bf1SYuichi Sugiyama  * conversions, just the encodings.
342e6f5bf1SYuichi Sugiyama  *
352e6f5bf1SYuichi Sugiyama  * This is complete, supporting +/- infinity, +/- zero, subnormals and
362e6f5bf1SYuichi Sugiyama  * NaN payloads. NaN payloads are converted to smaller by dropping the
372e6f5bf1SYuichi Sugiyama  * right most bits if they are zero and shifting to the right. If the
382e6f5bf1SYuichi Sugiyama  * rightmost bits are not zero the conversion is not performed. When
392e6f5bf1SYuichi Sugiyama  * converting from smaller to larger, the payload is shifted left and
402e6f5bf1SYuichi Sugiyama  * zero-padded. This is what is specified by CBOR preferred
412e6f5bf1SYuichi Sugiyama  * serialization and what modern HW conversion instructions do. CBOR
422e6f5bf1SYuichi Sugiyama  * CDE handling for NaN is not clearly specified, but upcoming
432e6f5bf1SYuichi Sugiyama  * documents may clarify this.
442e6f5bf1SYuichi Sugiyama  *
452e6f5bf1SYuichi Sugiyama  * There is no special handling of silent and quiet NaNs. It probably
462e6f5bf1SYuichi Sugiyama  * isn't necessary to transmit these special NaNs as there purpose is
472e6f5bf1SYuichi Sugiyama  * more for propgating errors up through some calculation. In many
482e6f5bf1SYuichi Sugiyama  * cases the handlng of the NaN payload will work for silent and quiet
492e6f5bf1SYuichi Sugiyama  * NaNs.
502e6f5bf1SYuichi Sugiyama  *
512e6f5bf1SYuichi Sugiyama  * A previous version of this was usable as a general library for
522e6f5bf1SYuichi Sugiyama  * conversion. This version is reduced to what is needed for CBOR.
532e6f5bf1SYuichi Sugiyama  */
542e6f5bf1SYuichi Sugiyama 
552e6f5bf1SYuichi Sugiyama 
562e6f5bf1SYuichi Sugiyama /**
572e6f5bf1SYuichi Sugiyama  * @brief Convert half-precision float to double-precision float.
582e6f5bf1SYuichi Sugiyama  *
592e6f5bf1SYuichi Sugiyama  * @param[in] uHalfPrecision   Half-prevision number to convert.
602e6f5bf1SYuichi Sugiyama  *
612e6f5bf1SYuichi Sugiyama  * @returns double-presion value.
622e6f5bf1SYuichi Sugiyama  *
632e6f5bf1SYuichi Sugiyama  * This is a lossless conversion because every half-precision value
642e6f5bf1SYuichi Sugiyama  * can be represented as a double. There is no error condition.
652e6f5bf1SYuichi Sugiyama  *
662e6f5bf1SYuichi Sugiyama  * There is no half-precision type in C, so it is represented here as
672e6f5bf1SYuichi Sugiyama  * a @c uint16_t. The bits of @c uHalfPrecision are as described for
682e6f5bf1SYuichi Sugiyama  * half-precision by IEEE 754.
692e6f5bf1SYuichi Sugiyama  */
702e6f5bf1SYuichi Sugiyama double
712e6f5bf1SYuichi Sugiyama IEEE754_HalfToDouble(uint16_t uHalfPrecision);
722e6f5bf1SYuichi Sugiyama 
732e6f5bf1SYuichi Sugiyama 
742e6f5bf1SYuichi Sugiyama /** Holds a floating-point value that could be half, single or
752e6f5bf1SYuichi Sugiyama  * double-precision.  The value is in a @c uint64_t that may be copied
762e6f5bf1SYuichi Sugiyama  * to a float or double.  Simply casting uValue will usually work but
772e6f5bf1SYuichi Sugiyama  * may generate compiler or static analyzer warnings. Using
782e6f5bf1SYuichi Sugiyama  * UsefulBufUtil_CopyUint64ToDouble() or
792e6f5bf1SYuichi Sugiyama  * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
802e6f5bf1SYuichi Sugiyama  * any extra code).
812e6f5bf1SYuichi Sugiyama  */
822e6f5bf1SYuichi Sugiyama typedef struct {
832e6f5bf1SYuichi Sugiyama    enum {IEEE754_UNION_IS_HALF   = 2,
842e6f5bf1SYuichi Sugiyama          IEEE754_UNION_IS_SINGLE = 4,
852e6f5bf1SYuichi Sugiyama          IEEE754_UNION_IS_DOUBLE = 8,
862e6f5bf1SYuichi Sugiyama    } uSize; /* Size of uValue */
872e6f5bf1SYuichi Sugiyama    uint64_t uValue;
882e6f5bf1SYuichi Sugiyama } IEEE754_union;
892e6f5bf1SYuichi Sugiyama 
902e6f5bf1SYuichi Sugiyama 
912e6f5bf1SYuichi Sugiyama /**
922e6f5bf1SYuichi Sugiyama  * @brief Convert a double to either single or half-precision.
932e6f5bf1SYuichi Sugiyama  *
942e6f5bf1SYuichi Sugiyama  * @param[in] d                    The value to convert.
952e6f5bf1SYuichi Sugiyama  * @param[in] bAllowHalfPrecision  If true, convert to either half or
962e6f5bf1SYuichi Sugiyama  *                                 single precision.
972e6f5bf1SYuichi Sugiyama  *
982e6f5bf1SYuichi Sugiyama  * @returns Unconverted value, or value converted to single or half-precision.
992e6f5bf1SYuichi Sugiyama  *
1002e6f5bf1SYuichi Sugiyama  * This always succeeds. If the value cannot be converted without the
1012e6f5bf1SYuichi Sugiyama  * loss of precision, it is not converted.
1022e6f5bf1SYuichi Sugiyama  *
1032e6f5bf1SYuichi Sugiyama  * This handles all subnormals and NaN payloads.
1042e6f5bf1SYuichi Sugiyama  */
1052e6f5bf1SYuichi Sugiyama IEEE754_union
1062e6f5bf1SYuichi Sugiyama IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
1072e6f5bf1SYuichi Sugiyama 
1082e6f5bf1SYuichi Sugiyama 
1092e6f5bf1SYuichi Sugiyama /**
1102e6f5bf1SYuichi Sugiyama  * @brief Convert a single-precision float to half-precision.
1112e6f5bf1SYuichi Sugiyama  *
1122e6f5bf1SYuichi Sugiyama  * @param[in] f  The value to convert.
1132e6f5bf1SYuichi Sugiyama  *
1142e6f5bf1SYuichi Sugiyama  * @returns Either unconverted value or value converted to half-precision.
1152e6f5bf1SYuichi Sugiyama  *
1162e6f5bf1SYuichi Sugiyama  * This always succeeds. If the value cannot be converted without the
1172e6f5bf1SYuichi Sugiyama  * loss of precision, it is not converted.
1182e6f5bf1SYuichi Sugiyama  *
1192e6f5bf1SYuichi Sugiyama  * This handles all subnormals and NaN payloads.
1202e6f5bf1SYuichi Sugiyama  */
1212e6f5bf1SYuichi Sugiyama IEEE754_union
1222e6f5bf1SYuichi Sugiyama IEEE754_SingleToHalf(float f);
1232e6f5bf1SYuichi Sugiyama 
1242e6f5bf1SYuichi Sugiyama 
1252e6f5bf1SYuichi Sugiyama #endif /* ieee754_h */
1262e6f5bf1SYuichi Sugiyama 
1272e6f5bf1SYuichi Sugiyama #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
128