Portable serialisation of IEEE754 floating-point v

2020-02-10 22:06发布

问题:

I've recently been working on a system that needs to store and load large quantities of data, including single-precision floating-point values. I decided to standardise on network byte order for integers, and also decided to store floating point values in big-endian format, i.e.:

  |-- Byte 0 --| |-- Byte 1 -|  Byte 2   Byte 3
  #      ####### #     ####### ######## ########
Sign     Exponent          Mantissa
 1b    8b, MSB first    23b, MSB first

Ideally, I want to provide functions like htonl() and ntohl(), since I have already been using these for swabbing integers, and I also want to implement this in a way that has as much platform-independence as possible (while assuming that the float type corresponds to IEEE754 32-bit floating point values). Is there some way, possibly using ieee754.h, to do this?

I have one answer that seems to work, and I will post it below, but it seems pretty slow and inefficient and I would appreciate any suggestions about how to make it faster and/or more reliable.

回答1:

Much simpler, and depending on the same assumption as yours (which is that float and integer types have the same byte order, and is almost universally valid -- realistically you'll never encounter a system where it isn't true):

#include <string.h>

float htonf(float val) {
    uint32_t rep;
    memcpy(&rep, &val, sizeof rep);
    rep = htonl(rep);
    memcpy(&val, &rep, sizeof rep);
    return val;
}

Any reasonably good compiler will optimize away the two memcpy calls; they are present to defeat over-eager strict aliasing optimizations, so this ends up being as efficient as htonl plus the overhead of a single function call.



回答2:

As mentioned in the question above, I have a solution to my problem, but I'm not particularly attached to it, and I welcome other answers, so I'm posting it here rather than in the question. In particular, it seems likely to be slow, and I'm not sure whether it breaks strict aliasing, among other potential problems.

#include <ieee754.h>

float
htonf (float val)
{
  union ieee754_float u;
  float v;
  uint8_t *un = (uint8_t *) &v;

  u.f = val;
  un[0] = (u.ieee.negative << 7) + ((u.ieee.exponent & 0xfe) >> 1);
  un[1] = ((u.ieee.exponent & 0x01) << 7) + ((u.ieee.mantissa & 0x7f0000) >> 16);
  un[2] = (u.ieee.mantissa & 0xff00) >> 8;
  un[3] = (u.ieee.mantissa & 0xff);
  return v;
}

float
ntohf (float val)
{
  union ieee754_float u;
  uint8_t *un = (uint8_t *) &val;

  u.ieee.negative = (un[0] & 0x80) >> 7;
  u.ieee.exponent = (un[0] & 0x7f) << 1;
  u.ieee.exponent += (un[1] & 0x80) >> 7;
  u.ieee.mantissa = (un[1] & 0x7f) << 16;
  u.ieee.mantissa += un[2] << 8;
  u.ieee.mantissa += un[3];

  return u.f;
}


回答3:

Here's a portable IEEE 754 write routine. It will write a double in IEEE 754 format, regardless of the floating point representation on the host machine.

/*
* write a double to a stream in ieee754 format regardless of host
*  encoding.
*  x - number to write
*  fp - the stream
*  bigendian - set to write big bytes first, elee write litle bytes
*              first
*  Returns: 0 or EOF on error
*  Notes: different NaN types and negative zero not preserved.
*         if the number is too big to represent it will become infinity
*         if it is too small to represent it will become zero.
*/
static int fwriteieee754(double x, FILE *fp, int bigendian)
{
    int shift;
    unsigned long sign, exp, hibits, hilong, lowlong;
    double fnorm, significand;
    int expbits = 11;
    int significandbits = 52;

    /* zero (can't handle signed zero) */
    if (x == 0)
    {
        hilong = 0;
        lowlong = 0;
        goto writedata;
    }
    /* infinity */
    if (x > DBL_MAX)
    {
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        lowlong = 0;
        goto writedata;
    }
    /* -infinity */
    if (x < -DBL_MAX)
    {
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        hilong |= (1 << 31);
        lowlong = 0;
        goto writedata;
    }
    /* NaN - dodgy because many compilers optimise out this test, but
    *there is no portable isnan() */
    if (x != x)
    {
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        lowlong = 1234;
        goto writedata;
    }

    /* get the sign */
    if (x < 0) { sign = 1; fnorm = -x; }
    else { sign = 0; fnorm = x; }

    /* get the normalized form of f and track the exponent */
    shift = 0;
    while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
    while (fnorm < 1.0) { fnorm *= 2.0; shift--; }

    /* check for denormalized numbers */
    if (shift < -1022)
    {
        while (shift < -1022) { fnorm /= 2.0; shift++; }
        shift = -1023;
    }
    /* out of range. Set to infinity */
    else if (shift > 1023)
    {
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        hilong |= (sign << 31);
        lowlong = 0;
        goto writedata;
    }
    else
        fnorm = fnorm - 1.0; /* take the significant bit off mantissa */

    /* calculate the integer form of the significand */
    /* hold it in a  double for now */

    significand = fnorm * ((1LL << significandbits) + 0.5f);


    /* get the biased exponent */
    exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */

    /* put the data into two longs (for convenience) */
    hibits = (long)(significand / 4294967296);
    hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
    x = significand - hibits * 4294967296;
    lowlong = (unsigned long)(significand - hibits * 4294967296);

writedata:
    /* write the bytes out to the stream */
    if (bigendian)
    {
        fputc((hilong >> 24) & 0xFF, fp);
        fputc((hilong >> 16) & 0xFF, fp);
        fputc((hilong >> 8) & 0xFF, fp);
        fputc(hilong & 0xFF, fp);

        fputc((lowlong >> 24) & 0xFF, fp);
        fputc((lowlong >> 16) & 0xFF, fp);
        fputc((lowlong >> 8) & 0xFF, fp);
        fputc(lowlong & 0xFF, fp);
    }
    else
    {
        fputc(lowlong & 0xFF, fp);
        fputc((lowlong >> 8) & 0xFF, fp);
        fputc((lowlong >> 16) & 0xFF, fp);
        fputc((lowlong >> 24) & 0xFF, fp);

        fputc(hilong & 0xFF, fp);
        fputc((hilong >> 8) & 0xFF, fp);
        fputc((hilong >> 16) & 0xFF, fp);
        fputc((hilong >> 24) & 0xFF, fp);
    }
    return ferror(fp);
}