How to convert a floating point number into a sequence of bytes so that it can be persisted in a file? Such algorithm must be fast and highly portable. It must allow also the opposite operation, deserialization. It would be nice if only very tiny excess of bits per value (persistent space) is required.

Here we go.

Portable IEEE 754 serialisation / deserialisation that should work regardless of the machine's internal floating point representation.


* read a double from a stream in ieee754 format regardless of host
*  encoding.
*  fp - the stream
*  bigendian - set to if big bytes first, clear for little bytes
*              first
double freadieee754(FILE *fp, int bigendian)
    unsigned char buff[8];
    int i;
    double fnorm = 0.0;
    unsigned char temp;
    int sign;
    int exponent;
    double bitval;
    int maski, mask;
    int expbits = 11;
    int significandbits = 52;
    int shift;
    double answer;

    /* read the data */
    for (i = 0; i < 8; i++)
        buff[i] = fgetc(fp);
    /* just reverse if not big-endian*/
    if (!bigendian)
        for (i = 0; i < 4; i++)
            temp = buff[i];
            buff[i] = buff[8 - i - 1];
            buff[8 - i - 1] = temp;
    sign = buff[0] & 0x80 ? -1 : 1;
    /* exponet in raw format*/
    exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);

    /* read inthe mantissa. Top bit is 0.5, the successive bits half*/
    bitval = 0.5;
    maski = 1;
    mask = 0x08;
    for (i = 0; i < significandbits; i++)
        if (buff[maski] & mask)
            fnorm += bitval;

        bitval /= 2.0;
        mask >>= 1;
        if (mask == 0)
            mask = 0x80;
    /* handle zero specially */
    if (exponent == 0 && fnorm == 0)
        return 0.0;

    shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
    /* nans have exp 1024 and non-zero mantissa */
    if (shift == 1024 && fnorm != 0)
        return sqrt(-1.0);
    if (shift == 1024 && fnorm == 0)

        return sign == 1 ? INFINITY : -INFINITY;
        return  (sign * 1.0) / 0.0;
    if (shift > -1023)
        answer = ldexp(fnorm + 1.0, shift);
        return answer * sign;
        /* denormalised numbers */
        if (fnorm == 0.0)
            return 0.0;
        shift = -1022;
        while (fnorm < 1.0)
            fnorm *= 2;
        answer = ldexp(fnorm, shift);
        return answer * sign;

* write a double to a stream in ieee754 format regardless of host
*  encoding.
*  x - number to write
*  fp - the stream
*  bigendian - set to write big bytes first, elee write litle bytes
*              first
*  Returns: 0 or EOF on error
*  Notes: different NaN types and negative zero not preserved.
*         if the number is too big to represent it will become infinity
*         if it is too small to represent it will become zero.
int fwriteieee754(double x, FILE *fp, int bigendian)
    int shift;
    unsigned long sign, exp, hibits, hilong, lowlong;
    double fnorm, significand;
    int expbits = 11;
    int significandbits = 52;

    /* zero (can't handle signed zero) */
    if (x == 0)
        hilong = 0;
        lowlong = 0;
        goto writedata;
    /* infinity */
    if (x > DBL_MAX)
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        lowlong = 0;
        goto writedata;
    /* -infinity */
    if (x < -DBL_MAX)
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        hilong |= (1 << 31);
        lowlong = 0;
        goto writedata;
    /* NaN - dodgy because many compilers optimise out this test, but
    *there is no portable isnan() */
    if (x != x)
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        lowlong = 1234;
        goto writedata;

    /* get the sign */
    if (x < 0) { sign = 1; fnorm = -x; }
    else { sign = 0; fnorm = x; }

    /* get the normalized form of f and track the exponent */
    shift = 0;
    while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
    while (fnorm < 1.0) { fnorm *= 2.0; shift--; }

    /* check for denormalized numbers */
    if (shift < -1022)
        while (shift < -1022) { fnorm /= 2.0; shift++; }
        shift = -1023;
    /* out of range. Set to infinity */
    else if (shift > 1023)
        hilong = 1024 + ((1 << (expbits - 1)) - 1);
        hilong <<= (31 - expbits);
        hilong |= (sign << 31);
        lowlong = 0;
        goto writedata;
        fnorm = fnorm - 1.0; /* take the significant bit off mantissa */

    /* calculate the integer form of the significand */
    /* hold it in a  double for now */

    significand = fnorm * ((1LL << significandbits) + 0.5f);

    /* get the biased exponent */
    exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */

    /* put the data into two longs (for convenience) */
    hibits = (long)(significand / 4294967296);
    hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
    x = significand - hibits * 4294967296;
    lowlong = (unsigned long)(significand - hibits * 4294967296);

    /* write the bytes out to the stream */
    if (bigendian)
        fputc((hilong >> 24) & 0xFF, fp);
        fputc((hilong >> 16) & 0xFF, fp);
        fputc((hilong >> 8) & 0xFF, fp);
        fputc(hilong & 0xFF, fp);

        fputc((lowlong >> 24) & 0xFF, fp);
        fputc((lowlong >> 16) & 0xFF, fp);
        fputc((lowlong >> 8) & 0xFF, fp);
        fputc(lowlong & 0xFF, fp);
        fputc(lowlong & 0xFF, fp);
        fputc((lowlong >> 8) & 0xFF, fp);
        fputc((lowlong >> 16) & 0xFF, fp);
        fputc((lowlong >> 24) & 0xFF, fp);

        fputc(hilong & 0xFF, fp);
        fputc((hilong >> 8) & 0xFF, fp);
        fputc((hilong >> 16) & 0xFF, fp);
        fputc((hilong >> 24) & 0xFF, fp);
    return ferror(fp);
This version has excess of only one byte per one floating point value to indicate the endianness. But I think, it is still not very portable however.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define LITEND      'L'
#define BIGEND      'B'

typedef short               INT16;
typedef int                 INT32;
typedef double              vec1_t;

 typedef struct {
    FILE            *fp;

#define w_byte(c, p)    putc((c), (p)->fp)
#define r_byte(p)       getc((p)->fp)

static void w_vec1(vec1_t v1_Val, WFILE *p)
    INT32   i;
    char    *pc_Val;

    pc_Val = (char *)&v1_Val;

    w_byte(LITEND, p);
    for (i = 0; i<sizeof(vec1_t); i++)
        w_byte(pc_Val[i], p);

static vec1_t r_vec1(RFILE *p)
    INT32   i;
    vec1_t  v1_Val;
    char    c_Type,

    pc_Val = (char *)&v1_Val;

    c_Type = r_byte(p);
    if (c_Type==LITEND)
        for (i = 0; i<sizeof(vec1_t); i++)
            pc_Val[i] = r_byte(p);
    return v1_Val;

int main(void)
    WFILE   x_FileW,
            *px_FileW = &x_FileW;
    RFILE   x_FileR,
            *px_FileR = &x_FileR;

    vec1_t  v1_Val;
    INT32   l_Val;
    char    *pc_Val = (char *)&v1_Val;
    INT32   i;

    px_FileW->fp = fopen("test.bin", "w");
    v1_Val = 1234567890.0987654321;
    printf("v1_Val before write = %.20f \n", v1_Val);
    w_vec1(v1_Val, px_FileW);

    px_FileR->fp = fopen("test.bin", "r");
    v1_Val = r_vec1(px_FileR);
    printf("v1_Val after read = %.20f \n", v1_Val);
    return 0;
This might give you a good start - it packs a floating point value into an int and long long pair, which you can then serialise in the usual way.

#define FRAC_MAX 9223372036854775807LL /* 2**63 - 1 */

struct dbl_packed
    int exp;
    long long frac;

void pack(double x, struct dbl_packed *r)
    double xf = fabs(frexp(x, &r->exp)) - 0.5;

    if (xf < 0.0)
        r->frac = 0;

    r->frac = 1 + (long long)(xf * 2.0 * (FRAC_MAX - 1));

    if (x < 0.0)
        r->frac = -r->frac;

double unpack(const struct dbl_packed *p)
    double xf, x;

    if (p->frac == 0)
        return 0.0;

    xf = ((double)(llabs(p->frac) - 1) / (FRAC_MAX - 1)) / 2.0;

    x = ldexp(xf + 0.5, p->exp);

    if (p->frac < 0)
        x = -x;

    return x;
What do you mean, "portable"?

For portability, remember to keep the numbers within the limits defined in the Standard: use a single number outside these limits, and there goes all portability down the drain.

double planck_time = 5.39124E-44; /* second */ Characteristics of floating types <float.h>

10   The values given in the following list shall be replaced by constant
     expressions with implementation-defined values [...]
11   The values given in the following list shall be replaced by constant
     expressions with implementation-defined values [...]
12   The values given in the following list shall be replaced by constant
     expressions with implementation-defined (positive) values [...]

Note the implementation-defined in all these clauses.

What level of portability do you require? If the file is to be read on a computer with the same OS that it was generated on, than you using a binary file and just saving and restoring the bit pattern should work. Otherwise as boytheo said, ASCII is your friend.

sprintf, fprintf ? you don't get any more portable than that.

