This question is very related to the question here(How do I convert a vec4 rgba value to a float?).
There is some of articles or questions related to this question already, but I wonder most of articles are not identifying which type of floating value.
As long as I can come up with, there is some of floating value packing/unpacking formula below.
- unsigned normalized float
- signed normalized float
- signed ranged float (the floating value I can find range limitation)
- unsigned ranged float
- unsigned float
- signed float
However, these are just 2 case actually. The other packing/unpacking can be processed by these 2 method.
- unsigned ranged float (I can pack/unpack by easy bitshifting)
- signed float
I want to pack and unpack signed floating values into vec3 or vec2 also.
For my case, the floating value is not ensured to be normalized, so I can not use the simple bitshifting way.
If you know the max range of values you want to store, say +5 to -5, than the easiest way is just to pick some convert that range to a value from 0 to 1. Expand that to the number of bits you have and then break it into parts.
vec2 packFloatInto8BitVec2(float v, float min, float max) {
float zeroToOne = (v - min) / (max - min);
float zeroTo16Bit = zeroToOne * 256.0 * 255.0;
return vec2(mod(zeroToOne, 256.0), zeroToOne / 256.0);
}
To put it back you do the opposite. Assemble the parts, divide to get back to a zeroToOne value, then expand by the range.
float unpack8BitVec2IntoFloat(vec2 v, float min, float max) {
float zeroTo16Bit = v.x + v.y * 256.0;
float zeroToOne = zeroTo16Bit / 256.0 / 255.0;
return zeroToOne * (max - min) + min;
}
For vec3 just expand it
vec3 packFloatInto8BitVec3(float v, float min, float max) {
float zeroToOne = (v - min) / (max - min);
float zeroTo24Bit = zeroToOne * 256.0 * 256.0 * 255.0;
return vec3(mod(zeroToOne, 256.0), mod(zeroToOne / 256.0, 256.0), zeroToOne / 256.0 / 256.0);
}
float unpack8BitVec3IntoFloat(vec3 v, float min, float max) {
float zeroTo24Bit = v.x + v.y * 256.0 + v.z * 256.0 * 256.0;
float zeroToOne = zeroTo24Bit / 256.0 / 256.0 / 256.0;
return zeroToOne * (max - min) + min;
}
I have written small example few days ago with shadertoy:
https://www.shadertoy.com/view/XdK3Dh
It stores float as RGB or load float from pixel. There is also test that function are exact inverses (lot of other functions i have seen has bug in some ranges because of bad precision).
Entire example assumes you want to save values in buffer and read it back in next draw. Having only 256 colors, it limits you to get 16777216 different values. Most of the time I dont need larger scale. I also shifted it to have signed float insted in interval <-8388608;8388608>.
float color2float(in vec3 c) {
c *= 255.;
c = floor(c); // without this value could be shifted for some intervals
return c.r*256.*256. + c.g*256. + c.b - 8388608.;
}
// values out of <-8388608;8388608> are stored as min/max values
vec3 float2color(in float val) {
val += 8388608.; // this makes values signed
if(val < 0.) {
return vec3(0.);
}
if(val > 16777216.) {
return vec3(1.);
}
vec3 c = vec3(0.);
c.b = mod(val, 256.);
val = floor(val/256.);
c.g = mod(val, 256.);
val = floor(val/256.);
c.r = mod(val, 256.);
return c/255.;
}
One more thing, values that overflow will be rounded to min/max value.
In order to pack a floating-point value in a vec2
, vec3
or vec4
, either the range of the source values has to be restricted and well specified, or the exponent has to be stored somehow too.
In general, if the significant digits of a floating-point number should be pack in bytes, consecutively 8 bits packages have to be extract from the the significant digits and have to be stored in a byte.
Encode a floating point number in a restricted and predefined range
A value range [minVal
, maxVal
] must be defined which includes all values that are to be encoded and the value range must be mapped to the range from [0.0, 1.0].
Encoding of a floating point number in the range [minVal
, maxVal
] to vec2
, vec3
and vec4
:
vec2 EncodeRangeV2( in float value, in float minVal, in float maxVal )
{
value = clamp( (value-minVal) / (maxVal-minVal), 0.0, 1.0 );
value *= (256.0*256.0 - 1.0) / (256.0*256.0);
vec3 encode = fract( value * vec3(1.0, 256.0, 256.0*256.0) );
return encode.xy - encode.yz / 256.0 + 1.0/512.0;
}
vec3 EncodeRangeV3( in float value, in float minVal, in float maxVal )
{
value = clamp( (value-minVal) / (maxVal-minVal), 0.0, 1.0 );
value *= (256.0*256.0*256.0 - 1.0) / (256.0*256.0*256.0);
vec4 encode = fract( value * vec4(1.0, 256.0, 256.0*256.0, 256.0*256.0*256.0) );
return encode.xyz - encode.yzw / 256.0 + 1.0/512.0;
}
vec4 EncodeRangeV4( in float value, in float minVal, in float maxVal )
{
value = clamp( (value-minVal) / (maxVal-minVal), 0.0, 1.0 );
value *= (256.0*256.0*256.0 - 1.0) / (256.0*256.0*256.0);
vec4 encode = fract( value * vec4(1.0, 256.0, 256.0*256.0, 256.0*256.0*256.0) );
return vec4( encode.xyz - encode.yzw / 256.0, encode.w ) + 1.0/512.0;
}
Decodeing of a vec2
, vec3
and vec4
to a floating point number in the range [minVal
, maxVal
]:
float DecodeRangeV2( in vec2 pack, in float minVal, in float maxVal )
{
float value = dot( pack, 1.0 / vec2(1.0, 256.0) );
value *= (256.0*256.0) / (256.0*256.0 - 1.0);
return mix( minVal, maxVal, value );
}
float DecodeRangeV3( in vec3 pack, in float minVal, in float maxVal )
{
float value = dot( pack, 1.0 / vec3(1.0, 256.0, 256.0*256.0) );
value *= (256.0*256.0*256.0) / (256.0*256.0*256.0 - 1.0);
return mix( minVal, maxVal, value );
}
float DecodeRangeV4( in vec4 pack, in float minVal, in float maxVal )
{
float value = dot( pack, 1.0 / vec4(1.0, 256.0, 256.0*256.0, 256.0*256.0*256.0) );
value *= (256.0*256.0*256.0) / (256.0*256.0*256.0 - 1.0);
return mix( minVal, maxVal, value );
}
Note,Since a standard 32-bit [IEEE 754][2] number has only 24 significant digits, it is completely sufficient to encode the number in 3 bytes.
Encode the significant digits and the exponent of a floating point number
Encoding of the significant digits of a floating point number and its exponent to vec2
, vec3
and vec4
:
vec2 EncodeExpV2( in float value )
{
int exponent = int( log2( abs( value ) ) + 1.0 );
value /= exp2( float( exponent ) );
value = (value + 1.0) * 255.0 / (2.0*256.0);
vec2 encode = fract( value * vec2(1.0, 256.0) );
return vec2( encode.x - encode.y / 256.0 + 1.0/512.0, (float(exponent) + 127.5) / 256.0 );
}
vec3 EncodeExpV3( in float value )
{
int exponent = int( log2( abs( value ) ) + 1.0 );
value /= exp2( float( exponent ) );
value = (value + 1.0) * (256.0*256.0 - 1.0) / (2.0*256.0*256.0);
vec3 encode = fract( value * vec3(1.0, 256.0, 256.0*256.0) );
return vec3( encode.xy - encode.yz / 256.0 + 1.0/512.0, (float(exponent) + 127.5) / 256.0 );
}
vec4 EncodeExpV4( in float value )
{
int exponent = int( log2( abs( value ) ) + 1.0 );
value /= exp2( float( exponent ) );
value = (value + 1.0) * (256.0*256.0*256.0 - 1.0) / (2.0*256.0*256.0*256.0);
vec4 encode = fract( value * vec4(1.0, 256.0, 256.0*256.0, 256.0*256.0*256.0) );
return vec4( encode.xyz - encode.yzw / 256.0 + 1.0/512.0, (float(exponent) + 127.5) / 256.0 );
}
Decoding of a vec2
, vec3
and vec4
to he significant digits of a floating point number and its exponent:
float DecodeExpV2( in vec2 pack )
{
int exponent = int( pack.z * 256.0 - 127.0 );
float value = pack.x * (2.0*256.0) / 255.0 - 1.0;
return value * exp2( float(exponent) );
}
float DecodeExpV3( in vec3 pack )
{
int exponent = int( pack.z * 256.0 - 127.0 );
float value = dot( pack.xy, 1.0 / vec2(1.0, 256.0) );
value = value * (2.0*256.0*256.0) / (256.0*256.0 - 1.0) - 1.0;
return value * exp2( float(exponent) );
}
float DecodeExpV4( in vec4 pack )
{
int exponent = int( pack.w * 256.0 - 127.0 );
float value = dot( pack.xyz, 1.0 / vec3(1.0, 256.0, 256.0*256.0) );
value = value * (2.0*256.0*256.0*256.0) / (256.0*256.0*256.0 - 1.0) - 1.0;
return value * exp2( float(exponent) );
}
See also the answer to the following question:
- How do you pack one 32bit int Into 4, 8bit ints in glsl / webgl?
I tested gman's solution and found that the scale factor was incorrect, and it produced roundoff errors, and there needs to be an additional division by 255.0 if you want to store the result in a RGB texture. So this is my revised solution:
#define SCALE_FACTOR (256.0 * 256.0 * 256.0 - 1.0)
vec3 packFloatInto8BitVec3(float v, float min, float max) {
float zeroToOne = (v - min) / (max - min);
float zeroTo24Bit = zeroToOne * SCALE_FACTOR;
return floor(
vec3(
mod(zeroTo24Bit, 256.0),
mod(zeroTo24Bit / 256.0, 256.0),
zeroTo24Bit / 256.0 / 256.0
)
) / 255.0;
}
float unpack8BitVec3IntoFloat(vec3 v, float min, float max) {
vec3 scaleVector = vec3(1.0, 256.0, 256.0 * 256.0) / SCALE_FACTOR * 255.0;
float zeroToOne = dot(v, scaleVector);
return zeroToOne * (max - min) + min;
}
Example:
- If you pack 0.25 using min=0 and max=1, you will get (1.0, 1.0, 0.247059)
- If you unpack that vector, you will get 0.249999970197678