#include <stdio.h> // for printf(), putchar()
#include <stdlib.h> // for atof(), exit()
// Display a byte in binary
void printBinary(const unsigned char val);
int main(int argc, char* argv[])
{
if(argc != 2)
{
printf("Must provide a number\n");
exit(1); // out of main(), end program; return value 1 signals an error
}
int i;
double d = atof(argv[1]);
float f = d;
unsigned char* cp = (unsigned char*)(&f);
for(i = sizeof(float); i > 0; i--)
{
printBinary(cp[i-1]); // cp[3], cp[2], cp[1], cp[0]
}
putchar('\n');
cp = (unsigned char*)(&d);
for(i = sizeof(double); i > 0; i--)
{
printBinary(cp[i-1]); // cp[7], cp[6], ..., cp[1], cp[0]
}
putchar('\n');
return 0; // normal return value for main(), signals no errors
}
// Display a byte in binary
void printBinary(const unsigned char val)
{
int i;
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
/*
gcc floatbinary.c -o floatbinary
./floatbinary
Must provide a number
./floatbinary 0 1
Must provide a number // exactly one number
./floatbinary 0
00000000000000000000000000000000 // single-precision floating-point
0000000000000000000000000000000000000000000000000000000000000000 // double
./floatbinary 1
00111111100000000000000000000000
0011111111110000000000000000000000000000000000000000000000000000
./floatbinary 2
01000000000000000000000000000000
0100000000000000000000000000000000000000000000000000000000000000
./floatbinary 3
01000000010000000000000000000000
0100000000001000000000000000000000000000000000000000000000000000
./floatbinary 4
01000000100000000000000000000000
0100000000010000000000000000000000000000000000000000000000000000
./floatbinary 0.1
00111101110011001100110011001101
0011111110111001100110011001100110011001100110011001100110011010
./floatbinary 0.2
00111110010011001100110011001101
0011111111001001100110011001100110011001100110011001100110011010
./floatbinary -0.1
10111101110011001100110011001101
1011111110111001100110011001100110011001100110011001100110011010
./floatbinary -.2
10111110010011001100110011001101
1011111111001001100110011001100110011001100110011001100110011010
./floatbinary 15
01000001011100000000000000000000
0100000000101110000000000000000000000000000000000000000000000000
./floatbinary 15.1
01000001011100011001100110011010
0100000000101110001100110011001100110011001100110011001100110011
./floatbinary 15.2
01000001011100110011001100110011
0100000000101110011001100110011001100110011001100110011001100110
./floatbinary 15.3
01000001011101001100110011001101
0100000000101110100110011001100110011001100110011001100110011010
*/
00000000000000000000000000000000 = ((-1)^0)*2^(0-127)*(1+0) = 1*2^(-127)*1 ~ 0
(here ^ means "raised to power" and ~ means "approximates to").
00111111100000000000000000000000 = ((-1)^0)*2^(2^0+2^1+2^2+2^3+2^4+2^5+2^6-127)*(1+0) = 1*2^0*1 = 1.
01000000000000000000000000000000 = ((-1)^0)*2^(2^7-127)*(1+0) = 1*2^1*1 = 2.
01000000010000000000000000000000 = ((-1)^0)*2^(2^7-127)*(1+2^(-1)) = 1*2^1*1.5 = 3.
01000000100000000000000000000000 = ((-1)^0)*2^(2^0+2^7-127)*(1+0) = 1*2^2*1 = 4.
00111101110011001100110011001101 = ((-1)^0)*2^(2^0+2^1+2^3+2^4+2^5+2^6-127)*(1+2^(-1)+2^(-4)+2^(-5)+...) ~ 1*2^(-4)*1.6 = 0.1.
01000001011100000000000000000000 = ((-1)^0)*2^(2^1+2^7-127)*(1+2^(-1)+2^(-2)+2^(-3)) = 1*2^3*1.875 = 15.
etc.
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include "PrintBinary.hpp" // for printBinary()
#include <cstdlib> // for atof(), exit()
#include <iostream>
using std::cout;
using std::endl;
int main(int argc, char* argv[])
{
if(argc != 2)
{
cout << "Must provide a number" << endl;
exit(1); // out of main(), end program; return value 1 signals an error
}
double d = atof(argv[1]);
float f = d;
unsigned char* cp = reinterpret_cast<unsigned char*>(&f);
for(int i = sizeof(float); i > 0; i--)
{
printBinary(cp[i-1]); // cp[3], cp[2], cp[1], cp[0]
}
cout << endl;
cp = reinterpret_cast<unsigned char*>(&d);
for(int i = sizeof(double); i > 0; i--)
{
printBinary(cp[i-1]); // cp[7], cp[6], ..., cp[1], cp[0]
}
cout << endl;
return 0; // normal return value for main(), signals no errors
}
/*
g++ -c PrintBinary.cpp FloatBinary.cpp // create object files
g++ -c *.cpp
g++ PrintBinary.o FloatBinary.o -o FloatBinary // link object files,
g++ *.o -o FloatBinary // create executable
rm *.o // clean (delete object files)
./FloatBinary
Must provide a number
./FloatBinary 0 1
Must provide a number // exactly one number
./FloatBinary 0
00000000000000000000000000000000 // single-precision floating-point
0000000000000000000000000000000000000000000000000000000000000000 // double
./FloatBinary 1
00111111100000000000000000000000
0011111111110000000000000000000000000000000000000000000000000000
./FloatBinary 2
01000000000000000000000000000000
0100000000000000000000000000000000000000000000000000000000000000
./FloatBinary 3
01000000010000000000000000000000
0100000000001000000000000000000000000000000000000000000000000000
./FloatBinary 4
01000000100000000000000000000000
0100000000010000000000000000000000000000000000000000000000000000
./FloatBinary 0.1
00111101110011001100110011001101
0011111110111001100110011001100110011001100110011001100110011010
./FloatBinary 0.2
00111110010011001100110011001101
0011111111001001100110011001100110011001100110011001100110011010
./FloatBinary -0.1
10111101110011001100110011001101
1011111110111001100110011001100110011001100110011001100110011010
./FloatBinary -.2
10111110010011001100110011001101
1011111111001001100110011001100110011001100110011001100110011010
./FloatBinary 15
01000001011100000000000000000000
0100000000101110000000000000000000000000000000000000000000000000
./FloatBinary 15.1
01000001011100011001100110011010
0100000000101110001100110011001100110011001100110011001100110011
./FloatBinary 15.2
01000001011100110011001100110011
0100000000101110011001100110011001100110011001100110011001100110
./FloatBinary 15.3
01000001011101001100110011001101
0100000000101110100110011001100110011001100110011001100110011010
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <stdio.h> // for printf()
#include <float.h>
#include <math.h> // for pow() - double, powl() - long double
void printBinary(const unsigned char val); // Display a byte in binary
void printData(unsigned char*, int); // Display a data type in binary
void printData2(unsigned char*, int); // Display data type in binary (2 parts)
int main()
{
int i;
printf("FLT_DIG: %d, DBL_DIG: %d, LDBL_DIG: %d\n",
FLT_DIG, DBL_DIG, LDBL_DIG);
printf("FLT_DECIMAL_DIG: %d, DBL_DECIMAL_DIG: %d, LDBL_DECIMAL_DIG: %d\n",
FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG);
printf("DECIMAL_DIG: %d\n", DECIMAL_DIG);
printf("FLT_MIN_10_EXP: %d, DBL_MIN_10_EXP: %d, LDBL_MIN_10_EXP: %d\n",
FLT_MIN_10_EXP, DBL_MIN_10_EXP, LDBL_MIN_10_EXP);
printf("FLT_MAX_10_EXP: %d, DBL_MAX_10_EXP: %d, LDBL_MAX_10_EXP: %d\n",
FLT_MAX_10_EXP, DBL_MAX_10_EXP, LDBL_MAX_10_EXP);
printf("FLT_MIN_EXP: %d, DBL_MIN_EXP: %d, LDBL_MIN_EXP: %d\n",
FLT_MIN_EXP, DBL_MIN_EXP, LDBL_MIN_EXP);
printf("FLT_MAX_EXP: %d, DBL_MAX_EXP: %d, LDBL_MAX_EXP: %d\n",
FLT_MAX_EXP, DBL_MAX_EXP, LDBL_MAX_EXP);
printf("FLT_EPSILON: %g, DBL_EPSILON: %g, LDBL_EPSILON: %Lg\n",
FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON);
printf("FLT_TRUE_MIN: %g, DBL_TRUE_MIN: %g, LDBL_TRUE_MIN: %Lg\n",
FLT_TRUE_MIN, DBL_TRUE_MIN, LDBL_TRUE_MIN);
printf("FLT_RADIX (Radix of exponent representation): %d\n", FLT_RADIX);
printf("float: %d mantissa digits", FLT_MANT_DIG);
printf("\t[%g, %g]\n", FLT_MIN, FLT_MAX);
printf("FLT_MIN: %g\t\t", FLT_MIN);
float f = FLT_MIN;
printData((unsigned char*)(&f), sizeof(float));
printf("FLT_TRUE_MIN: %g\t", FLT_TRUE_MIN);
f = FLT_TRUE_MIN;
printData((unsigned char*)(&f), sizeof(float));
printf("FLT_MAX: %g\t\t", FLT_MAX);
f = FLT_MAX;
printData((unsigned char*)(&f), sizeof(float));
float fmin, ftruemin, fmax;
fmin = pow(2, -126);
float fmantissa = 1.0;
for (i = 1; i <= 23; i++)
{fmantissa += pow(2, -i);}
fmax = fmantissa * pow(2, 127);
printf("float range (computed):\t\t[%g, %g]\n", fmin, fmax);
printf("fmin (computed): %g\t", fmin);
printData((unsigned char*)(&fmin), sizeof(float));
ftruemin = pow(2, -126) * pow(2, -23); // pow(2, -149); // 2^(-150) ~ 0
printf("ftruemin (comp): %g\t", ftruemin);
printData((unsigned char*)(&ftruemin), sizeof(float));
printf("fmax (computed): %g\t", fmax);
printData((unsigned char*)(&fmax), sizeof(float));
unsigned u = 1; // 00000000000000000000000000000001
u <<= 23; // u = pow(2, 23); // 00000000100000000000000000000000
fmin = *((float*)&u); // sizeof(float) = sizeof(unsigned) = sizeof(int)
printf("fmin shifted: %g\t", fmin);
printData((unsigned char*)(&fmin), sizeof(float));
unsigned v = 1; // 00000000000000000000000000000001
ftruemin = *((float*)&v); // sizeof(float) = sizeof(unsigned)
printf("ftruemin shift: %g\t", ftruemin);
printData((unsigned char*)(&ftruemin), sizeof(float));
v = ~0; // 11111111111111111111111111111111
v >>= 1; // 01111111111111111111111111111111
v &= ~u; // 01111111011111111111111111111111
fmax = *((float*)&v); // sizeof(float) = sizeof(unsigned)
printf("fmax shifted: %g\t", fmax);
printData((unsigned char*)(&fmax), sizeof(float));
putchar('\n');
printf("double: %d mantissa digits", DBL_MANT_DIG);
printf("\t[%g, %g]\n", DBL_MIN, DBL_MAX);
printf("DBL_MIN: %g\n", DBL_MIN);
double d = DBL_MIN;
printData((unsigned char*)(&d), sizeof(double));
printf("DBL_TRUE_MIN: %g\n", DBL_TRUE_MIN);
d = DBL_TRUE_MIN;
printData((unsigned char*)(&d), sizeof(double));
printf("DBL_MAX: %g\n", DBL_MAX);
d = DBL_MAX;
printData((unsigned char*)(&d), sizeof(double));
double dmin, dtruemin, dmax;
dmin = pow(2, -1022);
double dmantissa = 1.0;
for (i = 1; i <= 52; i++)
{dmantissa += pow(2, -i);}
dmax = dmantissa * pow(2, 1023);
printf("double range (computed):\t[%g, %g]\n", dmin, dmax);
printf("dmin (computed): %g\n", dmin);
printData((unsigned char*)(&dmin), sizeof(double));
dtruemin = pow(2, -1022) * pow(2, -52); // pow(2, -1074); // 2^(-1075) ~ 0
printf("dtruemin (comp): %g\n", dtruemin);
printData((unsigned char*)(&dtruemin), sizeof(double));
printf("dmax (computed): %g\n", dmax);
printData((unsigned char*)(&dmax), sizeof(double));
long unsigned lu = 1; // 0...01
lu <<= 52; // lu = pow(2, 52); // 0000000000010...0
dmin = *((double*)&lu); // sizeof(double) = sizeof(long unsigned) = sizeof(long)
printf("dmin shifted: %g\n", dmin);
printData((unsigned char*)(&dmin), sizeof(double));
long unsigned lv = 1; // 0...01
dtruemin = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)
printf("dtruemin shift: %g\n", dtruemin);
printData((unsigned char*)(&dtruemin), sizeof(double));
lv = ~0; // 1...1
lv >>= 1; // 011...11
lv &= ~lu; // 01111111111011...11
dmax = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)
printf("dmax shifted: %g\n", dmax);
printData((unsigned char*)(&dmax), sizeof(double));
putchar('\n');
/*
On my computer,
sizeof(double) = sizeof(long) = sizeof(long long) = 8
sizeof(long double) = 2 * sizeof(double) = 16
`long double' stored on 128 bits =
48 zeros (or garbage) + 80 bits (extended precision)
*/
printf("long double: %d mantissa digits", LDBL_MANT_DIG);
printf("\t[%Lg, %Lg]\n", LDBL_MIN, LDBL_MAX);
printf("LDBL_MIN: %Lg\n", LDBL_MIN);
long double ld = LDBL_MIN;
printData2((unsigned char*)(&ld), sizeof(long double));
printf("LDBL_TRUE_MIN: %Lg\n", LDBL_TRUE_MIN);
ld = LDBL_TRUE_MIN;
printData2((unsigned char*)(&ld), sizeof(long double));
printf("LDBL_MAX: %Lg\n", LDBL_MAX);
ld = LDBL_MAX;
printData2((unsigned char*)(&ld), sizeof(long double));
long double ldmin, ldtruemin, ldmax;
ldmin = powl(2, -16382);
long double ldmantissa = 1.0;
for (i = 1; i <= 63; i++)
{ldmantissa += powl(2, -i);}
ldmax = ldmantissa * powl(2, 16383);
printf("long double range (computed):\t[%Lg, %Lg]\n", ldmin, ldmax);
printf("ldmin (computed): %Lg\n", ldmin);
printData2((unsigned char*)(&ldmin), sizeof(long double));
ldtruemin = powl(2, -16382) * powl(2, -63); // powl(2, -16445); // 2^(-16446) ~ 0
printf("ldtruemin (comp): %Lg\n", ldtruemin);
printData2((unsigned char*)(&ldtruemin), sizeof(long double));
printf("ldmax (computed): %Lg\n", ldmax);
printData2((unsigned char*)(&ldmax), sizeof(long double));
long unsigned uarr[2] = {1, 1}; // 0...01, 0...01
uarr[0] <<= 63; // uarr[0] = powl(2, 63); // 10...0
// sizeof(long double) = 2 * sizeof(long unsigned) = 2 * sizeof(long):
ldmin = *((long double*)uarr);
printf("ldmin shifted: %Lg\n", ldmin);
printData2((unsigned char*)(&ldmin), sizeof(long double));
long unsigned varr[2] = {1, 0}; // 0...01, 0...0
ldtruemin = *((long double*)varr); // 0...01
printf("dtruemin shift: %Lg\n", ldtruemin);
printData2((unsigned char*)(&ldtruemin), sizeof(long double));
varr[0] = varr[1] = ~0; // 1...1
varr[1] >>= 49; // 0...0111111111111111 (49 zeros)
varr[1] &= ~uarr[1]; // 0..0111111111111110 (50 zeros)
/*
// Alternative for varr[1]:
varr[1] >>= 50; // 0...011111111111111 (50 zeros)
varr[1] <<= 1; // 0..0111111111111110 (50 zeros)
*/
ldmax = *((long double*)varr); // sizeof(long double) = 2 * sizeof(long)
printf("ldmax shifted: %Lg\n", ldmax);
printData2((unsigned char*)(&ldmax), sizeof(long double));
return 0;
}
void printBinary(const unsigned char val) // Display a byte in binary
{
int i;
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
void printData(unsigned char* cp, int size) // Display a data type in binary
{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]
for(; size > 0; size--)
{printBinary(cp[size-1]);}
putchar('\n');
}
// Display a data type in binary (2 parts):
void printData2(unsigned char* cp, int size) // for long double
{ // sizeof(long double) = 16, 2 parts of 8 bytes
int half = size / 2; // we assume size is even
for(; size > half; size--) // second half:
{printBinary(cp[size-1]);} // cp[15], ..., cp[8]
putchar('\n');
for(; size > 0; size--) // first half:
{printBinary(cp[size-1]);} // cp[7], ..., cp[0]
putchar('\n');
}
/*
gcc -E floatlimits.c // preprocess to show the contents of header files
// On disk: /usr/lib/gcc/x86_64-linux-gnu/9/include/float.h
gcc -E floatlimits.c > headers.txt // save to file
// Compile and run:
gcc floatlimits.c -o floatlimits -lm // link math library
./floatlimits
FLT_DIG: 6, DBL_DIG: 15, LDBL_DIG: 18
FLT_DECIMAL_DIG: 9, DBL_DECIMAL_DIG: 17, LDBL_DECIMAL_DIG: 21
DECIMAL_DIG: 21
FLT_MIN_10_EXP: -37, DBL_MIN_10_EXP: -307, LDBL_MIN_10_EXP: -4931
FLT_MAX_10_EXP: 38, DBL_MAX_10_EXP: 308, LDBL_MAX_10_EXP: 4932
FLT_MIN_EXP: -125, DBL_MIN_EXP: -1021, LDBL_MIN_EXP: -16381
FLT_MAX_EXP: 128, DBL_MAX_EXP: 1024, LDBL_MAX_EXP: 16384
FLT_EPSILON: 1.19209e-07, DBL_EPSILON: 2.22045e-16, LDBL_EPSILON: 1.0842e-19
FLT_TRUE_MIN: 1.4013e-45, DBL_TRUE_MIN: 4.94066e-324, LDBL_TRUE_MIN: 3.6452e-4951
FLT_RADIX (Radix of exponent representation): 2 // base 2 (binary)
float: 24 mantissa digits [1.17549e-38, 3.40282e+38]
FLT_MIN: 1.17549e-38 00000000100000000000000000000000
FLT_TRUE_MIN: 1.4013e-45 00000000000000000000000000000001
FLT_MAX: 3.40282e+38 01111111011111111111111111111111
float range (computed): [1.17549e-38, 3.40282e+38]
fmin (computed): 1.17549e-38 00000000100000000000000000000000
ftruemin (comp): 1.4013e-45 00000000000000000000000000000001
fmax (computed): 3.40282e+38 01111111011111111111111111111111
fmin shifted: 1.17549e-38 00000000100000000000000000000000
ftruemin shift: 1.4013e-45 00000000000000000000000000000001
fmax shifted: 3.40282e+38 01111111011111111111111111111111
double: 53 mantissa digits [2.22507e-308, 1.79769e+308]
DBL_MIN: 2.22507e-308
0000000000010000000000000000000000000000000000000000000000000000
DBL_TRUE_MIN: 4.94066e-324
0000000000000000000000000000000000000000000000000000000000000001
DBL_MAX: 1.79769e+308
0111111111101111111111111111111111111111111111111111111111111111
double range (computed): [2.22507e-308, 1.79769e+308]
dmin (computed): 2.22507e-308
0000000000010000000000000000000000000000000000000000000000000000
dtruemin (comp): 4.94066e-324
0000000000000000000000000000000000000000000000000000000000000001
dmax (computed): 1.79769e+308
0111111111101111111111111111111111111111111111111111111111111111
dmin shifted: 2.22507e-308
0000000000010000000000000000000000000000000000000000000000000000
dtruemin shift: 4.94066e-324
0000000000000000000000000000000000000000000000000000000000000001
dmax shifted: 1.79769e+308
0111111111101111111111111111111111111111111111111111111111111111
long double: 64 mantissa digits [3.3621e-4932, 1.18973e+4932]
LDBL_MIN: 3.3621e-4932
0000000000000000000000000000000000000000000000000000000000000001
1000000000000000000000000000000000000000000000000000000000000000
LDBL_TRUE_MIN: 3.6452e-4951
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000001
LDBL_MAX: 1.18973e+4932
0000000000000000000000000000000000000000000000000111111111111110
1111111111111111111111111111111111111111111111111111111111111111
long double range (computed): [3.3621e-4932, 1.18973e+4932]
ldmin (computed): 3.3621e-4932
0000000000000000000000000000000000000000000000000000000000000001
1000000000000000000000000000000000000000000000000000000000000000
ldtruemin (comp): 3.6452e-4951
0000000000000000000000000000000000000000111100000000000000000000
0000000000000000000000000000000000000000000000000000000000000001
ldmax (computed): 1.18973e+4932
0000000000000000011111111111110001100111100011100111111111111110
1111111111111111111111111111111111111111111111111111111111111111
ldmin shifted: 3.3621e-4932
0000000000000000000000000000000000000000000000000000000000000001
1000000000000000000000000000000000000000000000000000000000000000
dtruemin shift: 3.6452e-4951
0000000000000000000000000000000000000000111100000000000000000000
0000000000000000000000000000000000000000000000000000000000000001
ldmax shifted: 1.18973e+4932
0000000000000000011111111111110000011011011001000111111111111110
1111111111111111111111111111111111111111111111111111111111111111
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <iostream>
#include <cfloat>
#include <cmath> // for pow() - double, powl() - long double
using std::cout;
using std::endl;
void printBinary(const unsigned char val); // Display a byte in binary
void printData(unsigned char*, int); // Display a data type in binary
void printData2(unsigned char*, int); // Display data type in binary (2 parts)
int main()
{
cout << "FLT_DIG: " << FLT_DIG << ", DBL_DIG: " << DBL_DIG
<< ", LDBL_DIG: " << LDBL_DIG << endl;
cout << "FLT_DECIMAL_DIG: " << FLT_DECIMAL_DIG << ", DBL_DECIMAL_DIG: " <<
DBL_DECIMAL_DIG << ", LDBL_DECIMAL_DIG: " << LDBL_DECIMAL_DIG << endl;
cout << "DECIMAL_DIG: " << DECIMAL_DIG << endl;
cout << "FLT_MIN_10_EXP: " << FLT_MIN_10_EXP << ", DBL_MIN_10_EXP: " <<
DBL_MIN_10_EXP << ", LDBL_MIN_10_EXP: " << LDBL_MIN_10_EXP << endl;
cout << "FLT_MAX_10_EXP: " << FLT_MAX_10_EXP << ", DBL_MAX_10_EXP: " <<
DBL_MAX_10_EXP << ", LDBL_MAX_10_EXP: " << LDBL_MAX_10_EXP << endl;
cout << "FLT_MIN_EXP: " << FLT_MIN_EXP << ", DBL_MIN_EXP: " <<
DBL_MIN_EXP << ", LDBL_MIN_EXP: " << LDBL_MIN_EXP << endl;
cout << "FLT_MAX_EXP: " << FLT_MAX_EXP << ", DBL_MAX_EXP: " <<
DBL_MAX_EXP << ", LDBL_MAX_EXP: " << LDBL_MAX_EXP << endl;
cout << "FLT_EPSILON: " << FLT_EPSILON << ", DBL_EPSILON: " <<
DBL_EPSILON << ", LDBL_EPSILON: " << LDBL_EPSILON << endl;
cout << "FLT_TRUE_MIN: " << FLT_TRUE_MIN << ", DBL_TRUE_MIN: " <<
DBL_TRUE_MIN << ", LDBL_TRUE_MIN: " << LDBL_TRUE_MIN << endl;
cout << "FLT_RADIX (Radix of exponent representation): "
<< FLT_RADIX << endl;
cout << "float: " << FLT_MANT_DIG << " mantissa digits";
cout << "\t[" << FLT_MIN << ", " << FLT_MAX << "]" << endl;
cout << "FLT_MIN: " << FLT_MIN << "\t\t";
float f = FLT_MIN;
printData(reinterpret_cast<unsigned char*>(&f), sizeof(float));
cout << "FLT_TRUE_MIN: " << FLT_TRUE_MIN << "\t";
f = FLT_TRUE_MIN;
printData(reinterpret_cast<unsigned char*>(&f), sizeof(float));
cout << "FLT_MAX: " << FLT_MAX << "\t\t";
f = FLT_MAX;
printData(reinterpret_cast<unsigned char*>(&f), sizeof(float));
float fmin, ftruemin, fmax;
fmin = pow(2, -126);
float fmantissa = 1.0;
for (int i = 1; i <= 23; i++)
{fmantissa += pow(2, -i);}
fmax = fmantissa * pow(2, 127);
cout << "float range (computed):\t\t[" << fmin << ", " << fmax << "]\n";
cout << "fmin (computed): " << fmin << "\t";
printData(reinterpret_cast<unsigned char*>(&fmin), sizeof(float));
ftruemin = pow(2, -126) * pow(2, -23); // pow(2, -149); // 2^(-150) ~ 0
cout << "ftruemin (comp): " << ftruemin << "\t";
printData(reinterpret_cast<unsigned char*>(&ftruemin), sizeof(float));
cout << "fmax (computed): " << fmax << "\t";
printData(reinterpret_cast<unsigned char*>(&fmax), sizeof(float));
unsigned u = 1; // 00000000000000000000000000000001
u <<= 23; // u = pow(2, 23); // 00000000100000000000000000000000
fmin = *((float*)&u); // sizeof(float) = sizeof(unsigned) = sizeof(int)
cout << "fmin shifted: " << fmin << "\t";
printData(reinterpret_cast<unsigned char*>(&fmin), sizeof(float));
unsigned v = 1; // 00000000000000000000000000000001
ftruemin = *((float*)&v); // sizeof(float) = sizeof(unsigned)
cout << "ftruemin shift: " << ftruemin << "\t";
printData(reinterpret_cast<unsigned char*>(&ftruemin), sizeof(float));
v = ~0; // 11111111111111111111111111111111
v >>= 1; // 01111111111111111111111111111111
v &= ~u; // 01111111011111111111111111111111
fmax = *((float*)&v); // sizeof(float) = sizeof(unsigned)
cout << "fmax shifted: " << fmax << "\t";
printData(reinterpret_cast<unsigned char*>(&fmax), sizeof(float));
cout << endl;
cout << "double: " << DBL_MANT_DIG << " mantissa digits";
cout << "\t[" << DBL_MIN << ", " << DBL_MAX << "]" << endl;
cout << "DBL_MIN: " << DBL_MIN << endl;
double d = DBL_MIN;
printData(reinterpret_cast<unsigned char*>(&d), sizeof(double));
cout << "DBL_TRUE_MIN: " << DBL_TRUE_MIN << endl;
d = DBL_TRUE_MIN;
printData(reinterpret_cast<unsigned char*>(&d), sizeof(double));
cout << "DBL_MAX: " << DBL_MAX << endl;
d = DBL_MAX;
printData(reinterpret_cast<unsigned char*>(&d), sizeof(double));
double dmin, dtruemin, dmax;
dmin = pow(2, -1022);
double dmantissa = 1.0;
for (int i = 1; i <= 52; i++)
{dmantissa += pow(2, -i);}
dmax = dmantissa * pow(2, 1023);
cout << "double range (computed):\t[" << dmin << ", " << dmax << "]\n";
cout << "dmin (computed): " << dmin << endl;
printData(reinterpret_cast<unsigned char*>(&dmin), sizeof(double));
dtruemin = pow(2, -1022) * pow(2, -52); // pow(2, -1074); // 2^(-1075) ~ 0
cout << "dtruemin (comp): " << dtruemin << endl;
printData(reinterpret_cast<unsigned char*>(&dtruemin), sizeof(double));
cout << "dmax (computed): " << dmax << endl;
printData(reinterpret_cast<unsigned char*>(&dmax), sizeof(double));
long unsigned lu = 1; // 0...01
lu <<= 52; // lu = pow(2, 52); // 0000000000010...0
dmin = *((double*)&lu); // sizeof(double) = sizeof(long unsigned) = sizeof(long)
cout << "dmin shifted: " << dmin << endl;
printData(reinterpret_cast<unsigned char*>(&dmin), sizeof(double));
long unsigned lv = 1; // 0...01
dtruemin = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)
cout << "dtruemin shift: " << dtruemin << endl;
printData(reinterpret_cast<unsigned char*>(&dtruemin), sizeof(double));
lv = ~0; // 1...1
lv >>= 1; // 011...11
lv &= ~lu; // 01111111111011...11
dmax = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)
cout << "dmax shifted: " << dmax << endl;
printData(reinterpret_cast<unsigned char*>(&dmax), sizeof(double));
cout << endl;
/*
On my computer,
sizeof(double) = sizeof(long) = sizeof(long long) = 8
sizeof(long double) = 2 * sizeof(double) = 16
`long double' stored on 128 bits =
48 zeros (or garbage) + 80 bits (extended precision)
*/
cout << "long double: " << LDBL_MANT_DIG << " mantissa digits";
cout << "\t[" << LDBL_MIN << ", " << LDBL_MAX << "]" << endl;
cout << "LDBL_MIN: " << LDBL_MIN << endl;
long double ld = LDBL_MIN;
printData2(reinterpret_cast<unsigned char*>(&ld), sizeof(long double));
cout << "LDBL_TRUE_MIN: " << LDBL_TRUE_MIN << endl;
ld = LDBL_TRUE_MIN;
printData2(reinterpret_cast<unsigned char*>(&ld), sizeof(long double));
cout << "LDBL_MAX: " << LDBL_MAX << endl;
ld = LDBL_MAX;
printData2(reinterpret_cast<unsigned char*>(&ld), sizeof(long double));
long double ldmin, ldtruemin, ldmax;
ldmin = powl(2, -16382);
long double ldmantissa = 1.0;
for (int i = 1; i <= 63; i++)
{ldmantissa += powl(2, -i);}
ldmax = ldmantissa * powl(2, 16383);
cout << "long double range (computed):\t[" << ldmin << ", " << ldmax << "]\n";
cout << "ldmin (computed): " << ldmin << endl;
printData2(reinterpret_cast<unsigned char*>(&ldmin), sizeof(long double));
ldtruemin = powl(2, -16382) * powl(2, -63); // powl(2, -16445); // 2^(-16446) ~ 0
cout << "ldtruemin (comp): " << ldtruemin << endl;
printData2(reinterpret_cast<unsigned char*>(&ldtruemin), sizeof(long double));
cout << "ldmax (computed): " << ldmax << endl;
printData2(reinterpret_cast<unsigned char*>(&ldmax), sizeof(long double));
long unsigned uarr[2] = {1, 1}; // 0...01, 0...01
uarr[0] <<= 63; // uarr[0] = powl(2, 63); // 10...0
// sizeof(long double) = 2 * sizeof(long unsigned) = 2 * sizeof(long):
ldmin = *(reinterpret_cast<long double*>(uarr));
cout << "ldmin shifted: " << ldmin << endl;
printData2(reinterpret_cast<unsigned char*>(&ldmin), sizeof(long double));
long unsigned varr[2] = {1, 0}; // 0...01, 0...0
ldtruemin = *(reinterpret_cast<long double*>(varr)); // 0...01
cout << "dtruemin shift: " << ldtruemin << endl;
printData2(reinterpret_cast<unsigned char*>(&ldtruemin), sizeof(long double));
varr[0] = varr[1] = ~0; // 1...1
varr[1] >>= 49; // 0...0111111111111111 (49 zeros)
varr[1] &= ~uarr[1]; // 0..0111111111111110 (50 zeros)
/*
// Alternative for varr[1]:
varr[1] >>= 50; // 0...011111111111111 (50 zeros)
varr[1] <<= 1; // 0..0111111111111110 (50 zeros)
*/
ldmax = *(reinterpret_cast<long double*>(varr));
cout << "ldmax shifted: " << ldmax << endl;
printData2(reinterpret_cast<unsigned char*>(&ldmax), sizeof(long double));
return 0;
}
void printBinary(const unsigned char val) // Display a byte in binary
{
for(int i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
}
void printData(unsigned char* cp, int size) // Display a data type in binary
{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]
for(; size > 0; size--)
{printBinary(cp[size-1]);}
cout << endl;
}
// Display a data type in binary (2 parts):
void printData2(unsigned char* cp, int size) // for long double
{ // sizeof(long double) = 16, 2 parts of 8 bytes
int half = size / 2; // we assume size is even
for(; size > half; size--) // second half:
{printBinary(cp[size-1]);} // cp[15], ..., cp[8]
cout << endl;
for(; size > 0; size--) // first half:
{printBinary(cp[size-1]);} // cp[7], ..., cp[0]
cout << endl;
}
/*
g++ -std=c++17 -E FloatLimits.cpp // preprocess to show contents of headers
// c++17 for FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG,
// FLT_TRUE_MIN, DBL_TRUE_MIN, LDBL_TRUE_MIN
// On disk: /usr/lib/gcc/x86_64-linux-gnu/9/include/float.h
// /usr/include/c++/9/cfloat
g++ -std=c++17 -E FloatLimits.cpp > Headers.txt // save to file
// Compile and run:
g++ -std=c++17 FloatLimits.cpp -o FloatLimits
./FloatLimits
FLT_DIG: 6, DBL_DIG: 15, LDBL_DIG: 18
FLT_DECIMAL_DIG: 9, DBL_DECIMAL_DIG: 17, LDBL_DECIMAL_DIG: 21
DECIMAL_DIG: 21
FLT_MIN_10_EXP: -37, DBL_MIN_10_EXP: -307, LDBL_MIN_10_EXP: -4931
FLT_MAX_10_EXP: 38, DBL_MAX_10_EXP: 308, LDBL_MAX_10_EXP: 4932
FLT_MIN_EXP: -125, DBL_MIN_EXP: -1021, LDBL_MIN_EXP: -16381
FLT_MAX_EXP: 128, DBL_MAX_EXP: 1024, LDBL_MAX_EXP: 16384
FLT_EPSILON: 1.19209e-07, DBL_EPSILON: 2.22045e-16, LDBL_EPSILON: 1.0842e-19
FLT_TRUE_MIN: 1.4013e-45, DBL_TRUE_MIN: 4.94066e-324, LDBL_TRUE_MIN: 3.6452e-4951
FLT_RADIX (Radix of exponent representation): 2 // base 2 (binary)
float: 24 mantissa digits [1.17549e-38, 3.40282e+38]
FLT_MIN: 1.17549e-38 00000000100000000000000000000000
FLT_TRUE_MIN: 1.4013e-45 00000000000000000000000000000001
FLT_MAX: 3.40282e+38 01111111011111111111111111111111
float range (computed): [1.17549e-38, 3.40282e+38]
fmin (computed): 1.17549e-38 00000000100000000000000000000000
ftruemin (comp): 1.4013e-45 00000000000000000000000000000001
fmax (computed): 3.40282e+38 01111111011111111111111111111111
fmin shifted: 1.17549e-38 00000000100000000000000000000000
ftruemin shift: 1.4013e-45 00000000000000000000000000000001
fmax shifted: 3.40282e+38 01111111011111111111111111111111
double: 53 mantissa digits [2.22507e-308, 1.79769e+308]
DBL_MIN: 2.22507e-308
0000000000010000000000000000000000000000000000000000000000000000
DBL_TRUE_MIN: 4.94066e-324
0000000000000000000000000000000000000000000000000000000000000001
DBL_MAX: 1.79769e+308
0111111111101111111111111111111111111111111111111111111111111111
double range (computed): [2.22507e-308, 1.79769e+308]
dmin (computed): 2.22507e-308
0000000000010000000000000000000000000000000000000000000000000000
dtruemin (comp): 4.94066e-324
0000000000000000000000000000000000000000000000000000000000000001
dmax (computed): 1.79769e+308
0111111111101111111111111111111111111111111111111111111111111111
dmin shifted: 2.22507e-308
0000000000010000000000000000000000000000000000000000000000000000
dtruemin shift: 4.94066e-324
0000000000000000000000000000000000000000000000000000000000000001
dmax shifted: 1.79769e+308
0111111111101111111111111111111111111111111111111111111111111111
long double: 64 mantissa digits [3.3621e-4932, 1.18973e+4932]
LDBL_MIN: 3.3621e-4932
0000000000000000000000000000000000000000000000000000000000000001
1000000000000000000000000000000000000000000000000000000000000000
LDBL_TRUE_MIN: 3.6452e-4951
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000001
LDBL_MAX: 1.18973e+4932
0000000000000000000000000000000000000000000000000111111111111110
1111111111111111111111111111111111111111111111111111111111111111
long double range (computed): [3.3621e-4932, 1.18973e+4932]
ldmin (computed): 3.3621e-4932
0000000000000000010101011001100000010111000011110000000000000001
1000000000000000000000000000000000000000000000000000000000000000
ldtruemin (comp): 3.6452e-4951
0000000000000000000000000000000100000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000001
ldmax (computed): 1.18973e+4932
0000000000000000010101011001100000010111000011110111111111111110
1111111111111111111111111111111111111111111111111111111111111111
ldmin shifted: 3.3621e-4932
0000000000000000010101011001100000010111000011110000000000000001
1000000000000000000000000000000000000000000000000000000000000000
dtruemin shift: 3.6452e-4951
0000000000000000000000000000000100000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000001
ldmax shifted: 1.18973e+4932
0000000000000000010101011001100000010111000011110111111111111110
1111111111111111111111111111111111111111111111111111111111111111
*/
Note the difference in syntax for casting in C and C++:
(long double*)uarr
reinterpret_cast<long double*>(uarr)
reinterpret_cast<long double*>uarr // compile error
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 3-25. Define a float variable. Take its address, cast that address to an unsigned char*, and assign it to an unsigned char pointer. Using this pointer and [], index into the float variable and use the printBinary() function defined in this chapter (Chapter_3) to print out a map of the float (go from 0 to sizeof(float) ). Change the value of the float and see if you can figure out what’s going on (the float contains encoded data).
// float binary reversed
#include <stdio.h> // for printf(), putchar()
#include <stdlib.h> // for atof(), exit()
void printBinary(const unsigned char val); // Display a byte in binary
void printBinRev(const unsigned char val); // reversed
void printData(unsigned char*, int); // Display a data type in binary
void printDataRev(unsigned char*, int); // reversed
int main(int argc, char* argv[])
{
if(argc != 2)
{
printf("Must provide a number\n");
exit(1); // out of main(), end program; return value 1 signals an error
}
double d = atof(argv[1]);
float f = d;
unsigned char* cp = (unsigned char*)(&f);
printData(cp, sizeof(float)); // cp[3], cp[2], cp[1], cp[0]
printDataRev(cp, sizeof(float)); // cp[0], cp[1], cp[2], cp[3]
cp = (unsigned char*)(&d);
printData(cp, sizeof(double)); // cp[7], cp[6], ..., cp[1], cp[0]
printDataRev(cp, sizeof(double)); // cp[0], cp[1], ..., cp[6], cp[7]
return 0; // normal return value for main(), signals no errors
}
void printBinary(const unsigned char val) // Display a byte in binary
{
int i, bit = 128; // 2^7
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & bit) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
bit >>= 1; // bit /= 2;
}
}
void printBinRev(const unsigned char val) // Display byte (reversed)
{
int i, bit = 1;
for(i = 0; i <= 7; i++)
{ // print bits from last (least significant) to first (most significant)
if(val & bit) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
bit <<= 1; // bit *= 2;
}
}
void printData(unsigned char* cp, int size) // Display a data type in binary
{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]
for(; size > 0; size--)
{printBinary(cp[size-1]);}
putchar('\n');
}
void printDataRev(unsigned char* cp, int size) // Display data type (reversed)
{ // sizeof(float) = 4: cp[0], cp[1], cp[2], cp[3]
int i;
for(i = 0; i < size; i++)
{printBinRev(cp[i]);}
putchar('\n');
}
/*
gcc floatbinrev.c -o floatbinrev
./floatbinrev
Must provide a number
./floatbinrev 0 1
Must provide a number // exactly one number
./floatbinrev 0
00000000000000000000000000000000 // single-precision floating-point
00000000000000000000000000000000 // reversed
0000000000000000000000000000000000000000000000000000000000000000 // double
0000000000000000000000000000000000000000000000000000000000000000 // reversed
./floatbinrev 1
00111111100000000000000000000000
00000000000000000000000111111100
0011111111110000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000111111111100
./floatbinrev 2
01000000000000000000000000000000
00000000000000000000000000000010
0100000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000010
./floatbinrev 3
01000000010000000000000000000000
00000000000000000000001000000010
0100000000001000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000001000000000010
./floatbinrev 4
01000000100000000000000000000000
00000000000000000000000100000010
0100000000010000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000100000000010
./floatbinrev 0.1
00111101110011001100110011001101
10110011001100110011001110111100
0011111110111001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001110111111100
./floatbinrev 0.2
00111110010011001100110011001101
10110011001100110011001001111100
0011111111001001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001001111111100
./floatbinrev -0.1
10111101110011001100110011001101
10110011001100110011001110111101
1011111110111001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001110111111101
./floatbinrev -.2
10111110010011001100110011001101
10110011001100110011001001111101
1011111111001001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001001111111101
./floatbinrev 15
01000001011100000000000000000000
00000000000000000000111010000010
0100000000101110000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000111010000000010
./floatbinrev 15.1
01000001011100011001100110011010
01011001100110011000111010000010
0100000000101110001100110011001100110011001100110011001100110011
1100110011001100110011001100110011001100110011000111010000000010
./floatbinrev 15.2
01000001011100110011001100110011
11001100110011001100111010000010
0100000000101110011001100110011001100110011001100110011001100110
0110011001100110011001100110011001100110011001100111010000000010
./floatbinrev 15.3
01000001011101001100110011001101
10110011001100110010111010000010
0100000000101110100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110010111010000000010
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
// float binary reversed
#include <cstdlib> // for atof(), exit()
#include <iostream>
using std::cout;
using std::endl;
void printBinary(const unsigned char val); // Display a byte in binary
void printBinRev(const unsigned char val); // reversed
void printData(unsigned char*, int); // Display a data type in binary
void printDataRev(unsigned char*, int); // reversed
int main(int argc, char* argv[])
{
if(argc != 2)
{
cout << "Must provide a number" << endl;
exit(1); // out of main(), end program; return value 1 signals an error
}
double d = atof(argv[1]);
float f = d;
unsigned char* cp = reinterpret_cast<unsigned char*>(&f);
printData(cp, sizeof(float)); // cp[3], cp[2], cp[1], cp[0]
printDataRev(cp, sizeof(float)); // cp[0], cp[1], cp[2], cp[3]
cp = reinterpret_cast<unsigned char*>(&d);
printData(cp, sizeof(double)); // cp[7], cp[6], ..., cp[1], cp[0]
printDataRev(cp, sizeof(double)); // cp[0], cp[1], ..., cp[6], cp[7]
return 0; // normal return value for main(), signals no errors
}
void printBinary(const unsigned char val) // Display a byte in binary
{
for(int i = 7, bit = 128; i >= 0; i--) // 128 = 2^7
{ // print bits from first (most significant) to last (least significant)
if(val & bit) // set (1) bit
{cout << "1";}
else {cout << "0";} // 0 bit
bit >>= 1; // bit /= 2;
}
}
void printBinRev(const unsigned char val) // Display byte (reversed)
{
for(int i = 0, bit = 1; i <= 7; i++)
{ // print bits from last (least significant) to first (most significant)
if(val & bit) // set (1) bit
{cout << "1";}
else {cout << "0";} // 0 bit
bit <<= 1; // bit *= 2;
}
}
void printData(unsigned char* cp, int size) // Display a data type in binary
{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]
for(; size > 0; size--)
{printBinary(cp[size-1]);}
cout << endl;
}
void printDataRev(unsigned char* cp, int size) // Display data type (reversed)
{ // sizeof(float) = 4: cp[0], cp[1], cp[2], cp[3]
for(int i = 0; i < size; i++)
{printBinRev(cp[i]);}
cout << endl;
}
/*
g++ FloatBinRev.cpp -o FloatBinRev
./FloatBinRev
Must provide a number
./FloatBinRev 0 1
Must provide a number // exactly one number
./FloatBinRev 0
00000000000000000000000000000000 // single-precision floating-point
00000000000000000000000000000000 // reversed
0000000000000000000000000000000000000000000000000000000000000000 // double
0000000000000000000000000000000000000000000000000000000000000000 // reversed
./FloatBinRev 1
00111111100000000000000000000000
00000000000000000000000111111100
0011111111110000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000111111111100
./FloatBinRev 2
01000000000000000000000000000000
00000000000000000000000000000010
0100000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000010
./FloatBinRev 3
01000000010000000000000000000000
00000000000000000000001000000010
0100000000001000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000001000000000010
./FloatBinRev 4
01000000100000000000000000000000
00000000000000000000000100000010
0100000000010000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000100000000010
./FloatBinRev 0.1
00111101110011001100110011001101
10110011001100110011001110111100
0011111110111001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001110111111100
./FloatBinRev 0.2
00111110010011001100110011001101
10110011001100110011001001111100
0011111111001001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001001111111100
./FloatBinRev -0.1
10111101110011001100110011001101
10110011001100110011001110111101
1011111110111001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001110111111101
./FloatBinRev -.2
10111110010011001100110011001101
10110011001100110011001001111101
1011111111001001100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110011001001111111101
./FloatBinRev 15
01000001011100000000000000000000
00000000000000000000111010000010
0100000000101110000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000111010000000010
./FloatBinRev 15.1
01000001011100011001100110011010
01011001100110011000111010000010
0100000000101110001100110011001100110011001100110011001100110011
1100110011001100110011001100110011001100110011000111010000000010
./FloatBinRev 15.2
01000001011100110011001100110011
11001100110011001100111010000010
0100000000101110011001100110011001100110011001100110011001100110
0110011001100110011001100110011001100110011001100111010000000010
./FloatBinRev 15.3
01000001011101001100110011001101
10110011001100110010111010000010
0100000000101110100110011001100110011001100110011001100110011010
0101100110011001100110011001100110011001100110010111010000000010
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 3-26. Define an array of int. Take the starting address of that array and use static_cast to convert it into a void*. Write a function that takes a void*, a number (indicating a number of bytes), and a value (indicating the value to which each byte should be set) as arguments. The function should set each byte in the specified range to the specified value. Try out the function on your array of int.
#include <stdio.h> // for printf(), putchar()
#include <math.h> // for pow()
#define SIZE 10
// we assume sizeof(char) = 1 byte
void set(char* array, int bytes, int value); // initialize or set array
void print(void* array, int size, int type); // print array
// type: sizeof(data type)
int main()
{
char charArray[SIZE];
// automatically convert char* (charArray) to void* in function call:
print(charArray, SIZE, sizeof(char)); // print uninitialized array
set(charArray, SIZE, 65); // initialize array (65 is ASCII for `A')
print(charArray, SIZE, sizeof(char)); // print initialized array
short shArray[SIZE];
// automatically convert short* (shArray) to void* in function call:
print(shArray, SIZE, sizeof(short)); // print uninitialized array
char* cp = (char*)shArray; // &shArray[0]
/*
// Alternative conversion:
void* p = (void*)shArray; // &shArray[0]
char* cp = (char*)p;
*/
set(cp, SIZE*sizeof(short), 0); // initialize array to 0
print(shArray, SIZE, sizeof(short)); // print initialized array
set(cp, SIZE*sizeof(short), 1); // set each byte to 00000001
int i = 1 + pow(2, 8);
printf("1 + pow(2, 8) = %d\n", i);
print(shArray, SIZE, sizeof(short)); // print initialized array
set(cp, SIZE*sizeof(short), 0xff); // set array to 1...1
print(shArray, SIZE, sizeof(short)); // print array
int intArray[SIZE/2];
// automatically convert int* (intArray) to void* in function call:
print(intArray, SIZE/2, sizeof(int)); // print uninitialized array
cp = (char*)intArray; // &intArray[0]
set(cp, (SIZE/2)*sizeof(int), 0); // initialize array to 0
print(intArray, SIZE/2, sizeof(int)); // print initialized array
set(cp, (SIZE/2)*sizeof(int), 1); // set each byte to 00000001
i = 1 + pow(2, 8) + pow(2, 16) + pow(2, 24);
printf("1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = %d\n", i);
print(intArray, SIZE/2, sizeof(int)); // print initialized array
set(cp, (SIZE/2)*sizeof(int), 0xff); // set array to 1...1
print(intArray, SIZE/2, sizeof(int)); // print array
return 0;
}
void set(char* array, int bytes, int value) // initialize or set array
{ // set each byte of the contiguous array to `value'
for (int i = 0; i < bytes; i++)
{array[i] = value;}
}
void print(void* array, int size, int type) // print array
{ // type: sizeof(data type)
int i;
char* charArray;
short* shArray;
int* intArray;
switch(type)
{
case sizeof(char):
charArray = (char*)array;
for (i = 0; i < size; i++)
{printf("%c, ", charArray[i]);}
break;
case sizeof(short):
shArray = (short*)array;
for (i = 0; i < size; i++)
{printf("%d, ", shArray[i]);}
break;
case sizeof(int):
intArray = (int*)array;
for (i = 0; i < size; i++)
{printf("%d, ", intArray[i]);}
break;
default:
printf("Not implemented");
break;
}
putchar('\n');
}
/*
gcc arrays.c -o arrays -lm // link math library
./arrays
, , @, Q, �, p, 6, , , b, // garbage
A, A, A, A, A, A, A, A, A, A,
640, 0, 0, 0, -1, 0, 1, 0, 20496, -26493, // garbage
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1 + pow(2, 8) = 257
257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
2017608424, 32552, -298109536, 22041, 0, // garbage
0, 0, 0, 0, 0,
1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = 16843009
16843009, 16843009, 16843009, 16843009, 16843009,
-1, -1, -1, -1, -1,
*/
shArray[0] = 0000000100000001 = 1+2^8 = 257,
intArray[0] = 00000001000000010000000100000001 = 1+2^8+2^16+2^24 = 16843009.
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <iostream>
#include <cmath> // for pow()
using std::cout;
using std::endl;
#define SIZE 10
// we assume sizeof(char) = 1 byte
void set(char* array, int bytes, int value); // initialize array
template <typename T> // generics
void print(T* array, int size); // print array
int main()
{
char charArray[SIZE];
print<char>(charArray, SIZE); // print uninitialized array
set(charArray, SIZE, 65); // initialize array (65 is ASCII for `A')
print<char>(charArray, SIZE); // print initialized array
short shArray[SIZE];
print<short>(shArray, SIZE); // print uninitialized array
void* p = static_cast<void*>(shArray); // &shArray[0]
char* cp = static_cast<char*>(p);
set(cp, SIZE*sizeof(short), 0); // initialize array to 0
print<short>(shArray, SIZE); // print initialized array
set(cp, SIZE*sizeof(short), 1); // set each byte to 00000001
int i = 1 + pow(2, 8);
cout << "1 + pow(2, 8) = " << i << endl;
print<short>(shArray, SIZE); // print array
set(cp, SIZE*sizeof(short), 0XFF); // set array to 1...1
print<short>(shArray, SIZE); // print array
int intArray[SIZE/2];
print<int>(intArray, SIZE/2); // print uninitialized array
p = static_cast<void*>(intArray); // &intArray[0]
cp = static_cast<char*>(p);
set(cp, (SIZE/2)*sizeof(int), 0); // initialize array to 0
print<int>(intArray, SIZE/2); // print initialized array
set(cp, (SIZE/2)*sizeof(int), 1); // set each byte to 00000001
i = 1 + pow(2, 8) + pow(2, 16) + pow(2, 24);
cout << "1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = " << i << endl;
print<int>(intArray, SIZE/2); // print array
set(cp, (SIZE/2)*sizeof(int), 0XFF); // set array to 1...1
print<int>(intArray, SIZE/2); // print array
return 0;
}
void set(char* array, int bytes, int value) // initialize array
{ // set each byte of the contiguous array to `value'
for (int i = 0; i < bytes; i++)
{array[i] = value;}
}
template <typename T>
void print(T* array, int size) // print array
{
for (int i = 0; i < size; i++)
{cout << array[i] << ", ";}
cout << endl;
}
/*
g++ Arrays.cpp -o Arrays
./Arrays
, , @, Q, �, p, 6, , , b, // garbage
A, A, A, A, A, A, A, A, A, A,
640, 0, 0, 0, -1, 0, 1, 0, 20496, -26493, // garbage
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1 + pow(2, 8) = 257
257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
2017608424, 32552, -298109536, 22041, 0, // garbage
0, 0, 0, 0, 0,
1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = 16843009
16843009, 16843009, 16843009, 16843009, 16843009,
-1, -1, -1, -1, -1,
*/
Using static_cast, we need a two-steps conversion:
void* p = static_cast<void*>(shArray); // &shArray[0]
char* cp = static_cast<char*>(p);
compared to the C style conversion used in arrays.c:
char* cp = (char*)shArray; // &shArray[0]
Alternatively, we can use a reinterpret_cast:
char* cp = reinterpret_cast<char*>(shArray); // &shArray[0]
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 3-27. Create a const array of double and a volatile array of double. Index through each array and use const_cast to cast each element to non-const and non-volatile, respectively, and assign a value to each element.
#include <stdio.h> // for printf(), putchar()
#define SIZE 10
void print(double* array, int size);
void set(double* array, int size);
int main()
{
int i;
const double carr[SIZE]; // uninitialized
volatile double varr[SIZE]; // arrays
print((double*)carr, SIZE); // cast avoids const warning
print((double*)varr, SIZE); // cast avoids volatile warning
double* dp = (double*)carr;
for (i = 0; i < SIZE; i++) {dp[i] = i;}
dp = (double*)varr;
for (i = 0; i < SIZE; i++) {dp[i] = i*i;}
print((double*)carr, SIZE);
print((double*)varr, SIZE);
const double carray[SIZE] = {0,1}; // initialized
volatile double varray[SIZE] = {0,1}; // arrays
print((double*)carray, SIZE);
print((double*)varray, SIZE);
set((double*)carray, SIZE);
set((double*)varray, SIZE);
print((double*)carray, SIZE);
print((double*)varray, SIZE);
return 0;
}
void print(double* array, int size)
{
int i;
for (i = 0; i < size; i++)
{printf("%g, ", array[i]);}
putchar('\n');
}
void set(double* array, int size)
{
int i;
for (i = 0; i < size; i++)
{array[i] = i;}
}
/*
gcc const.c -o const
./const
6.79039e-313, 0, 0, 0, 4.94066e-324, 2.15251e-314, 0, 0, 0, 0, // garbage
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // garbage
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 4, 9, 16, 25, 36, 49, 64, 81,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <iostream>
using std::cout;
using std::endl;
#define SIZE 10
void print(double* array, int size);
void set(double* array, int size);
int main()
{
// const double carr[SIZE]; // compile error: uninitialized const array
const double carr[SIZE] = {}; // empty initializer (all zero)
volatile double varr[SIZE]; // uninitialized volatile array
print(const_cast<double*>(carr), SIZE); // cast avoids const error
print(const_cast<double*>(varr), SIZE/2); // cast avoids volatile error
// double* dp = const_cast<double*>carr; // compile error: missing parentheses
double* dp = const_cast<double*>(carr); // OK, cast op uses func call syntax:
// cast_operator<data_type pointer or reference>(operand)
for (int i = 0; i < SIZE; i++) {dp[i] = i;}
dp = const_cast<double*>(varr);
for (int i = 0; i < SIZE; i++) {dp[i] = i*i;}
print(const_cast<double*>(carr), SIZE);
print(const_cast<double*>(varr), SIZE);
const double carray[SIZE] = {0,1}; // initialized
volatile double varray[SIZE] = {0,1}; // arrays
print(const_cast<double*>(carray), SIZE);
print(const_cast<double*>(varray), SIZE);
set(const_cast<double*>(carray), SIZE);
set(const_cast<double*>(varray), SIZE);
print(const_cast<double*>(carray), SIZE);
print(const_cast<double*>(varray), SIZE);
return 0;
}
void print(double* array, int size)
{
for (int i = 0; i < size; i++)
{cout << array[i] << ", ";}
cout << endl;
}
void set(double* array, int size)
{
for (int i = 0; i < size; i++)
{array[i] = i;}
}
/*
g++ Const.cpp -o Const
./Const
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6.91459e-310, 6.95299e-310, 6.91459e-310, 2.96439e-323, 3.39519e-313, // garbage
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 4, 9, 16, 25, 36, 49, 64, 81,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 3-28. Create a function that takes a pointer to an array of double and a value indicating the size of that array. The function should print each element in the array. Now create an array of double and initialize each element to zero, then use your function to print the array. Next use reinterpret_cast to cast the starting address of your array to an unsigned char*, and set each byte of the array to 1 (hint: you’ll need to use sizeof to calculate the number of bytes in a double). Now use your array-printing function to print the results. Why do you think each element was not set to the value 1.0?
#include <stdio.h> // for printf(), putchar()
#define FLOAT 0 // used
#define DOUBLE 1 // for
#define LONGDOUBLE 2 // printing
#define SIZE 5 // array size
// we assume sizeof(char) = 1 byte
void set(char* array, int bytes, int value); // initialize or set array
void print(void* array, int size, int type); // print array
// type: 0 for float, 1 for double, 2 for long double
void printBinary(const unsigned char val); // Display a byte in binary
void printData(unsigned char*, int); // Display a data type in binary
void printData2(unsigned char*, int); // Display data type in binary (2 parts)
int main()
{
float fArray[SIZE];
// automatically convert float* (fArray) to void* in function call:
print(fArray, SIZE, FLOAT); // print uninitialized array
printData((unsigned char*)(fArray), sizeof(float)); // &fArray[0]
char* cp = (char*)fArray; // &fArray[0]
/*
// Alternative conversion:
void* p = (void*)fArray; // &fArray[0]
char* cp = (char*)p;
*/
set(cp, SIZE*sizeof(float), 0); // initialize array to 0
print(fArray, SIZE, FLOAT); // print initialized array
printData((unsigned char*)(fArray), sizeof(float));
set(cp, SIZE*sizeof(float), 1); // set each byte to 00000001
print(fArray, SIZE, FLOAT); // print initialized array
printData((unsigned char*)(fArray), sizeof(float));
set(cp, SIZE*sizeof(float), 0xff); // set array to 1...1
print(fArray, SIZE, FLOAT); // print array
printData((unsigned char*)(fArray), sizeof(float));
putchar('\n');
double dArray[SIZE];
// automatically convert double* (dArray) to void* in function call:
print(dArray, SIZE, DOUBLE); // print uninitialized array
printData((unsigned char*)(dArray), sizeof(double)); // &dArray[0]
cp = (char*)dArray; // &dArray[0]
set(cp, SIZE*sizeof(double), 0); // initialize array to 0
print(dArray, SIZE, DOUBLE); // print initialized array
printData((unsigned char*)(dArray), sizeof(double));
set(cp, SIZE*sizeof(double), 1); // set each byte to 00000001
print(dArray, SIZE, DOUBLE); // print initialized array
printData((unsigned char*)(dArray), sizeof(double));
set(cp, SIZE*sizeof(double), 0xff); // set array to 1...1
print(dArray, SIZE, DOUBLE); // print array
printData((unsigned char*)(dArray), sizeof(double));
putchar('\n');
long double ldArray[SIZE];
// automatically convert long double* (ldArray) to void* in function call:
print(ldArray, SIZE, LONGDOUBLE); // print uninitialized array
printData2((unsigned char*)(ldArray), sizeof(long double)); // &ldArray[0]
printData2((unsigned char*)(ldArray+1), sizeof(long double));
printData2((unsigned char*)(&ldArray[2]), sizeof(long double));
cp = (char*)ldArray; // &ldArray[0]
set(cp, SIZE*sizeof(long double), 0); // initialize array to 0
print(ldArray, SIZE, LONGDOUBLE); // print initialized array
printData2((unsigned char*)(ldArray), sizeof(long double));
set(cp, SIZE*sizeof(long double), 1); // set each byte to 00000001
print(ldArray, SIZE, LONGDOUBLE); // print initialized array
printData2((unsigned char*)(ldArray), sizeof(long double));
set(cp, SIZE*sizeof(long double), 0xff); // set array to 1...1
print(ldArray, SIZE, LONGDOUBLE); // print array
printData2((unsigned char*)(ldArray), sizeof(long double));
return 0;
}
void set(char* array, int bytes, int value) // initialize or set array
{ // set each byte of the contiguous array to `value'
for (int i = 0; i < bytes; i++)
{array[i] = value;}
}
void print(void* array, int size, int type) // print array
{ // type: 0 for float, 1 for double, 2 for long double
int i;
float* fArray;
double* dArray;
long double* ldArray;
switch(type)
{
case FLOAT: // 0
fArray = (float*)array;
for (i = 0; i < size; i++)
{printf("%g, ", fArray[i]);}
break;
case DOUBLE: // 1
dArray = (double*)array;
for (i = 0; i < size; i++)
{printf("%g, ", dArray[i]);}
break;
case LONGDOUBLE: // 2
ldArray = (long double*)array;
for (i = 0; i < size; i++)
{printf("%Lg, ", ldArray[i]);}
break;
default:
printf("Not implemented");
break;
}
putchar('\n');
}
void printBinary(const unsigned char val) // Display a byte in binary
{
int i;
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
void printData(unsigned char* cp, int size) // Display a data type in binary
{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]
for(; size > 0; size--)
{printBinary(cp[size-1]);}
putchar('\n');
}
// Display a data type in binary (2 parts):
void printData2(unsigned char* cp, int size) // for long double
{ // sizeof(long double) = 16, 2 parts of 8 bytes
int half = size / 2; // we assume size is even
for(; size > half; size--) // second half:
{printBinary(cp[size-1]);} // cp[15], ..., cp[8]
putchar('\n');
for(; size > 0; size--) // first half:
{printBinary(cp[size-1]);} // cp[7], ..., cp[0]
putchar('\n');
}
/*
gcc double.c -o double
./double
0, 0, 0, 0, 0, // garbage
00000000000000000000000000000000
0, 0, 0, 0, 0,
00000000000000000000000000000000
2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38,
00000001000000010000000100000001
-nan, -nan, -nan, -nan, -nan, // exp bits all 1, mantissa not 0
11111111111111111111111111111111 // sign bit is 1, negative
0, 0, 0, 0, 0, // garbage
0000000000000000000000000000000000000000000000000000000000000000
0, 0, 0, 0, 0,
0000000000000000000000000000000000000000000000000000000000000000
7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304,
0000000100000001000000010000000100000001000000010000000100000001
-nan, -nan, -nan, -nan, -nan, // sign bit 1, exp bits 1, mantissa not 0
1111111111111111111111111111111111111111111111111111111111111111
-nan, nan, -nan, -nan, -nan, // garbage (Unnormal, invalid operands)
0000000000000000000000000000000000000000111100001011011011111111 // part 2
0000000000000000010101011011111100010010011000110111000001000000 // part 1
0000000000000000011111111111111100001010100101010101011101110111 // last bits
0000000000000000000000000000000000000000000000000000000011000010 // first
0000000000000000010101011011111100010010011000111000100000001101 // 127-64
0000000000000000011111111111111100001010100101010101011101110110 // 63-0
0, 0, 0, 0, 0, (sign bit 79 is 0, positive)
0000000000000000000000000000000000000000000000000000000000000000 // 78-64 (0)
0000000000000000000000000000000000000000000000000000000000000000 // 63-0 (0)
nan, nan, nan, nan, nan, // Unnormal, invalid operands (bit 79 is 0, positive)
0000000100000001000000010000000100000001000000010000000100000001 // 78-64 (0/1)
0000000100000001000000010000000100000001000000010000000100000001 // 63 (0)
-nan, -nan, -nan, -nan, -nan, // Quiet Not a Number (bit 79 is 1, negative)
1111111111111111111111111111111111111111111111111111111111111111 // 78-64 (1)
1111111111111111111111111111111111111111111111111111111111111111 // 63-0 (1)
*/
00000001000000010000000100000001 = ((-1)^0)*2^(2^1-127)*(1+2^(-7)+2^(-15)+2^(-23)) = 1*2^(-125)*1.00784313679 ~ 2.36943e-38
(here ^ means "raised to power" and ~ means "approximates to").
etc.
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <iostream>
using std::cout;
using std::endl;
#define SIZE 5 // array size
// we assume sizeof(char) = 1 byte
void set(char* array, int bytes, int value); // initialize array
template <typename T> // generics
void print(T* array, int size); // print array
void printBinary(const unsigned char val); // Display a byte in binary
void printData(unsigned char*, int); // Display a data type in binary
void printData2(unsigned char*, int); // Display data type in binary (2 parts)
int main()
{
float fArray[SIZE];
// automatically convert float* (fArray) to void* in function call:
print<float>(fArray, SIZE); // print uninitialized array
printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));
printData(reinterpret_cast<unsigned char*>(fArray+1), sizeof(float));
printData(reinterpret_cast<unsigned char*>(&fArray[2]), sizeof(float));
char* cp = reinterpret_cast<char*>(fArray); // &fArray[0]
/*
// Alternative conversion:
void* p = static_cast<void*>fArray; // &fArray[0]
char* cp = static_cast<char*>p;
*/
set(cp, SIZE*sizeof(float), 0); // initialize array to 0
print<float>(fArray, SIZE); // print initialized array
printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));
set(cp, SIZE*sizeof(float), 1); // set each byte to 00000001
print<float>(fArray, SIZE); // print initialized array
printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));
set(cp, SIZE*sizeof(float), 0xff); // set array to 1...1
print<float>(fArray, SIZE); // print array
printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));
cout << endl;
double dArray[SIZE];
// automatically convert double* (dArray) to void* in function call:
print<double>(dArray, SIZE); // print uninitialized array
printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));
printData(reinterpret_cast<unsigned char*>(dArray+1), sizeof(double));
printData(reinterpret_cast<unsigned char*>(&dArray[2]), sizeof(double));
cp = reinterpret_cast<char*>(dArray); // &dArray[0]
set(cp, SIZE*sizeof(double), 0); // initialize array to 0
print<double>(dArray, SIZE); // print initialized array
printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));
set(cp, SIZE*sizeof(double), 1); // set each byte to 00000001
print<double>(dArray, SIZE); // print initialized array
printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));
set(cp, SIZE*sizeof(double), 0xff); // set array to 1...1
print<double>(dArray, SIZE); // print array
printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));
cout << endl;
long double ldArray[SIZE];
// automatically convert long double* (ldArray) to void* in function call:
print<long double>(ldArray, SIZE); // print uninitialized array
printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));
printData2(reinterpret_cast<unsigned char*>(ldArray+1), sizeof(long double));
printData2(reinterpret_cast<unsigned char*>(&ldArray[2]), sizeof(long double));
cp = reinterpret_cast<char*>(ldArray); // &dArray[0]
set(cp, SIZE*sizeof(long double), 0); // initialize array to 0
print<long double>(ldArray, SIZE); // print initialized array
printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));
set(cp, SIZE*sizeof(long double), 1); // set each byte to 00000001
print<long double>(ldArray, SIZE); // print initialized array
printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));
set(cp, SIZE*sizeof(long double), 0xff); // set array to 1...1
print<long double>(ldArray, SIZE); // print array
printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));
return 0;
}
void set(char* array, int bytes, int value) // initialize array
{ // set each byte of the contiguous array to `value'
for (int i = 0; i < bytes; i++)
{array[i] = value;}
}
template <typename T>
void print(T* array, int size) // print array
{
for (int i = 0; i < size; i++)
{cout << array[i] << ", ";}
cout << endl;
}
void printBinary(const unsigned char val) // Display a byte in binary
{
for(int i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << "1";}
else {cout << "0";} // 0 bit
}
}
void printData(unsigned char* cp, int size) // Display a data type in binary
{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]
for(; size > 0; size--)
{printBinary(cp[size-1]);}
cout << endl;
}
// Display a data type in binary (2 parts):
void printData2(unsigned char* cp, int size) // for long double
{ // sizeof(long double) = 16, 2 parts of 8 bytes
int half = size / 2; // we assume size is even
for(; size > half; size--) // second half:
{printBinary(cp[size-1]);} // cp[15], ..., cp[8]
cout << endl;
for(; size > 0; size--) // first half:
{printBinary(cp[size-1]);} // cp[7], ..., cp[0]
cout << endl;
}
/*
g++ Double.cpp -o Double
./Double
-2.68742e+32, 4.57552e-41, -2.45652e+32, 4.57552e-41, -2.30727e+32, // garbage
11110101010101000000000000000000
00000000000000000111111110001100
11110101010000011100100100000000
0, 0, 0, 0, 0,
00000000000000000000000000000000
2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38,
00000001000000010000000100000001
-nan, -nan, -nan, -nan, -nan, // exp bits all 1, mantissa not 0
11111111111111111111111111111111 // sign bit is 1, negative
4.68413e-310, 4.68413e-310, 6.95306e-310, 4.94066e-324, 6.95306e-310, // garbage
0000000000000000010101100011101000110001000011010111000101010001
0000000000000000010101100011101000110001000011010111000000001000
0000000000000000011111111111111010010101010100100110010111001000
0, 0, 0, 0, 0,
0000000000000000000000000000000000000000000000000000000000000000
7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304,
0000000100000001000000010000000100000001000000010000000100000001
-nan, -nan, -nan, -nan, -nan, // sign bit 1, exp bits 1, mantissa not 0
1111111111111111111111111111111111111111111111111111111111111111
-nan, nan, nan, nan, nan, // garbage (Unnormal, invalid operands)
0000000000000000000000000000000100000000000000001111111111111111 // part 2
0000000000000000000000000000000000000000000000000000001010000000 // part 1
0000000000000000010101100011101000110001000011010100011111101000 // last bits
0000000000000000011111111111111010010101010100100110010010010000 // first
0000000000000000010101100011101000110001000011010100100110101101 // 127-64
0000000000000000000000000000000000000000000000000000000000000010 // 63-0
0, 0, 0, 0, 0, (sign bit 79 is 0, positive)
0000000000000000000000000000000000000000000000000000000000000000 // 78-64 (0)
0000000000000000000000000000000000000000000000000000000000000000 // 63-0 (0)
nan, nan, nan, nan, nan, // Unnormal, invalid operands (bit 79 is 0, positive)
0000000100000001000000010000000100000001000000010000000100000001 // 78-64 (0/1)
0000000100000001000000010000000100000001000000010000000100000001 // 63 (0)
-nan, -nan, -nan, -nan, -nan, // Quiet Not a Number (bit 79 is 1, negative)
1111111111111111111111111111111111111111111111111111111111111111 // 78-64 (1)
1111111111111111111111111111111111111111111111111111111111111111 // 63-0 (1)
*/
11110101010101000000000000000000 = ((-1)^1)*2^(2^1+2^3+2^5+2^6+2^7-127)*(1+2^(-1)+2^(-3)+2^(-5)) = (-1)*2^107*1.65625 ~ -2.68742e+32.
etc.
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 3-29. (Challenging) Modify FloatingAsBinary.cpp (see ch3-FloatBinary) so that it prints out each part of the double as a separate group of bits. You’ll have to replace the calls to printBinary() with your own specialized code (which you can derive from printBinary() in order to do this), and you’ll also have to look up and understand the floating-point format along with the byte ordering for your compiler (this is the challenging part).
#include <stdio.h> // for printf(), putchar()
#include <stdlib.h> // for atof(), exit()
void printFloatBinary(float); // Display a float in binary
void printDoubleBinary(double); // Display a double in binary
void printLongDoubleBinary(long double); // Display a long double in binary
int main(int argc, char* argv[])
{
if(argc != 2)
{
printf("Must provide a number\n");
exit(1); // out of main(), end program; return value 1 signals an error
}
int i;
double d = atof(argv[1]);
float f = d; // implicit cast (restrictive)
printFloatBinary(f);
printDoubleBinary(d);
long double ld = d; // implicit cast (extended)
printLongDoubleBinary(ld);
return 0; // normal return value for main(), signals no errors
}
void printFloatBinary(float f) // Display a float in binary
{ // we assume `float' has 32 bits: 1 for sign, 8 for exp, 23 for mantissa
int i = 7; // first (most significant) position in a byte
int j = sizeof(float)-1; // first (most significant) byte (3) of a float
unsigned char* cp = (unsigned char*)(&f); // we assume sizeof(char) = 1
unsigned char val = cp[j]; // most significant byte of `f'
// first print the sign (bit 31):
if(val & (1 << i)) // set (1) bit for negative numbers
{putchar('1');}
else {putchar('0');} // 0 bit for positive numbers
putchar(' '); // separate the sign from the rest of `f' in binary
for(i = 6; i >= 0; i--) // the most significant 7 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
j--; // move to the next (less significant) byte of `f' (2)
val = cp[j];
i = 7; // first (most significant) position in a byte
// the least significant bit of the exponent (bit 23):
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
putchar(' '); // separate exponent from mantissa (last 23 bits)
for(i = 6; i >= 0; i--) // the most significant 7 bits of mantissa
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
while (--j >= 0) // last (least significant) 2 bytes of `f' (1, 0)
{ // the rest of mantissa (16 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
putchar('\n');
}
void printDoubleBinary(double d) // Display a double in binary
{ // we assume `double' has 64 bits: 1 for sign, 11 for exp, 52 for mantissa
int i = 7; // first (most significant) position in a byte
int j = sizeof(double)-1; // first (most significant) byte (7) of a double
unsigned char* cp = (unsigned char*)(&d); // we assume sizeof(char) = 1
unsigned char val = cp[j]; // most significant byte of `d'
// first print the sign (bit 63):
if(val & (1 << i)) // set (1) bit for negative numbers
{putchar('1');}
else {putchar('0');} // 0 bit for positive numbers
putchar(' '); // separate the sign from the rest of `d' in binary
for(i = 6; i >= 0; i--) // the most significant 7 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
j--; // move to the next (less significant) byte of `d' (6)
val = cp[j];
for(i = 7; i >= 4; i--) // the least significant 4 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
putchar(' '); // separate exponent from mantissa (last 52 bits)
for(; i >= 0; i--) // the most significant 4 bits of mantissa (3, 2, 1, 0)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
while (--j >= 0) // last (least significant) 6 bytes of `d' (5, 4, ..., 0)
{ // the rest of mantissa (48 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
putchar('\n');
}
void printLongDoubleBinary(long double ld) // Display a long double in binary
{ // extended precision, 80 bits: 1 sign, 15 exp, 1 int part, 63 fraction
int i; // count bits of a byte
int j = sizeof(long double)-1; // first byte (15) of a long double
unsigned char* cp = (unsigned char*)(&ld); // we assume sizeof(char) = 1
unsigned char val; // for the bytes of `ld'
for(; j >=10; j--) // first 6 bytes are garbage (48 bits)
{ // we assume sizeof(long double) = 16 (128 = 48 + 80 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
putchar(' '); // separate garbage (48 bits) from the rest (80 bits)
val = cp[j]; // byte 9 of `ld' (the most significant of the last 10 bytes)
i = 7; // first (most significant) position in a byte
// first print the sign (bit 79):
if(val & (1 << i)) // set (1) bit for negative numbers
{putchar('1');}
else {putchar('0');} // 0 bit for positive numbers
putchar(' '); // separate the sign from the rest of `ld' in binary
for(i = 6; i >= 0; i--) // the most significant 7 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
j--; // move to the next (less significant) byte of `ld' (8)
val = cp[j];
for(i = 7; i >= 0; i--) // the least significant 8 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
putchar('\n'); // separate exponent from mantissa (last 64 bits)
j--; // move to the next (less significant) byte of `ld' (7)
val = cp[j];
i = 7;
// print the integer part of mantissa (bit 63):
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
putchar(' '); // separate integer part from fraction (63 bits)
i = 6;
// print the first (most significant) bit of the fraction (bit 62):
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
putchar(' '); // separate first bit from the rest of the fraction (62 bits)
for(i = 5; i >= 0; i--) // next most significant 6 bits of the fraction
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
while (--j >= 0) // last (least significant) 7 bytes of `ld' (6, 5, ..., 0)
{ // the rest of the fraction and of mantissa (56 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{putchar('1');}
else {putchar('0');} // 0 bit
}
}
putchar('\n');
}
/*
gcc floatbinary2.c -o floatbinary2
./floatbinary2
Must provide a number
./floatbinary2 0 1
Must provide a number // exactly one number
./floatbinary2 0
0 00000000 00000000000000000000000 // single-precision floating-point
0 00000000000 0000000000000000000000000000000000000000000000000000 // double
000000000000000000000000000000000000000000000000 0 000000000000000 // long
0 0 00000000000000000000000000000000000000000000000000000000000000 // double
./floatbinary2 1
0 01111111 00000000000000000000000
0 01111111111 0000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 011111111111111
1 0 00000000000000000000000000000000000000000000000000000000000000
./floatbinary2 2
0 10000000 00000000000000000000000
0 10000000000 0000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000000
1 0 00000000000000000000000000000000000000000000000000000000000000
./floatbinary2 3
0 10000000 10000000000000000000000
0 10000000000 1000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000000
1 1 00000000000000000000000000000000000000000000000000000000000000
./floatbinary2 4
0 10000001 00000000000000000000000
0 10000000001 0000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000001
1 0 00000000000000000000000000000000000000000000000000000000000000
./floatbinary2 0.1
0 01111011 10011001100110011001101
0 01111111011 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 0 011111111111011
1 1 00110011001100110011001100110011001100110011001101000000000000
./floatbinary2 0.2
0 01111100 10011001100110011001101
0 01111111100 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 0 011111111111100
1 1 00110011001100110011001100110011001100110011001101000000000000
./floatbinary2 -0.1
1 01111011 10011001100110011001101
1 01111111011 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 1 011111111111011
1 1 00110011001100110011001100110011001100110011001101000000000000
./floatbinary2 -.2
1 01111100 10011001100110011001101
1 01111111100 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 1 011111111111100
1 1 00110011001100110011001100110011001100110011001101000000000000
./floatbinary2 15
0 10000010 11100000000000000000000
0 10000000010 1110000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11000000000000000000000000000000000000000000000000000000000000
./floatbinary2 15.1
0 10000010 11100011001100110011010
0 10000000010 1110001100110011001100110011001100110011001100110011
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11000110011001100110011001100110011001100110011001100000000000
./floatbinary2 15.2
0 10000010 11100110011001100110011
0 10000000010 1110011001100110011001100110011001100110011001100110
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11001100110011001100110011001100110011001100110011000000000000
./floatbinary2 15.3
0 10000010 11101001100110011001101
0 10000000010 1110100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11010011001100110011001100110011001100110011001101000000000000
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <iostream>
#include <cstdlib> // for atof(), exit()
using std::cout;
using std::endl;
void printFloatBinary(float); // Display a float in binary
void printDoubleBinary(double); // Display a double in binary
void printLongDoubleBinary(long double); // Display a long double in binary
int main(int argc, char* argv[])
{
if(argc != 2)
{
cout << "Must provide a number" << endl;
exit(1); // out of main(), end program; return value 1 signals an error
}
double d = atof(argv[1]);
float f = d; // implicit cast (restrictive)
printFloatBinary(f);
printDoubleBinary(d);
long double ld = d; // implicit cast (extended)
printLongDoubleBinary(ld);
return 0; // normal return value for main(), signals no errors
}
void printFloatBinary(float f) // Display a float in binary
{ // we assume `float' has 32 bits: 1 for sign, 8 for exp, 23 for mantissa
int i = 7; // first (most significant) position in a byte
int j = sizeof(float)-1; // first (most significant) byte (3) of a float
unsigned char* cp = reinterpret_cast<unsigned char*>(&f); // we assume sizeof(char) = 1
unsigned char val = cp[j]; // most significant byte of `f'
// first print the sign (bit 31):
if(val & (1 << i)) // set (1) bit for negative numbers
{cout << '1';}
else {cout << '0';} // 0 bit for positive numbers
cout << ' '; // separate the sign from the rest of `f' in binary
for(i = 6; i >= 0; i--) // the most significant 7 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
j--; // move to the next (less significant) byte of `f' (2)
val = cp[j];
i = 7; // first (most significant) position in a byte
// the least significant bit of the exponent (bit 23):
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
cout << ' '; // separate exponent from mantissa (last 23 bits)
for(i = 6; i >= 0; i--) // the most significant 7 bits of mantissa
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
while (--j >= 0) // last (least significant) 2 bytes of `f' (1, 0)
{ // the rest of mantissa (16 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
}
cout << endl;
}
void printDoubleBinary(double d) // Display a double in binary
{ // we assume `double' has 64 bits: 1 for sign, 11 for exp, 52 for mantissa
int i = 7; // first (most significant) position in a byte
int j = sizeof(double)-1; // first (most significant) byte (7) of a double
unsigned char* cp = reinterpret_cast<unsigned char*>(&d); // we assume sizeof(char) = 1
unsigned char val = cp[j]; // most significant byte of `d'
// first print the sign (bit 63):
if(val & (1 << i)) // set (1) bit for negative numbers
{cout << '1';}
else {cout << '0';} // 0 bit for positive numbers
cout << ' '; // separate the sign from the rest of `d' in binary
for(i = 6; i >= 0; i--) // the most significant 7 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
j--; // move to the next (less significant) byte of `d' (6)
val = cp[j];
for(i = 7; i >= 4; i--) // the least significant 4 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
cout << ' '; // separate exponent from mantissa (last 52 bits)
for(; i >= 0; i--) // the most significant 4 bits of mantissa (3, 2, 1, 0)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
while (--j >= 0) // last (least significant) 6 bytes of `d' (5, 4, ..., 0)
{ // the rest of mantissa (48 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
}
cout << endl;
}
void printLongDoubleBinary(long double ld) // Display a long double in binary
{ // extended precision, 80 bits: 1 sign, 15 exp, 1 int part, 63 fraction
int i; // count bits of a byte
int j = sizeof(long double)-1; // first byte (15) of a long double
unsigned char* cp = reinterpret_cast<unsigned char*>(&ld); // we assume sizeof(char) = 1
unsigned char val; // for the bytes of `ld'
for(; j >=10; j--) // first 6 bytes are garbage (48 bits)
{ // we assume sizeof(long double) = 16 (128 = 48 + 80 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
}
cout << ' '; // separate garbage (48 bits) from the rest (80 bits)
val = cp[j]; // byte 9 of `ld' (the most significant of the last 10 bytes)
i = 7; // first (most significant) position in a byte
// first print the sign (bit 79):
if(val & (1 << i)) // set (1) bit for negative numbers
{cout << '1';}
else {cout << '0';} // 0 bit for positive numbers
cout << ' '; // separate the sign from the rest of `ld' in binary
for(i = 6; i >= 0; i--) // the most significant 7 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
j--; // move to the next (less significant) byte of `ld' (8)
val = cp[j];
for(i = 7; i >= 0; i--) // the least significant 8 bits of the exponent
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
cout << endl; // separate exponent from mantissa (last 64 bits)
j--; // move to the next (less significant) byte of `ld' (7)
val = cp[j];
i = 7;
// print the integer part of mantissa (bit 63):
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
cout << ' '; // separate integer part from fraction (63 bits)
i = 6;
// print the first (most significant) bit of the fraction (bit 62):
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
cout << ' '; // separate first bit from the rest of the fraction (62 bits)
for(i = 5; i >= 0; i--) // next most significant 6 bits of the fraction
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
while (--j >= 0) // last (least significant) 7 bytes of `ld' (6, 5, ..., 0)
{ // the rest of the fraction and of mantissa (56 bits)
val = cp[j];
for(i = 7; i >= 0; i--)
{ // print bits from first (most significant) to last (least significant)
if(val & (1 << i)) // set (1) bit
{cout << '1';}
else {cout << '0';} // 0 bit
}
}
cout << endl;
}
/*
g++ FloatBinary2.cpp -o FloatBinary2
./FloatBinary2
Must provide a number
./FloatBinary2 0 1
Must provide a number // exactly one number
./FloatBinary2 0
0 00000000 00000000000000000000000 // single-precision floating-point
0 00000000000 0000000000000000000000000000000000000000000000000000 // double
000000000000000000000000000000000000000000000000 0 000000000000000 // long
0 0 00000000000000000000000000000000000000000000000000000000000000 // double
./FloatBinary2 1
0 01111111 00000000000000000000000
0 01111111111 0000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 011111111111111
1 0 00000000000000000000000000000000000000000000000000000000000000
./FloatBinary2 2
0 10000000 00000000000000000000000
0 10000000000 0000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000000
1 0 00000000000000000000000000000000000000000000000000000000000000
./FloatBinary2 3
0 10000000 10000000000000000000000
0 10000000000 1000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000000
1 1 00000000000000000000000000000000000000000000000000000000000000
./FloatBinary2 4
0 10000001 00000000000000000000000
0 10000000001 0000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000001
1 0 00000000000000000000000000000000000000000000000000000000000000
./FloatBinary2 0.1
0 01111011 10011001100110011001101
0 01111111011 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 0 011111111111011
1 1 00110011001100110011001100110011001100110011001101000000000000
./FloatBinary 0.2
0 01111100 10011001100110011001101
0 01111111100 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 0 011111111111100
1 1 00110011001100110011001100110011001100110011001101000000000000
./FloatBinary2 -0.1
1 01111011 10011001100110011001101
1 01111111011 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 1 011111111111011
1 1 00110011001100110011001100110011001100110011001101000000000000
./FloatBinary2 -.2
1 01111100 10011001100110011001101
1 01111111100 1001100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 1 011111111111100
1 1 00110011001100110011001100110011001100110011001101000000000000
./FloatBinary2 15
0 10000010 11100000000000000000000
0 10000000010 1110000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11000000000000000000000000000000000000000000000000000000000000
./FloatBinary2 15.1
0 10000010 11100011001100110011010
0 10000000010 1110001100110011001100110011001100110011001100110011
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11000110011001100110011001100110011001100110011001100000000000
./FloatBinary2 15.2
0 10000010 11100110011001100110011
0 10000000010 1110011001100110011001100110011001100110011001100110
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11001100110011001100110011001100110011001100110011000000000000
./FloatBinary 15.3
0 10000010 11101001100110011001101
0 10000000010 1110100110011001100110011001100110011001100110011010
000000000000000000000000000000000000000000000000 0 100000000000010
1 1 11010011001100110011001100110011001100110011001101000000000000
*/