ch3-Floats

Chapter_3 ch3-Arguments ch3-Pointers

ch3-FloatBinary TCP1, p. 173, 203-204 (Print bits of float and double in C/C++)

Contents: floatbinary.c FloatBinary.cpp

floatbinary.c download

#include <stdio.h> // for printf(), putchar()

#include <stdlib.h> // for atof(), exit()

// Display a byte in binary

void printBinary(const unsigned char val);

int main(int argc, char* argv[])

{

if(argc != 2)

{

printf("Must provide a number\n");

exit(1); // out of main(), end program; return value 1 signals an error

}

int i;

double d = atof(argv[1]);

float f = d;

unsigned char* cp = (unsigned char*)(&f);

for(i = sizeof(float); i > 0; i--)

{

printBinary(cp[i-1]); // cp[3], cp[2], cp[1], cp[0]

}

putchar('\n');

cp = (unsigned char*)(&d);

for(i = sizeof(double); i > 0; i--)

{

printBinary(cp[i-1]); // cp[7], cp[6], ..., cp[1], cp[0]

}

putchar('\n');

return 0; // normal return value for main(), signals no errors

}

// Display a byte in binary

void printBinary(const unsigned char val)

{

int i;

for(i = 7; i >= 0; i--)

{ // print bits from first (most significant) to last (least significant)

if(val & (1 << i)) // set (1) bit

{putchar('1');}

else {putchar('0');} // 0 bit

}

/*

gcc floatbinary.c -o floatbinary

./floatbinary

Must provide a number

./floatbinary 0 1

Must provide a number // exactly one number

./floatbinary 0

00000000000000000000000000000000 // single-precision floating-point

0000000000000000000000000000000000000000000000000000000000000000 // double

./floatbinary 1

00111111100000000000000000000000

0011111111110000000000000000000000000000000000000000000000000000

./floatbinary 2

01000000000000000000000000000000

0100000000000000000000000000000000000000000000000000000000000000

./floatbinary 3

01000000010000000000000000000000

0100000000001000000000000000000000000000000000000000000000000000

./floatbinary 4

01000000100000000000000000000000

0100000000010000000000000000000000000000000000000000000000000000

./floatbinary 0.1

00111101110011001100110011001101

0011111110111001100110011001100110011001100110011001100110011010

./floatbinary 0.2

00111110010011001100110011001101

0011111111001001100110011001100110011001100110011001100110011010

./floatbinary -0.1

10111101110011001100110011001101

1011111110111001100110011001100110011001100110011001100110011010

./floatbinary -.2

10111110010011001100110011001101

1011111111001001100110011001100110011001100110011001100110011010

./floatbinary 15

01000001011100000000000000000000

0100000000101110000000000000000000000000000000000000000000000000

./floatbinary 15.1

01000001011100011001100110011010

0100000000101110001100110011001100110011001100110011001100110011

./floatbinary 15.2

01000001011100110011001100110011

0100000000101110011001100110011001100110011001100110011001100110

./floatbinary 15.3

01000001011101001100110011001101

0100000000101110100110011001100110011001100110011001100110011010

*/

Notes: See Single-precision and Double-precision floating-point formats on Wikipedia.

00000000000000000000000000000000 = ((-1)^0)*2^(0-127)*(1+0) = 1*2^(-127)*1 ~ 0
(here ^ means "raised to power" and ~ means "approximates to").

00111111100000000000000000000000 = ((-1)^0)*2^(2^0+2^1+2^2+2^3+2^4+2^5+2^6-127)*(1+0) = 1*2^0*1 = 1.

01000000000000000000000000000000 = ((-1)^0)*2^(2^7-127)*(1+0) = 1*2^1*1 = 2.

01000000010000000000000000000000 = ((-1)^0)*2^(2^7-127)*(1+2^(-1)) = 1*2^1*1.5 = 3.

01000000100000000000000000000000 = ((-1)^0)*2^(2^0+2^7-127)*(1+0) = 1*2^2*1 = 4.

00111101110011001100110011001101 = ((-1)^0)*2^(2^0+2^1+2^3+2^4+2^5+2^6-127)*(1+2^(-1)+2^(-4)+2^(-5)+...) ~ 1*2^(-4)*1.6 = 0.1.

01000001011100000000000000000000 = ((-1)^0)*2^(2^1+2^7-127)*(1+2^(-1)+2^(-2)+2^(-3)) = 1*2^3*1.875 = 15.

etc.

*****************************************************************************************

Note: See ch3-Bitwise in Section ch3-Operators for PrintBinary.hpp and PrintBinary.cpp. Add the two files in the same folder as FloatBinary.cpp:

FloatBinary.cpp download

#include "PrintBinary.hpp" // for printBinary()

#include <cstdlib> // for atof(), exit()

#include <iostream>

using std::cout;

using std::endl;

int main(int argc, char* argv[])

{

if(argc != 2)

{

cout << "Must provide a number" << endl;

exit(1); // out of main(), end program; return value 1 signals an error

}

double d = atof(argv[1]);

float f = d;

unsigned char* cp = reinterpret_cast<unsigned char*>(&f);

for(int i = sizeof(float); i > 0; i--)

{

printBinary(cp[i-1]); // cp[3], cp[2], cp[1], cp[0]

}

cout << endl;

cp = reinterpret_cast<unsigned char*>(&d);

for(int i = sizeof(double); i > 0; i--)

{

printBinary(cp[i-1]); // cp[7], cp[6], ..., cp[1], cp[0]

}

cout << endl;

return 0; // normal return value for main(), signals no errors

}

/*

g++ -c PrintBinary.cpp FloatBinary.cpp // create object files

g++ -c *.cpp

g++ PrintBinary.o FloatBinary.o -o FloatBinary // link object files,

g++ *.o -o FloatBinary // create executable

rm *.o // clean (delete object files)

./FloatBinary

Must provide a number

./FloatBinary 0 1

Must provide a number // exactly one number

./FloatBinary 0

00000000000000000000000000000000 // single-precision floating-point

0000000000000000000000000000000000000000000000000000000000000000 // double

./FloatBinary 1

00111111100000000000000000000000

0011111111110000000000000000000000000000000000000000000000000000

./FloatBinary 2

01000000000000000000000000000000

0100000000000000000000000000000000000000000000000000000000000000

./FloatBinary 3

01000000010000000000000000000000

0100000000001000000000000000000000000000000000000000000000000000

./FloatBinary 4

01000000100000000000000000000000

0100000000010000000000000000000000000000000000000000000000000000

./FloatBinary 0.1

00111101110011001100110011001101

0011111110111001100110011001100110011001100110011001100110011010

./FloatBinary 0.2

00111110010011001100110011001101

0011111111001001100110011001100110011001100110011001100110011010

./FloatBinary -0.1

10111101110011001100110011001101

1011111110111001100110011001100110011001100110011001100110011010

./FloatBinary -.2

10111110010011001100110011001101

1011111111001001100110011001100110011001100110011001100110011010

./FloatBinary 15

01000001011100000000000000000000

0100000000101110000000000000000000000000000000000000000000000000

./FloatBinary 15.1

01000001011100011001100110011010

0100000000101110001100110011001100110011001100110011001100110011

./FloatBinary 15.2

01000001011100110011001100110011

0100000000101110011001100110011001100110011001100110011001100110

./FloatBinary 15.3

01000001011101001100110011001101

0100000000101110100110011001100110011001100110011001100110011010

*/

*****************************************************************************************

ch3-FloatLimits (float, double, long double limits in C/C++)

Contents: floatlimits.c FloatLimits.cpp

floatlimits.c download

#include <stdio.h> // for printf()

#include <float.h>

#include <math.h> // for pow() - double, powl() - long double

void printBinary(const unsigned char val); // Display a byte in binary

void printData(unsigned char*, int); // Display a data type in binary

void printData2(unsigned char*, int); // Display data type in binary (2 parts)

int main()

{

int i;

printf("FLT_DIG: %d, DBL_DIG: %d, LDBL_DIG: %d\n",

FLT_DIG, DBL_DIG, LDBL_DIG);

printf("FLT_DECIMAL_DIG: %d, DBL_DECIMAL_DIG: %d, LDBL_DECIMAL_DIG: %d\n",

FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG);

printf("DECIMAL_DIG: %d\n", DECIMAL_DIG);

printf("FLT_MIN_10_EXP: %d, DBL_MIN_10_EXP: %d, LDBL_MIN_10_EXP: %d\n",

FLT_MIN_10_EXP, DBL_MIN_10_EXP, LDBL_MIN_10_EXP);

printf("FLT_MAX_10_EXP: %d, DBL_MAX_10_EXP: %d, LDBL_MAX_10_EXP: %d\n",

FLT_MAX_10_EXP, DBL_MAX_10_EXP, LDBL_MAX_10_EXP);

printf("FLT_MIN_EXP: %d, DBL_MIN_EXP: %d, LDBL_MIN_EXP: %d\n",

FLT_MIN_EXP, DBL_MIN_EXP, LDBL_MIN_EXP);

printf("FLT_MAX_EXP: %d, DBL_MAX_EXP: %d, LDBL_MAX_EXP: %d\n",

FLT_MAX_EXP, DBL_MAX_EXP, LDBL_MAX_EXP);

printf("FLT_EPSILON: %g, DBL_EPSILON: %g, LDBL_EPSILON: %Lg\n",

FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON);

printf("FLT_TRUE_MIN: %g, DBL_TRUE_MIN: %g, LDBL_TRUE_MIN: %Lg\n",

FLT_TRUE_MIN, DBL_TRUE_MIN, LDBL_TRUE_MIN);

printf("FLT_RADIX (Radix of exponent representation): %d\n", FLT_RADIX);

printf("float: %d mantissa digits", FLT_MANT_DIG);

printf("\t[%g, %g]\n", FLT_MIN, FLT_MAX);

printf("FLT_MIN: %g\t\t", FLT_MIN);

float f = FLT_MIN;

printData((unsigned char*)(&f), sizeof(float));

printf("FLT_TRUE_MIN: %g\t", FLT_TRUE_MIN);

f = FLT_TRUE_MIN;

printData((unsigned char*)(&f), sizeof(float));

printf("FLT_MAX: %g\t\t", FLT_MAX);

f = FLT_MAX;

printData((unsigned char*)(&f), sizeof(float));

float fmin, ftruemin, fmax;

fmin = pow(2, -126);

float fmantissa = 1.0;

for (i = 1; i <= 23; i++)

{fmantissa += pow(2, -i);}

fmax = fmantissa * pow(2, 127);

printf("float range (computed):\t\t[%g, %g]\n", fmin, fmax);

printf("fmin (computed): %g\t", fmin);

printData((unsigned char*)(&fmin), sizeof(float));

ftruemin = pow(2, -126) * pow(2, -23); // pow(2, -149); // 2^(-150) ~ 0

printf("ftruemin (comp): %g\t", ftruemin);

printData((unsigned char*)(&ftruemin), sizeof(float));

printf("fmax (computed): %g\t", fmax);

printData((unsigned char*)(&fmax), sizeof(float));

unsigned u = 1; // 00000000000000000000000000000001

u <<= 23; // u = pow(2, 23); // 00000000100000000000000000000000

fmin = *((float*)&u); // sizeof(float) = sizeof(unsigned) = sizeof(int)

printf("fmin shifted: %g\t", fmin);

printData((unsigned char*)(&fmin), sizeof(float));

unsigned v = 1; // 00000000000000000000000000000001

ftruemin = *((float*)&v); // sizeof(float) = sizeof(unsigned)

printf("ftruemin shift: %g\t", ftruemin);

printData((unsigned char*)(&ftruemin), sizeof(float));

v = ~0; // 11111111111111111111111111111111

v >>= 1; // 01111111111111111111111111111111

v &= ~u; // 01111111011111111111111111111111

fmax = *((float*)&v); // sizeof(float) = sizeof(unsigned)

printf("fmax shifted: %g\t", fmax);

printData((unsigned char*)(&fmax), sizeof(float));

putchar('\n');

printf("double: %d mantissa digits", DBL_MANT_DIG);

printf("\t[%g, %g]\n", DBL_MIN, DBL_MAX);

printf("DBL_MIN: %g\n", DBL_MIN);

double d = DBL_MIN;

printData((unsigned char*)(&d), sizeof(double));

printf("DBL_TRUE_MIN: %g\n", DBL_TRUE_MIN);

d = DBL_TRUE_MIN;

printData((unsigned char*)(&d), sizeof(double));

printf("DBL_MAX: %g\n", DBL_MAX);

d = DBL_MAX;

printData((unsigned char*)(&d), sizeof(double));

double dmin, dtruemin, dmax;

dmin = pow(2, -1022);

double dmantissa = 1.0;

for (i = 1; i <= 52; i++)

{dmantissa += pow(2, -i);}

dmax = dmantissa * pow(2, 1023);

printf("double range (computed):\t[%g, %g]\n", dmin, dmax);

printf("dmin (computed): %g\n", dmin);

printData((unsigned char*)(&dmin), sizeof(double));

dtruemin = pow(2, -1022) * pow(2, -52); // pow(2, -1074); // 2^(-1075) ~ 0

printf("dtruemin (comp): %g\n", dtruemin);

printData((unsigned char*)(&dtruemin), sizeof(double));

printf("dmax (computed): %g\n", dmax);

printData((unsigned char*)(&dmax), sizeof(double));

long unsigned lu = 1; // 0...01

lu <<= 52; // lu = pow(2, 52); // 0000000000010...0

dmin = *((double*)&lu); // sizeof(double) = sizeof(long unsigned) = sizeof(long)

printf("dmin shifted: %g\n", dmin);

printData((unsigned char*)(&dmin), sizeof(double));

long unsigned lv = 1; // 0...01

dtruemin = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)

printf("dtruemin shift: %g\n", dtruemin);

printData((unsigned char*)(&dtruemin), sizeof(double));

lv = ~0; // 1...1

lv >>= 1; // 011...11

lv &= ~lu; // 01111111111011...11

dmax = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)

printf("dmax shifted: %g\n", dmax);

printData((unsigned char*)(&dmax), sizeof(double));

putchar('\n');

/*

On my computer,

sizeof(double) = sizeof(long) = sizeof(long long) = 8

sizeof(long double) = 2 * sizeof(double) = 16

`long double' stored on 128 bits =

48 zeros (or garbage) + 80 bits (extended precision)

*/

printf("long double: %d mantissa digits", LDBL_MANT_DIG);

printf("\t[%Lg, %Lg]\n", LDBL_MIN, LDBL_MAX);

printf("LDBL_MIN: %Lg\n", LDBL_MIN);

long double ld = LDBL_MIN;

printData2((unsigned char*)(&ld), sizeof(long double));

printf("LDBL_TRUE_MIN: %Lg\n", LDBL_TRUE_MIN);

ld = LDBL_TRUE_MIN;

printData2((unsigned char*)(&ld), sizeof(long double));

printf("LDBL_MAX: %Lg\n", LDBL_MAX);

ld = LDBL_MAX;

printData2((unsigned char*)(&ld), sizeof(long double));

long double ldmin, ldtruemin, ldmax;

ldmin = powl(2, -16382);

long double ldmantissa = 1.0;

for (i = 1; i <= 63; i++)

{ldmantissa += powl(2, -i);}

ldmax = ldmantissa * powl(2, 16383);

printf("long double range (computed):\t[%Lg, %Lg]\n", ldmin, ldmax);

printf("ldmin (computed): %Lg\n", ldmin);

printData2((unsigned char*)(&ldmin), sizeof(long double));

ldtruemin = powl(2, -16382) * powl(2, -63); // powl(2, -16445); // 2^(-16446) ~ 0

printf("ldtruemin (comp): %Lg\n", ldtruemin);

printData2((unsigned char*)(&ldtruemin), sizeof(long double));

printf("ldmax (computed): %Lg\n", ldmax);

printData2((unsigned char*)(&ldmax), sizeof(long double));

long unsigned uarr[2] = {1, 1}; // 0...01, 0...01

uarr[0] <<= 63; // uarr[0] = powl(2, 63); // 10...0

// sizeof(long double) = 2 * sizeof(long unsigned) = 2 * sizeof(long):

ldmin = *((long double*)uarr);

printf("ldmin shifted: %Lg\n", ldmin);

printData2((unsigned char*)(&ldmin), sizeof(long double));

long unsigned varr[2] = {1, 0}; // 0...01, 0...0

ldtruemin = *((long double*)varr); // 0...01

printf("dtruemin shift: %Lg\n", ldtruemin);

printData2((unsigned char*)(&ldtruemin), sizeof(long double));

varr[0] = varr[1] = ~0; // 1...1

varr[1] >>= 49; // 0...0111111111111111 (49 zeros)

varr[1] &= ~uarr[1]; // 0..0111111111111110 (50 zeros)

/*

// Alternative for varr[1]:

varr[1] >>= 50; // 0...011111111111111 (50 zeros)

varr[1] <<= 1; // 0..0111111111111110 (50 zeros)

*/

ldmax = *((long double*)varr); // sizeof(long double) = 2 * sizeof(long)

printf("ldmax shifted: %Lg\n", ldmax);

printData2((unsigned char*)(&ldmax), sizeof(long double));

return 0;

}

void printBinary(const unsigned char val) // Display a byte in binary

{

int i;

for(i = 7; i >= 0; i--)

{ // print bits from first (most significant) to last (least significant)

if(val & (1 << i)) // set (1) bit

{putchar('1');}

else {putchar('0');} // 0 bit

}

void printData(unsigned char* cp, int size) // Display a data type in binary

{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]

for(; size > 0; size--)

{printBinary(cp[size-1]);}

putchar('\n');

}

// Display a data type in binary (2 parts):

void printData2(unsigned char* cp, int size) // for long double

{ // sizeof(long double) = 16, 2 parts of 8 bytes

int half = size / 2; // we assume size is even

for(; size > half; size--) // second half:

{printBinary(cp[size-1]);} // cp[15], ..., cp[8]

putchar('\n');

for(; size > 0; size--) // first half:

{printBinary(cp[size-1]);} // cp[7], ..., cp[0]

putchar('\n');

}

/*

gcc -E floatlimits.c // preprocess to show the contents of header files

// On disk: /usr/lib/gcc/x86_64-linux-gnu/9/include/float.h

gcc -E floatlimits.c > headers.txt // save to file

// Compile and run:

gcc floatlimits.c -o floatlimits -lm // link math library

./floatlimits

FLT_DIG: 6, DBL_DIG: 15, LDBL_DIG: 18

FLT_DECIMAL_DIG: 9, DBL_DECIMAL_DIG: 17, LDBL_DECIMAL_DIG: 21

DECIMAL_DIG: 21

FLT_MIN_10_EXP: -37, DBL_MIN_10_EXP: -307, LDBL_MIN_10_EXP: -4931

FLT_MAX_10_EXP: 38, DBL_MAX_10_EXP: 308, LDBL_MAX_10_EXP: 4932

FLT_MIN_EXP: -125, DBL_MIN_EXP: -1021, LDBL_MIN_EXP: -16381

FLT_MAX_EXP: 128, DBL_MAX_EXP: 1024, LDBL_MAX_EXP: 16384

FLT_EPSILON: 1.19209e-07, DBL_EPSILON: 2.22045e-16, LDBL_EPSILON: 1.0842e-19

FLT_TRUE_MIN: 1.4013e-45, DBL_TRUE_MIN: 4.94066e-324, LDBL_TRUE_MIN: 3.6452e-4951

FLT_RADIX (Radix of exponent representation): 2 // base 2 (binary)

float: 24 mantissa digits [1.17549e-38, 3.40282e+38]

FLT_MIN: 1.17549e-38 00000000100000000000000000000000

FLT_TRUE_MIN: 1.4013e-45 00000000000000000000000000000001

FLT_MAX: 3.40282e+38 01111111011111111111111111111111

float range (computed): [1.17549e-38, 3.40282e+38]

fmin (computed): 1.17549e-38 00000000100000000000000000000000

ftruemin (comp): 1.4013e-45 00000000000000000000000000000001

fmax (computed): 3.40282e+38 01111111011111111111111111111111

fmin shifted: 1.17549e-38 00000000100000000000000000000000

ftruemin shift: 1.4013e-45 00000000000000000000000000000001

fmax shifted: 3.40282e+38 01111111011111111111111111111111

double: 53 mantissa digits [2.22507e-308, 1.79769e+308]

DBL_MIN: 2.22507e-308

0000000000010000000000000000000000000000000000000000000000000000

DBL_TRUE_MIN: 4.94066e-324

0000000000000000000000000000000000000000000000000000000000000001

DBL_MAX: 1.79769e+308

0111111111101111111111111111111111111111111111111111111111111111

double range (computed): [2.22507e-308, 1.79769e+308]

dmin (computed): 2.22507e-308

0000000000010000000000000000000000000000000000000000000000000000

dtruemin (comp): 4.94066e-324

0000000000000000000000000000000000000000000000000000000000000001

dmax (computed): 1.79769e+308

0111111111101111111111111111111111111111111111111111111111111111

dmin shifted: 2.22507e-308

0000000000010000000000000000000000000000000000000000000000000000

dtruemin shift: 4.94066e-324

0000000000000000000000000000000000000000000000000000000000000001

dmax shifted: 1.79769e+308

0111111111101111111111111111111111111111111111111111111111111111

long double: 64 mantissa digits [3.3621e-4932, 1.18973e+4932]

LDBL_MIN: 3.3621e-4932

0000000000000000000000000000000000000000000000000000000000000001

1000000000000000000000000000000000000000000000000000000000000000

LDBL_TRUE_MIN: 3.6452e-4951

0000000000000000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000000000000001

LDBL_MAX: 1.18973e+4932

0000000000000000000000000000000000000000000000000111111111111110

1111111111111111111111111111111111111111111111111111111111111111

long double range (computed): [3.3621e-4932, 1.18973e+4932]

ldmin (computed): 3.3621e-4932

0000000000000000000000000000000000000000000000000000000000000001

1000000000000000000000000000000000000000000000000000000000000000

ldtruemin (comp): 3.6452e-4951

0000000000000000000000000000000000000000111100000000000000000000

0000000000000000000000000000000000000000000000000000000000000001

ldmax (computed): 1.18973e+4932

0000000000000000011111111111110001100111100011100111111111111110

1111111111111111111111111111111111111111111111111111111111111111

ldmin shifted: 3.3621e-4932

0000000000000000000000000000000000000000000000000000000000000001

1000000000000000000000000000000000000000000000000000000000000000

dtruemin shift: 3.6452e-4951

0000000000000000000000000000000000000000111100000000000000000000

0000000000000000000000000000000000000000000000000000000000000001

ldmax shifted: 1.18973e+4932

0000000000000000011111111111110000011011011001000111111111111110

1111111111111111111111111111111111111111111111111111111111111111

*/

Notes: See Exercise_2-1 on the site Kernighan_and_Ritchie, Chapter_2, Section ch2-Types_and_Sizes, as well as Exercise_2-1 on clc-wiki-kr (solutions to exercises from "The C Programming Language", 2nd edition, by Kernighan and Ritchie).

Note how the preprocessor replaces the constants in the generated file (headers.txt, last part).

*****************************************************************************************

FloatLimits.cpp download

#include <iostream>

#include <cfloat>

#include <cmath> // for pow() - double, powl() - long double

using std::cout;

using std::endl;

void printBinary(const unsigned char val); // Display a byte in binary

void printData(unsigned char*, int); // Display a data type in binary

void printData2(unsigned char*, int); // Display data type in binary (2 parts)

int main()

{

cout << "FLT_DIG: " << FLT_DIG << ", DBL_DIG: " << DBL_DIG

<< ", LDBL_DIG: " << LDBL_DIG << endl;

cout << "FLT_DECIMAL_DIG: " << FLT_DECIMAL_DIG << ", DBL_DECIMAL_DIG: " <<

DBL_DECIMAL_DIG << ", LDBL_DECIMAL_DIG: " << LDBL_DECIMAL_DIG << endl;

cout << "DECIMAL_DIG: " << DECIMAL_DIG << endl;

cout << "FLT_MIN_10_EXP: " << FLT_MIN_10_EXP << ", DBL_MIN_10_EXP: " <<

DBL_MIN_10_EXP << ", LDBL_MIN_10_EXP: " << LDBL_MIN_10_EXP << endl;

cout << "FLT_MAX_10_EXP: " << FLT_MAX_10_EXP << ", DBL_MAX_10_EXP: " <<

DBL_MAX_10_EXP << ", LDBL_MAX_10_EXP: " << LDBL_MAX_10_EXP << endl;

cout << "FLT_MIN_EXP: " << FLT_MIN_EXP << ", DBL_MIN_EXP: " <<

DBL_MIN_EXP << ", LDBL_MIN_EXP: " << LDBL_MIN_EXP << endl;

cout << "FLT_MAX_EXP: " << FLT_MAX_EXP << ", DBL_MAX_EXP: " <<

DBL_MAX_EXP << ", LDBL_MAX_EXP: " << LDBL_MAX_EXP << endl;

cout << "FLT_EPSILON: " << FLT_EPSILON << ", DBL_EPSILON: " <<

DBL_EPSILON << ", LDBL_EPSILON: " << LDBL_EPSILON << endl;

cout << "FLT_TRUE_MIN: " << FLT_TRUE_MIN << ", DBL_TRUE_MIN: " <<

DBL_TRUE_MIN << ", LDBL_TRUE_MIN: " << LDBL_TRUE_MIN << endl;

cout << "FLT_RADIX (Radix of exponent representation): "

<< FLT_RADIX << endl;

cout << "float: " << FLT_MANT_DIG << " mantissa digits";

cout << "\t[" << FLT_MIN << ", " << FLT_MAX << "]" << endl;

cout << "FLT_MIN: " << FLT_MIN << "\t\t";

float f = FLT_MIN;

printData(reinterpret_cast<unsigned char*>(&f), sizeof(float));

cout << "FLT_TRUE_MIN: " << FLT_TRUE_MIN << "\t";

f = FLT_TRUE_MIN;

printData(reinterpret_cast<unsigned char*>(&f), sizeof(float));

cout << "FLT_MAX: " << FLT_MAX << "\t\t";

f = FLT_MAX;

printData(reinterpret_cast<unsigned char*>(&f), sizeof(float));

float fmin, ftruemin, fmax;

fmin = pow(2, -126);

float fmantissa = 1.0;

for (int i = 1; i <= 23; i++)

{fmantissa += pow(2, -i);}

fmax = fmantissa * pow(2, 127);

cout << "float range (computed):\t\t[" << fmin << ", " << fmax << "]\n";

cout << "fmin (computed): " << fmin << "\t";

printData(reinterpret_cast<unsigned char*>(&fmin), sizeof(float));

ftruemin = pow(2, -126) * pow(2, -23); // pow(2, -149); // 2^(-150) ~ 0

cout << "ftruemin (comp): " << ftruemin << "\t";

printData(reinterpret_cast<unsigned char*>(&ftruemin), sizeof(float));

cout << "fmax (computed): " << fmax << "\t";

printData(reinterpret_cast<unsigned char*>(&fmax), sizeof(float));

unsigned u = 1; // 00000000000000000000000000000001

u <<= 23; // u = pow(2, 23); // 00000000100000000000000000000000

fmin = *((float*)&u); // sizeof(float) = sizeof(unsigned) = sizeof(int)

cout << "fmin shifted: " << fmin << "\t";

printData(reinterpret_cast<unsigned char*>(&fmin), sizeof(float));

unsigned v = 1; // 00000000000000000000000000000001

ftruemin = *((float*)&v); // sizeof(float) = sizeof(unsigned)

cout << "ftruemin shift: " << ftruemin << "\t";

printData(reinterpret_cast<unsigned char*>(&ftruemin), sizeof(float));

v = ~0; // 11111111111111111111111111111111

v >>= 1; // 01111111111111111111111111111111

v &= ~u; // 01111111011111111111111111111111

fmax = *((float*)&v); // sizeof(float) = sizeof(unsigned)

cout << "fmax shifted: " << fmax << "\t";

printData(reinterpret_cast<unsigned char*>(&fmax), sizeof(float));

cout << endl;

cout << "double: " << DBL_MANT_DIG << " mantissa digits";

cout << "\t[" << DBL_MIN << ", " << DBL_MAX << "]" << endl;

cout << "DBL_MIN: " << DBL_MIN << endl;

double d = DBL_MIN;

printData(reinterpret_cast<unsigned char*>(&d), sizeof(double));

cout << "DBL_TRUE_MIN: " << DBL_TRUE_MIN << endl;

d = DBL_TRUE_MIN;

printData(reinterpret_cast<unsigned char*>(&d), sizeof(double));

cout << "DBL_MAX: " << DBL_MAX << endl;

d = DBL_MAX;

printData(reinterpret_cast<unsigned char*>(&d), sizeof(double));

double dmin, dtruemin, dmax;

dmin = pow(2, -1022);

double dmantissa = 1.0;

for (int i = 1; i <= 52; i++)

{dmantissa += pow(2, -i);}

dmax = dmantissa * pow(2, 1023);

cout << "double range (computed):\t[" << dmin << ", " << dmax << "]\n";

cout << "dmin (computed): " << dmin << endl;

printData(reinterpret_cast<unsigned char*>(&dmin), sizeof(double));

dtruemin = pow(2, -1022) * pow(2, -52); // pow(2, -1074); // 2^(-1075) ~ 0

cout << "dtruemin (comp): " << dtruemin << endl;

printData(reinterpret_cast<unsigned char*>(&dtruemin), sizeof(double));

cout << "dmax (computed): " << dmax << endl;

printData(reinterpret_cast<unsigned char*>(&dmax), sizeof(double));

long unsigned lu = 1; // 0...01

lu <<= 52; // lu = pow(2, 52); // 0000000000010...0

dmin = *((double*)&lu); // sizeof(double) = sizeof(long unsigned) = sizeof(long)

cout << "dmin shifted: " << dmin << endl;

printData(reinterpret_cast<unsigned char*>(&dmin), sizeof(double));

long unsigned lv = 1; // 0...01

dtruemin = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)

cout << "dtruemin shift: " << dtruemin << endl;

printData(reinterpret_cast<unsigned char*>(&dtruemin), sizeof(double));

lv = ~0; // 1...1

lv >>= 1; // 011...11

lv &= ~lu; // 01111111111011...11

dmax = *((double*)&lv); // sizeof(double) = sizeof(long unsigned)

cout << "dmax shifted: " << dmax << endl;

printData(reinterpret_cast<unsigned char*>(&dmax), sizeof(double));

cout << endl;

/*

On my computer,

sizeof(double) = sizeof(long) = sizeof(long long) = 8

sizeof(long double) = 2 * sizeof(double) = 16

`long double' stored on 128 bits =

48 zeros (or garbage) + 80 bits (extended precision)

*/

cout << "long double: " << LDBL_MANT_DIG << " mantissa digits";

cout << "\t[" << LDBL_MIN << ", " << LDBL_MAX << "]" << endl;

cout << "LDBL_MIN: " << LDBL_MIN << endl;

long double ld = LDBL_MIN;

printData2(reinterpret_cast<unsigned char*>(&ld), sizeof(long double));

cout << "LDBL_TRUE_MIN: " << LDBL_TRUE_MIN << endl;

ld = LDBL_TRUE_MIN;

printData2(reinterpret_cast<unsigned char*>(&ld), sizeof(long double));

cout << "LDBL_MAX: " << LDBL_MAX << endl;

ld = LDBL_MAX;

printData2(reinterpret_cast<unsigned char*>(&ld), sizeof(long double));

long double ldmin, ldtruemin, ldmax;

ldmin = powl(2, -16382);

long double ldmantissa = 1.0;

for (int i = 1; i <= 63; i++)

{ldmantissa += powl(2, -i);}

ldmax = ldmantissa * powl(2, 16383);

cout << "long double range (computed):\t[" << ldmin << ", " << ldmax << "]\n";

cout << "ldmin (computed): " << ldmin << endl;

printData2(reinterpret_cast<unsigned char*>(&ldmin), sizeof(long double));

ldtruemin = powl(2, -16382) * powl(2, -63); // powl(2, -16445); // 2^(-16446) ~ 0

cout << "ldtruemin (comp): " << ldtruemin << endl;

printData2(reinterpret_cast<unsigned char*>(&ldtruemin), sizeof(long double));

cout << "ldmax (computed): " << ldmax << endl;

printData2(reinterpret_cast<unsigned char*>(&ldmax), sizeof(long double));

long unsigned uarr[2] = {1, 1}; // 0...01, 0...01

uarr[0] <<= 63; // uarr[0] = powl(2, 63); // 10...0

// sizeof(long double) = 2 * sizeof(long unsigned) = 2 * sizeof(long):

ldmin = *(reinterpret_cast<long double*>(uarr));

cout << "ldmin shifted: " << ldmin << endl;

printData2(reinterpret_cast<unsigned char*>(&ldmin), sizeof(long double));

long unsigned varr[2] = {1, 0}; // 0...01, 0...0

ldtruemin = *(reinterpret_cast<long double*>(varr)); // 0...01

cout << "dtruemin shift: " << ldtruemin << endl;

printData2(reinterpret_cast<unsigned char*>(&ldtruemin), sizeof(long double));

varr[0] = varr[1] = ~0; // 1...1

varr[1] >>= 49; // 0...0111111111111111 (49 zeros)

varr[1] &= ~uarr[1]; // 0..0111111111111110 (50 zeros)

/*

// Alternative for varr[1]:

varr[1] >>= 50; // 0...011111111111111 (50 zeros)

varr[1] <<= 1; // 0..0111111111111110 (50 zeros)

*/

ldmax = *(reinterpret_cast<long double*>(varr));

cout << "ldmax shifted: " << ldmax << endl;

printData2(reinterpret_cast<unsigned char*>(&ldmax), sizeof(long double));

return 0;

}

void printBinary(const unsigned char val) // Display a byte in binary

{

for(int i = 7; i >= 0; i--)

{ // print bits from first (most significant) to last (least significant)

if(val & (1 << i)) // set (1) bit

{cout << '1';}

else {cout << '0';} // 0 bit

}

void printData(unsigned char* cp, int size) // Display a data type in binary

{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]

for(; size > 0; size--)

{printBinary(cp[size-1]);}

cout << endl;

}

// Display a data type in binary (2 parts):

void printData2(unsigned char* cp, int size) // for long double

{ // sizeof(long double) = 16, 2 parts of 8 bytes

int half = size / 2; // we assume size is even

for(; size > half; size--) // second half:

{printBinary(cp[size-1]);} // cp[15], ..., cp[8]

cout << endl;

for(; size > 0; size--) // first half:

{printBinary(cp[size-1]);} // cp[7], ..., cp[0]

cout << endl;

}

/*

g++ -std=c++17 -E FloatLimits.cpp // preprocess to show contents of headers

// c++17 for FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG,

// FLT_TRUE_MIN, DBL_TRUE_MIN, LDBL_TRUE_MIN

// On disk: /usr/lib/gcc/x86_64-linux-gnu/9/include/float.h

// /usr/include/c++/9/cfloat

g++ -std=c++17 -E FloatLimits.cpp > Headers.txt // save to file

// Compile and run:

g++ -std=c++17 FloatLimits.cpp -o FloatLimits

./FloatLimits

FLT_DIG: 6, DBL_DIG: 15, LDBL_DIG: 18

FLT_DECIMAL_DIG: 9, DBL_DECIMAL_DIG: 17, LDBL_DECIMAL_DIG: 21

DECIMAL_DIG: 21

FLT_MIN_10_EXP: -37, DBL_MIN_10_EXP: -307, LDBL_MIN_10_EXP: -4931

FLT_MAX_10_EXP: 38, DBL_MAX_10_EXP: 308, LDBL_MAX_10_EXP: 4932

FLT_MIN_EXP: -125, DBL_MIN_EXP: -1021, LDBL_MIN_EXP: -16381

FLT_MAX_EXP: 128, DBL_MAX_EXP: 1024, LDBL_MAX_EXP: 16384

FLT_EPSILON: 1.19209e-07, DBL_EPSILON: 2.22045e-16, LDBL_EPSILON: 1.0842e-19

FLT_TRUE_MIN: 1.4013e-45, DBL_TRUE_MIN: 4.94066e-324, LDBL_TRUE_MIN: 3.6452e-4951

FLT_RADIX (Radix of exponent representation): 2 // base 2 (binary)

float: 24 mantissa digits [1.17549e-38, 3.40282e+38]

FLT_MIN: 1.17549e-38 00000000100000000000000000000000

FLT_TRUE_MIN: 1.4013e-45 00000000000000000000000000000001

FLT_MAX: 3.40282e+38 01111111011111111111111111111111

float range (computed): [1.17549e-38, 3.40282e+38]

fmin (computed): 1.17549e-38 00000000100000000000000000000000

ftruemin (comp): 1.4013e-45 00000000000000000000000000000001

fmax (computed): 3.40282e+38 01111111011111111111111111111111

fmin shifted: 1.17549e-38 00000000100000000000000000000000

ftruemin shift: 1.4013e-45 00000000000000000000000000000001

fmax shifted: 3.40282e+38 01111111011111111111111111111111

double: 53 mantissa digits [2.22507e-308, 1.79769e+308]

DBL_MIN: 2.22507e-308

0000000000010000000000000000000000000000000000000000000000000000

DBL_TRUE_MIN: 4.94066e-324

0000000000000000000000000000000000000000000000000000000000000001

DBL_MAX: 1.79769e+308

0111111111101111111111111111111111111111111111111111111111111111

double range (computed): [2.22507e-308, 1.79769e+308]

dmin (computed): 2.22507e-308

0000000000010000000000000000000000000000000000000000000000000000

dtruemin (comp): 4.94066e-324

0000000000000000000000000000000000000000000000000000000000000001

dmax (computed): 1.79769e+308

0111111111101111111111111111111111111111111111111111111111111111

dmin shifted: 2.22507e-308

0000000000010000000000000000000000000000000000000000000000000000

dtruemin shift: 4.94066e-324

0000000000000000000000000000000000000000000000000000000000000001

dmax shifted: 1.79769e+308

0111111111101111111111111111111111111111111111111111111111111111

long double: 64 mantissa digits [3.3621e-4932, 1.18973e+4932]

LDBL_MIN: 3.3621e-4932

0000000000000000000000000000000000000000000000000000000000000001

1000000000000000000000000000000000000000000000000000000000000000

LDBL_TRUE_MIN: 3.6452e-4951

0000000000000000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000000000000001

LDBL_MAX: 1.18973e+4932

0000000000000000000000000000000000000000000000000111111111111110

1111111111111111111111111111111111111111111111111111111111111111

long double range (computed): [3.3621e-4932, 1.18973e+4932]

ldmin (computed): 3.3621e-4932

0000000000000000010101011001100000010111000011110000000000000001

1000000000000000000000000000000000000000000000000000000000000000

ldtruemin (comp): 3.6452e-4951

0000000000000000000000000000000100000000000000000000000000000000

0000000000000000000000000000000000000000000000000000000000000001

ldmax (computed): 1.18973e+4932

0000000000000000010101011001100000010111000011110111111111111110

1111111111111111111111111111111111111111111111111111111111111111

ldmin shifted: 3.3621e-4932

0000000000000000010101011001100000010111000011110000000000000001

1000000000000000000000000000000000000000000000000000000000000000

dtruemin shift: 3.6452e-4951

0000000000000000000000000000000100000000000000000000000000000000

0000000000000000000000000000000000000000000000000000000000000001

ldmax shifted: 1.18973e+4932

0000000000000000010101011001100000010111000011110111111111111110

1111111111111111111111111111111111111111111111111111111111111111

*/

Notes: We have to preprocess and compile with the option for C++ 2017 (-std=c++17) for the constants FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG, FLT_TRUE_MIN, DBL_TRUE_MIN, LDBL_TRUE_MIN.
Note how the preprocessor replaces these constants (and others) in the generated file (Headers.txt, last part).

Note the difference in syntax for casting in C and C++:
(long double*)uarr
reinterpret_cast<long double*>(uarr)
reinterpret_cast<long double*>uarr // compile error

*****************************************************************************************

Exercise 3-25 TCP1, p. 229 (Print bits of float and double in C/C++)

Exercise 3-25. Define a float variable. Take its address, cast that address to an unsigned char*, and assign it to an unsigned char pointer. Using this pointer and [], index into the float variable and use the printBinary() function defined in this chapter (Chapter_3) to print out a map of the float (go from 0 to sizeof(float) ). Change the value of the float and see if you can figure out what’s going on (the float contains encoded data).

Contents: floatbinrev.c FloatBinRev.cpp

Note: See also ch3-FloatBinary.

floatbinrev.c download

// float binary reversed

#include <stdio.h> // for printf(), putchar()

#include <stdlib.h> // for atof(), exit()

void printBinary(const unsigned char val); // Display a byte in binary

void printBinRev(const unsigned char val); // reversed

void printData(unsigned char*, int); // Display a data type in binary

void printDataRev(unsigned char*, int); // reversed

int main(int argc, char* argv[])

{

if(argc != 2)

{

printf("Must provide a number\n");

exit(1); // out of main(), end program; return value 1 signals an error

}

double d = atof(argv[1]);

float f = d;

unsigned char* cp = (unsigned char*)(&f);

printData(cp, sizeof(float)); // cp[3], cp[2], cp[1], cp[0]

printDataRev(cp, sizeof(float)); // cp[0], cp[1], cp[2], cp[3]

cp = (unsigned char*)(&d);

printData(cp, sizeof(double)); // cp[7], cp[6], ..., cp[1], cp[0]

printDataRev(cp, sizeof(double)); // cp[0], cp[1], ..., cp[6], cp[7]

return 0; // normal return value for main(), signals no errors

}

void printBinary(const unsigned char val) // Display a byte in binary

{

int i, bit = 128; // 2^7

for(i = 7; i >= 0; i--)

{ // print bits from first (most significant) to last (least significant)

if(val & bit) // set (1) bit

{putchar('1');}

else {putchar('0');} // 0 bit

bit >>= 1; // bit /= 2;

}

void printBinRev(const unsigned char val) // Display byte (reversed)

{

int i, bit = 1;

for(i = 0; i <= 7; i++)

{ // print bits from last (least significant) to first (most significant)

if(val & bit) // set (1) bit

{putchar('1');}

else {putchar('0');} // 0 bit

bit <<= 1; // bit *= 2;

}

void printData(unsigned char* cp, int size) // Display a data type in binary

{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]

for(; size > 0; size--)

{printBinary(cp[size-1]);}

putchar('\n');

}

void printDataRev(unsigned char* cp, int size) // Display data type (reversed)

{ // sizeof(float) = 4: cp[0], cp[1], cp[2], cp[3]

int i;

for(i = 0; i < size; i++)

{printBinRev(cp[i]);}

putchar('\n');

}

/*

gcc floatbinrev.c -o floatbinrev

./floatbinrev

Must provide a number

./floatbinrev 0 1

Must provide a number // exactly one number

./floatbinrev 0

00000000000000000000000000000000 // single-precision floating-point

00000000000000000000000000000000 // reversed

0000000000000000000000000000000000000000000000000000000000000000 // double

0000000000000000000000000000000000000000000000000000000000000000 // reversed

./floatbinrev 1

00111111100000000000000000000000

00000000000000000000000111111100

0011111111110000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000111111111100

./floatbinrev 2

01000000000000000000000000000000

00000000000000000000000000000010

0100000000000000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000000000000010

./floatbinrev 3

01000000010000000000000000000000

00000000000000000000001000000010

0100000000001000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000001000000000010

./floatbinrev 4

01000000100000000000000000000000

00000000000000000000000100000010

0100000000010000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000100000000010

./floatbinrev 0.1

00111101110011001100110011001101

10110011001100110011001110111100

0011111110111001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001110111111100

./floatbinrev 0.2

00111110010011001100110011001101

10110011001100110011001001111100

0011111111001001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001001111111100

./floatbinrev -0.1

10111101110011001100110011001101

10110011001100110011001110111101

1011111110111001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001110111111101

./floatbinrev -.2

10111110010011001100110011001101

10110011001100110011001001111101

1011111111001001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001001111111101

./floatbinrev 15

01000001011100000000000000000000

00000000000000000000111010000010

0100000000101110000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000111010000000010

./floatbinrev 15.1

01000001011100011001100110011010

01011001100110011000111010000010

0100000000101110001100110011001100110011001100110011001100110011

1100110011001100110011001100110011001100110011000111010000000010

./floatbinrev 15.2

01000001011100110011001100110011

11001100110011001100111010000010

0100000000101110011001100110011001100110011001100110011001100110

0110011001100110011001100110011001100110011001100111010000000010

./floatbinrev 15.3

01000001011101001100110011001101

10110011001100110010111010000010

0100000000101110100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110010111010000000010

*/

*****************************************************************************************

FloatBinRev.cpp download

// float binary reversed

#include <cstdlib> // for atof(), exit()

#include <iostream>

using std::cout;

using std::endl;

void printBinary(const unsigned char val); // Display a byte in binary

void printBinRev(const unsigned char val); // reversed

void printData(unsigned char*, int); // Display a data type in binary

void printDataRev(unsigned char*, int); // reversed

int main(int argc, char* argv[])

{

if(argc != 2)

{

cout << "Must provide a number" << endl;

exit(1); // out of main(), end program; return value 1 signals an error

}

double d = atof(argv[1]);

float f = d;

unsigned char* cp = reinterpret_cast<unsigned char*>(&f);

printData(cp, sizeof(float)); // cp[3], cp[2], cp[1], cp[0]

printDataRev(cp, sizeof(float)); // cp[0], cp[1], cp[2], cp[3]

cp = reinterpret_cast<unsigned char*>(&d);

printData(cp, sizeof(double)); // cp[7], cp[6], ..., cp[1], cp[0]

printDataRev(cp, sizeof(double)); // cp[0], cp[1], ..., cp[6], cp[7]

return 0; // normal return value for main(), signals no errors

}

void printBinary(const unsigned char val) // Display a byte in binary

{

for(int i = 7, bit = 128; i >= 0; i--) // 128 = 2^7

{ // print bits from first (most significant) to last (least significant)

if(val & bit) // set (1) bit

{cout << "1";}

else {cout << "0";} // 0 bit

bit >>= 1; // bit /= 2;

}

void printBinRev(const unsigned char val) // Display byte (reversed)

{

for(int i = 0, bit = 1; i <= 7; i++)

{ // print bits from last (least significant) to first (most significant)

if(val & bit) // set (1) bit

{cout << "1";}

else {cout << "0";} // 0 bit

bit <<= 1; // bit *= 2;

}

void printData(unsigned char* cp, int size) // Display a data type in binary

{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]

for(; size > 0; size--)

{printBinary(cp[size-1]);}

cout << endl;

}

void printDataRev(unsigned char* cp, int size) // Display data type (reversed)

{ // sizeof(float) = 4: cp[0], cp[1], cp[2], cp[3]

for(int i = 0; i < size; i++)

{printBinRev(cp[i]);}

cout << endl;

}

/*

g++ FloatBinRev.cpp -o FloatBinRev

./FloatBinRev

Must provide a number

./FloatBinRev 0 1

Must provide a number // exactly one number

./FloatBinRev 0

00000000000000000000000000000000 // single-precision floating-point

00000000000000000000000000000000 // reversed

0000000000000000000000000000000000000000000000000000000000000000 // double

0000000000000000000000000000000000000000000000000000000000000000 // reversed

./FloatBinRev 1

00111111100000000000000000000000

00000000000000000000000111111100

0011111111110000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000111111111100

./FloatBinRev 2

01000000000000000000000000000000

00000000000000000000000000000010

0100000000000000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000000000000010

./FloatBinRev 3

01000000010000000000000000000000

00000000000000000000001000000010

0100000000001000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000001000000000010

./FloatBinRev 4

01000000100000000000000000000000

00000000000000000000000100000010

0100000000010000000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000000100000000010

./FloatBinRev 0.1

00111101110011001100110011001101

10110011001100110011001110111100

0011111110111001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001110111111100

./FloatBinRev 0.2

00111110010011001100110011001101

10110011001100110011001001111100

0011111111001001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001001111111100

./FloatBinRev -0.1

10111101110011001100110011001101

10110011001100110011001110111101

1011111110111001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001110111111101

./FloatBinRev -.2

10111110010011001100110011001101

10110011001100110011001001111101

1011111111001001100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110011001001111111101

./FloatBinRev 15

01000001011100000000000000000000

00000000000000000000111010000010

0100000000101110000000000000000000000000000000000000000000000000

0000000000000000000000000000000000000000000000000111010000000010

./FloatBinRev 15.1

01000001011100011001100110011010

01011001100110011000111010000010

0100000000101110001100110011001100110011001100110011001100110011

1100110011001100110011001100110011001100110011000111010000000010

./FloatBinRev 15.2

01000001011100110011001100110011

11001100110011001100111010000010

0100000000101110011001100110011001100110011001100110011001100110

0110011001100110011001100110011001100110011001100111010000000010

./FloatBinRev 15.3

01000001011101001100110011001101

10110011001100110010111010000010

0100000000101110100110011001100110011001100110011001100110011010

0101100110011001100110011001100110011001100110010111010000000010

*/

*****************************************************************************************

Exercise 3-26 TCP1, p. 229 (Set bytes in arrays to a specified value)

Exercise 3-26. Define an array of int. Take the starting address of that array and use static_cast to convert it into a void*. Write a function that takes a void*, a number (indicating a number of bytes), and a value (indicating the value to which each byte should be set) as arguments. The function should set each byte in the specified range to the specified value. Try out the function on your array of int.

Contents: arrays.c Arrays.cpp

arrays.c download

#include <stdio.h> // for printf(), putchar()

#include <math.h> // for pow()

#define SIZE 10

// we assume sizeof(char) = 1 byte

void set(char* array, int bytes, int value); // initialize or set array

void print(void* array, int size, int type); // print array

// type: sizeof(data type)

int main()

{

char charArray[SIZE];

// automatically convert char* (charArray) to void* in function call:

print(charArray, SIZE, sizeof(char)); // print uninitialized array

set(charArray, SIZE, 65); // initialize array (65 is ASCII for `A')

print(charArray, SIZE, sizeof(char)); // print initialized array

short shArray[SIZE];

// automatically convert short* (shArray) to void* in function call:

print(shArray, SIZE, sizeof(short)); // print uninitialized array

char* cp = (char*)shArray; // &shArray[0]

/*

// Alternative conversion:

void* p = (void*)shArray; // &shArray[0]

char* cp = (char*)p;

*/

set(cp, SIZE*sizeof(short), 0); // initialize array to 0

print(shArray, SIZE, sizeof(short)); // print initialized array

set(cp, SIZE*sizeof(short), 1); // set each byte to 00000001

int i = 1 + pow(2, 8);

printf("1 + pow(2, 8) = %d\n", i);

print(shArray, SIZE, sizeof(short)); // print initialized array

set(cp, SIZE*sizeof(short), 0xff); // set array to 1...1

print(shArray, SIZE, sizeof(short)); // print array

int intArray[SIZE/2];

// automatically convert int* (intArray) to void* in function call:

print(intArray, SIZE/2, sizeof(int)); // print uninitialized array

cp = (char*)intArray; // &intArray[0]

set(cp, (SIZE/2)*sizeof(int), 0); // initialize array to 0

print(intArray, SIZE/2, sizeof(int)); // print initialized array

set(cp, (SIZE/2)*sizeof(int), 1); // set each byte to 00000001

i = 1 + pow(2, 8) + pow(2, 16) + pow(2, 24);

printf("1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = %d\n", i);

print(intArray, SIZE/2, sizeof(int)); // print initialized array

set(cp, (SIZE/2)*sizeof(int), 0xff); // set array to 1...1

print(intArray, SIZE/2, sizeof(int)); // print array

return 0;

}

void set(char* array, int bytes, int value) // initialize or set array

{ // set each byte of the contiguous array to `value'

for (int i = 0; i < bytes; i++)

{array[i] = value;}

}

void print(void* array, int size, int type) // print array

{ // type: sizeof(data type)

int i;

char* charArray;

short* shArray;

int* intArray;

switch(type)

{

case sizeof(char):

charArray = (char*)array;

for (i = 0; i < size; i++)

{printf("%c, ", charArray[i]);}

break;

case sizeof(short):

shArray = (short*)array;

for (i = 0; i < size; i++)

{printf("%d, ", shArray[i]);}

break;

case sizeof(int):

intArray = (int*)array;

for (i = 0; i < size; i++)

{printf("%d, ", intArray[i]);}

break;

default:

printf("Not implemented");

break;

}

putchar('\n');

}

/*

gcc arrays.c -o arrays -lm // link math library

./arrays

, , @, Q, �, p, 6, , , b, // garbage

A, A, A, A, A, A, A, A, A, A,

640, 0, 0, 0, -1, 0, 1, 0, 20496, -26493, // garbage

0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

1 + pow(2, 8) = 257

257, 257, 257, 257, 257, 257, 257, 257, 257, 257,

-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

2017608424, 32552, -298109536, 22041, 0, // garbage

0, 0, 0, 0, 0,

1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = 16843009

16843009, 16843009, 16843009, 16843009, 16843009,

-1, -1, -1, -1, -1,

*/

Note: After setting each byte to 1 (00000001),

shArray[0] = 0000000100000001 = 1+2^8 = 257,

intArray[0] = 00000001000000010000000100000001 = 1+2^8+2^16+2^24 = 16843009.

*****************************************************************************************

Arrays.cpp download

#include <iostream>

#include <cmath> // for pow()

using std::cout;

using std::endl;

#define SIZE 10

// we assume sizeof(char) = 1 byte

void set(char* array, int bytes, int value); // initialize array

template <typename T> // generics

void print(T* array, int size); // print array

int main()

{

char charArray[SIZE];

print<char>(charArray, SIZE); // print uninitialized array

set(charArray, SIZE, 65); // initialize array (65 is ASCII for `A')

print<char>(charArray, SIZE); // print initialized array

short shArray[SIZE];

print<short>(shArray, SIZE); // print uninitialized array

void* p = static_cast<void*>(shArray); // &shArray[0]

char* cp = static_cast<char*>(p);

set(cp, SIZE*sizeof(short), 0); // initialize array to 0

print<short>(shArray, SIZE); // print initialized array

set(cp, SIZE*sizeof(short), 1); // set each byte to 00000001

int i = 1 + pow(2, 8);

cout << "1 + pow(2, 8) = " << i << endl;

print<short>(shArray, SIZE); // print array

set(cp, SIZE*sizeof(short), 0XFF); // set array to 1...1

print<short>(shArray, SIZE); // print array

int intArray[SIZE/2];

print<int>(intArray, SIZE/2); // print uninitialized array

p = static_cast<void*>(intArray); // &intArray[0]

cp = static_cast<char*>(p);

set(cp, (SIZE/2)*sizeof(int), 0); // initialize array to 0

print<int>(intArray, SIZE/2); // print initialized array

set(cp, (SIZE/2)*sizeof(int), 1); // set each byte to 00000001

i = 1 + pow(2, 8) + pow(2, 16) + pow(2, 24);

cout << "1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = " << i << endl;

print<int>(intArray, SIZE/2); // print array

set(cp, (SIZE/2)*sizeof(int), 0XFF); // set array to 1...1

print<int>(intArray, SIZE/2); // print array

return 0;

}

void set(char* array, int bytes, int value) // initialize array

{ // set each byte of the contiguous array to `value'

for (int i = 0; i < bytes; i++)

{array[i] = value;}

}

template <typename T>

void print(T* array, int size) // print array

{

for (int i = 0; i < size; i++)

{cout << array[i] << ", ";}

cout << endl;

}

/*

g++ Arrays.cpp -o Arrays

./Arrays

, , @, Q, �, p, 6, , , b, // garbage

A, A, A, A, A, A, A, A, A, A,

640, 0, 0, 0, -1, 0, 1, 0, 20496, -26493, // garbage

0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

1 + pow(2, 8) = 257

257, 257, 257, 257, 257, 257, 257, 257, 257, 257,

-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

2017608424, 32552, -298109536, 22041, 0, // garbage

0, 0, 0, 0, 0,

1 + pow(2, 8) + pow(2, 16) + pow(2, 24) = 16843009

16843009, 16843009, 16843009, 16843009, 16843009,

-1, -1, -1, -1, -1,

*/

Notes: See Generics_in_C++ on GeeksForGeeks.

Using static_cast, we need a two-steps conversion:

void* p = static_cast<void*>(shArray); // &shArray[0]
char* cp = static_cast<char*>(p);

compared to the C style conversion used in arrays.c:

char* cp = (char*)shArray; // &shArray[0]

Alternatively, we can use a reinterpret_cast:

char* cp = reinterpret_cast<char*>(shArray); // &shArray[0]

*****************************************************************************************

Exercise 3-27 TCP1, p. 229 (const and volatile arrays)

Exercise 3-27. Create a const array of double and a volatile array of double. Index through each array and use const_cast to cast each element to non-const and non-volatile, respectively, and assign a value to each element.

Contents: const.c Const.cpp

const.c download

#include <stdio.h> // for printf(), putchar()

#define SIZE 10

void print(double* array, int size);

void set(double* array, int size);

int main()

{

int i;

const double carr[SIZE]; // uninitialized

volatile double varr[SIZE]; // arrays

print((double*)carr, SIZE); // cast avoids const warning

print((double*)varr, SIZE); // cast avoids volatile warning

double* dp = (double*)carr;

for (i = 0; i < SIZE; i++) {dp[i] = i;}

dp = (double*)varr;

for (i = 0; i < SIZE; i++) {dp[i] = i*i;}

print((double*)carr, SIZE);

print((double*)varr, SIZE);

const double carray[SIZE] = {0,1}; // initialized

volatile double varray[SIZE] = {0,1}; // arrays

print((double*)carray, SIZE);

print((double*)varray, SIZE);

set((double*)carray, SIZE);

set((double*)varray, SIZE);

print((double*)carray, SIZE);

print((double*)varray, SIZE);

return 0;

}

void print(double* array, int size)

{

int i;

for (i = 0; i < size; i++)

{printf("%g, ", array[i]);}

putchar('\n');

}

void set(double* array, int size)

{

int i;

for (i = 0; i < size; i++)

{array[i] = i;}

}

/*

gcc const.c -o const

./const

6.79039e-313, 0, 0, 0, 4.94066e-324, 2.15251e-314, 0, 0, 0, 0, // garbage

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // garbage

0, 1, 2, 3, 4, 5, 6, 7, 8, 9,

0, 1, 4, 9, 16, 25, 36, 49, 64, 81,

0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

0, 1, 2, 3, 4, 5, 6, 7, 8, 9,

*/

*****************************************************************************************

Const.cpp download

#include <iostream>

using std::cout;

using std::endl;

#define SIZE 10

void print(double* array, int size);

void set(double* array, int size);

int main()

{

// const double carr[SIZE]; // compile error: uninitialized const array

const double carr[SIZE] = {}; // empty initializer (all zero)

volatile double varr[SIZE]; // uninitialized volatile array

print(const_cast<double*>(carr), SIZE); // cast avoids const error

print(const_cast<double*>(varr), SIZE/2); // cast avoids volatile error

// double* dp = const_cast<double*>carr; // compile error: missing parentheses

double* dp = const_cast<double*>(carr); // OK, cast op uses func call syntax:

// cast_operator<data_type pointer or reference>(operand)

for (int i = 0; i < SIZE; i++) {dp[i] = i;}

dp = const_cast<double*>(varr);

for (int i = 0; i < SIZE; i++) {dp[i] = i*i;}

print(const_cast<double*>(carr), SIZE);

print(const_cast<double*>(varr), SIZE);

const double carray[SIZE] = {0,1}; // initialized

volatile double varray[SIZE] = {0,1}; // arrays

print(const_cast<double*>(carray), SIZE);

print(const_cast<double*>(varray), SIZE);

set(const_cast<double*>(carray), SIZE);

set(const_cast<double*>(varray), SIZE);

print(const_cast<double*>(carray), SIZE);

print(const_cast<double*>(varray), SIZE);

return 0;

}

void print(double* array, int size)

{

for (int i = 0; i < size; i++)

{cout << array[i] << ", ";}

cout << endl;

}

void set(double* array, int size)

{

for (int i = 0; i < size; i++)

{array[i] = i;}

}

/*

g++ Const.cpp -o Const

./Const

0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

6.91459e-310, 6.95299e-310, 6.91459e-310, 2.96439e-323, 3.39519e-313, // garbage

0, 1, 2, 3, 4, 5, 6, 7, 8, 9,

0, 1, 4, 9, 16, 25, 36, 49, 64, 81,

0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

0, 1, 2, 3, 4, 5, 6, 7, 8, 9,

*/

Note: See also ch3-ConstCast in Section ch3-Operators.

*****************************************************************************************

Exercise 3-28 TCP1, p. 230 (Print, set bytes in double arrays)

Exercise 3-28. Create a function that takes a pointer to an array of double and a value indicating the size of that array. The function should print each element in the array. Now create an array of double and initialize each element to zero, then use your function to print the array. Next use reinterpret_cast to cast the starting address of your array to an unsigned char*, and set each byte of the array to 1 (hint: you’ll need to use sizeof to calculate the number of bytes in a double). Now use your array-printing function to print the results. Why do you think each element was not set to the value 1.0?

Contents: double.c Double.cpp

double.c download

#include <stdio.h> // for printf(), putchar()

#define FLOAT 0 // used

#define DOUBLE 1 // for

#define LONGDOUBLE 2 // printing

#define SIZE 5 // array size

// we assume sizeof(char) = 1 byte

void set(char* array, int bytes, int value); // initialize or set array

void print(void* array, int size, int type); // print array

// type: 0 for float, 1 for double, 2 for long double

void printBinary(const unsigned char val); // Display a byte in binary

void printData(unsigned char*, int); // Display a data type in binary

void printData2(unsigned char*, int); // Display data type in binary (2 parts)

int main()

{

float fArray[SIZE];

// automatically convert float* (fArray) to void* in function call:

print(fArray, SIZE, FLOAT); // print uninitialized array

printData((unsigned char*)(fArray), sizeof(float)); // &fArray[0]

char* cp = (char*)fArray; // &fArray[0]

/*

// Alternative conversion:

void* p = (void*)fArray; // &fArray[0]

char* cp = (char*)p;

*/

set(cp, SIZE*sizeof(float), 0); // initialize array to 0

print(fArray, SIZE, FLOAT); // print initialized array

printData((unsigned char*)(fArray), sizeof(float));

set(cp, SIZE*sizeof(float), 1); // set each byte to 00000001

print(fArray, SIZE, FLOAT); // print initialized array

printData((unsigned char*)(fArray), sizeof(float));

set(cp, SIZE*sizeof(float), 0xff); // set array to 1...1

print(fArray, SIZE, FLOAT); // print array

printData((unsigned char*)(fArray), sizeof(float));

putchar('\n');

double dArray[SIZE];

// automatically convert double* (dArray) to void* in function call:

print(dArray, SIZE, DOUBLE); // print uninitialized array

printData((unsigned char*)(dArray), sizeof(double)); // &dArray[0]

cp = (char*)dArray; // &dArray[0]

set(cp, SIZE*sizeof(double), 0); // initialize array to 0

print(dArray, SIZE, DOUBLE); // print initialized array

printData((unsigned char*)(dArray), sizeof(double));

set(cp, SIZE*sizeof(double), 1); // set each byte to 00000001

print(dArray, SIZE, DOUBLE); // print initialized array

printData((unsigned char*)(dArray), sizeof(double));

set(cp, SIZE*sizeof(double), 0xff); // set array to 1...1

print(dArray, SIZE, DOUBLE); // print array

printData((unsigned char*)(dArray), sizeof(double));

putchar('\n');

long double ldArray[SIZE];

// automatically convert long double* (ldArray) to void* in function call:

print(ldArray, SIZE, LONGDOUBLE); // print uninitialized array

printData2((unsigned char*)(ldArray), sizeof(long double)); // &ldArray[0]

printData2((unsigned char*)(ldArray+1), sizeof(long double));

printData2((unsigned char*)(&ldArray[2]), sizeof(long double));

cp = (char*)ldArray; // &ldArray[0]

set(cp, SIZE*sizeof(long double), 0); // initialize array to 0

print(ldArray, SIZE, LONGDOUBLE); // print initialized array

printData2((unsigned char*)(ldArray), sizeof(long double));

set(cp, SIZE*sizeof(long double), 1); // set each byte to 00000001

print(ldArray, SIZE, LONGDOUBLE); // print initialized array

printData2((unsigned char*)(ldArray), sizeof(long double));

set(cp, SIZE*sizeof(long double), 0xff); // set array to 1...1

print(ldArray, SIZE, LONGDOUBLE); // print array

printData2((unsigned char*)(ldArray), sizeof(long double));

return 0;

}

void set(char* array, int bytes, int value) // initialize or set array

{ // set each byte of the contiguous array to `value'

for (int i = 0; i < bytes; i++)

{array[i] = value;}

}

void print(void* array, int size, int type) // print array

{ // type: 0 for float, 1 for double, 2 for long double

int i;

float* fArray;

double* dArray;

long double* ldArray;

switch(type)

{

case FLOAT: // 0

fArray = (float*)array;

for (i = 0; i < size; i++)

{printf("%g, ", fArray[i]);}

break;

case DOUBLE: // 1

dArray = (double*)array;

for (i = 0; i < size; i++)

{printf("%g, ", dArray[i]);}

break;

case LONGDOUBLE: // 2

ldArray = (long double*)array;

for (i = 0; i < size; i++)

{printf("%Lg, ", ldArray[i]);}

break;

default:

printf("Not implemented");

break;

}

putchar('\n');

}

void printBinary(const unsigned char val) // Display a byte in binary

{

int i;

for(i = 7; i >= 0; i--)

{ // print bits from first (most significant) to last (least significant)

if(val & (1 << i)) // set (1) bit

{putchar('1');}

else {putchar('0');} // 0 bit

}

void printData(unsigned char* cp, int size) // Display a data type in binary

{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]

for(; size > 0; size--)

{printBinary(cp[size-1]);}

putchar('\n');

}

// Display a data type in binary (2 parts):

void printData2(unsigned char* cp, int size) // for long double

{ // sizeof(long double) = 16, 2 parts of 8 bytes

int half = size / 2; // we assume size is even

for(; size > half; size--) // second half:

{printBinary(cp[size-1]);} // cp[15], ..., cp[8]

putchar('\n');

for(; size > 0; size--) // first half:

{printBinary(cp[size-1]);} // cp[7], ..., cp[0]

putchar('\n');

}

/*

gcc double.c -o double

./double

0, 0, 0, 0, 0, // garbage

00000000000000000000000000000000

0, 0, 0, 0, 0,

00000000000000000000000000000000

2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38,

00000001000000010000000100000001

-nan, -nan, -nan, -nan, -nan, // exp bits all 1, mantissa not 0

11111111111111111111111111111111 // sign bit is 1, negative

0, 0, 0, 0, 0, // garbage

0000000000000000000000000000000000000000000000000000000000000000

0, 0, 0, 0, 0,

0000000000000000000000000000000000000000000000000000000000000000

7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304,

0000000100000001000000010000000100000001000000010000000100000001

-nan, -nan, -nan, -nan, -nan, // sign bit 1, exp bits 1, mantissa not 0

1111111111111111111111111111111111111111111111111111111111111111

-nan, nan, -nan, -nan, -nan, // garbage (Unnormal, invalid operands)

0000000000000000000000000000000000000000111100001011011011111111 // part 2

0000000000000000010101011011111100010010011000110111000001000000 // part 1

0000000000000000011111111111111100001010100101010101011101110111 // last bits

0000000000000000000000000000000000000000000000000000000011000010 // first

0000000000000000010101011011111100010010011000111000100000001101 // 127-64

0000000000000000011111111111111100001010100101010101011101110110 // 63-0

0, 0, 0, 0, 0, (sign bit 79 is 0, positive)

0000000000000000000000000000000000000000000000000000000000000000 // 78-64 (0)

0000000000000000000000000000000000000000000000000000000000000000 // 63-0 (0)

nan, nan, nan, nan, nan, // Unnormal, invalid operands (bit 79 is 0, positive)

0000000100000001000000010000000100000001000000010000000100000001 // 78-64 (0/1)

0000000100000001000000010000000100000001000000010000000100000001 // 63 (0)

-nan, -nan, -nan, -nan, -nan, // Quiet Not a Number (bit 79 is 1, negative)

1111111111111111111111111111111111111111111111111111111111111111 // 78-64 (1)

1111111111111111111111111111111111111111111111111111111111111111 // 63-0 (1)

*/

Notes: See Single-precision, Double-precision, and Extended_precision (for long_double) floating-point formats, as well as NaN (not a number) on Wikipedia. See also Exercise_3-26.

00000001000000010000000100000001 = ((-1)^0)*2^(2^1-127)*(1+2^(-7)+2^(-15)+2^(-23)) = 1*2^(-125)*1.00784313679 ~ 2.36943e-38
(here ^ means "raised to power" and ~ means "approximates to").

etc.

*****************************************************************************************

Double.cpp download

#include <iostream>

using std::cout;

using std::endl;

#define SIZE 5 // array size

// we assume sizeof(char) = 1 byte

void set(char* array, int bytes, int value); // initialize array

template <typename T> // generics

void print(T* array, int size); // print array

void printBinary(const unsigned char val); // Display a byte in binary

void printData(unsigned char*, int); // Display a data type in binary

void printData2(unsigned char*, int); // Display data type in binary (2 parts)

int main()

{

float fArray[SIZE];

// automatically convert float* (fArray) to void* in function call:

print<float>(fArray, SIZE); // print uninitialized array

printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));

printData(reinterpret_cast<unsigned char*>(fArray+1), sizeof(float));

printData(reinterpret_cast<unsigned char*>(&fArray[2]), sizeof(float));

char* cp = reinterpret_cast<char*>(fArray); // &fArray[0]

/*

// Alternative conversion:

void* p = static_cast<void*>fArray; // &fArray[0]

char* cp = static_cast<char*>p;

*/

set(cp, SIZE*sizeof(float), 0); // initialize array to 0

print<float>(fArray, SIZE); // print initialized array

printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));

set(cp, SIZE*sizeof(float), 1); // set each byte to 00000001

print<float>(fArray, SIZE); // print initialized array

printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));

set(cp, SIZE*sizeof(float), 0xff); // set array to 1...1

print<float>(fArray, SIZE); // print array

printData(reinterpret_cast<unsigned char*>(fArray), sizeof(float));

cout << endl;

double dArray[SIZE];

// automatically convert double* (dArray) to void* in function call:

print<double>(dArray, SIZE); // print uninitialized array

printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));

printData(reinterpret_cast<unsigned char*>(dArray+1), sizeof(double));

printData(reinterpret_cast<unsigned char*>(&dArray[2]), sizeof(double));

cp = reinterpret_cast<char*>(dArray); // &dArray[0]

set(cp, SIZE*sizeof(double), 0); // initialize array to 0

print<double>(dArray, SIZE); // print initialized array

printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));

set(cp, SIZE*sizeof(double), 1); // set each byte to 00000001

print<double>(dArray, SIZE); // print initialized array

printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));

set(cp, SIZE*sizeof(double), 0xff); // set array to 1...1

print<double>(dArray, SIZE); // print array

printData(reinterpret_cast<unsigned char*>(dArray), sizeof(double));

cout << endl;

long double ldArray[SIZE];

// automatically convert long double* (ldArray) to void* in function call:

print<long double>(ldArray, SIZE); // print uninitialized array

printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));

printData2(reinterpret_cast<unsigned char*>(ldArray+1), sizeof(long double));

printData2(reinterpret_cast<unsigned char*>(&ldArray[2]), sizeof(long double));

cp = reinterpret_cast<char*>(ldArray); // &dArray[0]

set(cp, SIZE*sizeof(long double), 0); // initialize array to 0

print<long double>(ldArray, SIZE); // print initialized array

printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));

set(cp, SIZE*sizeof(long double), 1); // set each byte to 00000001

print<long double>(ldArray, SIZE); // print initialized array

printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));

set(cp, SIZE*sizeof(long double), 0xff); // set array to 1...1

print<long double>(ldArray, SIZE); // print array

printData2(reinterpret_cast<unsigned char*>(ldArray), sizeof(long double));

return 0;

}

void set(char* array, int bytes, int value) // initialize array

{ // set each byte of the contiguous array to `value'

for (int i = 0; i < bytes; i++)

{array[i] = value;}

}

template <typename T>

void print(T* array, int size) // print array

{

for (int i = 0; i < size; i++)

{cout << array[i] << ", ";}

cout << endl;

}

void printBinary(const unsigned char val) // Display a byte in binary

{

for(int i = 7; i >= 0; i--)

{ // print bits from first (most significant) to last (least significant)

if(val & (1 << i)) // set (1) bit

{cout << "1";}

else {cout << "0";} // 0 bit

}

void printData(unsigned char* cp, int size) // Display a data type in binary

{ // sizeof(float) = 4: cp[3], cp[2], cp[1], cp[0]

for(; size > 0; size--)

{printBinary(cp[size-1]);}

cout << endl;

}

// Display a data type in binary (2 parts):

void printData2(unsigned char* cp, int size) // for long double

{ // sizeof(long double) = 16, 2 parts of 8 bytes

int half = size / 2; // we assume size is even

for(; size > half; size--) // second half:

{printBinary(cp[size-1]);} // cp[15], ..., cp[8]

cout << endl;

for(; size > 0; size--) // first half:

{printBinary(cp[size-1]);} // cp[7], ..., cp[0]

cout << endl;

}

/*

g++ Double.cpp -o Double

./Double

-2.68742e+32, 4.57552e-41, -2.45652e+32, 4.57552e-41, -2.30727e+32, // garbage

11110101010101000000000000000000

00000000000000000111111110001100

11110101010000011100100100000000

0, 0, 0, 0, 0,

00000000000000000000000000000000

2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38, 2.36943e-38,

00000001000000010000000100000001

-nan, -nan, -nan, -nan, -nan, // exp bits all 1, mantissa not 0

11111111111111111111111111111111 // sign bit is 1, negative

4.68413e-310, 4.68413e-310, 6.95306e-310, 4.94066e-324, 6.95306e-310, // garbage

0000000000000000010101100011101000110001000011010111000101010001

0000000000000000010101100011101000110001000011010111000000001000

0000000000000000011111111111111010010101010100100110010111001000

0, 0, 0, 0, 0,

0000000000000000000000000000000000000000000000000000000000000000

7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304, 7.7486e-304,

0000000100000001000000010000000100000001000000010000000100000001

-nan, -nan, -nan, -nan, -nan, // sign bit 1, exp bits 1, mantissa not 0

1111111111111111111111111111111111111111111111111111111111111111

-nan, nan, nan, nan, nan, // garbage (Unnormal, invalid operands)

0000000000000000000000000000000100000000000000001111111111111111 // part 2

0000000000000000000000000000000000000000000000000000001010000000 // part 1

0000000000000000010101100011101000110001000011010100011111101000 // last bits

0000000000000000011111111111111010010101010100100110010010010000 // first

0000000000000000010101100011101000110001000011010100100110101101 // 127-64

0000000000000000000000000000000000000000000000000000000000000010 // 63-0