I have a binary file which has the following format :
# vtk DataFile Version 4.0
vtk output
BINARY
DATASET POLYDATA
POINTS 10000 double
?�T�����?����h�?�T�����?���� <-- 10000 double values (in binary format) follow separated by space and new line after every 9 values.
I want to read this file byte by byte so that I can store these double values in my array. I have the following code which loads this file into a char *buffer array. Now I want to know how to proceed further?
#include<iostream>
#include<fstream>
#include<sstream>
#include<stdlib.h>
#include<string>
using namespace std;
int main () {
ifstream is ("Data_binary.vtk", ifstream::binary);
if (is) {
// get length of file:
is.seekg (0, is.end);
unsigned long length = is.tellg();
is.seekg (0, is.beg);
char * buffer = new char [length+1];
buffer[length] = '\0';
cout << "Reading " << length << " characters... ";
// read data as a block:
is.seekg(0, is.beg);
is.read (buffer,length);
if (is)
cout << "all characters read successfully." << endl;
else
cout << "error: only " << is.gcount() << " could be read";
is.close();
}
return 0;
}
In ASCII format, an example file would look like the following :
# vtk DataFile Version 4.0
vtk output
ASCII
DATASET POLYDATA
POINTS 18 double
.1 .2 .3 1.4 11.55 1 0 8e-03 5.6
1.02 2.2 3.3 .1 .5 0.001 4e-07 4.2 1.55
For binary file, the double values are present in binary. I want to get double values from binary format.
Use this function.
/*
* read a double from a stream in ieee754 format regardless of host
* encoding.
* fp - the stream
* bigendian - set to if big bytes first, clear for little bytes
* first
*
*/
double freadieee754(FILE *fp, int bigendian)
{
unsigned char buff[8];
int i;
double fnorm = 0.0;
unsigned char temp;
int sign;
int exponent;
double bitval;
int maski, mask;
int expbits = 11;
int significandbits = 52;
int shift;
double answer;
/* read the data */
for (i = 0; i < 8; i++)
buff[i] = fgetc(fp);
/* just reverse if not big-endian*/
if (!bigendian)
{
for (i = 0; i < 4; i++)
{
temp = buff[i];
buff[i] = buff[8 - i - 1];
buff[8 - i - 1] = temp;
}
}
sign = buff[0] & 0x80 ? -1 : 1;
/* exponet in raw format*/
exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
/* read inthe mantissa. Top bit is 0.5, the successive bits half*/
bitval = 0.5;
maski = 1;
mask = 0x08;
for (i = 0; i < significandbits; i++)
{
if (buff[maski] & mask)
fnorm += bitval;
bitval /= 2.0;
mask >>= 1;
if (mask == 0)
{
mask = 0x80;
maski++;
}
}
/* handle zero specially */
if (exponent == 0 && fnorm == 0)
return 0.0;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
/* nans have exp 1024 and non-zero mantissa */
if (shift == 1024 && fnorm != 0)
return sqrt(-1.0);
/*infinity*/
if (shift == 1024 && fnorm == 0)
{
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0) / 0.0;
}
if (shift > -1023)
{
answer = ldexp(fnorm + 1.0, shift);
return answer * sign;
}
else
{
/* denormalised numbers */
if (fnorm == 0.0)
return 0.0;
shift = -1022;
while (fnorm < 1.0)
{
fnorm *= 2;
shift--;
}
answer = ldexp(fnorm, shift);
return answer * sign;
}
}
it's a lot, but it's just a snippet to cut and paste, and you never need to worry about binary floating point formats again. It simply reads an IEEE 754 double, regardless of host floating point format.
There's a twin which writes
Instead of reading into a char * buffer, read into a double * buffer. Casting to/from char * is allowed just for this purpose.
vector<double> buffer;
buffer.resize(n);
is.read(reinterpret_cast<char *>(&buffer[0]), n * sizeof(buffer[0]));
You'll need to read the non-binary data first so that the file pointer is located at the start of the binary data. This is defined as coming immediately after the newline character of the last field in the header.
The spec doesn't appear to mandate little-endian or big-endian format, it expects you to know based on the source of the file. If you're lucky the format will match the machine you're using to read the file and no conversion will be necessary. Otherwise you'll need to do a byte swap:
void ByteSwap(double * p)
{
char * pc = reinterpret_cast<char *>(p);
std::swap(pc[0], pc[7]);
std::swap(pc[1], pc[6]);
std::swap(pc[2], pc[5]);
std::swap(pc[3], pc[4]);
}
Related
I am trying a wave to base 64 converter program.
I am trying this following code snippet:
vector<char> in(3);
std::string out = "abcd"; //four letter garbage value as initializer
ifstream file_ptr(filename.c_str(), ios::in | ios::binary);
unsigned int threebytes = 0;
//Apply the Base 64 encoding algorithm
do {
threebytes = (unsigned int) file_ptr.rdbuf()->sgetn(&in[0], 3);
if (threebytes > 0) {
EncodeBlock(in, out, (int)threebytes); //Apply conversion algorithm to convert 3 bytes into 4
outbuff = outbuff + out; //Append the 4 bytes got from above step to the output
}
} while (threebytes == in.size());
file_ptr.close();
In encode block where the Base64 encoding algorithm is written
void EncodeBlock(const std::vector<char>& in, std::string& out, int len) {
using namespace std;
cb64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
out[0] = cb64[(int) (in[0] >> 2)];
out[1] = cb64[(int) (((in[0] << 6) >> 2) | (in[1] >> 4))];
out[2] = (len > 1) ?
cb64[(int) (((in[1] << 4) >> 2) | (in[2] >> 6))] :
'=';
out[3] = (len > 2) ?
cb64[(int) ((in[2] << 2) >> 2)] :
'=';
}
The cb64 is a 64 length long string but the index generated by bit manipulation sometimes fall out of range (0 to 63).
Why!!!
The resolution to this was to handle the bit manipulation correctly.
the char 8 bits are operated and then casted to unsigned int introduces 24 bits extra into it which needed to be set to 0.
So,
out[0] = cb64[(unsigned int) ((in[0] >> 2) & 0x003f)];
out[1] = cb64[(unsigned int) ((((in[0] << 6) >> 2) | (in[1] >> 4))) & 0x003f)]; .. and so on handles the masking
How can I get numerator and denominator from a fractional number? for example, from "1.375" i want to get "1375/1000" or "11/8" as a result. How can i do it with c++??
I have tried to do it by separating the numbers before the point and after the point but it doesn't give any idea how to get my desired output.
You didn't really specify whether you need to convert a floating point or a string to ratio, so I'm going to assume the former one.
Instead of trying string or arithmetic-based approaches, you can directly use properties of IEEE-754 encoding.
Floats (called binary32 by the standard) are encoded in memory like this:
S EEEEEEEE MMMMMMMMMMMMMMMMMMMMMMM
^ ^
bit 31 bit 0
where S is sign bit, Es are exponent bits (8 of them) Ms are mantissa bits (23 bits).
The number can be decoded like this:
value = (-1)^S * significand * 2 ^ expoenent
where:
significand = 1.MMMMMMMMMMMMMMMMMMMMMMM (as binary)
exponent = EEEEEEEE (as binary) - 127
(note: this is for so called "normal numbers", there are also zeroes, subnormals, infinities and NaNs - see Wikipedia page I linked)
This can be used here. We can rewrite the equation above like this:
(-1)^S * significand * exponent = (-1)^s * (significand * 2^23) * 2 ^ (exponent - 23)
The point is that significand * 2^23 is an integer (equal to 1.MMMMMMMMMMMMMMMMMMMMMMM, binary - by multiplying by 2^23, we moved the point 23 places right).2 ^ (exponent - 23) is an integer too, obviously.
In other words: we can write the number as:
(significand * 2^23) / 2^(-(exponent - 23)) (when exponent - 23 < 0)
or
[(significand * 2^23) * 2^(exponent - 23)] / 1 (when exponent - 23 >= 0)
So we have both numerator and denominator - directly from binary representation of the number.
All of the above could be implemented like this in C++:
struct Ratio
{
int64_t numerator; // numerator includes sign
uint64_t denominator;
float toFloat() const
{
return static_cast<float>(numerator) / denominator;
}
static Ratio fromFloat(float v)
{
// First, obtain bitwise representation of the value
const uint32_t bitwiseRepr = *reinterpret_cast<uint32_t*>(&v);
// Extract sign, exponent and mantissa bits (as stored in memory) for convenience:
const uint32_t signBit = bitwiseRepr >> 31u;
const uint32_t expBits = (bitwiseRepr >> 23u) & 0xffu; // 8 bits set
const uint32_t mntsBits = bitwiseRepr & 0x7fffffu; // 23 bits set
// Handle some special cases:
if(expBits == 0 && mntsBits == 0)
{
// special case: +0 and -0
return {0, 1};
}
else if(expBits == 255u && mntsBits == 0)
{
// special case: +inf, -inf
// Let's agree that infinity is always represented as 1/0 in Ratio
return {signBit ? -1 : 1, 0};
}
else if(expBits == 255u)
{
// special case: nan
// Let's agree, that if we get NaN, we returns max int64_t by 0
return {std::numeric_limits<int64_t>::max(), 0};
}
// mask lowest 23 bits (mantissa)
uint32_t significand = (1u << 23u) | mntsBits;
const int64_t signFactor = signBit ? -1 : 1;
const int32_t exp = expBits - 127 - 23;
if(exp < 0)
{
return {signFactor * static_cast<int64_t>(significand), 1u << static_cast<uint32_t>(-exp)};
}
else
{
return {signFactor * static_cast<int64_t>(significand * (1u << static_cast<uint32_t>(exp))), 1};
}
}
};
(hopefully comments and description above are understandable - let me know, if there's something to improve)
I've omitted checks for out of range values for simplicity.
We can use it like this:
float fv = 1.375f;
Ratio rv = Ratio::fromFloat(fv);
std::cout << "fv = " << fv << ", rv = " << rv << ", rv.toFloat() = " << rv.toFloat() << "\n";
And the output is:
fv = 1.375, rv = 11534336/8388608, rv.toFloat() = 1.375
As you can see, exactly the same values on both ends.
The problem is that numerators and denumerators are big. This is because the code always multiplies significand by 2^23, even if smaller value would be enough to make it integer (this is equivalent to writing 0.2 as 2000000/10000000 instead of 2/10 - it's the same thing, only written differently).
This can be solved by changing the code to multiply significand (and divide exponent) by minimum number, like this (ellipsis stands for parts which are the same as above):
// counts number of subsequent least significant bits equal to 0
// example: for 1001000 (binary) returns 3
uint32_t countTrailingZeroes(uint32_t v)
{
uint32_t counter = 0;
while(counter < 32 && (v & 1u) == 0)
{
v >>= 1u;
++counter;
}
return counter;
}
struct Ratio
{
...
static Ratio fromFloat(float v)
{
...
uint32_t significand = (1u << 23u) | mntsBits;
const uint32_t nTrailingZeroes = countTrailingZeroes(significand);
significand >>= nTrailingZeroes;
const int64_t signFactor = signBit ? -1 : 1;
const int32_t exp = expBits - 127 - 23 + nTrailingZeroes;
if(exp < 0)
{
return {signFactor * static_cast<int64_t>(significand), 1u << static_cast<uint32_t>(-exp)};
}
else
{
return {signFactor * static_cast<int64_t>(significand * (1u << static_cast<uint32_t>(exp))), 1};
}
}
};
And now, for the following code:
float fv = 1.375f;
Ratio rv = Ratio::fromFloat(fv);
std::cout << "fv = " << fv << ", rv = " << rv << ", rv.toFloat() = " << rv.toFloat() << "\n";
We get:
fv = 1.375, rv = 11/8, rv.toFloat() = 1.375
In C++ you can use the Boost rational class. But you need to give numerator and denominator.
For this you need to find out no of digits in the input string after the decimal point. You can do this by string manipulation functions. Read the input character by character and find no of characters after the .
char inputstr[30];
int noint=0, nodec=0;
char intstr[30], dec[30];
int decimalfound = 0;
int denominator = 1;
int numerator;
scanf("%s",inputstr);
len = strlen(inputstr);
for (int i=0; i<len; i++)
{
if (decimalfound ==0)
{
if (inputstr[i] == '.')
{
decimalfound = 1;
}
else
{
intstr[noint++] = inputstr[i];
}
}
else
{
dec[nodec++] = inputstr[i];
denominator *=10;
}
}
dec[nodec] = '\0';
intstr[noint] = '\0';
numerator = atoi(dec) + (atoi(intstr) * 1000);
// You can now use the numerator and denominator as the fraction,
// either in the Rational class or you can find gcd and divide by
// gcd.
What about this simple code:
double n = 1.375;
int num = 1, den = 1;
double frac = (num * 1.f / den);
double margin = 0.000001;
while (abs(frac - n) > margin){
if (frac > n){
den++;
}
else{
num++;
}
frac = (num * 1.f / den);
}
I don't really tested too much, it's only an idea.
I hope I'll be forgiven for posting an answer which uses "only the C language". I know you tagged the question with C++ - but I couldn't turn down the bait, sorry. This is still valid C++ at least (although it does, admittedly, use mainly C string-processing techniques).
int num_string_float_to_rat(char *input, long *num, long *den) {
char *tok = NULL, *end = NULL;
char buf[128] = {'\0'};
long a = 0, b = 0;
int den_power = 1;
strncpy(buf, input, sizeof(buf) - 1);
tok = strtok(buf, ".");
if (!tok) return 1;
a = strtol(tok, &end, 10);
if (*end != '\0') return 2;
tok = strtok(NULL, ".");
if (!tok) return 1;
den_power = strlen(tok); // Denominator power of 10
b = strtol(tok, &end, 10);
if (*end != '\0') return 2;
*den = static_cast<int>(pow(10.00, den_power));
*num = a * *den + b;
num_simple_fraction(num, den);
return 0;
}
Sample usage:
int rc = num_string_float_to_rat("0015.0235", &num, &den);
// Check return code -> should be 0!
printf("%ld/%ld\n", num, den);
Output:
30047/2000
Full example at http://codepad.org/CFQQEZkc .
Notes:
strtok() is used to parse the input in to tokens (no need to reinvent the wheel in that regard). strtok() modifies its input - so a temporary buffer is used for safety
it checks for invalid characters - and will return a non-zero return code if found
strtol() has been used instead of atoi() - as it can detect non-numeric characters in the input
scanf() has not been used to slurp the input - due to rounding issues with floating point numbers
the base for strtol() has been explicitly set to 10 to avoid problems with leading zeros (otherwise a leading zero will cause the number to be interpreted as octal)
it uses a num_simple_fraction() helper (not shown) - which in turn uses a gcd() helper (also not shown) - to convert the result to a simple fraction
log10() of the numerator is determined by calculating the length of the token after the decimal point
I'd do this in three steps.
1) find the decimal point, so that you know how large the denominator has to be.
2) get the numerator. That's just the original text with the decimal point removed.
3) get the denominator. If there was no decimal point, the denominator is 1. Otherwise, the denominator is 10^n, where n is the number of digits to the right of the (now-removed) decimal point.
struct fraction {
std::string num, den;
};
fraction parse(std::string input) {
// 1:
std::size_t dec_point = input.find('.');
// 2:
if (dec_point == std::string::npos)
dec_point = 0;
else {
dec_point = input.length() - dec_point;
input.erase(input.begin() + dec_point);
}
// 3:
int denom = 1;
for (int i = 1; i < dec_point; ++i)
denom *= 10;
string result = { input, std::to_string(denom) };
return result;
}
I'm trying to figure out a way to send a sequence of float values over the network. I've seen various answers for this, and this is my current attempt:
#include <iostream>
#include <cstring>
union floatBytes
{
float value;
char bytes[sizeof (float)];
};
int main()
{
floatBytes data1;
data1.value = 3.1;
std::string string(data1.bytes);
floatBytes data2;
strncpy(data2.bytes, string.c_str(), sizeof (float));
std::cout << data2.value << std::endl; // <-- prints "3.1"
return 0;
}
Which works nicely (though I suspect I might run into problems when sending this string to other systems, please comment).
However, if the float value is a round number (like 3.0 instead of 3.1) then this doesn't work.
data1.value = 3;
std::string string(data1.bytes);
floatBytes data2;
strncpy(data2.bytes, string.c_str(), sizeof (float));
std::cout << data2.value << std::endl; // <-- prints "0"
So what is the preferred way of storing the bytes of a float value, send it, and parse it "back" to a float value?
Never use str* functions this way. These are intended to deal with c-string and the byte representation of a float is certainly not a valid c-string. What you need is to send/receive your data in a common representation. There exist a lot of them, but basically two: a textual representation or a byte coding.
Textual representation) almost consist in converting your float value onto a string using stringstream to convert and then extract the string and send it over the connection.
Byte representation) that is much more problematic because if the two machines are not using the same byte-ordering, float encoding, etc then you can't send the raw byte as-is. But there exists (at least) one standard known as XDR (RFC 4506) that specify a standard to encode bytes of a float/double value natively encoded with IEEE 754.
You can reconstitute a float portably with rather involved code, which I maintain on my IEE754 git hub site. If you break the float into bytes using those functions, and reconstitute using the other function, you will obtain the same value in receiver as you sent, regardless of float encoding, up to the precision of the format.
https://github.com/MalcolmMcLean/ieee754
float freadieee754f(FILE *fp, int bigendian)
{
unsigned long buff = 0;
unsigned long buff2 = 0;
unsigned long mask;
int sign;
int exponent;
int shift;
int i;
int significandbits = 23;
int expbits = 8;
double fnorm = 0.0;
double bitval;
double answer;
for(i=0;i<4;i++)
buff = (buff << 8) | fgetc(fp);
if(!bigendian)
{
for(i=0;i<4;i++)
{
buff2 <<= 8;
buff2 |= (buff & 0xFF);
buff >>= 8;
}
buff = buff2;
}
sign = (buff & 0x80000000) ? -1 : 1;
mask = 0x00400000;
exponent = (buff & 0x7F800000) >> 23;
bitval = 0.5;
for(i=0;i<significandbits;i++)
{
if(buff & mask)
fnorm += bitval;
bitval /= 2;
mask >>= 1;
}
if(exponent == 0 && fnorm == 0.0)
return 0.0f;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
if(shift == 128 && fnorm != 0.0)
return (float) sqrt(-1.0);
if(shift == 128 && fnorm == 0.0)
{
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0f)/0.0f;
}
if(shift > -127)
{
answer = ldexp(fnorm + 1.0, shift);
return (float) answer * sign;
}
else
{
if(fnorm == 0.0)
{
return 0.0f;
}
shift = -126;
while (fnorm < 1.0)
{
fnorm *= 2;
shift--;
}
answer = ldexp(fnorm, shift);
return (float) answer * sign;
}
}
int fwriteieee754f(float x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, buff;
double fnorm, significand;
int expbits = 8;
int significandbits = 23;
/* zero (can't handle signed zero) */
if (x == 0)
{
buff = 0;
goto writedata;
}
/* infinity */
if (x > FLT_MAX)
{
buff = 128 + ((1 << (expbits - 1)) - 1);
buff <<= (31 - expbits);
goto writedata;
}
/* -infinity */
if (x < -FLT_MAX)
{
buff = 128 + ((1 << (expbits - 1)) - 1);
buff <<= (31 - expbits);
buff |= (1 << 31);
goto writedata;
}
/* NaN - dodgy because many compilers optimise out this test, but
*there is no portable isnan() */
if (x != x)
{
buff = 128 + ((1 << (expbits - 1)) - 1);
buff <<= (31 - expbits);
buff |= 1234;
goto writedata;
}
/* get the sign */
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
/* get the normalized form of f and track the exponent */
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
/* check for denormalized numbers */
if (shift < -126)
{
while (shift < -126) { fnorm /= 2.0; shift++; }
shift = -1023;
}
/* out of range. Set to infinity */
else if (shift > 128)
{
buff = 128 + ((1 << (expbits - 1)) - 1);
buff <<= (31 - expbits);
buff |= (sign << 31);
goto writedata;
}
else
fnorm = fnorm - 1.0; /* take the significant bit off mantissa */
/* calculate the integer form of the significand */
/* hold it in a double for now */
significand = fnorm * ((1LL << significandbits) + 0.5f);
/* get the biased exponent */
exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */
hibits = (long)(significand);
buff = (sign << 31) | (exp << (31 - expbits)) | hibits;
writedata:
/* write the bytes out to the stream */
if (bigendian)
{
fputc((buff >> 24) & 0xFF, fp);
fputc((buff >> 16) & 0xFF, fp);
fputc((buff >> 8) & 0xFF, fp);
fputc(buff & 0xFF, fp);
}
else
{
fputc(buff & 0xFF, fp);
fputc((buff >> 8) & 0xFF, fp);
fputc((buff >> 16) & 0xFF, fp);
fputc((buff >> 24) & 0xFF, fp);
}
return ferror(fp);
}
Let me first clear the issue with your code.
You are using strncpy which stops the copy the moment it sees '\0'. Which simply means that it is not copying all your data.
And thus the 0 is expected.
Using memcpy instead of strncpy should do the trick.
I just tried this C++ code
int main(){
float f = 3.34;
printf("before = %f\n", f);
char a[10];
memcpy(a, (char*) &f, sizeof(float));
a[sizeof(float)] = '\0'; // For sending over network
float f1 = 1.99;
memcpy((char*) &f1, a, sizeof(float));
printf("after = %f\n", f1);
return 0;
}
I get the correct output as expected.
Now coming to the correctness. I am not sure if this classifies as Undefined Behaviour. It could also be called a case of type punning, in which case it would be implementation defined (and I assume any sane compiler would not muck this).
This is all okay as long as I am doing it for the same program.
Now for your problem of sending it over network. I don't think this would be the correct way of doing it. Like #Jean-Baptiste Yunès mentioned, both the systems could be using different representations for float, or even different ordering for bytes.
In that case you need to use a library to convert it to some standard representation like IEEE 754.
The main problem is that C++ do not enforce IEEE754, so the representation of your float may work between 2 computers and fail with another.
The problem have to be divided into two:
How to encode and decode a float to shared format
How to serialize the value to a char array for transmission.
How to encode/decode a float to a common format
C++ does not impose a specific bit-format, this mean a computer might transfer a float and the value on the other machine would be different.
Example of 1.0f
Machine1: sign + 8bit Exponent + 23bit mantissa:
0-01111111-00000000000000000000000
Machine2: sign + 7bit exponent
+ 24bit mantissa: 0-0111111-000000000000000000000000
Sending from machine 1 to machine 2 without shared format, would result in machine 2 receiving: 0-0111111-100000000000000000000000 = 1.5
This is a complex topic and may be difficult to solve completely cross-platform. C++ includes some convenience properties helping somehow with this:
bool isIeee754 = std::numeric_limits<float>::is_iec559;
The main problem is that the compiler may not know about the exact CPU architecture on which its output will run. So this is half reliable. Fortunately, the bit format is in most of the case correct. Additionally, if the format is not known, it may be very difficult to normalize it.
We might design some code to detect the float format, or we might decide to skip those cases as "unsupported platforms".
In the case of the IEEE754 32bit, we may easily extract Mantissa, Sign and Exponent with bitwise operations:
float input;
uint8_t exponent = (input>>23)&0xFF;
uint32_t mantissa = (input&0x7FFFFF);
bool sign = (input>>31);
A standard format for transmission could well be the 32 bit IEEE754, so it would work in most of the times without even encoding:
bool isStandard32BitIeee754( float f)
{
// TODO: improve for "special" targets.
return std::numeric_limits<decltype(f)>::is_iec559 && sizeof(f)==4;
}
Finally, and especially for those non-standard platforms, it is required to keep special values for NaN and infinite.
Serialization of a float for transmission
The second issue is much simpler, it is just required to transform the standardized binary to a char array, however, not all characters may be acceptable on network, especially if it is used in HTTP protocol or equivalent.
For this example, I will convert the stream to hexadecimal encoding (an alternative could be Base64, etc..).
Note: I know there are some function which may help, I deliberately use simple C++ to show the steps at a level as lower as possible.
void toHex( uint8_t &out1, uint8_t &out2, uint8_t in)
{
out1 = in>>4;
out1 = out1>9? out1-10+'A' : out1+'0';
out2 = in&0xF;
out2 = out2>9? out2-10+'A' : out2+'0';
}
void standardFloatToHex (float in, std::string &out)
{
union Aux
{
uint8_t c[4];
float f;
};
out.resize(8);
Aux converter;
converter.f = in;
for (int i=0; i<4; i++)
{
// Could use std::stringstream as an alternative.
uint8_t c1, c2, c = converter.c[i];
toHex(c1, c2, c);
out[i*2] = c1;
out[i*2+1] = c2;
}
}
Finally, the equivalent decoding is required in the opposite side.
Conclusion
The standardization of the float value into a shared bit format has been explained. Some implementation-dependent conversions may be required.
The serialization for most common network protocols is shown.
Is there a way to read .pfm files in OpenCV?
Thank you very much for any suggestions!
PFM is an uncommon image format and I don't know why the Middlebury dataset chose to use that, probably because it uses floating point values.
Anyway I was able to read the images with OpenCV:
import numpy as np
import cv2
groundtruth = cv2.imread('disp0.pfm', cv2.IMREAD_UNCHANGED)
Note the IMREAD_UNCHANGED flag. Somehow it is able to read all the correct values even if OpenCV does not support it.
But wait a minute: inf values are commonly used to set INVALID pixel disparity, so to properly display the image you should do:
# Remove infinite value to display
groundtruth[groundtruth==np.inf] = 0
# Normalize and convert to uint8
groundtruth = cv2.normalize(groundtruth, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
# Show
cv2.imshow("groundtruth", groundtruth)
cv2.waitKey(0)
cv2.destroyAllWindows()
Based on the description of the ".pfm" file formate (see http://netpbm.sourceforge.net/doc/pfm.html), I wrote the following read/write functions, which only depend standard C/C++ library. It is proved to work well on reading/writing the pfm file, like, the ground truth disparity ".pfm" files from MiddleBury Computer Vision (see http://vision.middlebury.edu/stereo/submit3/).
#ifndef _PGM_H_
#define _PGM_H_
#include <fstream>
#include <iostream>
#include <algorithm>
#include <string>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <bitset> /*std::bitset<32>*/
#include <cstdio>
enum PFM_endianness { BIG, LITTLE, ERROR};
class PFM {
public:
PFM();
inline bool is_little_big_endianness_swap(){
if (this->endianess == 0.f) {
std::cerr << "this-> endianness is not assigned yet!\n";
exit(0);
}
else {
uint32_t endianness = 0xdeadbeef;
//std::cout << "\n" << std::bitset<32>(endianness) << std::endl;
unsigned char * temp = (unsigned char *)&endianness;
//std::cout << std::bitset<8>(*temp) << std::endl;
PFM_endianness endianType_ = ((*temp) ^ 0xef == 0 ?
LITTLE : (*temp) ^ (0xde) == 0 ? BIG : ERROR);
// ".pfm" format file specifies that:
// positive scale means big endianess;
// negative scale means little endianess.
return ((BIG == endianType_) && (this->endianess < 0.f))
|| ((LITTLE == endianType_) && (this->endianess > 0.f));
}
}
template<typename T>
T * read_pfm(const std::string & filename) {
FILE * pFile;
pFile = fopen(filename.c_str(), "rb");
char c[100];
if (pFile != NULL) {
fscanf(pFile, "%s", c);
// strcmp() returns 0 if they are equal.
if (!strcmp(c, "Pf")) {
fscanf(pFile, "%s", c);
// atoi: ASCII to integer.
// itoa: integer to ASCII.
this->width = atoi(c);
fscanf(pFile, "%s", c);
this->height = atoi(c);
int length_ = this->width * this->height;
fscanf(pFile, "%s", c);
this->endianess = atof(c);
fseek(pFile, 0, SEEK_END);
long lSize = ftell(pFile);
long pos = lSize - this->width*this->height * sizeof(T);
fseek(pFile, pos, SEEK_SET);
T* img = new T[length_];
//cout << "sizeof(T) = " << sizeof(T);
fread(img, sizeof(T), length_, pFile);
fclose(pFile);
/* The raster is a sequence of pixels, packed one after another,
* with no delimiters of any kind. They are grouped by row,
* with the pixels in each row ordered left to right and
* the rows ordered bottom to top.
*/
T* tbimg = (T *)malloc(length_ * sizeof(T));// top-to-bottom.
//PFM SPEC image stored bottom -> top reversing image
for (int i = 0; i < this->height; i++) {
memcpy(&tbimg[(this->height - i - 1)*(this->width)],
&img[(i*(this->width))],
(this->width) * sizeof(T));
}
if (this->is_little_big_endianness_swap()){
std::cout << "little-big endianness transformation is needed.\n";
// little-big endianness transformation is needed.
union {
T f;
unsigned char u8[sizeof(T)];
} source, dest;
for (int i = 0; i < length_; ++i) {
source.f = tbimg[i];
for (unsigned int k = 0, s_T = sizeof(T); k < s_T; k++)
dest.u8[k] = source.u8[s_T - k - 1];
tbimg[i] = dest.f;
//cout << dest.f << ", ";
}
}
delete[] img;
return tbimg;
}
else {
std::cout << "Invalid magic number!"
<< " No Pf (meaning grayscale pfm) is missing!!\n";
fclose(pFile);
exit(0);
}
}
else {
std::cout << "Cannot open file " << filename
<< ", or it does not exist!\n";
fclose(pFile);
exit(0);
}
}
template<typename T>
void write_pfm(const std::string & filename, const T* imgbuffer,
const float & endianess_) {
std::ofstream ofs(filename.c_str(), std::ifstream::binary);
// ** 1) Identifier Line: The identifier line contains the characters
// "PF" or "Pf". PF means it's a color PFM.
// Pf means it's a grayscale PFM.
// ** 2) Dimensions Line:
// The dimensions line contains two positive decimal integers,
// separated by a blank. The first is the width of the image;
// the second is the height. Both are in pixels.
// ** 3) Scale Factor / Endianness:
// The Scale Factor / Endianness line is a queer line that jams
// endianness information into an otherwise sane description
// of a scale. The line consists of a nonzero decimal number,
// not necessarily an integer. If the number is negative, that
// means the PFM raster is little endian. Otherwise, it is big
// endian. The absolute value of the number is the scale
// factor for the image.
// The scale factor tells the units of the samples in the raster.
// You use somehow it along with some separately understood unit
// information to turn a sample value into something meaningful,
// such as watts per square meter.
ofs << "Pf\n"
<< this->width << " " << this->height << "\n"
<< endianess_ << "\n";
/* PFM raster:
* The raster is a sequence of pixels, packed one after another,
* with no delimiters of any kind. They are grouped by row,
* with the pixels in each row ordered left to right and
* the rows ordered bottom to top.
* Each pixel consists of 1 or 3 samples, packed one after another,
* with no delimiters of any kind. 1 sample for a grayscale PFM
* and 3 for a color PFM (see the Identifier Line of the PFM header).
* Each sample consists of 4 consecutive bytes. The bytes represent
* a 32 bit string, in either big endian or little endian format,
* as determined by the Scale Factor / Endianness line of the PFM
* header. That string is an IEEE 32 bit floating point number code.
* Since that's the same format that most CPUs and compiler use,
* you can usually just make a program use the bytes directly
* as a floating point number, after taking care of the
* endianness variation.
*/
int length_ = this->width*this->height;
this->endianess = endianess_;
T* tbimg = (T *)malloc(length_ * sizeof(T));
// PFM SPEC image stored bottom -> top reversing image
for (int i = 0; i < this->height; i++) {
memcpy(&tbimg[(this->height - i - 1)*this->width],
&imgbuffer[(i*this->width)],
this->width * sizeof(T));
}
if (this->is_little_big_endianness_swap()) {
std::cout << "little-big endianness transformation is needed.\n";
// little-big endianness transformation is needed.
union {
T f;
unsigned char u8[sizeof(T)];
} source, dest;
for (int i = 0; i < length_; ++i) {
source.f = tbimg[i];
for (size_t k = 0, s_T = sizeof(T); k < s_T; k++)
dest.u8[k] = source.u8[s_T - k - 1];
tbimg[i] = dest.f;
//cout << dest.f << ", ";
}
}
ofs.write((char *)tbimg, this->width*this->height * sizeof(T));
ofs.close();
free(tbimg);
}
inline float getEndianess(){return endianess;}
inline int getHeight(void){return height;}
inline int getWidth(void){return width;}
inline void setHeight(const int & h){height = h;}
inline void setWidth(const int & w){width = w;}
private:
int height;
int width;
float endianess;
};
#endif /* PGM_H_ */
Forgive me to leave lots of useless comments in the code.
A simple example shows the write/read:
int main(){
PFM pfm_rw;
string temp = "img/Motorcycle/disp0GT.pfm";
float * p_disp_gt = pfm_rw.read_pfm<float>(temp);
//int imgH = pfm_rw.getHeight();
//int imgW = pfm_rw.getWidth();
//float scale = pfm_rw.getEndianess();
string temp2 = "result/Motorcycle/disp0GT_n1.pfm";
pfm_rw.write_pfm<float>(temp2, p_disp_gt, -1.0f);
return 1;
}
As far as I know, OpenCV doesn't support to read PFM files directly.
You can refer to the code snippet here for a simple PFM reader, which will enable you to read PFM files into COLOR *data with COLOR defined as follows:
typedef struct {
float r;
float g;
float b;
} COLOR;
Given the reprensentation of decimal I have --you can find it here for instance--, I tried to convert a double this way:
explicit Decimal(double n)
{
DoubleAsQWord doubleAsQWord;
doubleAsQWord.doubleValue = n;
uint64 val = doubleAsQWord.qWord;
const uint64 topBitMask = (int64)(0x1 << 31) << 32;
//grab the 63th bit
bool isNegative = (val & topBitMask) != 0;
//bias is 1023=2^(k-1)-1, where k is 11 for double
uint32 exponent = (((uint64)(val >> 31) >> 21) & 0x7FF) - 1023;
//exclude both sign and exponent (<<12, >>12) and normalize mantissa
uint64 mantissa = ((uint64)(0x1 << 31) << 21) | (val << 12) >> 12;
// normalized mantissa is 53 bits long,
// the exponent does not care about normalizing bit
uint8 scale = exponent + 11;
if (scale > 11)
scale = 11;
else if (scale < 0)
scale = 0;
lo_ = ((isNegative ? -1 : 1) * n) * std::pow(10., scale);
signScale_ = (isNegative ? 0x1 : 0x0) | (scale << 1);
// will always be 0 since we cannot reach
// a 128 bits precision with a 64 bits double
hi_ = 0;
}
The DoubleAsQWord type is used to "cast" from double to its uint64 representation:
union DoubleAsQWord
{
double doubleValue;
uint64 qWord;
};
My Decimal type has these fields:
uint64 lo_;
uint32 hi_;
int32 signScale_;
All this stuff is encapsulated in my Decimal class. You can notice I extract the mantissa even if I'm not using it. I'm still thinking of a way to guess the scale accurately.
This is purely practical, and seems to work in the case of a stress test:
BOOST_AUTO_TEST_CASE( convertion_random_stress )
{
const double EPSILON = 0.000001f;
srand(time(0));
for (int i = 0; i < 10000; ++i)
{
double d1 = ((rand() % 10) % 2 == 0 ? -1 : 1)
* (double)(rand() % 1000 + 1000.) / (double)(rand() % 42 + 2.);
Decimal d(d1);
double d2 = d.toDouble();
double absError = fabs(d1 - d2);
BOOST_CHECK_MESSAGE(
absError <= EPSILON,
"absError=" << absError << " with " << d1 << " - " << d2
);
}
}
Anyway, how would you convert from double to this decimal representation?
I think you guys will be interested in an implementation of a C++ wrapper to the Intel Decimal Floating-Point Math Library:
C++ Decimal Wrapper Class
Intel DFP
What about using VarR8FromDec Function ?
EDIT: This function is declared on Windows system only. However an equivalent C implementation is available with WINE, here: http://source.winehq.org/source/dlls/oleaut32/vartype.c
Perhaps you are looking for System::Convert::ToDecimal()
http://msdn.microsoft.com/en-us/library/a69w9ca0%28v=vs.80%29.aspx
Alternatively you could try recasting the Double as a Decimal.
An example from the MSDN.
http://msdn.microsoft.com/en-us/library/aa326763%28v=vs.71%29.aspx
// Convert the double argument; catch exceptions that are thrown.
void DecimalFromDouble( double argument )
{
Object* decValue;
// Convert the double argument to a Decimal value.
try
{
decValue = __box( (Decimal)argument );
}
catch( Exception* ex )
{
decValue = GetExceptionType( ex );
}
Console::WriteLine( formatter, __box( argument ), decValue );
}
If you do not have access to the .Net routines then this is tricky. I have done this myself for my hex editor (so that users can display and edit C# Decimal values using the Properties dialog) - see http://www.hexedit.com for more information. Also the source for HexEdit is freely available - see my article at http://www.codeproject.com/KB/cpp/HexEdit.aspx.
Actually my routines convert between Decimal and strings but you can of course use sprintf to convert the double to a string first. (Also when you talk about double I think you explicitly mean IEEE 64-bit floating point format, though this is what most compilers/systems use nowadays.)
Note that there are a few gotchas if you want to handle precisely all valid Decimal values and return an error for any value that cannot be converted, since the format is not well documented. (The Decimal format is aweful really, eg the same number can have many representations.)
Here is my code that converts a string to a Decimal. Note that it uses the the GNU Multiple Precision Arithmetic Library (functions that start with mpz_). The String2Decimal function obviously returns false if it fails for some reason, such as the value being too big. The parameter 'presult' must point to a buffer of at least 16 bytes, to store the result.
bool String2Decimal(const char *ss, void *presult)
{
bool retval = false;
// View the decimal (result) as four 32 bit integers
unsigned __int32 *dd = (unsigned __int32 *)presult;
mpz_t mant, max_mant;
mpz_inits(mant, max_mant, NULL);
int exp = 0; // Exponent
bool dpseen = false; // decimal point seen yet?
bool neg = false; // minus sign seen?
// Scan the characters of the value
const char *pp;
for (pp = ss; *pp != '\0'; ++pp)
{
if (*pp == '-')
{
if (pp != ss)
goto exit_func; // minus sign not at start
neg = true;
}
else if (isdigit(*pp))
{
mpz_mul_si(mant, mant, 10);
mpz_add_ui(mant, mant, unsigned(*pp - '0'));
if (dpseen) ++exp; // Keep track of digits after decimal pt
}
else if (*pp == '.')
{
if (dpseen)
goto exit_func; // more than one decimal point
dpseen = true;
}
else if (*pp == 'e' || *pp == 'E')
{
char *end;
exp -= strtol(pp+1, &end, 10);
pp = end;
break;
}
else
goto exit_func; // unexpected character
}
if (*pp != '\0')
goto exit_func; // extra characters after end
if (exp < -28 || exp > 28)
goto exit_func; // exponent outside valid range
// Adjust mantissa for -ve exponent
if (exp < 0)
{
mpz_t tmp;
mpz_init_set_si(tmp, 10);
mpz_pow_ui(tmp, tmp, -exp);
mpz_mul(mant, mant, tmp);
mpz_clear(tmp);
exp = 0;
}
// Get max_mant = size of largest mantissa (2^96 - 1)
//mpz_set_str(max_mant, "79228162514264337593543950335", 10); // 2^96 - 1
static unsigned __int32 ffs[3] = { 0xFFFFffffUL, 0xFFFFffffUL, 0xFFFFffffUL };
mpz_import(max_mant, 3, -1, sizeof(ffs[0]), 0, 0, ffs);
// Check for mantissa too big.
if (mpz_cmp(mant, max_mant) > 0)
goto exit_func; // value too big
else if (mpz_sgn(mant) == 0)
exp = 0; // if mantissa is zero make everything zero
// Set integer part
dd[2] = mpz_getlimbn(mant, 2);
dd[1] = mpz_getlimbn(mant, 1);
dd[0] = mpz_getlimbn(mant, 0);
// Set exponent and sign
dd[3] = exp << 16;
if (neg && mpz_sgn(mant) > 0)
dd[3] |= 0x80000000;
retval = true; // indicate success
exit_func:
mpz_clears(mant, max_mant, NULL);
return retval;
}
How about this:
1) sprintf number into s
2) find decimal point (strchr), store in idx
3) atoi = obtain integer part easily, use union to separate high/lo
4) use strlen - idx to obtain number of digits after point
sprintf may be slow but you´ll get the solution under 2 minutes of typing...