I am trying to unpack mp3 frames using bitfields.
The header of mp3 frames starts with the syncword 0xFFF followed by 20 bits of header data. The structure of the header is represented as follows:
struct Mp3FrameRaw {
unsigned short fff:12; // Should always be 0xFFF = 4095
unsigned short mpeg_standard : 1;
unsigned short layer : 2;
unsigned short error_protection : 1;
unsigned short bitrate : 4;
unsigned short frequency : 2;
unsigned short pad_bit : 1;
unsigned short : 1;
unsigned short mode :2;
unsigned short mode_extension :2;
unsigned short copyrighted : 1;
unsigned short original: 1;
unsigned short emphasis: 2;
};
In total the header is 32 bit long.
My program first finds the syncword:
size_t find_sync_word(std::vector<unsigned char> & input) {
bool previous_was_ff = false;
for (size_t offset = 0; offset < input.size(); ++offset) {
if (previous_was_ff && (input[offset] & 0xF0 == 0xF0))
return offset - 1;
previous_was_ff = 0xFF == input[offset];
}
return -1;
}
And then tries to unpack the first header:
int parse(std::vector<unsigned char> & input) {
size_t offset = find_sync_word(input);
if (offset < 0) {
std::cerr << "Not a valid Mp3 file" << std::endl;
return -1;
}
Mp3FrameRaw *frame_ptr = reinterpret_cast<Mp3FrameRaw * >(input.data() + offset);
std::cout << frame_ptr->fff << " (Should always be 4095)" << std::endl;
std::cout << frame_ptr->layer << " (Should be 1 )" << std::endl;
std::cout << frame_ptr->bitrate << " (Should be 1-14)" << std::endl;
return 0;
}
The main.cpp reads:
int main() {
std::ifstream mp3_file("/path/to/file.mp3", std::ios::binary);
std::vector<unsigned char> file_contents((std::istreambuf_iterator<char>(mp3_file)),
std::istreambuf_iterator<char>());
return parse(file_contents);
}
The result reads:
3071 (Should always be 4095)
3 (Should be 1 )
0 (Should be 1 - 14)
Contrary, if I unpack the fields manually bit by bit, everything works as expected. e.g
{
size_t offset;
Mp3FrameRaw frame;
...
frame.fff = input[offset++];
frame.fff = (frame.fff << 4) | (input[offset] >> 4);
frame.mpeg_standard = (input[offset] >> 3) & 1;
frame.layer = (input[offset] >> 1) & 0x3;
frame.error_protection = (input[offset++]) & 0x1;
frame.bitrate = input[offset] >> 4;
...
}
I assume that the bitfields are not located in a way they intuitively should do. What am I doing wrong?
I am using gcc on Ubuntu 18.04.
Related
I have a issue with my code below which does encoding of a vector of long into a string by storing the differences of the sequence.
The encode / decode works fine as long as the value is same or below 2^30
Any value above it, the logic fails. Note that the sizeof(long) is 8 bytes.
static std::string encode(const std::vector<long>& path) {
long lastValue = 0L;
std::stringstream result;
for (long value : path) {
long delta = value - lastValue;
lastValue = value;
long var = 0;
// Shift the delta value left by 1 bit and encode each 5-bit chunk into a character
for (var = delta < 0 ? ~(delta << 1) : delta << 1; var >= 32L; var >>= 5) {
result << (char)((32L | var & 31L) + 63L); //char is getting written to result stringstream
}
// Encode the last 5-bit chunk into a character
result << (char)(var + 63L); // char is getting written to result stringstream
}
std::cout << std::endl;
return result.str();
}
static std::unique_ptr<std::vector<long>> decode(const std::string& encoded) {
auto decoded = std::make_unique<std::vector<long>>();
long last_val = 0;
int index = 0;
while (index < encoded.length()) {
int shift = 0;
long current = 1;
int c;
do {
c = encoded[index++] - 63 - 1;
current += c << shift;
shift += 5;
} while (c >= 31);
long v = ( (current & 1) == 0 ? current >> 1 : ~(current >> 1) );
last_val += v;
decoded->push_back(last_val);
}
return std::move(decoded);
}
Can someone please provide insight what might be going wrong ?
inside the decode function, it was required to declare c as "long" and not as "int".
Here is an implementation of TEA, that attempts to encrypt a file containing a text message:
main.cpp
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include "TEA.h"
int main()
try
{
// std::cout <<"sizeof(long) = " << sizeof(long) <<'\n';
std::string src("in.txt");
std::string dest("out.txt");
std::string key("bs");
send_msg(src, dest, key);
}
catch(std::exception& e)
{
std::cerr << e.what();
exit(1);
}
TEA.h
#ifndef TEA_h
#define TEA_h
/*
src - eight (2 words or 2*4 bytes) characters to be enciphered.
dest- enciphered output.
key - array of 4 words.
Assumes sizeof(long) == 4 bytes.
*/
void encipher(const unsigned long* const v,
unsigned long* const w,
const unsigned long* const k)
{
unsigned long y = v[0];
unsigned long z = v[1];
unsigned long sum = 0;
unsigned long delta = 0x9E3779B9;
unsigned long n = 32;
while (n-- > 0)
{
y += (z<<4 ^ z>>5) + z^sum + k[sum&3];
sum += delta;
z += (z<<4 ^ z>>5) + y^sum + k[sum>>11&3];
}
w[0] = y;
w[1] = z;
}
//---------------------------------------------------------------------------
/*
Sends the clear text from: src_file as
encrypted text to: dest_file, using TEA
with key: the last argument.
*/
void send_msg(std::string& src_file,
std::string& dest_file,
std::string key)
{
const int nchar = 2 * sizeof(long); // size of I/O chunk: 8 bytes = 64 bits
const int kchar = 2 * nchar; // size of key: 16 bytes = 128 bits
// pad key with 0's to match en-/de- cipher argument input size
while (key.size() < kchar)
{
key += '0';
}
// prepare files
std::ifstream ifs(src_file.c_str());
std::ofstream ofs(dest_file.c_str());
if (!ifs || !ofs)
{
throw std::runtime_error("File can't open!\n");
}
// key: extract raw string data interpreted as pointer to const unsigned long
const unsigned long* k = reinterpret_cast<const unsigned long*>(key.data());
// define C-compatible way to read & write from / to file 128 bits (two unsigned longs) at a time
unsigned long outptr[2];
char inbuf[nchar];
unsigned long* inptr = reinterpret_cast<unsigned long*>(inbuf);
int count = 0;
while (ifs.get(inbuf[count]))
{
ofs << std::hex; // write output in hex
if (++count == nchar) // 8 characters in the input buffer: ready to encode
{
encipher(inptr, outptr, k);
// pad with leading 0's
ofs << std::setw(8) << std::setfill('0') << outptr[0] <<' '
<< std::setw(8) << std::setfill('0') << outptr[1] <<' ';
count = 0;
}
}
if (count) // pad at the end
{
while (count != nchar)
{
inbuf[count++] = '0';
}
encipher(inptr, outptr, k);
ofs << outptr[0] <<' '<< outptr[1] <<' ';
}
}
#endif
Input file, in.txt:
The Tiny
Expected in Output file:
5b8fb57c 806fbcce
Actual in Output file, out.txt:
f3a810ff 3874d755
What am I doing wrong?
The + operation has a higher precedence than ^, so (z<<4 ^ z>>5) + z^sum + k[sum&3] is parsed as
(((z<<4) ^ (z>>5)) + z)^(sum + k[sum&3]).
Similarly for the other expression.
You should add parenthesis to make your expression explicit in how it executes.
The problem was indeed related to those expressions (pointed out by #1201ProgramAlarm), however, it is not related to the (wrong) implicit operator precedence (nor arity).
y += (z<<4 ^ z>>5) + z^sum + k[sum&3];
sum += delta;
z += (z<<4 ^ z>>5) + y^sum + k[sum>>11&3]; // <------ the problem is here
the left and right bit shift operations have to be applied on variable y, i.e.:
z += (y<<4 ^ y>>5) + y^sum + k[sum>>11&3];
I've been going back trying to find a segmentation error in my program. Very often when the program crashes it is at this point.
unsigned long data = octets[g];
So I have tracked this buffer as being created in the main loop with a fixed defined size. However since it's defined in a if statement in main does it need to be allocated with "new"? Basically after receiving from a TCP socket the char buffer is copied to an unsigned char buffer to check for certain binary data types. So only if data arrives is this called into existance.
INT8U byteArray[BUFFERSIZE];
This buffer is then passed for message ID and crc checking. Is not doing a "new" type allocation the issue because it is in the main loop? I thought it would go out of scope at the end of the "if new data is received" statement.
long calc_crc24q(byte* octets, int start, int last) //start is first byte, last is MSbyte of CRC
{
long crc = CRC24SEED;
for(int g = start; g < last; g++) //should xor from preamble to the end of data
{
unsigned long data = octets[g]; //fault occurs here often
crc = crc ^ data << 16;
for (int i = 0; i < 8; i++)
{
crc <<= 1;
if (crc & 0x1000000)
crc = crc ^ CRC24POLY;
}
}
return crc & 0x00ffffff; //returns an int value with high byte 00 then data in lower 3 bytes
}
//---------------------------------------------
Here is the message id
unsigned int id_message(INT8U* buffer, unsigned int posStart, unsigned int numberbytes, unsigned int& messageLength)
{
unsigned int messID = 0;
unsigned int posEnd;
unsigned int noBytes = 0;
if(buffer[posStart] == Preamble)
{
unsigned int dataLength = (((0x0000 | buffer[posStart+1]) << 8) | buffer[posStart+2]); //0x byte1 byte2
messID = ((0x0000 | (buffer[posStart+3] << 4)) | ((buffer[posStart+4] >> 4) & 0x0F)); //byte1 shift 4 bits add upper 4 bits of byte 2
noBytes = dataLength + 6;
//numberbytes = noBytes;
posEnd = posStart + noBytes - 1;
if(calc_crc24q( buffer, posStart, posEnd-2) != (((0x00000000 | buffer[posEnd-2]) << 16) | ((0x00000000 | buffer[posEnd-1]) << 8) | (0x00000000 | buffer[posEnd])) )
{
cout << "CRC error" << endl;
return 0;
}
//return message type extracted from data segment
messageLength = posStart + noBytes;
return messID;
}
return 255; //unknown type
}
Hey everyone this may turn out to be a simple stupid question, but one that has been giving me headaches for a while now. I'm reading data from a Named Binary Tag file, and the code is working except when I try to read big-endian numbers. The code that gets an integer looks like this:
long NBTTypes::getInteger(istream &in, int num_bytes, bool isBigEndian)
{
long result = 0;
char buff[8];
//get bytes
readData(in, buff, num_bytes, isBigEndian);
//convert to integer
cout <<"Converting bytes to integer..." << endl;
result = buff[0];
cout <<"Result starts at " << result << endl;
for(int i = 1; i < num_bytes; ++i)
{
result = (result << 8) | buff[i];
cout <<"Result is now " << result << endl;
}
cout <<"Done." << endl;
return result;
}
And the readData function:
void NBTTypes::readData(istream &in, char *buffer, unsigned long num_bytes, bool BE)
{
char hold;
//get data
in.read(buffer, num_bytes);
if(BE)
{
//convert to little-endian
cout <<"Converting to a little-endian number..." << endl;
for(unsigned long i = 0; i < num_bytes / 2; ++i)
{
hold = buffer[i];
buffer[i] = buffer[num_bytes - i - 1];
buffer[num_bytes - i - 1] = hold;
}
cout <<"Done." << endl;
}
}
This code originally worked (gave correct positive values), but now for whatever reason the values I get are either over or underflowing. What am I missing?
Your byte order swapping is fine, however building the integer from the sequences of bytes is not.
First of all, you get the endianness wrong: the first byte you read in becomes the most significant byte, while it should be the other way around.
Then, when OR-ing in the characters from the array, be aware that they are promoted to an int, which, for a signed char, sets a lot of additional bits unless you mask them out.
Finally, when long is wider than num_bytes, you need to sign-extend the bits.
This code works:
union {
long s; // Signed result
unsigned long u; // Use unsigned for safe bit-shifting
} result;
int i = num_bytes-1;
if (buff[i] & 0x80)
result.s = -1; // sign-extend
else
result.s = 0;
for (; i >= 0; --i)
result.u = (result.u << 8) | (0xff & buff[i]);
return result.s;
I have a couple of integers, for example (in binary represetation):
00001000, 01111111, 10000000, 00000001
and I need to put them in sequence to array of bytes(chars), without the leading zeros, like so:
10001111 11110000 0001000
I understand that it is must be done by bit shifting with <<,>> and using binary or |. But I can't find the correct algorithm, can you suggest the best approach?
The integers I need to put there are unsigned long long ints, so the length of one can be anywhere from 1 bit to 8 bytes (64 bits).
You could use a std::bitset:
#include <bitset>
#include <iostream>
int main() {
unsigned i = 242122534;
std::bitset<sizeof(i) * 8> bits;
bits = i;
std::cout << bits.to_string() << "\n";
}
There are doubtless other ways of doing it, but I would probably go with the simplest:
std::vector<unsigned char> integers; // Has your list of bytes
integers.push_back(0x02);
integers.push_back(0xFF);
integers.push_back(0x00);
integers.push_back(0x10);
integers.push_back(0x01);
std::string str; // Will have your resulting string
for(unsigned int i=0; i < integers.size(); i++)
for(int j=0; j<8; j++)
str += ((integers[i]<<j) & 0x80 ? "1" : "0");
std::cout << str << "\n";
size_t begin = str.find("1");
if(begin > 0) str.erase(0,begin);
std::cout << str << "\n";
I wrote this up before you mentioned that you were using long ints or whatnot, but that doesn't actually change very much of this. The mask needs to change, and the j loop variable, but otherwise the above should work.
Convert them to strings, then erase all leading zeros:
#include <iostream>
#include <sstream>
#include <string>
#include <cstdint>
std::string to_bin(uint64_t v)
{
std::stringstream ss;
for(size_t x = 0; x < 64; ++x)
{
if(v & 0x8000000000000000)
ss << "1";
else
ss << "0";
v <<= 1;
}
return ss.str();
}
void trim_right(std::string& in)
{
size_t non_zero = in.find_first_not_of("0");
if(std::string::npos != non_zero)
in.erase(in.begin(), in.begin() + non_zero);
else
{
// no 1 in data set, what to do?
in = "<no data>";
}
}
int main()
{
uint64_t v1 = 437148234;
uint64_t v2 = 1;
uint64_t v3 = 0;
std::string v1s = to_bin(v1);
std::string v2s = to_bin(v2);
std::string v3s = to_bin(v3);
trim_right(v1s);
trim_right(v2s);
trim_right(v3s);
std::cout << v1s << "\n"
<< v2s << "\n"
<< v3s << "\n";
return 0;
}
A simple approach would be having the "current byte" (acc in the following), the associated number of used bits in it (bitcount) and a vector of fully processed bytes (output):
int acc = 0;
int bitcount = 0;
std::vector<unsigned char> output;
void writeBits(int size, unsigned long long x)
{
while (size > 0)
{
// sz = How many bit we're about to copy
int sz = size;
// max avail space in acc
if (sz > 8 - bitcount) sz = 8 - bitcount;
// get the bits
acc |= ((x >> (size - sz)) << (8 - bitcount - sz));
// zero them off in x
x &= (1 << (size - sz)) - 1;
// acc got bigger and x got smaller
bitcount += sz;
size -= sz;
if (bitcount == 8)
{
// got a full byte!
output.push_back(acc);
acc = bitcount = 0;
}
}
}
void writeNumber(unsigned long long x)
{
// How big is it?
int size = 0;
while (size < 64 && x >= (1ULL << size))
size++;
writeBits(size, x);
}
Note that at the end of the processing you should check if there is any bit still in the accumulator (bitcount > 0) and you should flush them in that case by doing a output.push_back(acc);.
Note also that if speed is an issue then probably using a bigger accumulator is a good idea (however the output will depend on machine endianness) and also that discovering how many bits are used in a number can be made much faster than a linear search in C++ (for example x86 has a special machine language instruction BSR dedicated to this).