Why would elements in a union be unaligned

Why would elements in a union be unaligned - c++

I've spent a couple hours trying to make sense of the results of a quick mockup I made for an application I am working on. Here is what I am trying to do. I have an incoming message, which among other things has a 512 bit field which can have multiple forms depending on its type and needs to be translated to usable data. The first type results in the 4 x 32-bit words originating from the little endian LSB
|31 0|
|-------------word1--------------|
|-------------word2--------------|
|-------------word3--------------|
|-------------word4--------------|
|-------------unused-------------|
...
|-------------unused-------------|
512
The second is significantly larger and has 4 x 128 bit words but still occupies the same overall space
|31 0|
|-------------word1--------------|
|----------word1 cont.-----------|
|----------word1 cont.-----------|
|----------word1 cont.-----------|
|-------------word2--------------|
|----------word2 cont.-----------|
|----------word2 cont.-----------|
|----------word2 cont.-----------|
|-------------word3--------------|
|----------word3 cont.-----------|
|----------word3 cont.-----------|
|----------word1 cont.-----------|
|-------------word4--------------|
|----------word4 cont.-----------|
|----------word4 cont.-----------|
|----------word4 cont.-----------|
I am attempting to make a class that processes this into 4 accessible void* fields, regardless of if they are type 1 and convertible to a unsigned integer or 128 bit of void data. Here is the code.
The first header is just common structures. I attempted to minimize bit shifting by using unions.
// Common.hpp
#ifndef COMMON_HPP
#define COMMON_HPP
#include <cstdint>
#include <cstddef>
enum WaveFormType
{
TYPE1,
TYPE2
};
enum CommandWordEnum
{
COMMAND1,
COMMAND2,
COMMAND3,
COMMAND4
};
// 384 + 32 + 32 + 32 + 32 = 512 bits
struct Type1CommandWord
{
uint32_t unused[12];
uint32_t word[4];
};
// 128 + 128 + 128 + 128 = 512 bits
struct Type2CommandWord
{
uint32_t word1[4];
uint32_t word2[4];
uint32_t word3[4];
uint32_t word4[4];
};
union CommandWordData
{
uint8_t buffer[64];
Type1CommandWord type1CommandWord;
Type2CommandWord type2CommandWord;
};
#endif /* COMMON_HPP */
The second class is a mockup of the incoming message. In reality it is a very large message, but at the moment, these fields are the ones I am concerned about.
// Message.hpp
#ifndef MESSAGE_HPP
#define MESSAGE_HPP
#include <cstdint>
#include "Common.hpp"
//Test Message
struct Message
{
uint8_t data[64];
WaveFormType waveFormType;
};
#endif /* MESSAGE_HPP */
The next class is the actual receiver class
// CommandReceiver.hpp
#ifndef COMMAND_RECEIVER_HPP
#define COMMAND_RECEIVER_HPP
#include "Common.hpp"
#include "Message.hpp"
#include <cstdlib>
class CommandReceiver
{
public:
CommandReceiver(Message &_message);
virtual ~CommandReceiver() = default;
WaveFormType getWaveFormType() const;
size_t getCommandWordSize() const;
void *getCommandWord(CommandWordEnum cmdWordEnum) const;
private:
CommandWordData commandWordData;
WaveFormType waveFormType;
};
#endif /* COMMAND_RECEIVER_HPP */
and the definition
// CommandReceiver.cpp
#include "CommandReceiver.hpp"
#include <string.h>
CommandReceiver::CommandReceiver(Message &_message)
: waveFormType(_message.waveFormType)
{
memset(
this->commandWordData.buffer,
0,
sizeof(commandWordData.buffer));
memcpy(
this->commandWordData.buffer,
&_message.data,
sizeof(this->commandWordData.buffer));
}
WaveFormType CommandReceiver::getWaveFormType() const
{
return waveFormType;
}
size_t CommandReceiver::getCommandWordSize() const
{
return((waveFormType == TYPE2)
? sizeof(this->commandWordData.type2CommandWord.word1)
: sizeof(this->commandWordData.type1CommandWord.word[1]));
}
void *CommandReceiver::getCommandWord(CommandWordEnum cmdWordEnum) const
{
void * cmdPtr = (void*)malloc(this->getCommandWordSize());
switch(cmdWordEnum)
{
case COMMAND1:
if(this->waveFormType == TYPE2)
{
memcpy(
cmdPtr,
this->commandWordData.type2CommandWord.word2,
this->getCommandWordSize());
}
else
{
memcpy(
cmdPtr,
&this->commandWordData.type1CommandWord.word[1],
this->getCommandWordSize());
}
break;
case COMMAND2:
if(this->waveFormType == TYPE2)
{
memcpy(
cmdPtr,
&this->commandWordData.type2CommandWord.word2,
this->getCommandWordSize());
}
else
{
memcpy(
cmdPtr,
&this->commandWordData.type1CommandWord.word[2],
this->getCommandWordSize());
}
break;
case COMMAND3:
if(this->waveFormType == TYPE2)
{
memcpy(
cmdPtr,
&this->commandWordData.type2CommandWord.word3,
this->getCommandWordSize());
}
else
{
memcpy(
cmdPtr,
&this->commandWordData.type1CommandWord.word[3],
this->getCommandWordSize());
}
break;
case COMMAND4:
if(this->waveFormType == TYPE2)
{
memcpy(
cmdPtr,
&this->commandWordData.type2CommandWord.word4,
this->getCommandWordSize());
}
else
{
memcpy(
cmdPtr,
&this->commandWordData.type1CommandWord.word[4],
this->getCommandWordSize());
}
break;
default:
// memcpy(
// cmdPtr,
// nullptr,
// this->getCommandWordSize());
break;
}
return cmdPtr;
}
finally the main file which I made a few simple tests.
What I am confused about is why a cannot get addresses to
align. See below.
#include "CommandReceiver.hpp"
#include <iostream>
#include <cstring>
#include <cassert>
//test
int main()
{
// Confirm substructures are the correct size
assert(sizeof(Type1CommandWord) == 512 / 8);
assert(sizeof(Type2CommandWord) == 512 / 8);
// Set up tests
Message msg;
memset(&msg.data, 0, sizeof(msg.data));
msg.data[60] = 0xDE;
msg.data[61] = 0xAD;
msg.data[62] = 0xC0;
msg.data[63] = 0xDE;
msg.waveFormType = TYPE1;
// Call the constructor
CommandReceiver cmdRec(msg);
// Confirm values copied in constructor match
assert(cmdRec.commandWordData.buffer[60] == 0xDE);
assert(cmdRec.commandWordData.buffer[61] == 0xAD);
assert(cmdRec.commandWordData.buffer[62] == 0xC0);
assert(cmdRec.commandWordData.buffer[63] == 0xDE);
std::cout << "0x"<< &cmdRec.commandWordData.buffer + 60 << "\n";
std::cout << "0x"<< &cmdRec.commandWordData.buffer + 61 << "\n";
std::cout << "0x"<< &cmdRec.commandWordData.type1CommandWord + (32 * 4) << "\n";
The bottom line is it is not working and the problem is fairly early on. I just cannot figure out why. Here is the output:
0x0x7ffc3139f2e8
0x0x7ffc3139f328
0x0x7ffc313a03e8
The first two fields should be adjacent characters of an unsigned char array. so I would expect them to be 1 byte off, not 744 bytes.
The third field should be equal to the first but instead it is 1100 bytes off. I'm obviously missing something big.
What am I missing? I'd also be interested in suggestions about better ways of doing this. However, the endpoints have to remain the same.

With pointer arithmetic, type of pointer does count:
int* p = /*..*/;
char* c = reinterpret_cast<char*>(p);
int n = /*..*/;
assert(reinterpret_cast<char*>(p + n) == c + n * sizeof(int));
// ^^^^^^^^^^^^^
So in your case
&cmdRec.commandWordData.type1CommandWord + (32 * 4)
"is"
reinterpret_cast<Type1CommandWord*>(
reinterpret_cast<const char*>(
&cmdRec.commandWordData.type1CommandWord
) + (32 * 4) * sizeof(Type1CommandWord)
)
and
&cmdRec.commandWordData.buffer + 60 is not &cmdRec.commandWordData.buffer[60] but
reinterpret_cast<uint8_t(*)[64]>(reinterpret_cast<char*>(&cmdRec.commandWordData.buffer) + 60 * sizeof(uint8_t[64]))

Related

Longest common subsequence: multiprocess version errors but sequential version works

I've been attempting to solve the longest common subsequence problem using multiprocessing and multi-threading, and I have implemented a multiprocess version of the code, using the usual dynamic programming approach: generate a score matrix, each element depends on the one to its left, north-west and directly above.
In my multiprocess approach, I have adopted propagating the wavefront along the anti-diagonals of the score matrix, and to make life easy, I have performed a shear transform on said score matrix, so that each antidiagonal is now horizontal (this is for improved memory access):
Following is my code (admittedly rather long, which allows for some set-up):
#include <algorithm>
#include <atomic>
#include <cstring>
#include <fcntl.h>
#include <fstream>
#include <iostream>
#include <string>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <x86intrin.h>
#define LOGICAL_CORES (int) sysconf(_SC_NPROCESSORS_CONF) /* Number of
logical cores on system: indirectly determines number of processes run */
#define MAX_WORK_SIZE 256 /* Maximum amount
of work for each worker process */
#define NUM_ANTIDIAGS (X + Y + 1) /* Number of anti-
diagonals to process */
#define ANTIDIAG_SIZE std::max(X, Y) /* Length of each
anti-diagonal of the score matrix */
#define ANTIDIAG_REAL_SIZE ((ANTIDIAG_SIZE + MAX_WORK_SIZE - 1) & -MAX_WORK_SIZE) /* Length of each
anti-diagonal in memory: a multiple of MAX_WORKER_SIZE */
#define NUM_WORKERS (ANTIDIAG_REAL_SIZE / MAX_WORK_SIZE) /* Total number of
worker processes */
// The sizes of the input strings
u_int32_t X, Y;
u_int32_t *back; /* The back anti-diagonal, read from */
u_int32_t *middle; /* The middle antidiagonal, read from */
u_int32_t *front; /* The front antidiagonal, written to */
struct sync_container {
pthread_barrier_t barrier; /* A barrier, to ensure all threads are synchronised */
pthread_barrierattr_t barrierattr; /* Barrier attributes */
std::vector<pid_t> pids; /* A list of process IDs of the worker processes */
};
u_int32_t *data;
sync_container *sync_data;
std::string seq_1;
std::string seq_2;
void read_files(std::ifstream f1, std::ifstream f2)
{
if (f1.fail() || f2.fail())
{
std::cout << "Error reading files; exiting." << std::endl;
exit(EXIT_FAILURE);
}
f1 >> X >> seq_1;
f1.close();
f2 >> Y >> seq_2;
f2.close();
}
void shm_setup()
{
data = reinterpret_cast<u_int32_t *>(mmap(nullptr, 4 * ANTIDIAG_REAL_SIZE * sizeof(uint32_t),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0));
back = reinterpret_cast<u_int32_t *>(data);
middle = back + ANTIDIAG_REAL_SIZE;
front = middle + ANTIDIAG_REAL_SIZE;
memset(back, 0, ANTIDIAG_REAL_SIZE * sizeof(u_int32_t));
memset(middle, 0, ANTIDIAG_REAL_SIZE * sizeof(u_int32_t));
memset(front, 0, ANTIDIAG_REAL_SIZE * sizeof(u_int32_t));
sync_data = static_cast<sync_container *>(mmap(nullptr, sizeof(sync_container),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0));
}
void cleanup()
{
munmap(data, 3 * ANTIDIAG_REAL_SIZE * sizeof(u_int32_t));
munmap(sync_data, sizeof(sync_container));
exit(0);
}
int main(int argc, char **argv)
{
if (argc != 3)
{
std::cout << "Usage: [executable] [file 1] [file 2]" << std::endl;
return 1;
}
read_files(std::ifstream(argv[1], std::ifstream::in),
std::ifstream(argv[2], std::ifstream::in));
// Initialise shared memory and arrays
shm_setup();
// Initialise barrier
pthread_barrierattr_init(&sync_data->barrierattr);
pthread_barrierattr_setpshared(&sync_data->barrierattr, PTHREAD_PROCESS_SHARED);
pthread_barrier_init(&sync_data->barrier, &sync_data->barrierattr, NUM_WORKERS + 1);
int pid = 0;
int worker_id = 0;
for (; worker_id < NUM_WORKERS; ++worker_id)
{
pid = fork();
if (pid) sync_data->pids[worker_id] = pid;
else
break;
}
pthread_barrier_wait(&sync_data->barrier);
for (int antidiag_idx = 2; antidiag_idx < NUM_ANTIDIAGS; ++antidiag_idx)
{
pthread_barrier_wait(&sync_data->barrier);
if (!pid) // worker processes go here
{
for (int element = MAX_WORK_SIZE * worker_id; element < (antidiag_idx * worker_id) + MAX_WORK_SIZE; ++element)
{
if (!element || element >= ANTIDIAG_SIZE) continue;
char vert = seq_1[antidiag_idx - 1 - element];
char horz = seq_2[element - 1];
front[element] = horz == vert ? back[element - 1] + 1
: std::max(middle[element - 1], middle[element]);
}
}
if (pid) // parent process moves pointers
{
back = middle;
middle = front;
front = back;
}
pthread_barrier_wait(&sync_data->barrier);
}
if (!pid) exit(0);
std::cout << middle[ANTIDIAG_SIZE] << std::endl;
cleanup();
}
Now, this code does not work. This is strange, because with a small input size (specifically, < 256), this code only spawns one worker process and one parent process to manage it, and it still fails.
However, when the fork(), various pthread_barrier_wait() calls, and if (pid) control flow paths are removed in the for loop, the code executes perfectly and returns the correct expected length of the LCS between two strings specified in the input files. In other words, it degenerates into effectively a single-threaded, single-process version of the dynamic programming solution, but with the shear transform thing.
There is clearly an issue with my synchronisation, and I can't figure out where it is. I've tried several permutations of adding more pthread_barrier_wait()s, but this hasn't led anywhere.
Where is the synch issue, and how may I fix it?

Using mmap memory for a circular buffer with very low overhead

I have a debugging tool which in order to register its acquired data uses a data structure called DiskPool (code follows). At start, this data structure mmaps a certain amount of data (backed by a file on disk). Clients can allocate memory via a simple bump pointer mechanism (implemented using std::atomic<size_t>.
As the volume of acquired data is massive I have decided to have a window over a time period instead of registering and keeping all the data. To fulfil such a purpose I have to change the disk pool into a circular buffer but this should not impose a considerable overhead as this overhead affects the measurement.
I wanted to ask you if anybody has any idea? (For example, using an atomic interface of STL).
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <atomic>
#include <memory>
#include <signal.h>
#include <chrono>
#include <thread>
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
class DiskPool {
char* addr_; // Initialized by mmap()
size_t len_; // Given by the user as many as memory pages as needed
std::atomic<size_t> top_; // Offset from address_
int fd_;
public:
DiskPool(size_t l, const char* file) : len_(l), top_(0),fd_(-1)
{
struct stat st;
fd_= open(file, O_CREAT|O_RDWR, S_IREAD | S_IWRITE);
if (fd_ == -1)
handle_error("open");
if (ftruncate(fd_, len_* sysconf(_SC_PAGE_SIZE)) != 0)
handle_error("ftruncate() error");
else {
fstat(fd_, &st);
printf("the file has %ld bytes\n", (long) st.st_size);
}
addr_ = static_cast<char*>( mmap(NULL, (len_* sysconf(_SC_PAGE_SIZE)),
PROT_READ | PROT_WRITE, MAP_SHARED|MAP_NORESERVE, fd_,0));
if (addr_ == MAP_FAILED)
handle_error("mmap failed.");
}
~DiskPool()
{
close(fd_);
if( munmap(addr_, len_)< 0) {
handle_error("Could not unmap file");
exit(1);}
std::cout << "Successfully unmapped the file. " << std::endl;
}
void* allocate(size_t s)
{
size_t t = std::atomic_fetch_add(&top_, s);
return addr_+t;
}
void flush() {madvise(addr_, len_, MADV_DONTNEED);}
};
As an example, I created sample code that uses this disk pool to record data at the creation and destruction of an object (AutomaticLifetimeCollector).
static const std::string RECORD_FILE = "Data.txt";
static const size_t DISK_POOL_NUMBER_OF_PAGES = 10000;
static std::shared_ptr<DiskPool> diskPool =
std::shared_ptr <DiskPool> (new DiskPool(DISK_POOL_NUMBER_OF_PAGES,RECORD_FILE.c_str()));
struct TaskRecord
{
uint64_t tid; // Thread id
uint64_t tag; // User-given identifier (“f1”)
uint64_t start_time; // nanoseconds
uint64_t stop_time;
uint64_t cpu_time;
TaskRecord(int depth, size_t tag, uint64_t start_time) :
tid(pthread_self()), tag(tag),
start_time(start_time), stop_time(0), cpu_time(0) {}
};
class AutomaticLifetimeCollector
{
TaskRecord* record_;
public:
AutomaticLifetimeCollector(size_t tag) :
record_(new(diskPool->allocate(sizeof(TaskRecord)))
TaskRecord(2, tag, (uint64_t)1000000004L))
{
}
~AutomaticLifetimeCollector() {
record_->stop_time = (uint64_t)1000000000L;
record_->cpu_time = (uint64_t)1000000002L;
}
};
inline void DelayMilSec(unsigned int pduration)
{
std::this_thread::sleep_until(std::chrono::system_clock::now() +
std::chrono::milliseconds(pduration));
}
std::atomic<bool> LoopsRunFlag {true};
void sigIntHappened(int signal)
{
std::cout<< "Application was terminated.";
LoopsRunFlag.store(false, std::memory_order_release);
}
int main()
{
signal(SIGINT, sigIntHappened);
unsigned int i = 0;
while(LoopsRunFlag)
{
AutomaticLifetimeCollector alc(i++);
DelayMilSec(2);
}
diskPool->flush();
return(0);
}

So accounting only for the handing out of variable-sized slices for a variable buffer, I believe a Compare-And-Swap loop should work.
The basic idea here is to read a value (which is atomic), do some computation with it, then write the value, if it did not change since reading. If it did change (another thread/process), the computation must be redone with the new value.
Since you have variable sized objects, I think actually simply slicing it into n array elements with (i + 1) % n won't work, as given (i + item_len) % capacity, it would split the allocation between the end and start of the buffer, and while that can be correct and working, I think maybe not what you wanted. So that means a condition, but I think the CPU should predict it pretty well.
#include <iostream>
#include <atomic>
std::atomic<size_t> next_index = 0;
const size_t len = 100; // small for demo purpose
size_t alloc(size_t required_size)
{
if (required_size > len) std::terminate(); // do something, would cause a buffer overflow
size_t i, ret_index, new_index;
i = next_index.load();
do
{
auto space = len - i;
ret_index = required_size <= space ? i : 0; // Wrap if needed
new_index = ret_index + required_size;
} while (next_index.compare_exchange_weak(i, new_index)); // succeed if value did of i not change
return ret_index;
}
int main()
{
std::cout << alloc(4) << std::endl; // 0 - 3
std::cout << alloc(8) << std::endl; // 4 - 11
std::cout << alloc(32) << std::endl; // 12 - 43
std::cout << alloc(32) << std::endl; // 44 - 75
std::cout << alloc(32) << std::endl; // 0 - 31 (76 - 107 would overflow)
std::cout << alloc(32) << std::endl; // 32 - 63
std::cout << alloc(32) << std::endl; // 64 - 95
std::cout << alloc(32) << std::endl; // 0 - 31 (96 - 127 would overflow)
}
Which should be fairly simple to plug in to your class:
void* allocate(size_t s)
{
if (s > len_ * sysconf(_SC_PAGE_SIZE)) std::terminate(); // do something, would cause a buffer overflow
size_t i, ret_index, new_index;
i = top_.load();
do
{
auto space = len_ * sysconf(_SC_PAGE_SIZE) - i;
ret_index = s <= space ? i : 0; // Wrap if needed
new_index = ret_index + s;
} while (top_.compare_exchange_weak(i, new_index)); // succeed if value did of i not change
return addr_+ ret_index;
}
len_ * sysconf(_SC_PAGE_SIZE) is in a few places, so might be the more useful value to store in len_ itself.

Sending TFTP packets in C++

I am relatively new to C++, so please forgive my lack of knowledge. I need help regarding TFTP packets. Below is the code I am using to generate a WRQ (write request package) and DATA packet which will be sent to a designated server.
bool createWRQ(char * filename) {
/* structure is the same as RRQ */
clear();
addWord(TFTP_OPCODE_WRITE);
addString(filename);
addByte(0);
addString(TFTP_DEFAULT_TRANSFER_MODE);
addByte(0);
return true;
}
bool createData(int block, char * mData, int data_size) {
/* 2 bytes 2 bytes n bytes
----------------------------------------
DATA | 03 | Block # | Data |
---------------------------------------- */
clear(); // to clean the memory location
addWord(TFTP_OPCODE_DATA);
addWord(block);
addMemory(mData, data_size);
return true;
}
I will include the declarations and required functions.
#include "stdafx.h"
#include "WebComm.h"
#include "WebCommDlg.h"
#include <stdio.h>
#include <stdlib.h>
#include "visa.h"
#include <cstring>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <winsock.h>
#include <string.h>
#include <string>
#include <fstream>
#include <cstdio>
#include <cerrno>
int mCurPacketSize = 512;
char mData[512];
#define VIBUF_LEN 255
#define TFTP_OPCODE_READ 1
#define TFTP_OPCODE_WRITE 2
#define TFTP_OPCODE_DATA 3
#define TFTP_OPCODE_ACK 4
#define TFTP_OPCODE_ERROR 5
#define cTFTPPacket_MAX_SIZE 1024
#define cTFTPPacket_DATA_SIZE 512
#define TFTP_DEFAULT_TRANSFER_MODE "octet" //"netascii", "octet", or "mail"
typedef unsigned char BYTE;
typedef unsigned short WORD;
bool addByte(BYTE b) {
if(mCurPacketSize >= cTFTPPacket_MAX_SIZE)
return false;
mData[mCurPacketSize] = (unsigned char)b;
mCurPacketSize++;
return true;
}
bool addWord(WORD w) {
w = htons(w);
if(!addByte(*(((BYTE*)&w)+1)))
return false;
return !addByte(*((BYTE*)&w));
}
bool addString(char * str) {
int n = strlen(str);
for(int i=0; i<n; i++)
if(!addByte(str[i]))
return false;
return true;
}
bool addMemory(char * buffer, int len) {
bool oStatus = false;
if(mCurPacketSize + len >= cTFTPPacket_MAX_SIZE) {
AfxMessageBox("Packet max size exceeded");
return false;
} else {
memcpy(mData + mCurPacketSize), buffer, len);
mCurPacketSize += len;
return true;
}
}
void clear() {
mCurPacketSize = 0;
memset(mData, mCurPacketSize, cTFTPPacket_MAX_SIZE);
}
I am aware these function have been declared mostly as type bool, however I need to send a WRQ packet to the server and wait for an ACK response before sending a DATA packet.
Something along the lines of:
while(/* something */)
if(!sendto(socket, WRQ, 512, NULL, (sockaddr*)&Addr, sizeof(struct sockaddr_in)))){
if(!recvfrom(socket, ACK, /* ... */))
sendto(socket, DATA_Packet, 512, NULL, (sockaddr*)&Addr, sizeof(struct sockaddr_in))));
My question is: how can I modify the createWRQ() and createData() functions so that I can return them as packets to use for transmission, since bool only returns true or false as 1 or 0.
I need to be able to send them using the winsock send and receive functions. Apologies for the silly question. If anyone could point me in the right direction I would greatly appreciate it.

your whole approach has a few issues...
When you create your packets relying on functions like
bool addByte(BYTE b)
they use global variables
mCurPacketSize, mData
that's not good. You could use instead something on these lines
int addByte(char* Pkt, int PktIdx, BYTE b)
{
if (PktIdx > cTFTPPacket_MAX_SIZE)
{
return 0;
}
Pkt[PktIdx] = (unsigned char)b;
PktIdx++;
return PktIdx;
}
then you know that Pkt is always the head of your packet and PktIdx is either the place for a new byte (or string) and "also" the size of the packet.
When you create packets that have a fixed length structure (or a fixed length header followed by a variable length payload area) it is a good idea to represent the fixed length area with a "packed" (pay attention to memory alignment) C/C++ structure and then populate the structure.

C++ - MPEG TS - Parsing header - PID is messed up - Big endian 32 bits mask

I think I'm having a tunnel vision here so I need your help.
I am trying to parse a MPEG Transport-stream file and I'm stuck on the Header, on the wiki you will see that some 32bits BE MASK are provided in order to extract the data from the 4 bytes header. My code is taking into account endianess (I think) and reverses the bytes if it detects that you're running on a little endian. Then I cast the char* to an int and apply the mask, all the values look fine but the PID is messed up and I don't get why...
header definition
namespace ts {
#define SYNC_BYTE_MASK 0xff000000
#define TEI_MASK 0x800000
#define PAYLOAD_START_MASK 0x400000
#define PRIORITY_MASK 0x200000
#define PID_MASK 0x1fff00
#define SCRAMBLING_CTL_MASK 0xc0
#define ADAPTATION_FIELD_MASK 0x20
#define HAS_PAYLOAD_MASK 0x10
#define COUNTER_MASK 0xf
#define HEADER_BYTES 4
#define HEADER_BITS 8 * HEADER_BYTES
class Header {
public:
std::bitset<HEADER_BITS> *full;
unsigned char _syncByte;
bool _tei;
bool _payloadStart;
bool _priority;
int16_t _pid;
std::bitset<2> *_scramblingCtl;
bool _adaptationField;
bool _hasPayload;
int _counter;
Header(const char *, size_t);
~Header();
const std::string toString();
bool isValid();
};
}
Header values assignment
ts::Header::Header(const char *header, size_t n) {
uint32_t bytes = reverseLE(header, n);
// just for display
char t[4];
memcpy(t, header, 4);
std::cout << "Original: " << std::bitset<32>(*((uint32_t *)t)) << std::endl;
this->full = new std::bitset<HEADER_BITS>(bytes);
uint32_t tmp = bytes & SYNC_BYTE_MASK;
this->_syncByte = ((char *)&tmp)[n - 1];
this->_tei = bytes & TEI_MASK;
this->_payloadStart = bytes & PAYLOAD_START_MASK;
this->_priority = bytes & PRIORITY_MASK;
this->_pid = bytes & PID_MASK; // THIS ONE IS MESSED UP !!
this->_scramblingCtl = new std::bitset<2>(bytes & SCRAMBLING_CTL_MASK);
this->_adaptationField = bytes & ADAPTATION_FIELD_MASK;
this->_hasPayload = bytes & HAS_PAYLOAD_MASK;
this->_counter = bytes & COUNTER_MASK;
}
Functions to reverse
#include "utils.h"
int is_big_endian(void)
{
union {
uint32_t i;
char c[4];
} e = { 0x01000000 };
return e.c[0];
}
void swap(char *s, int a, int b) {
char tmp;
tmp = s[a];
s[a] = s[b];
s[b] = tmp;
}
// Converts string to int taking endianess into account
uint32_t reverseLE(const char *bits, size_t n) {
uint32_t ret = 0;
char *cp = (char *)malloc(n * sizeof(char));
memcpy(cp, bits, n);
if ( ! is_big_endian() ) {
for (int i = 0; i < n / 2; i++)
swap(cp, i, n - 1 - i);
}
ret = *((uint32_t *)cp);
free(cp);
return ret;
}
Here's an example of an header that should have a PID of 33
Original: 00010010001000010000000001000111
Binary: 01000111000000000010000100010010
Sync byte: G
TEI: 0
Payload start: 0
Priority: 0
PID: 8448 0010000100000000
Scrambling Ctl: 00
Adaptation field: 0
Has Payload: 1
Counter: 2
Somehow it gets reversed again, and I don't get why...

Ok so the problem was that the 13bits of the PID are located at str[1] and str[2] which means that after casting *((int *)str) and applying the mask there is still 8 trailing 0 bits from the last byte str[3].
Solution:
this->_pid = bytes & PID_MASK;
this->_pid >>= 8;
Thanks to #Wimmel.

Field '__jmpbuf' could not be resolved -cpp

I get this error when trying to compile my program:
Field '__jmpbuf' could not be resolved
I looked for a solution for hours and can't seem to find out where is the culprit.
The Thread.h file contains the header of the class. It has the private member:
sigjmp_buf _env;
And the implementation is inside Thread.cpp:
#include "Thread.h"
#include <setjmp.h>
#include "translateAdd.h"
#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <sys/time.h>
#define COUNTER_INIT -1
int Thread::_idCounter = COUNTER_INIT;
Thread::Thread(void (*threadsFunc)(void))
: threadsFunction(threadsFunc), _stack(new char[STACK_SIZE]), _quantums(1)
{
address_t sp, pc;
sp = (address_t)_stack + STACK_SIZE - sizeof(address_t);
pc = (address_t)threadsFunc;
// set environment for later return
sigsetjmp(_env, 1);
(_env->__jmpbuf)[JB_SP] = translate_address(sp);
(_env->__jmpbuf)[JB_PC] = translate_address(pc);
sigemptyset(&_env->__saved_mask);
_id = ++_idCounter;
_state = READY;
}
EDIT: Using eclipse as the IDE under ubuntu 32bit
EDIT: Another complete example that doesn't compile on my machine:
#include <stdio.h>
#include <setjmp.h>
#include <signal.h>
#include <unistd.h>
#include <sys/time.h>
#define SECOND 1000000
#define STACK_SIZE 4096
char stack1[STACK_SIZE];
char stack2[STACK_SIZE];
sigjmp_buf env[2];
#ifdef __x86_64__
/* code for 64 bit Intel arch */
typedef unsigned long address_t;
#define JB_SP 6
#define JB_PC 7
/* A translation is required when using an address of a variable.
Use this as a black box in your code. */
address_t translate_address(address_t addr)
{
address_t ret;
asm volatile("xor %%fs:0x30,%0\n"
"rol $0x11,%0\n"
: "=g" (ret)
: "0" (addr));
return ret;
}
#else
/* code for 32 bit Intel arch */
typedef unsigned int address_t;
#define JB_SP 4
#define JB_PC 5
/* A translation is required when using an address of a variable.
Use this as a black box in your code. */
address_t translate_address(address_t addr)
{
address_t ret;
asm volatile("xor %%gs:0x18,%0\n"
"rol $0x9,%0\n"
: "=g" (ret)
: "0" (addr));
return ret;
}
#endif
void switchThreads(void)
{
static int currentThread = 0;
int ret_val = sigsetjmp(env[currentThread],1);
printf("SWITCH: ret_val=%d\n", ret_val);
if (ret_val == 1) {
return;
}
currentThread = 1 - currentThread;
siglongjmp(env[currentThread],1);
}
void f(void)
{
int i = 0;
while(1){
++i;
printf("in f (%d)\n",i);
if (i % 3 == 0) {
printf("f: switching\n");
switchThreads();
}
usleep(SECOND);
}
}
void g(void)
{
int i = 0;
while(1){
++i;
printf("in g (%d)\n",i);
if (i % 5 == 0) {
printf("g: switching\n");
switchThreads();
}
usleep(SECOND);
}
}
void setup(void)
{
address_t sp, pc;
sp = (address_t)stack1 + STACK_SIZE - sizeof(address_t);
pc = (address_t)f;
sigsetjmp(env[0], 1);
(env[0]->__jmpbuf)[JB_SP] = translate_address(sp);
(env[0]->__jmpbuf)[JB_PC] = translate_address(pc);
sigemptyset(&env[0]->__saved_mask);
sp = (address_t)stack2 + STACK_SIZE - sizeof(address_t);
pc = (address_t)g;
sigsetjmp(env[1], 1);
(env[1]->__jmpbuf)[JB_SP] = translate_address(sp);
(env[1]->__jmpbuf)[JB_PC] = translate_address(pc);
sigemptyset(&env[1]->__saved_mask);
}
int main(void)
{
setup();
siglongjmp(env[0], 1);
return 0;
}

If you really need to use the internal fields (which will only be valid for your compiler on your system) you need to check the types:
typedef struct __jmp_buf_tag sigjmp_buf[1];
That means that sigjmp_buf is not a pointer, but an array with a single structure in it. So you use it like a normal array of structures:
sigjmp_buf _env;
_env[0].__jmpbuf[x] = y;
I really recommend against the use the internal field of this structure. Linux have other functions to simplify cooperative threading (which is what you seem to be implementing).

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Why would elements in a union be unaligned - c++

Related

Longest common subsequence: multiprocess version errors but sequential version works

Using mmap memory for a circular buffer with very low overhead

Sending TFTP packets in C++

C++ - MPEG TS - Parsing header - PID is messed up - Big endian 32 bits mask

Field '__jmpbuf' could not be resolved -cpp

Categories

Resources