C Simple RingBuffer - Multithreading - Finding Critical Sections [closed] - c++

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 years ago.
Improve this question
so I wrote a simple C Ring Buffer that I'm now testing using multiple threads and I'm having a hard time trying to get the code to fail so that I can identify critical sections.
Note: The code is in C, but i'm testing it in C++ files because its easier to create threads mutexes etc.
Header File:
#ifndef _C_TEST_H_
#define _C_TEST_H_
#include <stdio.h>
#include <mutex>
///////////////////////////////////////////////////////////////////////////////
// Defines and macros
///////////////////////////////////////////////////////////////////////////////
#ifndef __cplusplus
typedef enum { false, true } bool;
#endif
#define RING_BUFFER_SIZE 2000
///////////////////////////////////////////////////////////////////////////////
// Structures, Enumerations, Typedefs
///////////////////////////////////////////////////////////////////////////////
typedef struct Node
{
int val;
struct Node *next;
struct Node *previous;
} Node_T;
typedef enum RB_ERC
{
RB_ERC_NO_ERROR,
RB_ERC_NULL_PTR,
RB_ERC_UNDERFLOW,
RB_ERC_OVERFLOW
} RB_ERC_T;
typedef enum RB_HANDLE_OVERFLOW
{
RB_DECIMATE,
RB_IGNORE_AND_RETURN_ERROR
} RB_HANDLE_OVERFLOW_T;
typedef enum RB_READ_MODE
{
RB_FIFO,
RB_LIFO
} RB_READ_MODE_T;
typedef struct RingBuffer
{
int curSize;
RB_HANDLE_OVERFLOW_T handleOverflow;
struct Node *Write;
struct Node *Read;
Node_T buffer[RING_BUFFER_SIZE];
} RING_BUFFER_T;
///////////////////////////////////////////////////////////////////////////////
// Prototypes
///////////////////////////////////////////////////////////////////////////////
#ifdef __cplusplus
extern "C" {
#endif
RB_ERC_T RB_InitRingBuffer(RING_BUFFER_T *rb_, RB_HANDLE_OVERFLOW_T ifOverflow_);
//Return true if the queue has no elements; false if there are elements on the queue
bool RB_IsEmpty(RING_BUFFER_T *rb_);
//Return true if the queue is full; false if there are seats available
bool RB_IsFull(RING_BUFFER_T *rb_);
//Write N elements (length of the array) to the queue
//Note: array values will be read from element 0 to array length
RB_ERC_T RB_WriteArray(RING_BUFFER_T *rb_, int values_[], int length_);
//Write 1 element
RB_ERC_T RB_Write(RING_BUFFER_T *rb_, int val_);
//Dequeue and read N elements (length of the array) into an array
RB_ERC_T RB_ReadArray(RING_BUFFER_T *rb_, int values_[], int length_, RB_READ_MODE_T readMode_);
//Dequeue and read 1 element
RB_ERC_T RB_Read(RING_BUFFER_T *rb_, int *readVal_, RB_READ_MODE_T readMode_);
#ifdef __cplusplus
}
#endif
#endif //_C_TEST_H_
Source:
#include "CTest.h"
static std::mutex m;
RB_ERC_T RB_InitRingBuffer(RING_BUFFER_T *rb_, RB_HANDLE_OVERFLOW_T handleOverflow_)
{
//m.lock();
RB_ERC_T erc = RB_ERC_NO_ERROR;
int i;
if(rb_ == 0)
{
return RB_ERC_NULL_PTR;
}
//Initialize this instance of the ring buffer
//Both the read/write pointers should start at the same location
rb_->curSize = 0;
rb_->Read = &rb_->buffer[0];
rb_->Write = &rb_->buffer[0];
rb_->handleOverflow = handleOverflow_;
//Build the circular doubly-linked list
for(i = 0; i < RING_BUFFER_SIZE; i++)
{
rb_->buffer[i].val = 0;
if(i == 0)
{
//Sentinal Node found. Point the first node to the last element of the array
rb_->buffer[i].previous = &rb_->buffer[(RING_BUFFER_SIZE - 1)];
rb_->buffer[i].next = &rb_->buffer[i + 1];
}
else if(i < (RING_BUFFER_SIZE - 1) )
{
rb_->buffer[i].next = &rb_->buffer[i + 1];
rb_->buffer[i].previous = &rb_->buffer[i - 1];
}
else
{
//Sentinal node found. Reached the last element in the array; Point the sentinal
//node to the first element in the array to create a circular linked list.
rb_->buffer[i].next = &rb_->buffer[0];
rb_->buffer[i].previous = &rb_->buffer[i - 1];
}
}
//m.unlock();
return erc;
}
bool RB_IsEmpty(RING_BUFFER_T *rb_)
{
//m.lock();
//Note: assume rb is valid.
if(rb_->curSize == 0)
{
return true;
}
else
{
return false;
}
//m.unlock();
}
bool RB_IsFull(RING_BUFFER_T *rb_)
{
//m.lock();
//Note: assume rb is valid.
if(rb_->curSize == RING_BUFFER_SIZE)
{
return true;
}
else
{
return false;
}
//m.unlock();
}
RB_ERC_T RB_WriteArray(RING_BUFFER_T *rb_, int values_[], int length_)
{
//m.lock();
RB_ERC_T erc = RB_ERC_NO_ERROR;
int i;
if(rb_ == 0 || values_ == 0 || length_ == 0)
{
return RB_ERC_NULL_PTR;
}
switch(rb_->handleOverflow)
{
//Increment through the array and enqueue
//If attempting to write more elements than are available on the queue
//Decimate - overwrite old data
//Ignore and return error - Don't write any data and throw an error
case RB_DECIMATE:
for(i = 0; i < length_; i++)
{
RB_Write(rb_, values_[i] );
}
break;
default:
case RB_IGNORE_AND_RETURN_ERROR:
{
int numSeatsAvailable = (RING_BUFFER_SIZE - rb_->curSize);
if( length_ <= numSeatsAvailable )
{
//Increment through the array and enqueue
for(i = 0; i < length_; i++)
{
RB_Write(rb_, values_[i] );
}
}
else
{
//Attempted to write more elements than are avaialable on the queue
erc = RB_ERC_OVERFLOW;
}
}
break;
}
//m.unlock();
return erc;
}
RB_ERC_T RB_Write(RING_BUFFER_T *rb_, int val_)
{
//m.lock();
RB_ERC_T erc = RB_ERC_NO_ERROR;
if(rb_ == 0)
{
return RB_ERC_NULL_PTR;
}
if( !RB_IsFull(rb_) )
{
//Write the value to the current location, then increment the write pointer
//so that the write pointer is always pointing 1 element ahead of the queue
rb_->Write->val = val_;
rb_->Write = rb_->Write->next;
rb_->curSize++;
}
else
{
//Overflow
switch(rb_->handleOverflow)
{
case RB_DECIMATE:
//Set the value and increment both the read/write pointers
rb_->Write->val = val_;
rb_->Write = rb_->Write->next;
rb_->Read = rb_->Read->next;
break;
default:
case RB_IGNORE_AND_RETURN_ERROR:
erc = RB_ERC_OVERFLOW;
break;
}
}
//m.unlock();
return erc;
}
RB_ERC_T RB_ReadArray(RING_BUFFER_T *rb_, int values_[], int length_, RB_READ_MODE_T readMode_)
{
//m.lock();
RB_ERC_T erc = RB_ERC_NO_ERROR;
if(values_ == 0)
{
return RB_ERC_NULL_PTR;
}
//Verify that the amount of data to be read is actually available on the queue
if( length_ <= rb_->curSize )
{
//Increment through the array and dequeue
int i;
for(i = 0; i < length_; i++)
{
//Note: Error conditions have already been checked. Skip the ERC check
(void) RB_Read(rb_, &values_[i], readMode_);
}
}
else
{
//Attempted to read more data than is available on the queue
erc = RB_ERC_UNDERFLOW;
}
//m.unlock();
return erc;
}
RB_ERC_T RB_Read(RING_BUFFER_T *rb_, int *readVal_, RB_READ_MODE_T readMode_)
{
//m.lock();
RB_ERC_T erc = RB_ERC_NO_ERROR;
if(rb_ == 0 || readVal_ == 0)
{
return RB_ERC_NULL_PTR;
}
if( !RB_IsEmpty(rb_) )
{
switch(readMode_)
{
case RB_LIFO:
//Use the head (Write) to read the most recently written value (newest data)
//Note: The write pointer is always pointing 1 position ahead of the current queue.
rb_->Write = rb_->Write->previous; //Decrement write pointer
//Read the data
*readVal_ = rb_->Write->val;
rb_->Write->val = 0; //Reset read values to 0
break;
default:
case RB_FIFO:
*readVal_ = rb_->Read->val;
rb_->Read->val = 0; //Reset read values to 0
rb_->Read = rb_->Read->next; //Increment read pointer
break;
}
rb_->curSize--;
}
else
{
//Attempted to read more data but there is no data available on the queue
erc = RB_ERC_UNDERFLOW;
}
//m.unlock();
return erc;
}
Main CPP using for tests:
#include "CTest.h"
#include <iostream>
#include "windows.h"
#include <thread>
using namespace std;
static RING_BUFFER_T test1;
const int dataSize = 300;
const int dataSizeout = 1000;
int sharedValue = 0;
static std::mutex m;
void function1()
{
int data[dataSize];
RB_ERC_T erc = RB_ERC_NO_ERROR;
for (int i = 0; i < dataSizeout; i++)
{
erc = RB_Write(&test1, i);
if (erc != RB_ERC_NO_ERROR)
{
printf("Count down errrror %d\n", erc);
}
}
//RB_WriteArray(&test1, data, dataSize);
}
void function2()
{
RB_ERC_T erc = RB_ERC_NO_ERROR;
for (int i = 0; i > -dataSizeout; i--)
{
erc = RB_Write(&test1, i);
if (erc != RB_ERC_NO_ERROR)
{
printf("Count down errrror %d\n", erc);
}
}
}
int main()
{
RB_InitRingBuffer(&test1, RB_DECIMATE);
thread p1(function1);
//Sleep(1000);
thread p2(function2);
p1.join();
p2.join();
//Read out 5 at a time
int out;
int cnt = 0;
while(cnt < (2 * dataSizeout) )
{
if (RB_Read(&test1, &out, RB_LIFO) == RB_ERC_NO_ERROR)
{
printf("out[%d] = %d\n", cnt, out);
cnt += 1;
}
}
system("Pause");
return 0;
}
I'm thinking that everything in the main RING_BUFFER_T instance would be shared variables, so everywhere they are used, which is pretty much everywhere, they would have to be enclosed in mutexes.
typedef struct RingBuffer
{
int curSize;
RB_HANDLE_OVERFLOW_T handleOverflow;
struct Node *Write;
struct Node *Read;
Node_T buffer[RING_BUFFER_SIZE];
} RING_BUFFER_T;
I suppose NODE_T would be as well, but only for initialization. Am I wrong or shouldn't the elements being stuffed in the ring buffer be placed out of order, since there is no mutex being used right now?

For a state-of-the-art C implementation of a lock-free ring buffer, look in the Linux kernel source code. That should give you some idea of how the experts do it, and it is battle-proven code. See linux/kfifo.h and corresponding C file(s).
design description of Linux ring buffer, dunno how up-to-date it is
For ideas of how to do it in C++, you can look at
Linux Journal article about C++ lock-free queue
or maybe look at boost::lockfree::queue. Using C++ of course enables you to use generic types (templates) and e.g. replace function pointers with compile-time bound calls, thus enabling even better performance than C. And you can avoid those pesky void* pointers.

Thou Shalt Not expose the functions RB_IsEmpty and RB_IsFull as the return values may be invalid immediately. If you only call them from within read/write there is no need to do protection within that functions.
Typically you must protect your struct within the externally exposed read and write functions from the first access to the last access. There is no need to protect parameter checking.
You shall not double lock. Do not call RB_Read from RB_ReadArray. Provide an internal read function used by both. Same for the write functions.

Related

Arrays with unknown size on Arduino

I'm doing an Arduino project and I need to pass arrays with different sizes as parameter to my function.
The problem is that std::vector is not an option.
How can I do that?
The fallback is to pass a pointer to the first element in the array and the size:
void foo(int* arr, size_t size);
The reason for std::vector not being available on some platforms is that on some platforms dynamic allocations is a bad idea. However, once you are dynamically allocating arrays:
int* x = new int[42];
foo(arr,42); // array decays to pointer
delete[] x;
then you could as well use std::vector.
If std::vector is not available to you, then either search for an alternative (maybe this?) or write your own. The pointer + size approach is fragile and not recommended unless absolutely necessary. The power of std::vector is from the abstract concept to encapsulate the array, its size and capacity. Nobody can prevent you to apply that concept even if you cannot use std::vector.
In case you are talking about statically sized arrays, then thats not quite the use case for std::vector. You do not need dynamic allocation, and you can pass arrays by reference. I won't repeat here what you can find in this answer (std::array) or here (c-arrays).
Something like this should work
template<size_t N>
void DaFunction(std::array<int, N>& daArray)
you can do it without having to deal with memory allocation or pointers just by creating a string variable and a limited size array and then you start shifting
#include <Arduino.h>
class ArrayShifter
{
private:
// String Reservoire Tank
String _text;
// a fixed size array of 5 in my case (depending on the amount of data you expect)
String _viewPortArray[5];
int _size = 0;
// Methode to fill the array
bool shiftArray(int position);
public:
ArrayShifter(/* args */);
// Method that gets the text from Serial
String getSerialText();
// get data from the array
String getArrayData(int index);
// array size getter
int getSize();
//clear the array
void clearArray();
//remove item
void removeArrayItem(int index);
};
ArrayShifter::ArrayShifter(/* args */)
{
}
String ArrayShifter::getSerialText()
{
// lesteing to the serial and returning the value
_text = Serial.readString();
return _text;
}
bool ArrayShifter::shiftArray(int position)
{
/*Assuming that the data is comming separated with ";" for each row and ":" for each value
to optimize the size of array in this way :
name:value;age:value;gender:value;
*/
String text = getSerialText();
int index = 0;
_size = 0;
if (text.length() > 0) // text isn't empty
{
if (position <= 5) // if the data belongs to the first 5 range
{
for (int i = 0; i < 5; i++)
{
// get the index of our separator that we've chosed to be ";"
index = text.indexOf(";");
if (index > 0)
{
// index found
_size++;
// putting the value before ";" in the array
_viewPortArray[i] = text.substring(0, index);
// deleting the value from the tank
text = text.substring(index + 1);
}
}
}
else
{
_size = 0;
// to wich range the desired index belongs
unsigned int dataRange = ((position - position % 5));
int ghostIndex = 0;
// looping throught all ";" to get indexes
for (int i = 0; i < dataRange; i++)
{
ghostIndex = text.indexOf(";");
if (ghostIndex > 0)
{
_size++;
text = text.substring(ghostIndex + 1);
}
}
// grabing just 5 of the data
for (int i = 0; i < 5; i++)
{
if (ghostIndex > 0)
{
_size++;
_viewPortArray[i] = text.substring(0, ghostIndex);
text = text.substring(ghostIndex + 1);
}
// updating ghost index
ghostIndex = text.indexOf(';');
}
}
return true;
}
return false;
}
String ArrayShifter::getArrayData(int index)
{
// turn the roulette
if (shiftArray(index))
{
if (index <= 5)
{
// yes we have this
return _viewPortArray[index];
}
else
{
// but we have to put it in the range of 5
index = index - 5;
return _viewPortArray[index];
}
}
}
int ArrayShifter::getSize()
{
return _size;
}
void ArrayShifter::clearArray()
{
for(int i = 0 ; i <5 ; i ++)
{
_viewPortArray->remove(i);
_size = 0;
}
}
void ArrayShifter::removeArrayItem(int index)
{
_viewPortArray->remove(index);
_size--;
}
main class :
#include <Arduino.h>
#include <ArrayShifter.h>
ArrayShifter array;
void setup() {
// put your setup code here, to run once:
Serial.begin(9600);
while (!Serial){}
}
void loop() {
if(Serial.available()>0)
{
Serial.println(array.getArrayData(7));
int sizeOption2 = array.getSize();
Serial.println(sizeOption2);
array.removeArrayItem(7);
Serial.println(array.getArrayData(7));
}
}
please check my github repository
https://github.com/Riadam/ViewPort-Array-Shifter-for-Arduino-Uno.git

C++ linked list has junk nodes appearing in it [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
When implementing a call stack trace for tracking allocation in my overridden new function, I am using ::malloc to create an untracked call stack object that is then put into a linked list. When my harness finishes new-ing off all of the test cases, the list is sound. However, when i go to report the list (print to console) there are now values that should not be there and are causing it to crash. Below is the simplified version (I apologize that even simplified it is still a lot of code), I am hoping someone can make since of this:
Macros
#define convertToKiB(size) size * 1024UL
#define convertToMiB(size) size * (1024UL * 1024UL)
#define convertToGiB(size) size * (1024UL * 1024UL * 1024UL)
#define convertToReadableBytes(size) ((uint32_t)size > convertToKiB(2) && (uint32_t)size < convertToMiB(2)) ? (float)size / (float)convertToKiB(1) : ((uint32_t)size > convertToMiB(2) && (uint32_t)size < convertToGiB(2)) ? (float)size / (float)convertToMiB(1) : ((uint32_t)size > convertToGiB(2)) ? (float)size / (float)convertToMiB(1) : (float)size
#define convertToReadableBytesString(size) ((uint32_t)size > convertToKiB(2) && (uint32_t)size < convertToMiB(2)) ? "KiB" : ((uint32_t)size > convertToMiB(2) && (uint32_t)size < convertToGiB(2)) ? "MiB" : ((uint32_t)size > convertToGiB(2)) ? "GiB" : "B"
Globals
const uint8_t MAX_FRAMES_PER_CALLSTACK = 128;
const uint16_t MAX_SYMBOL_NAME_LENGTH = 128;
const uint32_t MAX_FILENAME_LENGTH = 1024;
const uint16_t MAX_DEPTH = 128;
typedef BOOL(__stdcall *sym_initialize_t)(IN HANDLE hProcess, IN PSTR UserSearchPath, IN BOOL fInvadeProcess);
typedef BOOL(__stdcall *sym_cleanup_t)(IN HANDLE hProcess);
typedef BOOL(__stdcall *sym_from_addr_t)(IN HANDLE hProcess, IN DWORD64 Address, OUT PDWORD64 Displacement, OUT PSYMBOL_INFO Symbol);
typedef BOOL(__stdcall *sym_get_line_t)(IN HANDLE hProcess, IN DWORD64 dwAddr, OUT PDWORD pdwDisplacement, OUT PIMAGEHLP_LINE64 Symbol);
static HMODULE g_debug_help;
static HANDLE g_process;
static SYMBOL_INFO* g_symbol;
static sym_initialize_t g_sym_initialize;
static sym_cleanup_t g_sym_cleanup;
static sym_from_addr_t g_sym_from_addr;
static sym_get_line_t g_sym_get_line_from_addr_64;
static int g_callstack_count = 0;
static callstack_list* g_callstack_root = nullptr;
CallStack Object
struct callstack_line_t
{
char file_name[128];
char function_name[256];
uint32_t line;
uint32_t offset;
};
class CallStack
{
public:
CallStack();
uint32_t m_hash;
uint8_t m_frame_count;
void* m_frames[MAX_FRAMES_PER_CALLSTACK];
};
CallStack::CallStack()
: m_hash(0)
, m_frame_count(0) {}
bool CallstackSystemInit()
{
// Load the dll, similar to OpenGL function fecthing.
// This is where these functions will come from.
g_debug_help = LoadLibraryA("dbghelp.dll");
if (g_debug_help == nullptr) {
return false;
}
// Get pointers to the functions we want from the loded library.
g_sym_initialize = (sym_initialize_t)GetProcAddress(g_debug_help, "SymInitialize");
g_sym_cleanup = (sym_cleanup_t)GetProcAddress(g_debug_help, "SymCleanup");
g_sym_from_addr = (sym_from_addr_t)GetProcAddress(g_debug_help, "SymFromAddr");
g_sym_get_line_from_addr_64 = (sym_get_line_t)GetProcAddress(g_debug_help, "SymGetLineFromAddr64");
// Initialize the system using the current process [see MSDN for details]
g_process = ::GetCurrentProcess();
g_sym_initialize(g_process, NULL, TRUE);
// Preallocate some memory for loading symbol information.
g_symbol = (SYMBOL_INFO *) ::malloc(sizeof(SYMBOL_INFO) + (MAX_FILENAME_LENGTH * sizeof(char)));
g_symbol->MaxNameLen = MAX_FILENAME_LENGTH;
g_symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
return true;
}
void CallstackSystemDeinit()
{
// cleanup after ourselves
::free(g_symbol);
g_symbol = nullptr;
g_sym_cleanup(g_process);
FreeLibrary(g_debug_help);
g_debug_help = NULL;
}
// Can not be static - called when
// the callstack is freed.
void DestroyCallstack(CallStack *ptr)
{
::free(ptr);
}
CallStack* CreateCallstack(uint8_t skip_frames)
{
// Capture the callstack frames - uses a windows call
void *stack[MAX_DEPTH];
DWORD hash;
// skip_frames: number of frames to skip [starting at the top - so don't return the frames for "CreateCallstack" (+1), plus "skip_frame_" layers.
// max_frames to return
// memory to put this information into.
// out pointer to back trace hash.
uint32_t frames = CaptureStackBackTrace(1 + skip_frames, MAX_DEPTH, stack, &hash);
// create the callstack using an untracked allocation
CallStack *cs = (CallStack*) ::malloc(sizeof(CallStack));
// force call the constructor (new in-place)
cs = new (cs) CallStack();
// copy the frames to our callstack object
unsigned int frame_count = min(MAX_FRAMES_PER_CALLSTACK, frames);
cs->m_frame_count = frame_count;
::memcpy(cs->m_frames, stack, sizeof(void*) * frame_count);
cs->m_hash = hash;
return cs;
}
//------------------------------------------------------------------------
// Fills lines with human readable data for the given callstack
// Fills from top to bottom (top being most recently called, with each next one being the calling function of the previous)
//
// Additional features you can add;
// [ ] If a file exists in yoru src directory, clip the filename
// [ ] Be able to specify a list of function names which will cause this trace to stop.
uint16_t CallstackGetLines(callstack_line_t *line_buffer, const uint16_t max_lines, CallStack *cs)
{
IMAGEHLP_LINE64 line_info;
DWORD line_offset = 0; // Displacement from the beginning of the line
line_info.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
unsigned int count = min(max_lines, cs->m_frame_count);
unsigned int idx = 0;
for (unsigned int i = 0; i < count; ++i) {
callstack_line_t *line = &(line_buffer[idx]);
DWORD64 ptr = (DWORD64)(cs->m_frames[i]);
if (FALSE == g_sym_from_addr(g_process, ptr, 0, g_symbol)) {
continue;
}
strcpy_s(line->function_name, 256, g_symbol->Name);
BOOL bRet = g_sym_get_line_from_addr_64(
GetCurrentProcess(), // Process handle of the current process
ptr, // Address
&line_offset, // Displacement will be stored here by the function
&line_info); // File name / line information will be stored here
if (bRet)
{
line->line = line_info.LineNumber;
strcpy_s(line->file_name, 128, line_info.FileName);
line->offset = line_offset;
}
else {
// no information
line->line = 0;
line->offset = 0;
strcpy_s(line->file_name, 128, "N/A");
}
++idx;
}
return idx;
}
Operators
// Treat as Linked List Node
struct callstack_list
{
CallStack* current_stack = nullptr;
uint16_t total_allocation = 0;
callstack_list* next = nullptr;
};
struct allocation_meta
{
uint16_t size;
callstack_list callstack_node;
};
void* operator new(const size_t size)
{
uint16_t alloc_size = (uint16_t)size + (uint16_t)sizeof(allocation_meta);
allocation_meta *ptr = (allocation_meta*)::malloc((size_t)alloc_size);
ptr->size = (uint16_t)size;
ptr->callstack_node.current_stack = CreateCallstack(0);
ptr->callstack_node.total_allocation = (uint16_t)size;
ptr->callstack_node.next = nullptr;
bool run = true;
callstack_list* currentNode = nullptr;
while (g_callstack_root != nullptr && run)
{
if (currentNode == nullptr)
{
currentNode = g_callstack_root;
}
if (currentNode->next != nullptr)
{
currentNode = currentNode->next;
}
else
{
currentNode->next = &ptr->callstack_node;
run = false;
}
}
if (g_callstack_root == nullptr)
{
g_callstack_root = &ptr->callstack_node;
}
return ptr + 1;
}
void operator delete(void* ptr)
{
if (nullptr == ptr)
return;
allocation_meta *data = (allocation_meta*)ptr;
data--;
if (data->callstack_node.current_stack != nullptr)
DestroyCallstack(data->callstack_node.current_stack);
bool run = true;
callstack_list* currentNode = nullptr;
while (g_callstack_root != nullptr && run && &data->callstack_node != NULL)
{
if (currentNode == nullptr && g_callstack_root != &data->callstack_node)
{
currentNode = g_callstack_root;
}
else
{
g_callstack_root = nullptr;
run = false;
continue;
}
if (currentNode->next != nullptr && currentNode->next != &data->callstack_node)
{
currentNode = currentNode->next;
}
else
{
currentNode->next = nullptr;
run = false;
}
}
::free(data);
}
Test Harness
void ReportVerboseCallStacks(const char* start_time_str = "", const char* end_time_str = "")
{
callstack_list* currentNode = g_callstack_root;
unsigned int totalSimiliarAllocs = 0;
uint32_t totalSimiliarSize = 0;
while (currentNode != nullptr)
{
callstack_list* nextNode = currentNode->next;
uint32_t& currentHash = currentNode->current_stack->m_hash;
uint32_t nextHash;
if (nextNode == nullptr)
nextHash = currentHash + 1;
else
nextHash = nextNode->current_stack->m_hash;
if (nextHash == currentHash)
{
totalSimiliarSize += currentNode->total_allocation;
totalSimiliarAllocs++;
}
if (nextHash != currentHash)
{
//Print total allocs for type and total size
float reportedBytes = convertToReadableBytes(totalSimiliarSize);
std::string size = convertToReadableBytesString(totalSimiliarSize);
char collection_buffer[128];
sprintf_s(collection_buffer, 128, "\nGroup contained %s allocation(s), Total: %0.3f %s\n", std::to_string(totalSimiliarAllocs).c_str(), reportedBytes, size.c_str());
printf(collection_buffer);
//Reset total allocs and size
totalSimiliarAllocs = 0;
totalSimiliarSize = 0;
}
// Printing a call stack, happens when making report
char line_buffer[512];
callstack_line_t lines[128];
unsigned int line_count = CallstackGetLines(lines, 128, currentNode->current_stack);
for (unsigned int i = 0; i < line_count; ++i)
{
// this specific format will make it double click-able in an output window
// taking you to the offending line.
//Print Line For Call Stack
sprintf_s(line_buffer, 512, " %s(%u): %s\n", lines[i].file_name, lines[i].line, lines[i].function_name);
printf(line_buffer);
}
currentNode = currentNode->next;
}
}
void Pop64List(int64_t* arr[], int size)
{
for (int index = 0; index < size; ++index)
{
arr[index] = new int64_t;
*arr[index] = (int64_t)index;
}
}
void Pop8List(int8_t* arr[], int size)
{
for (int index = 0; index < size; ++index)
{
arr[index] = new int8_t;
*arr[index] = (int8_t)index;
}
}
int main()
{
if (!CallstackSystemInit())
return 1;
const int SIZE_64 = 8000;
int64_t* arr_64[SIZE_64];
const int SIZE_8 = 10000;
int8_t* arr_8[SIZE_8];
Pop64List(arr_64, SIZE_64);
Pop8List(arr_8, SIZE_8);
ReportVerboseCallStacks();
CallstackSystemDeinit();
return 0;
}
I finally figured out the answer. In my reporting function I was using std::string to create some of the reporting objects. std::string calls ::new internally to create a small allocation, and then hammers additional memory as the string's internal array reallocates memory. Switching to C-strings solved my problem.

Seg. fault resizing array C++

I have a priority queue array that is filled with "Jobs" (name + priority). I've been able to get everything queue related working aside from re sizing if it is full. Here is the bits that I think are causing a segmentation fault that I haven't been able to figure out.
EDIT:
Here is a bit more code that will compile, I left in the rest of the functions in case those might help in any way. Right now the initial capacity is set to 5, when you try to add a job to the full list it will double the capacity of the array and allow you to add a couple more jobs before a SEG. fault.
pq.h
#ifndef PQ_H
#define PQ_H
#include "interface.h"
#include <string>
using namespace std;
class Job {
public:
int getPriority();
string getTaskName();
void setPriority(int val);
void setTaskName(string tname);
Job();
private:
int priority;
string taskName;
};
class PriorityQueue {
public:
PriorityQueue();
~PriorityQueue();
int size();
bool isEmpty();
void clear();
void enqueue(string value, int priority);
string dequeue();
string peek();
int peekPriority();
PriorityQueue(const PriorityQueue & src);
PriorityQueue & operator=(const PriorityQueue & src);
private:
static const int INITIAL_CAPACITY = 5;
Job *array;
int count;
int capacity;
void expandCapacity() {
Job *oldArray = array;
capacity *= 2;
array = new Job[capacity];
for (int i = 0; i < count; i++) {
array[i] = oldArray[i];
}
delete[] oldArray;
}
};
#endif
pq.cpp
#include <iostream>
#include <cstring>
using namespace std;
//#include "job.h"
#include "pq.h"
Job::Job() // Constructor
{
priority= 0;
taskName = "There are no items in the list.";
}
int Job::getPriority(){ // returns the prority of the job
return priority;
}
string Job::getTaskName(){ // returns the name of the job
return taskName;
}
void Job::setPriority(int val){ // sets the priority of a newly created job
priority = val;
}
void Job::setTaskName(string tname){ // sets the name of a new job
taskName = tname;
}
PriorityQueue::PriorityQueue() // constructor
{
count = 0;
capacity = INITIAL_CAPACITY - 1;
array = new Job[INITIAL_CAPACITY];
}
PriorityQueue::~PriorityQueue() { // destructor
delete [] array;
}
int PriorityQueue::size() { // returns the number of jobs in the queue
return count;
}
bool PriorityQueue::isEmpty() { // returns true if queue is empty
if (count != 0){
return false;
}else{
return true;
}
}
void PriorityQueue::clear() { // clears queue of all jobs
count = 0;
// need to make it remove and delete the items
}
void PriorityQueue::enqueue(string value, int priority) {
// tests size to see if Queue is a max capacity
if(count == capacity){
expandCapacity();
cout << "\tList was full and has been expanded\n";
}
array[++count].setPriority(priority);
array[count].setTaskName(value);
// upheap operations
Job v = array[count];
int tempcount = count;
while (array[tempcount/2].getPriority() >= v.getPriority()){
array[tempcount] = array[tempcount/2];
tempcount = tempcount/2;
array[tempcount] = v;
}
}
string PriorityQueue::dequeue() {
// removes the job with the highest priority from the queue and returns the name
if(this->isEmpty()){ // make sure the queue isnt empty
string empty = "The queue is empty";
return empty;
}else{
Job remove = array[1];
array[1] = array[count--];
int j;
Job v;
int k = 1;
v = array[k];
while(k <= count/2){
cout << "dequeuewhile"; // test
j = k + k;
if(j < count && array[j].getPriority() > array[j+1].getPriority()){
j++;
cout << "dequeueloop if1"; // test
}
if(v.getPriority() <= array[j].getPriority()){
cout << "dequeueloop if2"; //test
break;
}
array[k] = array[j];
k = j;
}
array[k] = v;
return remove.getTaskName(); // returns the name of the removed job
}
}
string PriorityQueue::peek() { // returns the name of the highest priority job without removing it from the queue
if(count == 0){
return array[0].getTaskName();
}
return array[1].getTaskName();
}
int PriorityQueue::peekPriority() { // returns the priority from the highest priority job without removing it from the queue
if(count == 0){
cout << "\tThere are no items in the list.\n";
return array[0].getPriority();
}
return array[1].getPriority();
}
I think that when you do ++count, the next use of count will be out of bounds for the array.
array[++count].setPriority(priority);
// SEGMENTATION FAULT HERE
array[count].setTaskName(value);
If the capacity of the array is 5, and count was 4, then you just incremented count to 5, and tried to access element 5, which is out-of-bounds.
array = new Job[capacity];
for (int i = 0; i < count; i++) {
array[i] = oldArray[i];
}
Lets assume capacity is 10, so you've got an array of 10 elements, ranging from elements 0 to 9.
counttells us how many elements are being used.
If count happens to be 9, then when you increment count by one, it is now 10. Then, when line come you marked as producing segment fault comes, you're trying to access element 10, in our example. There is no element 10in an array of length 10, so you're out of bounds.
array[++count].setPriority(priority); // array[10], but last element is 9!
// SEGMENTATION FAULT HERE
array[count].setTaskName(value); // array[10], but last element is 9!
And, of course, everything after that part causes the same issue, as you keep using array[count].
Your original code did exactly as the previous answer given by #antiHUMAN.
The problem you're having is mixing or erroneously using 0-based and 1-based concepts.
Your first mistake is to make capacity a 0-based number. The capacity should denote the maximum number of items in an array, thus you should not be subtracting 1 from it. If the array can hold 5 items, then capacity should be 5, not 4.
PriorityQueue::PriorityQueue() // constructor
{
count = 0;
capacity = INITIAL_CAPACITY; // this remains 1-based.
array = new Job[INITIAL_CAPACITY];
}
or using the initializer-list:
PriorityQueue::PriorityQueue() : count(0),
capacity(INITIAL_CAPACITY),
array(new Job[INITIAL_CAPACITY]) {}
The 0-based number in your situation should be count, not capacity. Given that, since count is 0-based, and capacity is 1-based, your test in enqueue needs to be changed:
if(count + 1 == capacity){
expandCapacity();
cout << "\tList was full and has been expanded\n";
}
Note that 1 is added to count to account for the fact that count is 0-based and capacity is 1 based.

Why does random extra code improve performance?

Struct Node {
Node *N[SIZE];
int value;
};
struct Trie {
Node *root;
Node* findNode(Key *key) {
Node *C = &root;
char u;
while (1) {
u = key->next();
if (u < 0) return C;
// if (C->N[0] == C->N[0]); // this line will speed up execution significantly
C = C->N[u];
if (C == 0) return 0;
}
}
void addNode(Key *key, int value){...};
};
In this implementation of Prefix Tree (aka Trie) I found out that 90% of findNode() execution time is taken by a single operation C=C->N[u];
In my attempt to speed up this code, I randomly added the line that is commented in the snipped above, and code became 30% faster! Why is that?
UPDATE
Here is complete program.
#include "stdio.h"
#include "sys/time.h"
long time1000() {
timeval val;
gettimeofday(&val, 0);
val.tv_sec &= 0xffff;
return val.tv_sec * 1000 + val.tv_usec / 1000;
}
struct BitScanner {
void *p;
int count, pos;
BitScanner (void *p, int count) {
this->p = p;
this->count = count;
pos = 0;
}
int next() {
int bpos = pos >> 1;
if (bpos >= count) return -1;
unsigned char b = ((unsigned char*)p)[bpos];
if (pos++ & 1) return (b >>= 4);
return b & 0xf;
}
};
struct Node {
Node *N[16];
__int64_t value;
Node() : N(), value(-1) { }
};
struct Trie16 {
Node root;
bool add(void *key, int count, __int64_t value) {
Node *C = &root;
BitScanner B(key, count);
while (true) {
int u = B.next();
if (u < 0) {
if (C->value == -1) {
C->value = value;
return true; // value added
}
C->value = value;
return false; // value replaced
}
Node *Q = C->N[u];
if (Q) {
C = Q;
} else {
C = C->N[u] = new Node;
}
}
}
Node* findNode(void *key, int count) {
Node *C = &root;
BitScanner B(key, count);
while (true) {
char u = B.next();
if (u < 0) return C;
// if (C->N[0] == C->N[1]);
C = C->N[0+u];
if (C == 0) return 0;
}
}
};
int main() {
int T = time1000();
Trie16 trie;
__int64_t STEPS = 100000, STEP = 500000000, key;
key = 0;
for (int i = 0; i < STEPS; i++) {
key += STEP;
bool ok = trie.add(&key, 8, key+222);
}
printf("insert time:%i\n",time1000() - T); T = time1000();
int err = 0;
key = 0;
for (int i = 0; i < STEPS; i++) {
key += STEP;
Node *N = trie.findNode(&key, 8);
if (N==0 || N->value != key+222) err++;
}
printf("find time:%i\n",time1000() - T); T = time1000();
printf("errors:%i\n", err);
}
This is largely a guess but from what I read about CPU data prefetcher it would only prefetch if it sees multiple access to the same memory location and that access matches prefetch triggers, for example looks like scanning. In your case if there is only single access to C->N the prefetcher would not be interested, however if there are multiple and it can predict that the later access is further into the same bit of memory that can make it to prefetch more than one cache line.
If the above was happening then C->N[u] would not have to wait for memory to arrive from RAM therefore would be faster.
It looks like what you are doing is preventing processor stalls by delaying the execution of code until the data is available locally.
Doing it this way is very error prone unlikely to continue working consistently. The better way is to get the compiler to do this. By default most compilers generate code for a generic processor family. BUT if you look at the available flags you can usually find flags for specifying your specific processor so it can generate more specific code (like pre-fetches and stall code).
See: GCC: how is march different from mtune? the second answer goes into some detail: https://stackoverflow.com/a/23267520/14065
Since each write operation is costly than the read.
Here If you see that,
C = C->N[u]; it means CPU is executing write in each iteration for the variable C.
But when you perform if (C->N[0] == C->N[1]) dummy++; write on dummy is executed only if C->N[0] == C->N[1]. So you have save many write instructions of CPU by using if condition.

How do I code a simple integer circular buffer in C/C++?

I see a lot of templates and complicated data structures for implementing a circular buffer.
How do I code a simple integer circular buffer for 5 numbers?
I'm thinking in C is the most straightforward?
Thanks.
Have an array, buffer, of 5 integers. Have an index ind to the next element. When you add, do
buffer[ind] = value;
ind = (ind + 1) % 5;
Take an array, arr, an index idx, and a counter, num.
To insert foo, say arr[idx++] = foo; idx %= buffer_len; num++;.
To read out an item into foo, say foo = arr[(idx-num)%buffer_len]; num--;.
Add boundary checks.
If the size and data type of your buffer are fixed, a simple array is all you need:
int buffer[5];
Add to that a couple pointers:
int* start = &buffer[0];
int* end = &buffer[4]+1;
int* input = start;
int* output = start;
int rI =0;
int wI=0;
#define FIFO_SIZE 3
int checkAvail()
{
int avail=0;
if(wI<rI)
avail= (rI-wI);
else
avail = (FIFO_SIZE-wI+rI);
return avail;
}
int addFIFO(int *a, int val)
{
if(checkAvail()>0)
{
a[wI]=val;
wI++;
if(wI>FIFO_SIZE)
wI=0;
}
else
{
printf("FIFO full");
}
return 0;
}
int remFIFO(int *a)
{
int val;
if((FIFO_SIZE-checkAvail()>0))
{
val =a[rI];
rI++;
if(rI>FIFO_SIZE)
rI=0;
}
else
{
printf("FIFO empty");
}
return 0;
}
int main(array<System::String ^> ^args)
{
int FIFO_ARRAY[FIFO_SIZE]={};
addFIFO(FIFO_ARRAY,1);
addFIFO(FIFO_ARRAY,2);
addFIFO(FIFO_ARRAY,3);
addFIFO(FIFO_ARRAY,4);
remFIFO(FIFO_ARRAY);
remFIFO(FIFO_ARRAY);
remFIFO(FIFO_ARRAY);
remFIFO(FIFO_ARRAY);
remFIFO(FIFO_ARRAY);
}