How to implement custom loss function correctly in caffe? - c++

I am beginer at caffe and I am implementing a custom loss function of caffe. But the error is occurred at runtest.
My loss function is similar with Euclidean loss. The original Euclidean loss equation is below.
enter image description here
I would like to implement 2D distance loss. So I made the equation like below.
enter image description here
Then, the runtest result is error at backward function. I thought the way to write back propagation is something wrong. However, I am not sure what is wrong. I simply modify Euclidean loss to adjust my loss function and I wrote gradient of my loss function at back propagation. Do you know why the error is occured?
imgdist_loss_layer.cpp
#include <vector>
#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";
diff_.ReshapeLike(*bottom[0]);
}
// forward propagation
// calculate loss
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count() / 2;
Dtype loss = 0;
for (int i = 0; i < count; ++i) {
Dtype x_sub = bottom[0]->cpu_data()[2 * i] - bottom[1]->cpu_data()[2 * i];
Dtype y_sub = bottom[0]->cpu_data()[2 * i + 1] - bottom[1]->cpu_data()[2 * i + 1];
loss += x_sub*x_sub + y_sub*y_sub;
}
loss = loss / bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;
}
// back propagation
// calculate gradient
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype* bottom_data_0 = bottom[0]->cpu_data();
const Dtype* bottom_data_1 = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
const int count = bottom[0]->count() / 2;
for (int j = 0; j < count; ++j) {
const Dtype x_sub = bottom_data_0[2 * j] - bottom_data_1[2 * j];
const Dtype y_sub = bottom_data_0[2 * j + 1] - bottom_data_1[2 * j + 1];
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom[i]->num();
const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom[i]->num();
bottom_diff[2 * j] = top[0]->cpu_diff()[0] * alpha_0;
bottom_diff[2 * j + 1] = top[0]->cpu_diff()[0] * alpha_1;
} // j
}
} // i
}
#ifdef CPU_ONLY
STUB_GPU(ImgDistLossLayer);
#endif
INSTANTIATE_CLASS(ImgdistLossLayer);
REGISTER_LAYER_CLASS(ImgdistLoss);
} // namespace caffe
imgdist_loss_layer.cu
#include <vector>
#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
// forward propagation loop
template <typename Dtype>
__global__ void imgdistLossForwardGPU(const int nthreads,
const Dtype* input_data, const Dtype* target, Dtype* loss) {
CUDA_KERNEL_LOOP(i, nthreads) {
loss[i] = (input_data[2 * i] - target[2 * i]) * (input_data[2 * i] - target[2 * i])
+ (input_data[2 * i + 1] - target[2 * i + 1]) * (input_data[2 * i + 1] - target[2 * i + 1]);
}
}
// forward propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int count = bottom[0]->count() / 2;
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
imgdistLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, input_data, target, loss_data);
CUDA_POST_KERNEL_CHECK;
Dtype loss;
caffe_gpu_asum(count, loss_data, &loss);
loss = loss / bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;
}
// back propagation loop
template <typename Dtype>
__global__ void imgdistLossBackwardGPU(const int nthreads,
const Dtype* input_data, const Dtype* target, Dtype* diff,
const Dtype sign, const Dtype toploss, const Dtype bottom_num) {
CUDA_KERNEL_LOOP(i, nthreads) {
const Dtype x_sub = input_data[2 * i] - target[2 * i];
const Dtype y_sub = input_data[2 * i + 1] - target[2 * i + 1];
const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom_num;
const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom_num;
diff[2 * i] = toploss * alpha_0;
diff[2 * i + 1] = toploss * alpha_1;
}
}
// back propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const int count = bottom[0]->count() / 2;
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
const Dtype toploss = top[0]->cpu_diff()[0];
const Dtype bottom_num = bottom[i]->num();
Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
imgdistLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, input_data, target, bottom_diff, sign, toploss, bottom_num);
CUDA_POST_KERNEL_CHECK;
}
}
}
INSTANTIATE_LAYER_GPU_FUNCS(ImgdistLossLayer);
} // namespace caffe
imgdist_loss_layer.hpp (only change class name)
#ifndef CAFFE_IMGDIST_LOSS_LAYER_HPP_
#define CAFFE_IMGDIST_LOSS_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/loss_layer.hpp"
namespace caffe {
template <typename Dtype>
class ImgdistLossLayer : public LossLayer<Dtype> {
public:
explicit ImgdistLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "ImgdistLoss"; }
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}
protected:
/// #copydoc EuclideanLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
Blob<Dtype> diff_;
};
} // namespace caffe
#endif // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_
test_imgdist_loss_layer.cpp
#include <cmath>
#include <vector>
#include "gtest/gtest.h"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"
namespace caffe {
template<typename TypeParam>
class ImgdistLossLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;
protected:
ImgdistLossLayerTest()
: blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
blob_bottom_label_(new Blob<Dtype>(10, 5, 1, 1)),
blob_top_loss_(new Blob<Dtype>()) {
// fill the values
FillerParameter filler_param;
GaussianFiller<Dtype> filler(filler_param);
filler.Fill(this->blob_bottom_data_);
blob_bottom_vec_.push_back(blob_bottom_data_);
filler.Fill(this->blob_bottom_label_);
blob_bottom_vec_.push_back(blob_bottom_label_);
blob_top_vec_.push_back(blob_top_loss_);
}
virtual ~ImgdistLossLayerTest() {
delete blob_bottom_data_;
delete blob_bottom_label_;
delete blob_top_loss_;
}
void TestForward() {
// Get the loss without a specified objective weight -- should be
// equivalent to explicitly specifying a weight of 1.
LayerParameter layer_param;
ImgdistLossLayer<Dtype> layer_weight_1(layer_param);
layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype loss_weight_1 =
layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
// Get the loss again with a different objective weight; check that it is
// scaled appropriately.
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
ImgdistLossLayer<Dtype> layer_weight_2(layer_param);
layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype loss_weight_2 =
layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype kErrorMargin = 1e-5;
EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
// Make sure the loss is non-trivial.
const Dtype kNonTrivialAbsThresh = 1e-1;
EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);
}
Blob<Dtype>* const blob_bottom_data_;
Blob<Dtype>* const blob_bottom_label_;
Blob<Dtype>* const blob_top_loss_;
vector<Blob<Dtype>*> blob_bottom_vec_;
vector<Blob<Dtype>*> blob_top_vec_;
};
TYPED_TEST_CASE(ImgdistLossLayerTest, TestDtypesAndDevices);
TYPED_TEST(ImgdistLossLayerTest, TestForward) {
this->TestForward();
}
TYPED_TEST(ImgdistLossLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
ImgdistLossLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_);
}
}
The error log is below.
C:\Projects\caffe\include\caffe/test/test_gradient_check_util.hpp(175): error: The difference between computed_gradient and estimated_gradient is 1.5981258813447825, which exceeds threshold_ * scale, where
computed_gradient evaluates to 2.755687472811343,
estimated_gradient evaluates to 1.1575615914665605, and
threshold_ * scale evaluates to 0.027556874728113429.
debug: (top_id, top_data_id, blob_id, feat_id)=0,0,1,49; feat = 1.5097962694948988; objective+ = 20.508002455868997; objective- = 20.484851224039666
[ FAILED ] ImgdistLossLayerTest/3.TestGradient, where TypeParam = struct caffe::GPUDevice<double> (204 ms)
[----------] 2 tests from ImgdistLossLayerTest/3 (222 ms total)
[----------] Global test environment tear-down
[==========] 8 tests from 4 test cases ran. (878 ms total)
[ PASSED ] 4 tests.
[ FAILED ] 4 tests, listed below:
[ FAILED ] ImgdistLossLayerTest/0.TestGradient, where TypeParam = struct caffe::CPUDevice<float>
[ FAILED ] ImgdistLossLayerTest/1.TestGradient, where TypeParam = struct caffe::CPUDevice<double>
[ FAILED ] ImgdistLossLayerTest/2.TestGradient, where TypeParam = struct caffe::GPUDevice<float>
[ FAILED ] ImgdistLossLayerTest/3.TestGradient, where TypeParam = struct caffe::GPUDevice<double>
4 FAILED TESTS

Related

Qt: what is the most efficient way to vizualize a large 2D array?

I'm porting a project which uses Curses to visualize a large (5000x5000, for example) 2D array of chars. The problem is, it must be a high-performance project where the array is constantly updated, but in its current state, the output to stdout is a bottleneck no matter how I optimize the back-end. The project would benefit from using a faster and object-oriented approach Qt can provide. What I've tried:
#include <QtWidgets>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
QGraphicsScene scene;
QGraphicsView view(&scene);
double totalY = 0;
for (size_t i = 0; i < 5000; ++i) {
double totalX = 0;
for (size_t j = 0; j < 5000; ++j) {
// Add an arbitrary char
QGraphicsSimpleTextItem *simpleText = scene.addSimpleText(QChar('.'));
simpleText->setPos(totalX, totalY);
// Add cell's width and height respectively
totalX += 10;
}
totalY += 10;
}
view.show();
return a.exec();
}
But it turned out that creating 5000x5000 graphic items is waaay slower. My next idea is to create some sort of viewport which will eliminate the need to use graphic items. It could be some canvas (QImage), which I will clear and draw on every time the array is updated. But what do you recommend?
2nd Attempt
After the 1st attempt wasn't really satisfying, I followed the hint of V.K.:
I mean that drawing a character is very expensive operation. So I suggest that you draw each possible character (I assume there are not many of them) to some small image in memory. And than do bitblt (if you do not understand some word, then google for it), i.e. copy blocks of bytes to the final QImage. It will be much faster than painting texts.
testQLargeCharTable2.cc:
#include <cassert>
#include <algorithm>
#include <random>
#include <vector>
#include <QtWidgets>
template <typename Value>
class MatrixT {
private:
size_t _nCols;
std::vector<Value> _values;
public:
MatrixT(size_t nRows, size_t nCols, Value value = Value()):
_nCols((assert(nCols > 0), nCols)), _values(nRows * nCols, value)
{ }
size_t rows() const { return _values.size() / _nCols; }
size_t cols() const { return _nCols; }
Value* operator[](size_t i)
{
assert(i < rows());
return &_values[i * _nCols];
}
const Value* operator[](size_t i) const
{
assert(i < rows());
return &_values[i * _nCols];
}
};
using CharTable = MatrixT<char>;
class CharTableView: public QAbstractScrollArea {
private:
const CharTable* pTbl = nullptr;
using CharCache = std::map<char, QPixmap>;
int wCell;
int hCell;
CharCache cacheChars;
public:
CharTableView(QWidget* pQParent = nullptr);
virtual ~CharTableView() = default;
CharTableView(const CharTableView&) = delete;
CharTableView& operator=(const CharTableView&) = delete;
void set(CharTable* pTbl)
{
this->pTbl = pTbl;
updateScrollBars();
update();
}
protected:
virtual void resizeEvent(QResizeEvent* pQEvent) override;
virtual void paintEvent(QPaintEvent* pQEvent) override;
private:
void updateScrollBars();
const QPixmap& getCharPixmap(char c);
};
void CharTableView::resizeEvent(QResizeEvent* pQEvent)
{
updateScrollBars();
}
void CharTableView::paintEvent(QPaintEvent* pQEvent)
{
if (!pTbl) return;
const int xView = horizontalScrollBar()
? horizontalScrollBar()->value() : 0;
const int yView = verticalScrollBar()
? verticalScrollBar()->value() : 0;
const int wView = viewport()->width();
const int hView = viewport()->height();
const int iRow0 = yView / hCell;
const int iCol0 = xView / wCell;
const int iRowN = std::min((int)pTbl->rows(), (yView + hView) / hCell + 1);
const int iColN = std::min((int)pTbl->cols(), (xView + wView) / wCell + 1);
QPainter qPainter(viewport());
for (int iRow = iRow0; iRow < iRowN; ++iRow) {
const char*const row = (*pTbl)[iRow];
const int yCell = iRow * hCell - yView;
for (int iCol = iCol0; iCol < iColN; ++iCol) {
const int xCell = iCol * wCell - xView;
const QPixmap& qPixmap = getCharPixmap(row[iCol]);
qPainter.drawPixmap(
QRect(xCell, yCell, wCell, hCell),
qPixmap);
}
}
}
CharTableView::CharTableView(QWidget* pQWidget):
QAbstractScrollArea(pQWidget)
{
QFontMetrics qFontMetrics(viewport()->font());
wCell = 2 * qFontMetrics.averageCharWidth();
hCell = qFontMetrics.height();
}
void CharTableView::updateScrollBars()
{
const int w = (int)(pTbl ? pTbl->cols() : 0) * wCell;
const int h = (int)(pTbl ? pTbl->rows() : 0) * hCell;
const QSize sizeView = viewport()->size();
QScrollBar*const pQScrBarH = horizontalScrollBar();
pQScrBarH->setRange(0, w > sizeView.width() ? w - sizeView.width() : 0);
pQScrBarH->setPageStep(sizeView.width());
QScrollBar*const pQScrBarV = verticalScrollBar();
pQScrBarV->setRange(0, h > sizeView.height() ? h - sizeView.height() : 0);
pQScrBarV->setPageStep(sizeView.height());
}
const QPixmap& CharTableView::getCharPixmap(char c)
{
const CharCache::iterator iter = cacheChars.find(c);
if (iter != cacheChars.end()) return iter->second;
QPixmap& qPixmap = cacheChars[c] = QPixmap(wCell, hCell);
qPixmap.fill(QColor(0, 0, 0, 0));
{ QPainter qPainter(&qPixmap);
qPainter.drawText(
QRect(0, 0, wCell, hCell),
Qt::AlignCenter,
QString(QChar(c)));
}
return qPixmap;
}
int main(int argc, char** argv)
{
qDebug() << "Qt Version:" << QT_VERSION_STR;
const size_t n = 10;
QApplication app(argc, argv);
// setup data
const char chars[]
= "0123456789()[]{}/&%$!'+#?="
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
CharTable tbl(5000, 5000);
std::random_device rd;
std::mt19937 rng(rd()); // seed the generator
std::uniform_int_distribution<size_t> distr(0, std::size(chars) - 1);
for (size_t i = 0; i < tbl.rows(); ++i) {
char*const row = tbl[i];
for (size_t j = 0; j < tbl.cols(); ++j) {
row[j] = chars[distr(rng)];
}
}
// setup GUI
CharTableView qCharTableView;
qCharTableView.setWindowTitle("Large Character Table View - 2nd Attempt");
qCharTableView.resize(1024, 768);
qCharTableView.set(&tbl);
qCharTableView.show();
// runtime loop
return app.exec();
}
Output:
I did the same test with a full screen window (2560×1280). The performance still was comparable. (The snapped GIF animations were too large to be uploaded here.)
In opposition to the hint of V.K., I used QPixmap. QImage can be modified with a QPainter as well. There is also a QPainter::drawImage() available.
1st Attempt
My first attempt was to print characters in the paintEvent() of a class derived from QAbstractScrollArea. Thereby, I carefully skipped all rows and columns which are outside of the view area. At the first glance, the performance seemed not that bad but with full screen window size, the approach showed weaknesses. While dragging the scrollbars, the output was significantly lacking behind.
testQLargeCharTable1.cc:
#include <cassert>
#include <algorithm>
#include <random>
#include <vector>
#include <QtWidgets>
template <typename Value>
class MatrixT {
private:
size_t _nCols;
std::vector<Value> _values;
public:
MatrixT(size_t nRows, size_t nCols, Value value = Value()):
_nCols((assert(nCols > 0), nCols)), _values(nRows * nCols, value)
{ }
size_t rows() const { return _values.size() / _nCols; }
size_t cols() const { return _nCols; }
Value* operator[](size_t i)
{
assert(i < rows());
return &_values[i * _nCols];
}
const Value* operator[](size_t i) const
{
assert(i < rows());
return &_values[i * _nCols];
}
};
using CharTable = MatrixT<char>;
class CharTableView: public QAbstractScrollArea {
private:
const CharTable* pTbl = nullptr;
int wCell;
int hCell;
public:
CharTableView(QWidget* pQParent = nullptr);
virtual ~CharTableView() = default;
CharTableView(const CharTableView&) = delete;
CharTableView& operator=(const CharTableView&) = delete;
void set(CharTable* pTbl)
{
this->pTbl = pTbl;
updateScrollBars();
update();
}
protected:
virtual void resizeEvent(QResizeEvent* pQEvent) override;
virtual void paintEvent(QPaintEvent* pQEvent) override;
private:
void updateScrollBars();
};
void CharTableView::resizeEvent(QResizeEvent* pQEvent)
{
updateScrollBars();
}
void CharTableView::paintEvent(QPaintEvent* pQEvent)
{
if (!pTbl) return;
const int xView = horizontalScrollBar()
? horizontalScrollBar()->value() : 0;
const int yView = verticalScrollBar()
? verticalScrollBar()->value() : 0;
const int wView = viewport()->width();
const int hView = viewport()->height();
const int iRow0 = yView / hCell;
const int iCol0 = xView / wCell;
const int iRowN = std::min((int)pTbl->rows(), (yView + hView) / hCell + 1);
const int iColN = std::min((int)pTbl->cols(), (xView + wView) / wCell + 1);
QPainter qPainter(viewport());
for (int iRow = iRow0; iRow < iRowN; ++iRow) {
const char*const row = (*pTbl)[iRow];
const int yCell = iRow * hCell - yView;
const int yC = yCell + hCell / 2;
for (int iCol = iCol0; iCol < iColN; ++iCol) {
const int xCell = iCol * wCell - xView;
const int xC = xCell + wCell / 2;
qPainter.drawText(
QRect(xCell, yCell, wCell, hCell),
Qt::AlignCenter,
QString(QChar(row[iCol])));
}
}
}
CharTableView::CharTableView(QWidget* pQWidget):
QAbstractScrollArea(pQWidget)
{
QFontMetrics qFontMetrics(viewport()->font());
wCell = 2 * qFontMetrics.averageCharWidth();
hCell = qFontMetrics.height();
}
void CharTableView::updateScrollBars()
{
const int w = (int)pTbl->cols() * wCell;
const int h = (int)pTbl->rows() * hCell;
const QSize sizeView = viewport()->size();
QScrollBar*const pQScrBarH = horizontalScrollBar();
pQScrBarH->setRange(0, w - sizeView.width());
pQScrBarH->setPageStep(sizeView.width());
QScrollBar*const pQScrBarV = verticalScrollBar();
pQScrBarV->setRange(0, h - sizeView.height());
pQScrBarV->setPageStep(sizeView.height());
}
int main(int argc, char** argv)
{
qDebug() << "Qt Version:" << QT_VERSION_STR;
const size_t n = 10;
QApplication app(argc, argv);
// setup data
const char chars[]
= "0123456789()[]{}/&%$!'+#?="
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
CharTable tbl(5000, 5000);
std::random_device rd;
std::mt19937 rng(rd()); // seed the generator
std::uniform_int_distribution<size_t> distr(0, std::size(chars) - 1);
for (size_t i = 0; i < tbl.rows(); ++i) {
char*const row = tbl[i];
for (size_t j = 0; j < tbl.cols(); ++j) {
row[j] = chars[distr(rng)];
}
}
// setup GUI
CharTableView qCharTableView;
qCharTableView.setWindowTitle("Large Character Table View - 1st Attempt");
qCharTableView.resize(640, 480);
qCharTableView.set(&tbl);
qCharTableView.show();
// runtime loop
return app.exec();
}
Output:

The reason behind huge performance difference between two custom key implementations for std::unordered_set and how to fix it?

I have some simple class with several fields like this:
#ifndef XVector_h
#define XVector_h
struct XVFields
{
unsigned char JointPromosCountSinceBeginning;
unsigned char PromoWeeksCountSinceCurrPromoBeginning;
unsigned char NoPromoWeeksCountSinceLastJointPromo;
bool IsPromo;
};
class XVector
{
public:
XVFields XVFs;
unsigned char *DiscountUsagesCounts;
XVector();
~XVector();
};
#endif XVector_h
It's realization:
#include <sstream>
#include "XVector.h"
#include "DynProgTask.h"
XVector::XVector()
{
memset(&this->XVFs, 0, sizeof(XVFields));
this->DiscountUsagesCounts = (unsigned char*)malloc(DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/);
memset(this->DiscountUsagesCounts, 0, DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/);
this->UniqueValForHuman_private = NULL;
}
XVector::~XVector()
{
free(this->DiscountUsagesCounts);
}
I Have another class named TaskCase that has XVector as one of it's field. Single TaskCase - single XVector.
I have custom hashers and comparators for both of classes:
#ifndef XVectorHasher_h
#define XVectorHasher_h
#include <Windows.h>
#include "XVector.h"
struct XVectorHasher
{
size_t operator()(const XVector *k) const;
};
struct XVectorComparator
{
bool operator()(const XVector *xv1, const XVector *xv2) const;
};
#endif XVectorHasher_h
cpp:
#include "XVectorHasher.h"
#include "DynProgTask.h"
size_t XVectorHasher::operator()(const XVector *k) const
{
size_t result = 0;
const size_t prime = 31;
int unibytes_count = sizeof(XVFields) + (DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/);
unsigned char *unibytes = (unsigned char*)malloc(unibytes_count);
memcpy(unibytes, &k->XVFs, sizeof(XVFields));
memcpy(&unibytes[sizeof(XVFields)], k->DiscountUsagesCounts, DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/);
for (size_t i = 0; i < unibytes_count; i++)
result = unibytes[i] + (result * prime);
free(unibytes);
return result;
}
bool XVectorComparator::operator()(const XVector *xv1, const XVector *xv2) const
{
//this operator compares instances bytes to determine their equality
if (memcmp(&xv1->XVFs, &xv2->XVFs, sizeof(XVFields)) != 0)
return false;
if (memcmp(xv1->DiscountUsagesCounts, xv2->DiscountUsagesCounts, DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/) != 0)
return false;
return true;
}
Another h:
#ifndef TaskCaseHasher_h
#define TaskCaseHasher_h
#include <Windows.h>
#include "TaskCase.h"
struct TaskCaseHasher
{
size_t operator()(const TaskCase *k) const;
};
struct TaskCaseComparator
{
bool operator()(const TaskCase *xv1, const TaskCase *xv2) const;
};
#endif TaskCaseHasher_h
Another cpp:
#include "XVector.h"
#include "TaskCaseHasher.h"
#include "DynProgTask.h"
size_t TaskCaseHasher::operator()(const TaskCase *tc) const
{
size_t result = 0;
const size_t prime = 31;
XVector *k = const_cast<TaskCase*>(tc)->GET_CurrX();
int unibytes_count = sizeof(XVFields) + (DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/);
unsigned char *unibytes = (unsigned char*)malloc(unibytes_count);
memcpy(unibytes, &k->XVFs, sizeof(XVFields));
memcpy(&unibytes[sizeof(XVFields)], k->DiscountUsagesCounts, DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/);
for (size_t i = 0; i < unibytes_count; i++)
result = unibytes[i] + (result * prime);
free(unibytes);
return result;
}
bool TaskCaseComparator::operator()(const TaskCase *tc1, const TaskCase *tc2) const
{
//this operator compares instances bytes to determine their equality
XVector *xv1 = const_cast<TaskCase*>(tc1)->GET_CurrX();
XVector *xv2 = const_cast<TaskCase*>(tc2)->GET_CurrX();
if (memcmp(&xv1->XVFs, &xv2->XVFs, sizeof(XVFields)) != 0)
return false;
if (memcmp(xv1->DiscountUsagesCounts, xv2->DiscountUsagesCounts, DynProgTask::PresetUsCount/* * sizeof(unsigned char)*/) != 0)
return false;
return true;
}
I have self made single directional list of TaskCase pointers. I iterate over them to find unique TaskCases (the ones which has different XVectors).
I have two different functions for that:
SmartArray<TaskCase*> *TaskStep::GET_UniqueCasesByCurrX() //This one doing it's job in 191 second
{
SmartArray<TaskCase*> *unitcs = new SmartArray<TaskCase*>(this->Cases->Length);
std::unordered_set<XVector*, XVectorHasher, XVectorComparator> unique_xs;
int curr_unique_case_i = 0;
for (TaskCase *itr = this->Cases->Head; itr; itr = itr->NEXT_CASE_PTR)
if (unique_xs.find(itr->GET_CurrX()) == unique_xs.end())
{
unique_xs.insert(itr->GET_CurrX());
unitcs->Data[curr_unique_case_i] = itr;
curr_unique_case_i++;
}
unitcs->Resize(curr_unique_case_i);
return unitcs;
}
std::unordered_set<TaskCase*, TaskCaseHasher, TaskCaseComparator> *TaskStep::GET_UniqueCasesSetByCurrX() //This one doing it's job in 363 seconds
{
std::unordered_set<TaskCase*, TaskCaseHasher, TaskCaseComparator> *unique_tcs = new std::unordered_set<TaskCase*, TaskCaseHasher, TaskCaseComparator>();
for (TaskCase *itr = this->Cases->Head; itr; itr = itr->NEXT_CASE_PTR)
if (unique_tcs->find(itr) == unique_tcs->end())
unique_tcs->insert(itr);
return unique_tcs;
}
std::unordered_set based on XVectors (191 sec) is way faster than std::unordered_set based on TaskCases (363 sec).
But I don't understand why. Anyway it falls to calling GET_CurrX() which returns XVector. In first case I can it by myself and in second case it called automatically inside set. What is possible to do to speedup TaskCase set?

Compilation error in cuda kernel calling/ passing parameters

In the actual code, my intention is to get the output array by comparing the input array to the scalar. Or simply output = input > scalar.
Simple sample host-side code as shown below is working as expected.
float *h_data1 = (float *)malloc(W1*H1 * sizeof(float));
bool *h_result = (bool *)malloc(H1*W2 * sizeof(bool));
float *d_data1; gpuErrchk(cudaMalloc(&d_data1, W1*H1 * sizeof(float)));
bool *d_result; gpuErrchk(cudaMalloc(&d_result, H1*W2 * sizeof(bool)));
for (int i = 0; i < W1*H1; i++) h_data1[i] = (float)i;
gpuErrchk(cudaMemcpy(d_data1, h_data1, W1*H1 * sizeof(float), cudaMemcpyHostToDevice));
float scalar = 2;
compGraterRetOut<float, bool><< <outw, outh >> > (d_data1, d_result, scalar);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
The device side code is
template<typename TType, typename TTypeOut>
__global__ void compGraterRetOut(TType *dataIn, TTypeOut *dataOut, const TType scalar)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
dataOut[i] = (dataIn[i] > scalar);
}
Coming to the actual code, I have an image class as shown below (Only some part of the class is shown).
template<typename TType, ImageType TImageType>
class Image
{
public:
Image(uint32_t width, uint32_t height, uint32_t depth = 1);
private:
TType* m_data;
uint32_t m_width;
uint32_t m_height;
uint32_t m_depth;
uint32_t m_bufferSize;
};
template<typename TType, ImageType TImageType>
Image<TType, TImageType>::Image(uint32_t width, uint32_t height, uint32_t depth) :m_width(width), \
m_height(height), m_depth(depth)
{
if (width == 0 || height == 0)
return;
cudaError_t cudaStatus;
//m_data = new TType[m_width * m_height * m_depth];
gpuErrchk(cudaStatus = cudaMalloc(&m_data, sizeof(TType) * m_width * m_height * m_depth));
if (cudaStatus == cudaSuccess)
{
m_bufferSize = m_width * m_height * m_depth;
}
else
{
std::cout << "Error malloc function failed [" << cudaStatus << "]" << std::endl;
}
};
To achieve the objective out = in > scalar, operator> is overloaded as shown below. This threw a compilation error as
"member "Image::m_data [with TType=float_t,
TImageType=ImageType::WHD]""
the code looks as shown below.
inline Image<uint32_t, TImageType> Image<TType, TImageType>::operator>(TType scalar) const
{
Image<uint32_t, TImageType> ret(m_width, m_height, m_depth);
compGraterRetOut<TType, uint32_t> << <m_width * 4, (m_height * m_depth/4) >> > (m_data, ret.m_data, scalar);
gpuErrchk(cudaGetLastError());
gpuErrchk(cudaDeviceSynchronize());
return std::move(ret);
}
To fix the compilation error I changed the function operator>. Here, cuda memory is allocated inside the function instead of inside of class's contructor.
template<class TType, ImageType TImageType>
inline Image<uint32_t, TImageType> Image<TType, TImageType>::operator>(TType scalar) const
{
cudaError_t cudaStatus;
uint32_t *dataout;
gpuErrchk(cudaMalloc(&dataout, m_width*m_height*m_depth * sizeof(uint32_t)));
Image<uint32_t, TImageType> ret(dataout, m_width, m_height, m_depth);
compGraterRetOut<TType, uint32_t> << <m_width * 4, (m_height * m_depth/4) >> > (m_data, dataout, scalar);
gpuErrchk(cudaGetLastError());
gpuErrchk(cudaDeviceSynchronize());
return std::move(ret);
}
Finally, my question is why last code compiled without an error, but not previous to that?
The problem has nothing to do with Cuda. It is the problem with templates and OOPS. When template class access a member in its own type it would not violate OOPS paradigm. Accessing a private member of the same class with different template arguments violate the OOPS paradigm. That is the answer.

Marco Monster's Car Physics Demo adaptation in C++, car behaves chaotically

I am trying to adopt the Marco Monster's Physics Demo (document: http://www.asawicki.info/Mirror/Car%20Physics%20for%20Games/Car%20Physics%20for%20Games.html and C reference code: https://github.com/spacejack/carphysics2d/blob/master/marco/Cardemo.c) in C++.
I ran into the problem that the car spins around itself and moves along the axis in an unpredictable manner (as it reacts to the input but does so unpredictably). I have spend the last 4 days trying to find the problem and got nothing. Please help as I am getting desperate with that. I have separated functionality of the car into separate classes (for better maintenance) and deduced that the problem occurs within the Wheel class and in Car class. Here is the code:
Wheel.h
class Wheel
{
public:
Wheel(const bool &isABSOn, const float &frontAxleToCG, const float &rearAxleToCG, const float &tireGripValue, const float &lockedTireGripCoef,
const float &lateralStiffnessFront, const float &lateralStiffnessRear, const float &brakeForceCoef, const float &ebrakeForceCoef,
const float &brakeTorque);
void SetValues(bool &isEbrakeOn, float &drivetrainTorque, float &steeringAngle, float &brakingInput,
float &frontAxleLoad, float &rearAxleLoad, float &surfaceCoefficient, float &angularVelocity, Vector2f &localVelocity);
void Update();
Vector2f GetSumForce();
float GetLateralTorque();
private:
bool m_IsEBrakeOn;
const bool m_IsABSOn;
float m_YawSpeed, m_VehicleAngularVelocity, m_VehicleRotationAngle, m_VehicleSideSlip, m_VehicleSlipAngleFrontAxle, m_VehicleSlipAngleRearAxle,
m_VehicleSteeringAngleRadInput,
m_SurfaceTypeGripCoefficient, m_DrivetrainTorqueNm, m_BrakingForceInputPercentage, m_FrontAxleLoad, m_RearAxleLoad;
const float m_CGtoFrontAxle, m_CGtoRearAxle, m_BaseTireGripValue, m_LockedTireGripCoefficent, m_LateralStiffnessFront,
m_LateralStiffnessRear, m_BreakForceCoefficent, m_EBrakeForceCoefficent, m_BrakeTorqueLimit, m_StableSpeedBoundary;
Vector2f m_LocalVehicleVelocity, m_VehicleLateralForceFront, m_VehicleLateralForceRear, m_VehicleLongtitudonalForceRear;
float FrontTireGripValue();
float RearTireGripValue();
float CombinedBrakingForceValueRearAxle();
};
Wheel.cpp
Wheel::Wheel(const bool &isABSOn, const float &frontAxleToCG, const float &rearAxleToCG, const float &tireGripValue, const float &lockedTireGripCoef,
const float &lateralStiffnessFront, const float &lateralStiffnessRear, const float &brakeForceCoef, const float &ebrakeForceCoef,
const float &brakeTorque)
: m_IsABSOn{ isABSOn }
, m_CGtoFrontAxle{ frontAxleToCG }
, m_CGtoRearAxle{ rearAxleToCG }
, m_BaseTireGripValue{ tireGripValue }
, m_LockedTireGripCoefficent{ lockedTireGripCoef }
, m_LateralStiffnessFront { lateralStiffnessFront }
, m_LateralStiffnessRear{ lateralStiffnessRear }
, m_BreakForceCoefficent{ brakeForceCoef }
, m_EBrakeForceCoefficent{ ebrakeForceCoef }
, m_BrakeTorqueLimit{ brakeTorque }
, m_StableSpeedBoundary{ 40.f } {}
void Wheel::Update()
{
if ((-0.01f < m_LocalVehicleVelocity.x) || (m_LocalVehicleVelocity.x < 0.01f))
{
m_YawSpeed = 0.f;
}
else
{
m_YawSpeed = ((m_CGtoFrontAxle + m_CGtoRearAxle) / 2.f) * m_VehicleAngularVelocity;
}
if ((-0.01f < m_LocalVehicleVelocity.x) || (m_LocalVehicleVelocity.x < 0.01f))
{
m_VehicleRotationAngle = 0.f;
}
else
{
m_VehicleRotationAngle = std::atan2(m_YawSpeed, m_LocalVehicleVelocity.x);
}
if ((-0.01f < m_LocalVehicleVelocity.x) || (m_LocalVehicleVelocity.x < 0.01f))
{
m_VehicleSideSlip = 0.f;
}
else
{
m_VehicleSideSlip = std::atan2(m_LocalVehicleVelocity.y, m_LocalVehicleVelocity.x);
}
m_VehicleSlipAngleFrontAxle = m_VehicleSideSlip + m_VehicleRotationAngle - m_VehicleSteeringAngleRadInput;
m_VehicleSlipAngleRearAxle = m_VehicleSideSlip - m_VehicleRotationAngle;
m_VehicleLateralForceFront.x = 0.f;
m_VehicleLateralForceFront.y = m_LateralStiffnessFront * m_VehicleSlipAngleFrontAxle;
m_VehicleLateralForceFront.y = std::fminf(FrontTireGripValue(), m_VehicleLateralForceFront.y);
m_VehicleLateralForceFront.y = std::fmaxf(-FrontTireGripValue(), m_VehicleLateralForceFront.y);
m_VehicleLateralForceFront.y *= m_FrontAxleLoad;
m_VehicleLateralForceRear.x = 0.f;
m_VehicleLateralForceRear.y = m_LateralStiffnessRear * m_VehicleSlipAngleRearAxle;
m_VehicleLateralForceRear.y = std::fminf(RearTireGripValue(), m_VehicleLateralForceRear.y);
m_VehicleLateralForceRear.y = std::fmaxf(-RearTireGripValue(), m_VehicleLateralForceRear.y);
m_VehicleLateralForceRear.y *= m_RearAxleLoad;
m_VehicleLongtitudonalForceRear.x = m_SurfaceTypeGripCoefficient * (m_DrivetrainTorqueNm - (CombinedBrakingForceValueRearAxle() * utils::Sign(m_LocalVehicleVelocity.x)));
m_VehicleLongtitudonalForceRear.y = 0.f;
}
Vector2f Wheel::GetSumForce()
{
if (m_LocalVehicleVelocity.Length() < 1.0f && m_DrivetrainTorqueNm < 0.5f)
{
m_LocalVehicleVelocity.x = m_LocalVehicleVelocity.y = 0.f;
m_VehicleLateralForceFront.x = m_VehicleLateralForceFront.y = m_VehicleLateralForceRear.x = m_VehicleLateralForceRear.y = 0.f;
}
return Vector2f
{
m_VehicleLongtitudonalForceRear.x + std::sinf(m_VehicleSteeringAngleRadInput) * m_VehicleLateralForceFront.x + m_VehicleLateralForceRear.x,
m_VehicleLongtitudonalForceRear.y + std::cosf(m_VehicleSteeringAngleRadInput) * m_VehicleLateralForceFront.y + m_VehicleLateralForceRear.y
};
}
float Wheel::GetLateralTorque()
{
return m_CGtoFrontAxle * m_VehicleLateralForceFront.y - m_CGtoRearAxle * m_VehicleLateralForceRear.y;
}
void Wheel::SetValues(bool &isEbrakeOn, float &drivetrainTorque, float &steeringAngle, float &brakingInput,
float &frontAxleLoad, float &rearAxleLoad, float &surfaceCoefficient, float &angularVelocity, Vector2f &localVelocity)
{
m_IsEBrakeOn = isEbrakeOn;
m_DrivetrainTorqueNm = drivetrainTorque;
m_VehicleSteeringAngleRadInput = steeringAngle;
m_BrakingForceInputPercentage = brakingInput;
m_FrontAxleLoad = frontAxleLoad;
m_RearAxleLoad = rearAxleLoad;
m_SurfaceTypeGripCoefficient = surfaceCoefficient;
m_LocalVehicleVelocity = localVelocity;
m_VehicleAngularVelocity = angularVelocity;
}
float Wheel::CombinedBrakingForceValueRearAxle()
{
return (m_BrakeTorqueLimit * m_BrakingForceInputPercentage);
}
float Wheel::FrontTireGripValue()
{
return m_BaseTireGripValue * m_SurfaceTypeGripCoefficient;
}
float Wheel::RearTireGripValue()
{
if ((CombinedBrakingForceValueRearAxle() > m_DrivetrainTorqueNm) && (!m_IsABSOn) && (m_LocalVehicleVelocity.Length() > m_StableSpeedBoundary))
{
return m_BaseTireGripValue * m_LockedTireGripCoefficent * m_SurfaceTypeGripCoefficient;
}
else
{
return m_BaseTireGripValue * m_SurfaceTypeGripCoefficient;
}
}
Car.h
class Car
{
public:
Car(VehicleCfg *pVehicleSpecs);
InputControl *m_pThisSteeringAndPedals;
void Draw() const;
void Update(float &elapsedSec);
private:
bool m_NOSStatus, m_IsEBrakeOn;
int m_GearShifterInput;
float m_VehicleThrottleInpute, m_VehicleSteeringAngleRadInput, m_VehicleBrakeInput,
m_DrivetrainTorqueOutput, m_FrontAxleLoad, m_RearAxleLoad,
m_ElapsedSec, m_VehicleHeadingDirectionAngleRad, m_CSHeading, m_SNHeading,
m_VehicleRotationAngle, m_YawSpeed, m_VehicleAngularVelocity, m_VehicleSideSlip,
m_VehicleSlipAngleFrontAxle, m_VehicleSlipAngleRearAxle,
m_SurfaceCoefficent, m_AngularTorque, m_AngularAcceleration, m_VehicleHealthStatus;
const float m_FrontToCG, m_RearToCG, m_CarMass, m_Inertia, m_RollingResistance, m_DragCoefficient;
Point2f m_WorldVehicleCoordinate;
Vector2f m_LocalVehicleVelocity, m_WorldVehicleVelocity, m_VehicleLocalAcceleration, m_VehicleWorldAcceleration,
m_WheelForces, m_ResistanceForces, m_TotalForce;
Suspension *m_pThisSuspension;
Drivetrain *m_pThisDrivetrain;
Wheel *m_pThisWheel;
ModularRenderer *m_pThisVehicleDrawn;
};
Car.cpp
void Car::Update(float &elapsedSec)
{
m_ElapsedSec = elapsedSec;
m_GearShifterInput = m_pThisSteeringAndPedals->GetCurrentGearValue();
m_VehicleThrottleInpute = m_pThisSteeringAndPedals->GetCurrentThrottleValue(m_ElapsedSec, m_VehicleThrottleInpute);
m_VehicleSteeringAngleRadInput = m_pThisSteeringAndPedals->GetCurrentSteeringValue(m_ElapsedSec);
m_VehicleBrakeInput = m_pThisSteeringAndPedals->GetCurrrentBrakeValue(m_ElapsedSec);
m_NOSStatus = m_pThisSteeringAndPedals->GetIsNOSOnValue();
m_IsEBrakeOn = m_pThisSteeringAndPedals->GetIsEBrakeOnValue();
m_CSHeading = std::cosf(m_VehicleHeadingDirectionAngleRad);
m_SNHeading = std::sinf(m_VehicleHeadingDirectionAngleRad);
m_LocalVehicleVelocity.x = m_CSHeading * m_WorldVehicleVelocity.y + m_SNHeading * m_WorldVehicleVelocity.x;
m_LocalVehicleVelocity.y = -m_SNHeading * m_WorldVehicleVelocity.y + m_CSHeading * m_WorldVehicleVelocity.x;
m_pThisDrivetrain->SetValues(m_NOSStatus, m_GearShifterInput, m_VehicleThrottleInpute, m_LocalVehicleVelocity.Length());
m_DrivetrainTorqueOutput = m_pThisDrivetrain->GetDrivetrainOutput(m_ElapsedSec);
m_pThisSuspension->SetValues(m_VehicleLocalAcceleration, m_LocalVehicleVelocity.Length());
m_FrontAxleLoad = m_pThisSuspension->GetFrontAxleWeight();
m_RearAxleLoad = m_pThisSuspension->GetRearAxleWeight();
m_pThisWheel->SetValues(m_IsEBrakeOn, m_DrivetrainTorqueOutput, m_VehicleSteeringAngleRadInput, m_VehicleBrakeInput, m_FrontAxleLoad,
m_RearAxleLoad, m_SurfaceCoefficent, m_VehicleAngularVelocity, m_LocalVehicleVelocity);
m_pThisWheel->Update();
m_WheelForces = m_pThisWheel->GetSumForce();
m_AngularTorque = m_pThisWheel->GetLateralTorque();
m_ResistanceForces.x = -((m_RollingResistance * m_LocalVehicleVelocity.x) + (m_DragCoefficient * m_LocalVehicleVelocity.x * std::abs(m_LocalVehicleVelocity.x)));
m_ResistanceForces.y = -((m_RollingResistance * m_LocalVehicleVelocity.y) + (m_DragCoefficient * m_LocalVehicleVelocity.y * std::abs(m_LocalVehicleVelocity.y)));
m_TotalForce.x = m_WheelForces.x + m_ResistanceForces.x;
m_TotalForce.y = m_WheelForces.y + m_ResistanceForces.y;
m_VehicleLocalAcceleration.x = m_TotalForce.x / m_CarMass;
m_VehicleLocalAcceleration.y = m_TotalForce.y / m_CarMass;
if (m_WorldVehicleVelocity.Length() < 1.0f && m_VehicleThrottleInpute < 0.5f)
{
m_LocalVehicleVelocity.x = m_LocalVehicleVelocity.y = 0.f;
m_VehicleAngularVelocity = m_AngularTorque = m_AngularAcceleration = 0.f;
}
m_AngularAcceleration = m_AngularTorque / m_Inertia;
m_VehicleWorldAcceleration.x = m_CSHeading * m_VehicleLocalAcceleration.y + m_SNHeading * m_VehicleLocalAcceleration.x;
m_VehicleWorldAcceleration.y = -(m_SNHeading) * m_VehicleLocalAcceleration.y + m_CSHeading * m_VehicleLocalAcceleration.x;
m_WorldVehicleVelocity.x += m_ElapsedSec * m_VehicleWorldAcceleration.x;
m_WorldVehicleVelocity.y += m_ElapsedSec * m_VehicleWorldAcceleration.y;
m_WorldVehicleCoordinate.x += m_ElapsedSec * m_WorldVehicleVelocity.x;
m_WorldVehicleCoordinate.y += m_ElapsedSec * m_WorldVehicleVelocity.y;
std::cout << "m_WorldVehicleCoordinate: " << m_WorldVehicleCoordinate.x << ", " << m_WorldVehicleCoordinate.y << "\n";
m_VehicleAngularVelocity += m_ElapsedSec * m_AngularAcceleration;
m_VehicleHeadingDirectionAngleRad += m_ElapsedSec * m_VehicleAngularVelocity;
m_pThisVehicleDrawn->SetVariables(int(0), int(0), int(0), int(0), m_VehicleHeadingDirectionAngleRad, m_VehicleSteeringAngleRadInput, m_WorldVehicleCoordinate);
}
void Car::Draw() const
{
m_pThisVehicleDrawn->DrawTheVehicle();
}
I think that the error occurs due to some sort of singularity that occurs in the calculations but I fail to see where that occurs.
Since the car spins around, I looked at your use of angular velocity. The m_VehicleAngularVelocity value is not initialized in either class, so it has an indeterminate value. The only time it has a value set is in your check for the car being stopped.
The unpredictable motion is likely a similar problem.
You should initialize all your class members in a constructor to avoid those problems.
Why does Wheel::SetValues take all its parameters by reference? Since it is just copying them to internal variables, and they are basic types, just pass them in by value.

C++ std::vector iterators error

First of all I'm sorry for my bad english, hope you guys will understand me :) Im writing WinAPI game and my classes behave very strange: all operations with vector
crash my program so Windows says that my .exe stopped working. But when I debug these lines
I get exceptions.
This is how my class header looks like:
#ifndef FIGURE_H_INCLUDED
#define FIGURE_H_INCLUDED
#include <vector>
#include <Windows.h>
#include "Other.h"
using namespace std;
enum Figure_Type { I, J, L, O, S, T, Z };
class Figure
{
public:
/* CONSTRUCTORS */
Figure();
Figure(Figure_Type);
/* MOVEMENT */
bool Move(vector<Cell>&, Direction&);
void Drop(vector<Cell>&);
bool Rotate(vector<Cell>&);
/* OTHER */
void Draw(HDC&);
private:
/* METHODS */
void Generate();
void GenerateMasks();
void GenerateFigure();
Figure GetFigureCopy() const;
/* DATA */
Shift shift;
char mask[4][4];
vector<Cell> vCells;
Figure_Type type;
int rotation;
};
#endif
My constructors are using Generate() method, which code is:
void Figure::GenerateFigure()
{
vCells.clear();
int defPosX = 4,
defPosY = 20;
Cell cell;
for(int y = 0; y < 4; y++)
{
for(int x = 0; x < 4; x++)
{
if(mask[y][x] == '0')
{
cell.x = defPosX + x + shift.dx;
cell.y = defPosY - y + shift.dy;
vCells.push_back(cell);
}
}
}
}
And I'm getting exceptions on vCells.clear() method and (if I comment first line) vCells.push_back(cell) line. Actually every operation with vector / vector iterators crash my program even incrementing iterator, those are just the first so my code isn't running any longer after them.
Exception text:
"Unhandled exception at 0x5A4ACCD2 (msvcp110d.dll) in Tetris_completely_new.exe: 0xC000041D: An unhandled exception was encountered during a user callback."
And these exceptions are thrown on 217's line of "xutility". I commented it:
....
// MEMBER FUNCTIONS FOR _Container_base12
inline void _Container_base12::_Orphan_all()
{ // orphan all iterators
#if _ITERATOR_DEBUG_LEVEL == 2
if (_Myproxy != 0)
{ // proxy allocated, drain it
_Lockit _Lock(_LOCK_DEBUG);
for (_Iterator_base12 **_Pnext = &_Myproxy->_Myfirstiter;
*_Pnext != 0; *_Pnext = (*_Pnext)->_Mynextiter)
**(*_Pnext)->_Myproxy = 0;** // <------------ THIS LINE
_Myproxy->_Myfirstiter = 0;
}
#endif /* _ITERATOR_DEBUG_LEVEL == 2 */
}
....
Here is how my Cell struct looks like:
struct Cell
{
Cell() : x(1), y(1) { }
Cell(int _x, int _y): x(_x), y(_y) { }
void Draw(HDC&) const;
bool operator ==(const Cell& a) const { return (x == a.x && y == a.y); }
bool operator !=(const Cell& a) const { return !(*this == a); }
int x;
int y;
};
And Figure constructor:
Figure::Figure()
{
srand(time(NULL));
vCells.clear();
type = Figure_Type(rand() % 7);
rotation = 0;
shift.dx = 0;
shift.dy = 0;
Generate();
}
You're likely invoking undefined behaviour.
Without any more information, I'd say you're calling instance methods through stale object references/pointers (a reference taken at the time of callback registration is no longer valid?).
Also, as currently written in the question, you're generating a figure based on unitialized bytes in mask, so you'd likely want to initialize these too.
Here's a take on oa slightly modernized/cleaned up version. Note
the use of initializer lists
uniform initialization
reordered member initialization
not using using namespace in headers
moved srand into main instead of the constructor
See it Live on Coliru
#ifndef FIGURE_H_INCLUDED
#define FIGURE_H_INCLUDED
#include <vector>
#ifdef _WIN32
# include <Windows.h>
# include "Other.h"
#else
# include <cstdint>
# include <cstdlib>
# include <ctime>
using HDC = uint32_t;
#endif
struct Cell
{
Cell(int _x=1, int _y=1): x(_x), y(_y) { }
void Draw(HDC&) const;
bool operator ==(const Cell& a) const { return (x == a.x && y == a.y); }
bool operator !=(const Cell& a) const { return !(*this == a); }
int x;
int y;
};
struct Shift
{
Shift(int dx=0, int dy=0) : dx(dx), dy(dy) {}
int dx, dy;
};
enum class Direction
{
up, down, left, right
};
enum Figure_Type { I, J, L, O, S, T, Z };
class Figure
{
public:
/* CONSTRUCTORS */
Figure();
Figure(Figure_Type);
/* MOVEMENT */
bool Move(std::vector<Cell>&, Direction&);
void Drop(std::vector<Cell>&);
bool Rotate(std::vector<Cell>&);
/* OTHER */
void Draw(HDC&);
private:
/* METHODS */
void Generate();
void GenerateMasks();
void GenerateFigure();
Figure GetFigureCopy() const;
/* DATA */
char mask[4][4];
std::vector<Cell> vCells;
Figure_Type type;
int rotation;
Shift shift;
};
#endif
/*
* And I'm getting exceptions on vCells.clear() method and (if I comment first
* line) vCells.push_back(cell) line. Actually every operation with vector /
* vector iterators crash my program even incrementing iterator, those are just
* the first so my code isn't running any longer after them.
*
* Exception text:
* **"Unhandled exception at 0x5A4ACCD2 (msvcp110d.dll) in
* Tetris_completely_new.exe: 0xC000041D: An unhandled exception was
* encountered during a user callback."**
*
* And these exceptions are thrown on 217's line of "xutility". I commented it:
*
* ....
* // MEMBER FUNCTIONS FOR _Container_base12
* inline void _Container_base12::_Orphan_all()
* { // orphan all iterators
* #if _ITERATOR_DEBUG_LEVEL == 2
* if (_Myproxy != 0)
* { // proxy allocated, drain it
* _Lockit _Lock(_LOCK_DEBUG);
*
* for (_Iterator_base12 **_Pnext = &_Myproxy->_Myfirstiter;
* *_Pnext != 0; *_Pnext = (*_Pnext)->_Mynextiter)
* **(*_Pnext)->_Myproxy = 0;** // <------------ THIS LINE
* _Myproxy->_Myfirstiter = 0;
* }
* #endif // _ITERATOR_DEBUG_LEVEL == 2
* }
* ....
*
* Here is how my **Cell struct** looks like:
*/
//And **Figure constructor**:
Figure::Figure()
: mask {{0}},
vCells(),
type((Figure_Type) (rand() % 7)),
rotation(0),
shift({0,0})
{
Generate();
}
//My constructors are using Generate() method, which code is:
void Figure::Generate()
{
GenerateFigure();
}
void Figure::GenerateFigure()
{
vCells.clear();
for(int y = 0; y < 4; y++) {
for(int x = 0; x < 4; x++) {
if(mask[y][x] == '0')
vCells.push_back({4 + x + shift.dx, 20 - y + shift.dy});
}
}
}
int main()
{
srand(time(0));
Figure fig1;
Figure fig2;
}