I'm writing a simulation of the Ising model in 2D. The model behaves as predicted except for one thing: the critical temperature is roughly 3.5 while it should be near 2/ln(2 + sqrt (2)).
The project is a C++ program that generates the data, and a shell script that exercises the program. The full code can be found here. Also here's lattice.cpp
#include <iostream>
#include "include/lattice.h"
using namespace std;
/*
Copy assignment operator, too long to include in the header.
*/
lattice &lattice::operator=(const lattice &other) {
size_ = other.size_;
spins_ = other.spins_;
J_ = other.J_;
H_ = other.H_;
delete spins_;
return *this;
}
void lattice::print() {
unsigned int area = size_ * size_;
for (unsigned int i = 0; i < area; i++) {
cout << to_symbol(spins_->at(i));
if (i % size_ == size_ - 1)
cout << endl;
}
cout << endl;
}
/*
Computes the energy associated with a spin at the given point.
It is explicitly float as that would allow the compiler to make use of multiple
registers instead of keeping track of unneeded precision. (typically J, H ~ 1).
*/
float lattice::compute_point_energy(int row, int col) {
int accumulator = get(row + 1, col) + get(row - 1, col) + get(row, col - 1) +
get(row, col + 1);
return -get(row, col) * (accumulator * J_ + H_);
}
/*
Computes total magnetisation in O(n^2). Thread safe
*/
int lattice::total_magnetisation() {
int sum = 0;
#pragma omp parallel for reduction(+ : sum)
for (unsigned int i = 0; i < size_ * size_; i++) {
sum += spins_->at(i);
}
return sum;
}
int inline to_periodic(int row, int col, int size) {
if (row < 0 || row >= size)
row = abs(size - abs(row));
if (col < 0 || col >= size)
col = abs(size - abs(col));
return row * size + col;
}
with lattice.h
#ifndef lattice_h
#define lattice_h
#include <cmath>
#include <vector>
/* Converts spin up/down to easily printable symbols. */
char inline to_symbol(int in) { return in == -1 ? '-' : '+'; }
/* Converts given pair of indices to those with periodic boundary conditions. */
int inline to_periodic(int row, int col, int size) {
if (row < 0 || row >= size)
row = abs(size - abs(row));
if (col < 0 || col >= size)
col = abs(size - abs(col));
return row * size + col;
}
class lattice {
private:
unsigned int size_;
// vector<bool> would be more space efficient, but it would not allow
// multithreading
std::vector<short> *spins_;
float J_;
float H_;
public:
lattice() noexcept : size_(0), spins_(NULL), J_(1.0), H_(0.0) {}
lattice(int new_size, double new_J, double new_H) noexcept
: size_(new_size), spins_(new std::vector<short>(size_ * size_, 1)),
J_(new_J), H_(new_H) {}
lattice(const lattice &other) noexcept
: lattice(other.size_, other.J_, other.H_) {
#pragma omp parallel for
for (unsigned int i = 0; i < size_ * size_; i++)
spins_->at(i) = other.spins_->at(i);
}
lattice &operator=(const lattice &);
~lattice() { delete spins_; }
void print();
short get(int row, int col) {
return spins_->at(to_periodic(row, col, size_));
}
unsigned int get_size() { return size_; }
void flip(int row, int col) { spins_->at(to_periodic(row, col, size_)) *= -1; }
int total_magnetisation();
float compute_point_energy(int row, int col);
};
#endif
and simulation.cpp
#include <iostream>
#include <math.h>
#include "include/simulation.h"
using namespace std;
/*
Advances the simulation a given number of steps, and updates/prints the statistics
into the given file pointer.
Defaults to stdout.
The number of time_steps is explcitly unsigned, so that linters/IDEs remind
the end user of the file that extra care needs to be taken, as well as to allow
advancing the simulation a larger number of times.
*/
void simulation::advance(unsigned int time_steps, FILE *output) {
unsigned int area = spin_lattice_.get_size() * spin_lattice_.get_size();
for (unsigned int i = 0; i < time_steps; i++) {
// If we don't update mean_energy_ every time, we might get incorrect
// thermodynamic behaviour.
total_energy_ = compute_energy(spin_lattice_);
double temperature_delta = total_energy_/area - mean_energy_;
if (abs(temperature_delta) < 1/area){
cerr<<temperature_delta<<"! Reached equilibrium "<<endl;
}
temperature_ += temperature_delta;
mean_energy_ = total_energy_ / area;
if (time_ % print_interval_ == 0) {
total_magnetisation_ = spin_lattice_.total_magnetisation();
mean_magnetisation_ = total_magnetisation_ / area;
print_status(output);
}
advance();
}
}
/*
Advances the simulation a single step.
DOES NOT KEEP TRACK OF STATISTICS. Hence private.
*/
void simulation::advance() {
#pragma omp parallel for collapse(2)
for (unsigned int row = 0; row < spin_lattice_.get_size(); row++) {
for (unsigned int col = 0; col < spin_lattice_.get_size(); col++) {
double dE = compute_dE(row, col);
double p = r_.random_uniform();
float rnd = rand() / (RAND_MAX + 1.);
if (exp(-dE / temperature_) > rnd) {
spin_lattice_.flip(row, col);
}
}
}
time_++;
}
/*
Computes change in energy due to flipping one single spin.
The function returns a single-precision floating-point number, as data cannot under
most circumstances make use of greater precision than that (save J is set to a
non-machine-representable value).
The code modifies the spin lattice, as an alternative (copying the neighborhood
of a given point), would make the code run slower by a factor of 2.25
*/
float simulation::compute_dE(int row, int col) {
float e_0 = spin_lattice_.compute_point_energy(row, col);
return -4*e_0;
}
/*
Computes the total energy associated with spins in the spin_lattice_.
I originally used this function to test the code that tracked energy as the lattice
itself was modified, but that code turned out to be only marginally faster, and
not thread-safe. This is due to a race condition: when one thread uses a neighborhood
of a point, while another thread was computing the energy of one such point in
the neighborhood of (row, col).
*/
double simulation::compute_energy(lattice &other) {
double energy_sum = 0;
unsigned int max = other.get_size();
#pragma omp parallel for reduction(+ : energy_sum)
for (unsigned int i = 0; i < max; i++) {
for (unsigned int j = 0; j < max; j++) {
energy_sum += other.compute_point_energy(i, j);
}
}
return energy_sum;
}
void simulation::set_to_chequerboard(int step){
if (time_ !=0){
return;
}else{
for (unsigned int i=0; i< spin_lattice_.get_size(); ++i){
for (unsigned int j=0; j<spin_lattice_.get_size(); ++j){
if ((i/step)%2-(j/step)%2==0){
spin_lattice_.flip(i, j);
}
}
}
}
}
with simulation.h
#ifndef simulation_h
#define simulation_h
#include "lattice.h"
#include "rng.h"
#include <gsl/gsl_rng.h>
/*
The logic of the entire simulation of the Ising model of magnetism.
This simulation will run and print statistics at a given time interval.
A simulation can be advanced a single time step, or many at a time,
*/
class simulation {
private:
unsigned int time_ = 0; // Current time of the simulation.
rng r_ = rng();
lattice spin_lattice_;
double temperature_;
double mean_magnetisation_ = 1;
double mean_energy_;
double total_magnetisation_;
double total_energy_;
unsigned int print_interval_ = 1;
void advance();
public:
void set_print_interval(unsigned int new_print_interval) { print_interval_ = new_print_interval; }
simulation(int new_size, double new_temp, double new_J, double new_H)
: time_(0), spin_lattice_(lattice(new_size, new_J, new_H)), temperature_(new_temp),
mean_energy_(new_J * (-4)), total_magnetisation_(new_size * new_size),
total_energy_(compute_energy(spin_lattice_)) {}
void print_status(FILE *f) {
f = f==NULL? stdout : f;
fprintf(f, "%4d\t%e \t%e\t%e\n", time_, mean_magnetisation_,
mean_energy_, temperature_);
}
void advance(unsigned int time_steps, FILE *output);
double compute_energy(lattice &other);
float compute_dE(int row, int col);
void set_to_chequerboard(int step);
void print_lattice(){
spin_lattice_.print();
};
// void load_custom(const lattice& custom);
};
#endif
The output right now looks something like this:
while it should be a step down near 2.26
I have found a few issues in your code:
The compute_dE method returns the wrong energy, as the factor of 2 shouldn't be there. The Hamiltonian of the Ising system is
While you are effectively using
The compute_energy method returns the wrong energy. The method should iterate over each spin pair only once. Something like this should do the trick:
for (unsigned int i = 0; i < max; i++) {
for (unsigned int j = i + 1; j < max; j++) {
energy_sum += other.compute_point_energy(i, j);
}
}
You use a temperature that is updated on the fly instead of using the target temperature. I do not really understand the purpose of that.
Related
I have a problem, probably, with memory leaking in C++ threads. I receive a runtime error with code 11. I am writing an optimization algorithm, which aims to optimize parameters of 2D reactors. It generates instances of reforming function, which creates Reformer objects. The reformers have 2 different parameters, which can differ locally in a single reformer and are passed to the reforming function from the main function. To specify, each reformer is divided into a specified number of zones (same dimensions and locations in each reformer), and each zone can have different parameters. Therefore, size of each of 2 vectors is equal to [NUMBER OF REFORMERS] * [NUMBER OF ZONES]. Then, the reforming function creates Segment objects, which number is equal to the number of zones.
I assume that the issue here is that threads try to access the same vector simultaneously and I would really appreciate a solution for that matter.
Remarks:
If I change the main.cpp to substitute the threads with a usual loop, no error is returned.
If I comment out the setProp method in the set_segments functions, no error is returned (with threads).
Threads are highly recommended here, due to long computation time of a single Reformer, and I have an access to a multi-core computing units.
To clarify, I will explain everything with a minimal reproducible example:
input.h
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
int reactor_no = 2; // number of reformers
int zones_X = 5; // number of zones in a single reformer, X direction
int zones_Y = 2; // number of zones in a single reformer, Y direction
double dim_X = 0.5; // reactor's length
double dim_Y = 0.2; // reactor's height
double wall_t = 0.1; // thickness of the reactor wall
size_t zones = zones_X * zones_Y;
Reformer.h:
#include "input.h"
class Reformer {
public:
Reformer() {}
Reformer(const double& L, const double& Y, const double& wall_t,
const int& zones_X = 1, const int& zones_Y = 1) {
length_ = L;
height_ = Y;
zonesX_ = zones_X;
zonesY_ = zones_Y;
wall_thickness_ = wall_t;
dx_ = length_ / static_cast<double> (zonesX_);
dr_ = height_ / static_cast<double> (zonesY_);
}
private:
double wall_thickness_; // wall thickness (m)
double length_; // recactor length (m)
double height_; // reactor height (m) (excluding wall thickness)
int zonesX_; // number of segments in the X direction
int zonesY_; // number of segments in the Y direction
double dx_; // segment width (m)
double dr_; // segment height (m)
}
Segment.h:
#include "input.h"
class Segment{
public:
Segment() : Segment(0, 0) {}
Segment(int i, int j) {
i_ = i;
j_ = j;
}
void setXR(const double& dx, const double& dr, const int& SL, const int& SR) {
x0_ = i_ * dx;
x1_ = x0_ + dx;
r0_ = j_ * dr;
r1_ = r0_ + dr;
if (i_ == SL - 1) {
x1_ = length;
}
if (j_ == SR - 1) {
r1_ = radius;
}
}
void setWall() {
x0_ = 0;
x1_ = length;
r0_ = radius;
r1_ = radius + wall_t;
}
void setProp(const double& por, const double& por_s, const bool& cat) {
porosity_ = por;
catalyst_ = cat;
}
private:
size_t i_; //segment column no.
size_t j_; //segment row no.
double x0_; //beginning of segment - x coordinate (m)
double x1_; //ending of segment - x coordinate (m)
double r0_; //beginning of segment - r coordinate (m)
double r1_; //ending of segment - r coordinate (m)
int catalyst_; //1 - catalytic, 0 - non-catalytic
double porosity_; //porosity (-)
};
main.cpp:
#include "input.h"
int main() {
int zones = zones_X * zones_Y;
size_t pop_size = reactor_no * zones;
std::vector<int> cat;
cat.reserve(pop_size);
std::vector<double> porosity;
porosity.reserve(pop_size); // the values in the vectors are not important, therefore I will just fill them with 1s
for (int i = 0; i < pop_size; i++) {
cat[i] = 1;
porosity[i] = 1.0;
}
std::vector<std::thread> Ref;
Ref.reserve(reactor_no);
for (k = 0; k < reactor_no; k++) {
Ref.emplace_back(reforming, k, cat, porosity);
}
for (auto &X : Ref) { X.join(); }
}
reforming.cpp:
#include "input.h"
void reforming(const int m, const std::vector<int>& cat_check, const std::vector<double>& por) {
Reformer reactor(length, radius, wall_t, zonesX, zonesY);
std::vector<Segment> seg; // vector holding segment objects
seg.reserve(zones);
set_segments(seg, reactor, zones, m, por, por_s, cat_check);
}
set_segments function:
#include "input.h"
void set_segments(std::vector<Segment> &seg, Reformer &reac, const int m,
const std::vector<double> &por, const std::vector<int> &check) {
int i, j, k, n;
double dx = dim_X / static_cast<double> (zones_X);
double dy = dim_Y / static_cast<double> (zones_Y);
std::vector<Segment*> ptr_seg;
ptr_seg.reserve(zones);
k = 0;
for (i = 0; i < zones_X; i++) {
for (j = 0; j < zones_Y; j++) {
n = m * zones + (i * zones_Y + j);
seg.emplace_back(Segment(i, j));
seg[k].setProp(por[n], check[n]);
seg[k].setXR(dx, dy, zones_X, zones_Y);
k++;
}
}
}
Adding std::ref() to the reforming function call parameters solved the problem.
for (k = 0; k < spec_max; k++) {
Ref.emplace_back(reforming, k, std::ref(cat), std::ref(porosity));
}
I'm trying to implement a neural network into c++, but all I have to show for it are lots of unknown errors. I've already searched and found other post such as (C++ class has no member named), however this has been no help to me. Can please help me figure out how to resolve all the errors I've been getting.
Here's the code
#include <iostream>
#include <vector>
#include <cstdlib>
#include <assert.h>
#include <math.h>
using namespace std;
struct Connection
{
double weight;
double deltaWeight;
};
class Neuron {};
typedef vector<Neuron> Layer;
// ************************* class Neuron *************************
class Neuron
{
public:
Neuron(unsigned numOutputs, unsigned myIndex);
void setOutputVal(double val)
{
m_outputVal = val;
};
double getOutputVal(void) const
{
return m_outputVal;
};
void feedForward(const Layer &prevLayer);
void calcOutputGradients(double targetVal);
void calcHiddenGradients(const Layer &nextLayer);
void updateInputWeights(Layer &prevLayer);
private:
static double eta; // [0.0..1.0] overall net training rate
static double alpha; // [0.0..n] multiplier of last weight change (momentum)
static double transferFunction(double x);
static double transferFunctionDerivative(double x);
static double randomWeight(void)
{
return rand() / double(RAND_MAX);
};
double sumDOW(const Layer &nextLayer) const;
double m_outputVal;
vector<Connection> m_outputWeights;
unsigned m_myIndex;
double m_gradient;
};
double Neuron::eta = 0.15; // overall net learning rate, [0.0..1.0]
double Neuron::alpha = 0.5; // momentum, multiplier of last deltaWeight [0.0..n]
void Neuron::updateInputWeights(Layer &prevLayer)
{
// The weight are updated in the Connection container
// in the neurons in the preceding layer
for (unsigned n = 0; n < prevLayer.size(); ++n)
{
Neuron &neuron = prevLayer[n];
double oldDeltaWeight = neuron.m_outputWeights[m_myIndex].deltaWeight;
double newDeltaWeight =
eta
* neuron_getOutputVal()
* m_gradient
+ alpha
* oldDeltaWeight;
neuron.m_outputWeights[m_myIndex].deltaWeight = newDeltaWeight;
neuron.m_outputWeights[m_myIndex].weight += newDeltaWeight;
}
}
double Neuron::sumDOW(const Layer &nextLayer) const
{
double sum = 0.0;
// Sum our contributions of the errors at the nodes we feed
for (unsigned n = 0; nextLayer.size() - 1; ++n)
{
sum += m_outputWeights[n].weight * nextLayer[n].m_gradient;
}
return sum;
}
void Neuron::calcHiddenGradients(const Layer &nextLayer)
{
double dow = sumDOW(nextLayer);
m_gradient = dow * Neuron::transferFunctionDerivative(m_outputVal);
}
void Neuron::calcOutputGradients(double targetVal)
{
double delta = targetVal - m_outputVal;
m_gradient = delta * Neuron::transferFunctionDerivative(m_outputVal);
}
double Neuron::transferFunction(double x)
{
// tanh - output range [-1.0..1.0]
return tanh(x);
}
double Neuron::transferFunctionDerivative(double x)
{
// tanh derivative
return 1.0 - x * x;
}
void Neuron::feedForward(const Layer &prevLayer)
{
double sum = 0.0;
// Sum the previous layer's outputs (which are our inputs)
// Include the bias node from the previous layer
for (unsigned n = 0; n < prevLayer.size(); ++n)
{
sum += prevLayer[n].getOutputVal() *
prevLayer[n].m_outputWeights[m_myIndex].weight;
}
m_outputVal = Neuron::transferFunction(sum);
}
Neuron::Neuron(unsigned numOutputs, unsigned myIndex)
{
for (unsigned c = 0; c < numOutputs; ++c)
{
m_outputWeights.push_back(Connection());
m_outputWeights.back().weight = randomWeight();
}
m_myIndex = myIndex;
}
// ************************* class Net *************************
class Net
{
public:
Net(const vector<unsigned> &topology);
void feedForward(const vector<double> &inputVals);
void backProp(const vector<double> &targetVals);
void getResults(vector<double> &resultVals) const;
private:
vector<Layer> m_layers; // m_layers{layerNum][neuronNum]
double m_error;
double m_recentAverageError;
double m_recentAverageSmoothingFactor;
};
void Net::getResults(vector<double> &resultVals) const
{
resultVals.clear();
for (unsigned n = 0; n < m_layers.back().size() - 1; ++n)
{
resultVals.push_back(m_layers.back()[n].getOutputVals());
}
}
void Net::backProp(const vector<double> &targetVals)
{
// Calculate overall net error (RMS of output errors)
Layer &outputLayer = m_layers.back();
m_error = 0.0;
for (unsigned n = 0; n < outputLayer.size() - 1; ++n)
{
double delta = targetVals[n] - outputLayer[n].getOutputVal();
m_error += delta * delta;
}
m_error /= outputLayer.size() - 1; // get average error squared
m_error = sqrt(m_error); // RMS
// Implement a recent average measurement:
m_recentAverageError =
(m_recentAverageError * m_recentAverageSmoothingFactor + m_error)
/ (m_recentAverageSmoothingFactor + 1.0);
// Calculate output layer gradients
for (unsigned n = 0; n < outputLayer.size() - 1; ++n)
{
outputLayer[n].calcOutputGradients(targetVals[n]);
}
// Calculate gradients on hidden layers
for (unsigned layerNum = m_layers.size() - 2; layerNum > 0; --layerNum)
{
Layer &hiddenLayer = m_layers[layerNum];
Layer &nextLayer = m_layers[layerNum + 1];
for (unsigned n = 0; n < hiddenLayer.size(); ++n)
{
hiddenLayer[n].calcHiddenGradients(nextLayer);
}
}
// For all layers from output to first hidden layer.
// update connection weights
for (unsigned layerNum = m_layers.size() - 1; layerNum > 0; --layerNum)
{
Layer &layer = m_layers[layerNum];
Layer &prevLayer = m_layers[layerNum - 1];
for (unsigned n = 0; n < layer.size() - 1; ++n)
{
layer[n].updateInputWeights(prevLayer);
}
}
}
void Net::feedForward(const vector<double> &inputVals)
{
assert(inputVals.size() == m_layers[0].size() - 1);
// Assign (latch) the values into the input neurons
for (unsigned i = 0; i < inputVals.size(); ++i)
{
m_layers[0][i].setOutputVal(inputVals[i]);
}
// Forward propagate
for (unsigned layerNum = 1; layerNum = m_layers.size(); ++layerNum)
{
Layer &prevLayer = m_layers[layerNum - 1];
for (unsigned n = 0; n < m_layers[layerNum].size() - 1; ++n)
{
m_layers[layerNum][n].feedForward(prevLayer);
}
}
}
Net::Net(const vector<unsigned> &topology)
{
unsigned numLayers = topology.size();
for (unsigned layerNum = 0; layerNum < numLayers; ++layerNum)
{
m_layers.push_back(Layer());
unsigned numOutputs = layerNum == topology.size() - 1 ? 0 : topology[layerNum + 1];
// We have made a new layer, now fill it with neurons, and
// add a bias neuron to the layer:
for (unsigned neuronNum = 0; neuronNum <= topology[layerNum]; ++neuronNum)
{
m_layers.back().push_back(Neuron(numOutputs, neuronNum));
cout << "Made a Neuron!" << endl;
}
}
}
int main()
{
// e.g.. { 3, 2, 1 }
// THIS IS FOR THE NUMBER OF NEURONS THAT YOU WANT!!
vector<unsigned> topology;
topology.push_back(3);
topology.push_back(2);
topology.push_back(1);
Net myNet(topology);
vector<double> inputVals;
myNet.feedForward(inputVals);
vector<double> targetVals;
myNet.backProp(targetVals);
vector<double> resultVals;
myNet.getResults(resultVals);
system("pause");
}
I've been getting errors such as:
Error: class "Neuron" has no member "feedForward"
Error: class "Neuron" has no member "setOutputVal"
'neuron_OutputVal': identifier not found
class Neuron {};
Here your file defined a class called Neuron. It's a class with no members, and no methods. A completely empty class.
A few lines later:
class Neuron
{
public:
// ...
Why, here's another class called Neuron. However, in C++, all classes must have unique names. So, your C++ compiler will completely reject this class declaration, and refuse to process it. Or, take some other, unspecified action.
There are three issues in your code.
First, as others mentioned fix your forward declaration.
class Neuron;
note this doesnt have {} as in your code. You dont need to move the typedef down, since your Neuron class uses the typedef 'Layer'.
Second, on line 70,
neuron.getOutputVal
instead of neuron_getOutputVal.
Third on line 167 just drop the s from getOutputVal (s).
class Neuron {}; is not a valid forward declaration. You can't use a forward declaration anyway, because declaration of Layer requires full knowledge of Neuron.
You'll have to remove the forward declaration class Neuron {}; entirely and move your declaration of typedef vector<Neuron> Layer; down. Put it right above your declaration of class Net.
The scatter method takes the original image and scatter its pixels.
The program works well when I use several lines of code instead of the
method "randomSelect". The program seems to go into an infinite loop
and the image does not change when I use the method "randomSelect".
void scatter(GBufferedImage &img, Grid<int> original, int row, int col) {
int degree;
while (true) {
degree = getInteger("Enter degree of scatter [1-100]: ");
if (degree >=1 && degree <= 100) break;
}
Grid<int> newImg(row, col);
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
/* int newRow = -1;
int newCol = -1;
while (!original.inBounds(newRow, newCol)) {
newRow = randomInteger(max(i - degree, 0), min(i + degree,original.numRows()));
newCol = randomInteger(max(j - degree, 0), min(j + degree,original.numRows()));
}
newImg[i][j] = original[newRow][newCol]; */ // work properly
newImg[i][j] = randomSelect(original, i , j, degree); // do not work
}
}
img.fromGrid(newImg);
}
int randomSelect(Grid<int> original, int i, int j, int degree) { // do not work
int newRow = -1;
int newCol = -1;
while (!original.inBounds(newRow, newCol)) {
newRow = randomInteger(max(i - degree, 0), min(i + degree,original.numRows()));
newCol = randomInteger(max(j - degree, 0), min(j + degree,original.numRows()));
}
return original[newRow][newCol];
}
You should pass original as a reference:
int randomSelect(Grid<int>& original, int i, int j, int degree) { // will work
I have created a program which creates a Mandelbrot set. Now I'm trying to make it multithreaded.
// mandelbrot.cpp
// compile with: g++ -std=c++11 mandelbrot.cpp -o mandelbrot
// view output with: eog mandelbrot.ppm
#include <fstream>
#include <complex> // if you make use of complex number facilities in C++
#include <iostream>
#include <cstdlib>
#include <thread>
#include <mutex>
#include <vector>
using namespace std;
template <class T> struct RGB { T r, g, b; };
template <class T>
class Matrix {
public:
Matrix(const size_t rows, const size_t cols) : _rows(rows), _cols(cols) {
_matrix = new T*[rows];
for (size_t i = 0; i < rows; ++i) {
_matrix[i] = new T[cols];
}
}
Matrix(const Matrix &m) : _rows(m._rows), _cols(m._cols) {
_matrix = new T*[m._rows];
for (size_t i = 0; i < m._rows; ++i) {
_matrix[i] = new T[m._cols];
for (size_t j = 0; j < m._cols; ++j) {
_matrix[i][j] = m._matrix[i][j];
}
}
}
~Matrix() {
for (size_t i = 0; i < _rows; ++i) {
delete [] _matrix[i];
}
delete [] _matrix;
}
T *operator[] (const size_t nIndex)
{
return _matrix[nIndex];
}
size_t width() const { return _cols; }
size_t height() const { return _rows; }
protected:
size_t _rows, _cols;
T **_matrix;
};
// Portable PixMap image
class PPMImage : public Matrix<RGB<unsigned char> >
{
public:
unsigned int size;
PPMImage(const size_t height, const size_t width) : Matrix(height, width) { }
void save(const std::string &filename)
{
std::ofstream out(filename, std::ios_base::binary);
out <<"P6" << std::endl << _cols << " " << _rows << std::endl << 255 << std::endl;
for (size_t y=0; y<_rows; y++)
for (size_t x=0; x<_cols; x++)
out << _matrix[y][x].r << _matrix[y][x].g << _matrix[y][x].b;
}
};
/*Draw mandelbrot according to the provided parameters*/
void draw_Mandelbrot(PPMImage & image, const unsigned width, const unsigned height, double cxmin, double cxmax, double cymin, double cymax,unsigned int max_iterations)
{
for (std::size_t ix = 0; ix < width; ++ix)
for (std::size_t iy = 0; iy < height; ++iy)
{
std::complex<double> c(cxmin + ix / (width - 1.0)*(cxmax - cxmin), cymin + iy / (height - 1.0)*(cymax - cymin));
std::complex<double> z = 0;
unsigned int iterations;
for (iterations = 0; iterations < max_iterations && std::abs(z) < 2.0; ++iterations)
z = z*z + c;
image[iy][ix].r = image[iy][ix].g = image[iy][ix].b = iterations;
}
}
int main()
{
const unsigned width = 1600;
const unsigned height = 1600;
PPMImage image(height, width);
int parts = 8;
std::vector<int>bnd (parts, image.size);
std::thread *tt = new std::thread[parts - 1];
time_t start, end;
time(&start);
//Lauch parts-1 threads
for (int i = 0; i < parts - 1; ++i) {
tt[i] = std::thread(draw_Mandelbrot,ref(image), width, height, -2.0, 0.5, -1.0, 1.0, 10);
}
//Use the main thread to do part of the work !!!
for (int i = parts - 1; i < parts; ++i) {
draw_Mandelbrot(ref(image), width, height, -2.0, 0.5, -1.0, 1.0, 10);
}
//Join parts-1 threads
for (int i = 0; i < parts - 1; ++i)
tt[i].join();
time(&end);
std::cout << difftime(end, start) << " seconds" << std::endl;
image.save("mandelbrot.ppm");
delete[] tt;
return 0;
}
Now every thread draws the complete fractal (look in main()). How can I let the threads draw different parts of the fractal?
You're making this (quite a lot) harder than it needs to be. This is the sort of task to which OpenMP is almost perfectly suited. For this task it gives almost perfect scaling with a bare minimum of effort.
I modified your draw_mandelbrot by inserting a pragma before the outer for loop:
#pragma omp parallel for
for (int ix = 0; ix < width; ++ix)
for (int iy = 0; iy < height; ++iy)
Then I simplified your main down to:
int main() {
const unsigned width = 1600;
const unsigned height = 1600;
PPMImage image(height, width);
clock_t start = clock();
draw_Mandelbrot(image, width, height, -2.0, 0.5, -1.0, 1.0, 10);
clock_t stop = clock();
std::cout << (double(stop - start) / CLOCKS_PER_SEC) << " seconds\n";
image.save("mandelbrot.ppm");
return 0;
}
On my (fairly slow) machine, your original code ran in 4.73 seconds. My modified code ran in 1.38 seconds. That's an improvement of 3.4x out of code that's nearly indistinguishable from a trivial single-threaded version.
Just for what it's worth, I did a bit more rewriting to get this:
// mandelbrot.cpp
// compile with: g++ -std=c++11 mandelbrot.cpp -o mandelbrot
// view output with: eog mandelbrot.ppm
#include <fstream>
#include <complex> // if you make use of complex number facilities in C++
#include <iostream>
#include <cstdlib>
#include <thread>
#include <mutex>
#include <vector>
using namespace std;
template <class T> struct RGB { T r, g, b; };
template <class T>
struct Matrix
{
std::vector<T> data;
size_t rows;
size_t cols;
class proxy {
Matrix &m;
size_t index_1;
public:
proxy(Matrix &m, size_t index_1) : m(m), index_1(index_1) { }
T &operator[](size_t index) { return m.data[index * m.rows + index_1]; }
};
class const_proxy {
Matrix const &m;
size_t index_1;
public:
const_proxy(Matrix const &m, size_t index_1) : m(m), index_1(index_1) { }
T const &operator[](size_t index) const { return m.data[index * m.rows + index_1]; }
};
public:
Matrix(size_t rows, size_t cols) : data(rows * cols), rows(rows), cols(cols) { }
proxy operator[](size_t index) { return proxy(*this, index); }
const_proxy operator[](size_t index) const { return const_proxy(*this, index); }
};
template <class T>
std::ostream &operator<<(std::ostream &out, Matrix<T> const &m) {
out << "P6" << std::endl << m.cols << " " << m.rows << std::endl << 255 << std::endl;
for (size_t y = 0; y < m.rows; y++)
for (size_t x = 0; x < m.cols; x++) {
T pixel = m[y][x];
out << pixel.r << pixel.g << pixel.b;
}
return out;
}
/*Draw Mandelbrot according to the provided parameters*/
template <class T>
void draw_Mandelbrot(T & image, const unsigned width, const unsigned height, double cxmin, double cxmax, double cymin, double cymax, unsigned int max_iterations) {
#pragma omp parallel for
for (int ix = 0; ix < width; ++ix)
for (int iy = 0; iy < height; ++iy)
{
std::complex<double> c(cxmin + ix / (width - 1.0)*(cxmax - cxmin), cymin + iy / (height - 1.0)*(cymax - cymin));
std::complex<double> z = 0;
unsigned int iterations;
for (iterations = 0; iterations < max_iterations && std::abs(z) < 2.0; ++iterations)
z = z*z + c;
image[iy][ix].r = image[iy][ix].g = image[iy][ix].b = iterations;
}
}
int main() {
const unsigned width = 1600;
const unsigned height = 1600;
Matrix<RGB<unsigned char>> image(height, width);
clock_t start = clock();
draw_Mandelbrot(image, width, height, -2.0, 0.5, -1.0, 1.0, 255);
clock_t stop = clock();
std::cout << (double(stop - start) / CLOCKS_PER_SEC) << " seconds\n";
std::ofstream out("mandelbrot.ppm", std::ios::binary);
out << image;
return 0;
}
On my machine, this code runs in about 0.5 to 0.6 seconds.
As to why I made these changes: mostly to make it faster, cleaner, and simpler. Your Matrix class allocated a separate block of memory for each row (or perhaps column--didn't pay very close of attention). This allocates one contiguous block of the entire matrix instead. This eliminates a level of indirection to get to the data, and increases locality of reference, thus improving cache usage. It also reduces the total amount of data used.
Changing from using time to using clock to do the timing was to measure CPU time instead of wall time (and typically improve precision substantially as well).
Getting rid of the PPMImage class was done simply because (IMO) having a PPImage class that derives from a Matrix class just doesn't make much (if any) sense. I suppose it works (for a sufficiently loose definition of "work") but it doesn't strike me as good design. If you insist on doing it at all, it should at least be private derivation, because you're just using the Matrix as a way of implementing your PPMImage class, not (at least I certainly hope not) trying to make assertions about properties of PPM images.
If, for whatever, reason, you decide to handle the threading manually, the obvious way of dividing the work up between threads would still be by looking at the loops inside of draw_mandelbrot. The obvious one would be to leave your outer loop alone, but send the computation for each iteration off to a thread pool:
for (int ix = 0; ix < width; ++ix)
compute_thread(ix);
where the body of compute_thread is basically this chunk of code:
for (int iy = 0; iy < height; ++iy)
{
std::complex<double> c(cxmin + ix / (width - 1.0)*(cxmax - cxmin), cymin + iy / (height - 1.0)*(cymax - cymin));
std::complex<double> z = 0;
unsigned int iterations;
for (iterations = 0; iterations < max_iterations && std::abs(z) < 2.0; ++iterations)
z = z*z + c;
image[iy][ix].r = image[iy][ix].g = image[iy][ix].b = iterations;
}
There would obviously be a little work involved in passing the correct data to the compute thread (each thread should be pass a reference to a slice of the resulting picture), but that would be an obvious and fairly clean place to divide things up. In particular it divides the job up into enough tasks that you semi-automatically get pretty good load balancing (i.e., you can keep all the cores busy) but large enough that you don't waste massive amounts of time on communication and synchronization between the threads.
As to the result, with the number of iterations set to 255, I get the following (scaled to 25%):
...which is pretty much as I'd expect.
One of the big issues with this approach is that different regions take different amounts of time to calculate.
A more general approach is.
Start 1 source thread.
Start N worker threads.
Start 1 sink thread.
Create 2 thread safe queues (call them the source queue and the sink queue).
Divide the image into M (many more than N) pieces.
The source thread pushes pieces into the source queue
The workers pull piecse from the source queue, convert the pieces into result fragments, and pushes those fragments into the sink queue.
The sink thread takes fragments from the sink queue and combines them into the final image.
By dividing up the work this way, all the worker threads will be busy all the time.
You can divide the fractal into pieces by divide the start and end of the fractal with the screen dimension:
$this->stepsRe = (double)((($this->startRe * -1) + ($this->endeRe)) / ($this->size_x-1));
$this->stepsIm = (double)((($this->startIm * -1) + ($this->endeIm)) / ($this->size_y-1));
I'm writing a code in C++ for a 2D Ising model. Here's what the code should do:
Generate random NxN lattice, with each site either +1 or -1 value.
Select a site at random
If site when flipped (+1 to -1 or -1 to +1) is a state of lower energy, flip state ie. if dE < 0, flip state. If flipped state is of higher energy, flip with acceptance rate w = e^{-b(dE)}. Where dE is the change in energy if state is flipped.
4.Do this for all NxN sites, without repetition. This is considered one sweep.
Do like 100 sweeps.
I'm having trouble with steps 1, 2 and 3, would appreciate any help! For step 1, I managed to create and display a lattice, but I can't seem to extract the value of a site at location (x, y). Steps 2 and 3, how do I use a boolean expression of some sort to flip according to acceptance probability?
#include <cstdlib>
#include <ctime>
using namespace std;
#include <iostream>
int main() //random generation of spin configuration
{
int L; //Total number of spins L = NxN
int N = 30 //A square lattice of length 30
double B=1; //magnetic field
double M; //Total Magnetization = Sum Si
double E; //Total Energy
int T = 1.0;
int nsweeps = 100; //number of sweeps
int de; //change in energy when flipped
double Boltzmann; //Boltzmann factor
int x,y; //randomly chosen lattice site
int i,j,a,c; //counters
int ROWS = 5;
int COLS = 5;
int matrix[ROWS][COLS];
srand ( static_cast<unsigned> ( time ( 0 ) ) );
for ( int i = 0; i < ROWS; i++ )
{
for ( int j = 0; j < COLS; j++ )
{
matrix[i][j] = rand () % 2 *2-1;
}
}
// showing the matrix on the screen
for(int i=0;i<ROWS;i++) // loop 3 times for three lines
{
for(int j=0;j<COLS;j++) // loop for the three elements on the line
{
cout<<matrix[i][j]; // display the current element out of the array
}
cout<<endl; // when the inner loop is done, go to a new line
}
return 0; // return 0 to the OS.
//boundary conditions and range
if(x<0) x += N;
if(x>=L) x -= N;
if(y<0) y += N;
if(y>=L) y -= N;
//counting total energy of configuration
{ int neighbour = 0; // nearest neighbour count
for(int i=0; i<L; i++)
for(int j=0; j<L; j++)
{ if(spin(i,j)==spin(i+1, j)) // count from each spin to the right and above
neighbour++;
else
neighbour--;
if(spin(i, j)==spin(i, j+1))
neighbour++;
else
neighbour--;
}
E = -J*neighbour - B*M;
//flipping spin
int x = int(srand48()*L); //retrieves spin from randomly choosen site
int y = int(srand48()*L);
int delta_M = -2*spin(x, y); //calculate change in Magnetization M
int delta_neighbour = spin(spinx-1, y) + spin(x+1, y)+ spin(x, y-1) + spin(x, y+1);
int delta_neighbour = -2*spin(x,y)* int delta_neighbour;
double delta_E = -J*delta_neighbour -B*delta_M;
//flip or not
if (delta_E<=0)
{ (x, y) *= -1; // flip spin and update values
M += delta_M;
E += delta_E;
}
}
To follow up on my comment:
There are too many issues with your code for a single answer. Try to
build your program step by step. Use functions which perform one
thing, and this they do well. Test each function individually and if
necessary try to find out why it does not work. Then post specific
questions again.
To get you started:
Store your lattice as a std::vector<int> lattice(N*N)
Access element (x,y) with data[x+N*y].
Example:
#include <vector>
struct IsingModel
{
unsigned size_;
std::vector<int> lattice_;
// access element (x,y)
int& at(int x, int y) {
return lattice_[x + y*size_];
}
int at(int x, int y) const {
return lattice_[x + y*size_];
}
// generate size x size lattice
IsingModel(unsigned size)
: size_(size), lattice_(size*size, +1) {
}
static int BoolToSpin(bool v) {
return v ? +1 : -1;
}
// initialize spin randomly
void initializeRandom() {
for(int y=0; y<size_; y++) {
for(int x=0; x<size_; x++) {
at(x,y) = BoolToSpin(rand()%2);
}
}
}
static int Energy(int a, int b) {
return (a == b) ? +1 : -1;
}
// compute total energy
unsigned computeTotalEnergy() const {
unsigned energy = 0;
for(int y=1; y<size_-1; y++) {
for(int x=1; x<size_-1; x++) {
energy += Energy(at(x,y), at(x+1,y));
energy += Energy(at(x,y), at(x,y+1));
}
}
return energy ;
}
};
#include <iostream>
#include <cstdlib>
#include <ctime>
int main() {
srand(static_cast<unsigned>(time(0))); // intialize random number generator
IsingModel im(10);
im.initializeRandom();
unsigned energy = im.computeTotalEnergy();
std::cout << energy << std::endl; // print energy
}