I am building a spacial octree. In order to determine in which branch/octant a certain point (x,y,z) should be placed, I use this function:
if (x>x_centre) {
xsign = 1;
}
else {
xsign = 0;
}
if (y>y_centre) {
ysign = 1;
}
else {
ysign = 0;
}
if (z>z_centre) {
zsign = 1;
}
else {
zsign = 0;
}
return xsign + 2*ysign + 4*zsign;
It returns a number between 0 and 7 unique for every octant. It turns out this snippet is called a big many times. It gets quite time consuming when building large trees.
Is there any easy way to speed this proces up?
This allready gives a 30 percent speed up:
xsign = x>x_centre;
ysign = y>y_centre;
zsign = z>y_centre;
return xsign + 2*ysign + 4*zsign;
Any other tips?
Related
I am creating a game with a 3D grid for flying entities, So I have a lot of points and connections in the air where there aren't any obstructions. I didn't want to decrease the resolution of my grid so I thought I could just skip over chunks (or empties as I call them) of the Astar map while they're not containing any obstructions, and I modified Godot's Astar algorithm to do this.
Unfortunately this ended up being slower than looping through points one at a time due to the way I implemented this modification, which needs to loop through all the edge points of an empty.
2D representation of how one edge point of an empty connects to all other edge points:
This ends up looping through a larger number of points than letting the A* algorithm work it's way through the empty.
So I'm sorta stumped on how to make this more efficient while still preserving the most optimal path.
I could potentially narrow down what faces of the empty should be scanned over by first comparing the center points of all 8 faces of the empty (as my grid consists of hexagonal prisms). Or maybe I should somehow use the face center points of the empty's faces exclusively instead of all edge points.
I mainly want to know if anyone has worked on an issue like this before, and if so what would be the recommended solution?
Here is the astar loop for reference:
bool AStar::_solve(Point *begin_point, Point *end_point, int relevant_layers) {
pass++;
//make sure parallel layers are supported
// or if *relevant_layers is 0 then use all points
bool supported = relevant_layers == 0 || (relevant_layers & end_point->parallel_support_layers) > 0;
if (!end_point->enabled || !supported) {
return false;
}
bool found_route = false;
Vector<Point *> open_list;
SortArray<Point *, SortPoints> sorter;
begin_point->g_score = 0;
begin_point->f_score = _estimate_cost(begin_point->id, end_point->id);
open_list.push_back(begin_point);
while (!open_list.empty()) {
Point *p = open_list[0]; // The currently processed point
if (p == end_point) {
found_route = true;
break;
}
sorter.pop_heap(0, open_list.size(), open_list.ptrw()); // Remove the current point from the open list
open_list.remove(open_list.size() - 1);
p->closed_pass = pass; // Mark the point as closed
//if the point is part of an empty, look through all of the edge points of said empty (as to skip over any points within the empty).
OAHashMap<int, Point*> connections;
PoolVector<Empty*> enabled_empties;
int size = p->empties.size();
PoolVector<Empty*>::Read r = p->empties.read();
for (int i = 0; i < size; i++) {
Empty* e = r[i];
supported = relevant_layers == 0 || (relevant_layers & e->parallel_support_layers) > 0;
//if the empty is enabled and the end point is not within the empty
if (e->enabled && supported && !end_point->empties.has(e)) {
enabled_empties.append(e);
//can travel to any edge point
for (OAHashMap<int, Point*>::Iterator it = e->edge_points.iter(); it.valid; it = e->edge_points.next_iter(it)) {
int id = *it.key;
Point* ep = *(it.value);
ep->is_neighbour = false;
//don't connect to the same point
if (id != p->id && (i == 0 || !connections.has(id))) {
connections.set(id, ep);
}
}
}
}
//add neighbours to connections
for (OAHashMap<int, Point*>::Iterator it = p->neighbours.iter(); it.valid; it = p->neighbours.next_iter(it)) {
int id = *it.key;
Point* np = *(it.value);// The neighbour point
np->is_neighbour = true;
//don't need to check for duplicate point connections if no empties
if (size == 0 || !connections.has(id)) {
//don't add points within enabled empties since they're meant to be skipped over
if (np->empties.size() > 0 && !np->on_empty_edge) {
bool in_enabled_empty = false;
PoolVector<Empty*>::Read r1 = np->empties.read();
for (int i = 0; i < np->empties.size(); i++) {
if (enabled_empties.has(r1[i])) {
in_enabled_empty = true;
break;
}
}
if (!in_enabled_empty) {
connections.set(id, np);
}
}
else {
connections.set(id, np);
}
}
}
for (OAHashMap<int, Point *>::Iterator it = connections.iter(); it.valid; it = connections.next_iter(it)) {
Point *e = *(it.value); // The neighbour point
//make sure parallel layers are supported
// or if *relevant_layers is 0 then use all points
supported = relevant_layers == 0 || (relevant_layers & e->parallel_support_layers) > 0;
if (!e->enabled || e->closed_pass == pass || !supported) {
continue;
}
real_t tentative_g_score = p->g_score + _compute_cost(p->id, e->id) * e->weight_scale;
bool new_point = false;
if (e->open_pass != pass) { // The point wasn't inside the open list.
e->open_pass = pass;
open_list.push_back(e);
new_point = true;
} else if (tentative_g_score >= e->g_score) { // The new path is worse than the previous.
continue;
}
e->prev_point = p;
e->prev_point_connected = e->is_neighbour;
e->g_score = tentative_g_score;
e->f_score = e->g_score + _estimate_cost(e->id, end_point->id);
if (new_point) { // The position of the new points is already known.
sorter.push_heap(0, open_list.size() - 1, 0, e, open_list.ptrw());
} else {
sorter.push_heap(0, open_list.find(e), 0, e, open_list.ptrw());
}
}
}
return found_route;
}
Note: I'm still not exactly sure what the sorter does.
the entire code can be seen here in a_star.cpp and a_star.h
Edit:
if anyone wants to reference or use this, I've modified the Astar code to add user-defined octants and to use a user-defined straight line function (they are user-defined so they can work with any type of grid) to be used between octants when possible to further decrease runtime, and it works very well in terms of speed. Though the pathing is not optimal, especially when adding a lot of obstacles/restricting the available positions.
I need to loop a big array, 2 Million elements, many many times.
The structure of my code looks like:
loop_tag= 0;
// the ig1, ig2 loop will run many times,
// and N & M are about 30000
for(ig1=0; ig1<N; ig1++)
{
for(ig2=0; ig2<M; ig2++)
{
for(k=0;k<45;k++)
{
element_val = arr[loop_tag];
loop_tag ++;
// there're a few lines to calculate something
}
if(loop_tag == the end of arr){loop_tag=0;}
}
}
I will run the code more than 100 000 times, each time takes me about 200~1000 sec. Actually, I have used MPI to save time. But it still needs about 10 hours with 300 CPUs being used.
I find that most of the time is spent on "element_val = arr[loop_tag];". If I just assign a value to element_val like "element_val = 0.01", the time of each running will be just about 30% of before.
How can I accelerate this part? Thanks!
Here are some outputs in the log file:
expo pair: 0-0-0(20193) <-> 1-0-1(22275)
Finish in 1017.22 sec. 4.49795e+08 pairs. Expo pairs got now: 1
Now 0 buffers, 1 block in current buffer
expo pair: 23-5-2(18259) <-> 201-18-1(9704)
Finish in 70.86 sec. 3.17283e+07 pairs. Expo pairs got now: 2
Now 0 buffers, 2 block in current buffer
expo pair: 23-5-2(18259) <-> 559-47-1(15243)
Finish in 608.50 sec. 2.78322e+08 pairs. Expo pairs got now: 3
Now 0 buffers, 3 block in current buffer
Here is the code inside the ig1- &ig2-loop. The code reads data from two exposure files each time, then it loop the lines of them. I put all the data, arrays, in the structure, expo_info.
// if two galaxies come from the same CFHTLenS exposure, break
if(expo_info->obs_expo_label_1[ig1] == expo_info->obs_expo_label_2[ig2]){break;}
n = ig2*expo_info->expo_data_col;
ra_z2 = expo_info->expo_data[expo_label_1][n+expo_info->ra_idx];
dec_z2 = expo_info->expo_data[expo_label_1][n+expo_info->dec_idx];
cos_dec_z2 = expo_info->expo_data[expo_label_1][n+expo_info->cos_dec_idx];
// the seperation angle (arc minute)
delta_ra = (ra_z2 - ra_z1)*cos_dec_z1;
delta_dec = dec_z2 - dec_z1;
delta_radius = sqrt(delta_ra*delta_ra + delta_dec*delta_dec);
theta_tag = -1;
for(ir=0; ir<expo_info->theta_bin_num; ir++)
{
if(delta_radius > expo_info->theta_bin[ir] and delta_radius <= expo_info->theta_bin[ir+1]){theta_tag=ir;break;}
}
if(theta_tag > -1)
{
pairs+= 1;
// shear estimators rotation (position angle defined as East of North)
sin_theta = delta_ra/delta_radius;
cos_theta = delta_dec/delta_radius;
sin_2theta = 2*sin_theta*cos_theta;
cos_2theta = cos_theta*cos_theta - sin_theta*sin_theta;
sin_4theta = 2*sin_2theta*cos_2theta;
cos_4theta = cos_2theta*cos_2theta - sin_2theta*sin_2theta;
mg1_z2 = expo_info->expo_data[expo_label_1][n+expo_info->mg1_idx]*cos_2theta - expo_info->expo_data[expo_label_1][n+expo_info->mg2_idx]*sin_2theta;
mg2_z2 = expo_info->expo_data[expo_label_1][n+expo_info->mg1_idx]*sin_2theta + expo_info->expo_data[expo_label_1][n+expo_info->mg2_idx]*cos_2theta;
mnu1_z2 = expo_info->expo_data[expo_label_1][n+expo_info->mu_idx]*cos_4theta -expo_info->expo_data[expo_label_1][n+expo_infomv_idx]*sin_4theta;
mnu2_z2 = mnu1_z2;
mnu1_z2 = expo_info->expo_data[expo_label_1][n+expo_info->mn_idx] + mnu2_z2;
mnu2_z2 = expo_info->expo_data[expo_label_1][n+expo_info->mn_idx] - mnu2_z2;
// there're zbin_num *zbin_num blocks, iz1 is row, iz2 is the col, each block
// has a length of mg_bin_num*mg_bin_num*chi_guess_num*theta_bin_num.
iz2 = expo_info->expo_zbin_label[expo_label_1][ig2];
////////////////////// the key part of PDF_SYM //////////////////////////////
ic_len = theta_tag*ir_chi_block_len + (iz1 + iz2)*expo_info-iz_chi_block_len;
gg_1 = expo_info->gg_1[loop_label];
gg_2 = expo_info->gg_2[loop_label];
temp_tt[2] = mg1_z1 - gg_1*mnu1_z1;
temp_tt[3] = mg1_z2 - gg_2*mnu1_z2;
hist_2d_new(temp_tt[2], temp_tt[3], expo_info->mg_bin,
mg_bin_num,mg_bin_num1, mg_bin_num2, mg_bin_num3, ix_tt, iy_tt);
expo_info->expo_num_count_chit[ic_len + iy_tt*mg_bin_num+ix_tt] += 1;
temp_xx[2] = mg2_z1 - gg_1*mnu2_z1;
temp_xx[3] = mg2_z2 - gg_2*mnu2_z2;
hist_2d_new(temp_xx[2], temp_xx[3], expo_info->mg_bin,
mg_bin_num,mg_bin_num1, mg_bin_num2, mg_bin_num3, ix_xx, iy_xx);
expo_info->expo_num_count_chix[ic_len + iy_xx*mg_bin_num+ix_xx] += 1;
loop_label += 1;
for(ic=1; ic<chi_guess_num; ic++)
{
ic_len += chi_block_len;
// these two lines, gg_1 & gg_2, take a lot of time.
// expo_info->gg_1 & gg_2 are two big arrays, 2 Million elements
// if I just use something like gg_1 = 0.001; gg_2 == 0.001,
// it runs very fast
gg_1 = expo_info->gg_1[loop_label];
gg_2 = expo_info->gg_2[loop_label];
bin_para_tt[0] = ix_tt;
bin_para_tt[1] = iy_tt;
temp_tt[0] = temp_tt[2];
temp_tt[1] = temp_tt[3];
temp_tt[2] = mg1_z1 - gg_1*mnu1_z1;
temp_tt[3] = mg1_z2 - gg_2*mnu1_z2;
hist_2d_new(expo_info->mg_bin, mg_bin_num, temp_tt, bin_para_tt, ix_tt, iy_tt);
expo_info->expo_num_count_chit[ic_len + iy_tt*mg_bin_num+ix_tt] += 1;
bin_para_xx[0] = ix_xx;
bin_para_xx[1] = iy_xx;
temp_xx[0] = temp_xx[2];
temp_xx[1] = temp_xx[3];
temp_xx[2] = mg2_z1 - gg_1*mnu2_z1;
temp_xx[3] = mg2_z2 - gg_2*mnu2_z2;
hist_2d_new(expo_info->mg_bin, mg_bin_num, temp_xx, bin_para_xx, ix_xx, iy_xx);
expo_info->expo_num_count_chix[ic_len + iy_xx*mg_bin_num+ix_xx] += 1;
loop_label += 1;
}
if(loop_label >= gg_len){loop_label = 0;}
////////////////////// the key part of PDF_SYM -end //////////////////////////////
}
I find most of the time is spent on
gg_1 = expo_info->gg_1[loop_label];
gg_2 = expo_info->gg_2[loop_label];
The function is supposed to increment x by 1 every time it launches, thus forcing the code to choose a different option on the next launch.
function changeBackground() {
var x = 0;
if (x % 2 === 0) {
document.getElementById("sample").style.backgroundColor = "purple";
}
else {
document.getElementById("sample").style.backgroundColor = "white";
}
x++;
}
var x = document.getElementById("sample");
x.onmouseover = changeBackground;
This is just grabbing a heading and launching changeBackground every time the cursor is placed on it. The background color stays on purple.
because you set it to 0 every time in the beginning of the function.. you need to set the variable outside of the function
var x = 0;
function changeBackground() {
if (x % 2 === 0) {
document.getElementById("sample").style.backgroundColor = "purple";
}
else {
document.getElementById("sample").style.backgroundColor = "white";
}
x++;
}
var x = document.getElementById("sample");
x.onmouseover = changeBackground;
I am trying to implement a simple Gif-Reader in c++.
I currently stuck with decompressing the Imagedata.
If an image includes a Clear Code my decompression algorithm fails.
After the Clear Code I rebuild the CodeTable reset the CodeSize to MinimumLzwCodeSize + 1.
Then I read the next code and add it to the indexstream. The problem is that after clearing, the next codes include values greater than the size of the current codetable.
For example the sample file from wikipedia: rotating-earth.gif has a code value of 262 but the GlobalColorTable is only 256. How do I handle this?
I implemented the lzw decompression according to gif spec..
here is the main code part of decompressing:
int prevCode = GetCode(ptr, offset, codeSize);
codeStream.push_back(prevCode);
while (true)
{
auto code = GetCode(ptr, offset, codeSize);
//
//Clear code
//
if (code == IndexClearCode)
{
//reset codesize
codeSize = blockA.LZWMinimumCodeSize + 1;
currentNodeValue = pow(2, codeSize) - 1;
//reset codeTable
codeTable.resize(colorTable.size() + 2);
//read next code
prevCode = GetCode(ptr, offset, codeSize);
codeStream.push_back(prevCode);
continue;
}
else if (code == IndexEndOfInformationCode)
break;
//exists in dictionary
if (codeTable.size() > code)
{
if (prevCode >= codeTable.size())
{
prevCode = code;
continue;
}
for (auto c : codeTable[code])
codeStream.push_back(c);
newEntry = codeTable[prevCode];
newEntry.push_back(codeTable[code][0]);
codeTable.push_back(newEntry);
prevCode = code;
if (codeTable.size() - 1 == currentNodeValue)
{
codeSize++;
currentNodeValue = pow(2, codeSize) - 1;
}
}
else
{
if (prevCode >= codeTable.size())
{
prevCode = code;
continue;
}
newEntry = codeTable[prevCode];
newEntry.push_back(codeTable[prevCode][0]);
for (auto c : newEntry)
codeStream.push_back(c);
codeTable.push_back(newEntry);
prevCode = codeTable.size() - 1;
if (codeTable.size() - 1 == currentNodeValue)
{
codeSize++;
currentNodeValue = pow(2, codeSize) - 1;
}
}
}
Found the solution.
It is called Deferred clear code. So when I check if the codeSize needs to be incremented I also need to check if the codeSize is already max(12), as it is possible to to get codes that are of the maximum Code Size. See spec-gif89a.txt.
if (codeTable.size() - 1 == currentNodeValue && codeSize < 12)
{
codeSize++;
currentNodeValue = (1 << codeSize) - 1;
}
Is there a way to update the number of joysticks plugged in at run-time other than constantly calling remove_joystick() then install_joystick? This proves to be extremely slow (goes from 60 FPS to around 5).
Allegro 4.2 answers only please...
void Joystick::Update() {
//If joystick input was lost, attempt to reacquire.
if(GetNumJoysticks() == 0) {
throw InputNotAvailableException("Joystick");
}
//If all joysticks were deleted remove input and do nothing.
if(_numjoysticks == 0) {
remove_joystick();
return;
}
//Update state information
if(poll_joystick() < 0) {
throw InputNotAvailableException("Joystick");
}
for(int i = 0; i < _numButtons; ++i) {
_prevButtons[i].b = _curButtons[i].b;
_prevButtons[i].name = _curButtons[i].name;
_curButtons[i].b = joy[_joyNumber].button[i].b;
_curButtons[i].name = joy[_joyNumber].button[i].name;
}
for(int i = 0; i < _numSticks; ++i) {
for(int j = 0; j < joy[_joyNumber].stick[i].num_axis; ++j) {
_prevSticks[i].axis[j].name = _curSticks[i].axis[j].name;
_prevSticks[i].axis[j].pos = _curSticks[i].axis[j].pos;
_prevSticks[i].axis[j].d1 = _curSticks[i].axis[j].d1;
_prevSticks[i].axis[j].d2 = _curSticks[i].axis[j].d2;
_curSticks[i].axis[j].name = joy[_joyNumber].stick[i].axis[j].name;
_curSticks[i].axis[j].pos = joy[_joyNumber].stick[i].axis[j].pos;
_curSticks[i].axis[j].d1 = joy[_joyNumber].stick[i].axis[j].d1;
_curSticks[i].axis[j].d2 = joy[_joyNumber].stick[i].axis[j].d2;
}
_prevSticks[i].flags = _curSticks[i].flags;
_prevSticks[i].name = _curSticks[i].name;
_curSticks[i].flags = joy[_joyNumber].stick[i].flags;
_curSticks[i].name = joy[_joyNumber].stick[i].name;
}
}
int Joystick::GetNumJoysticks() {
remove_joystick();
if(install_joystick(JOY_TYPE_DIRECTX)) {
return 0;
}
return (num_joysticks);
}
The 4.x series does not. The 5.x series does.
You'll have to either listen for native OS events using custom platform specific code (assuming such things exist) and only call the Allegro deinit/init functions when a change is detected, or require the user to initiate joystick refresh manually.
Under Linux, you could inotify_add_watch() /dev/input to check for changes. Looking at the 4.4 Allegro code, looks like you'd want to call the Win32 functions joyGetNumDevs() and joyGetPos(). Something like:
int WIN_MAX_JOYSTICKS = joyGetNumDevs(); // this should never change
JOYINFO ji;
int pluggedin_count = 0;
for (int i = 0; i < WIN_MAX_JOYSTICKS; ++i)
if (joyGetPos(i, &ji) == JOYERR_NOERROR) ++pluggedin_count;
if (pluggedin_count != last_pluggedin_count) /* reinit Allegro */
You'd have to do that every N seconds.
Those joy* functions are Windows functions, so read MSDN docs to learn how to use them.