Binary tree benchmark results - c++

I stumbled upon a website making benchmakrs.
In this case Golang vs C++, binary trees.
The C++ solution does A LOT better than golang using allocation of a memory pool.
I can get behind that but wondered how an implementation without that would fare. So I modified it to look more like the Golang-Code and removed concurrency for both.
In this example and on my machine the golang code runs in around 24 seconds.
The C++ code takes an average of 126 seconds. I did not expect this result at all. I expected C++ to still be faster or maybe be a bit slower but not by a factor of 5.
Did I make some huge mistake? Or do you know the reason for this? Code for both programs is below:
Built with:
mingw32-g++.exe -Wall -fexceptions -O2 -c D:\TMP\Test\main.cpp -o
obj\Release\main.o
mingw32-g++.exe -o bin\Release\Test.exe obj\Release\main.o -s
#include <iostream>
using namespace std;
class Node {
public:
Node(uint64_t d);
~Node();
int Check();
private:
Node* l;
Node* r;
};
Node::Node(uint64_t d){
if (d > 0){
l = new Node(d - 1);
r = new Node(d - 1);
} else {
l = 0;
r = 0;
}
}
Node::~Node(){
if(l){
delete l;
delete r;
}
}
int Node::Check(){
if (l) {
return l->Check() + 1 + r->Check();
} else {
return 1;
}
}
int main()
{
uint64_t min_depth = 4;
uint64_t max_depth = 21;
for (uint64_t d = min_depth; d <= max_depth; d += 2) {
uint64_t iterations = 1 << (max_depth - d + min_depth);
uint64_t c = 0;
for (uint64_t i = 1; i < iterations; i++) {
Node* a = new Node(d);
c += a->Check();
delete a; // I tried commenting this line but it made no big impact
}
cout << iterations << " trees of depth " << d << " check: " << c << "\n";
}
return 0;
}
Golang:
go version go1.7.1 windows/amd64
package main
import(
"fmt"
)
type Node struct {
l *Node
r *Node
}
func (n *Node) check() int {
if n.l != nil {
return n.l.check() + 1 + n.r.check()
} else {
return 1
}
}
func make(d uint) *Node {
root := new(Node)
if d > 0 {
root.l = make(d-1)
root.r = make(d-1)
}
return root
}
func main(){
min_depth := uint(4)
max_depth := uint(21)
for d := min_depth; d <= max_depth; d += 2 {
iterations := 1 << (max_depth - d + min_depth)
c := 0
for i := 1; i < iterations; i++ {
a := make(d)
c += a.check()
}
fmt.Println(iterations, " trees of depth ", d, " check: ", c)
}
}

It's something with your computer your ran on, because I'm getting the expected result where C++ is twice as fast as go.
C++
time cmake-build-debug/main
2097152 trees of depth 4 check: 65011681
524288 trees of depth 6 check: 66584449
131072 trees of depth 8 check: 66977281
32768 trees of depth 10 check: 67074049
8192 trees of depth 12 check: 67092481
2048 trees of depth 14 check: 67074049
512 trees of depth 16 check: 66977281
128 trees of depth 18 check: 66584449
32 trees of depth 20 check: 65011681
cmake-build-debug/main 21.09s user 0.02s system 99% cpu 21.113 total
GO
jonny#skyhawk  ~/Projects/benchmark  time ./main  ✔  2604  02:34:29
2097152 trees of depth 4 check: 65011681
524288 trees of depth 6 check: 66584449
131072 trees of depth 8 check: 66977281
32768 trees of depth 10 check: 67074049
8192 trees of depth 12 check: 67092481
2048 trees of depth 14 check: 67074049
512 trees of depth 16 check: 66977281
128 trees of depth 18 check: 66584449
32 trees of depth 20 check: 65011681
./main 48.72s user 0.52s system 197% cpu 24.905 total
I built the C++ main.cpp with CLion's mose basic / default settings (this CMakeLists.txt that will build a main.cpp)
cmake_minimum_required(VERSION 3.3)
project(test_build)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(BUILD_1 main)
set(SOURCE_FILES_1 main.cpp)
add_executable(${BUILD_1} ${SOURCE_FILES_1})

Related

tensorflow c++ batch inference

I have a problem with making inference on a batchsize greater than 1 using the c++ tensorflow api. The network input planes are 8x8x13 and the output is a single float. When I try to infer on multiple samples as follows, the result is correct only for the first sample. I used keras2tensorflow tool for converting the graph to .pb format.
node {
name: "main_input"
op: "Placeholder"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
attr {
key: "shape"
value {
shape {
dim {
size: -1
}
dim {
size: 8
}
dim {
size: 8
}
dim {
size: 12
}
}
}
}
}
Edit: Output node is a scalar. Could the culprit be the keras2tensorflow code that I used to convert keras hdf5 file to pb ? Maybe the output should have been a -1x1 to accept any number of samples just like the input planes). I got the converter code from the following link: keras_to_tensorflow
node {
name: "value_0"
op: "Identity"
input: "strided_slice"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
}
The input plane dimension is correctly set to -1 x 8 x 8 x 13.
void test() {
//input planes
const int nmoves = pstack->count; //This is the number of samples
TensorShape input_shape({nmoves, 8, 8, CHANNELS});
Tensor inputs(DT_FLOAT, input_shape);
//.... Initialize input planes
//output
std::vector<Tensor> outputs;
//run session
TF_CHECK_OK( session->Run(
{{input_layer, inputs}}, {output_layer}, {}, &outputs)
);
//get results
auto outd = outputs[0].flat<float>().data(); //is this correct way to access the data for multiple samples ?
for(int i = 0;i < nmoves; i++) {
float p = outd[i]; //The value of p is wrong for all but the first one
std::cout << "I" << i << " == " << p << std::endl;
}
}
Example output (p) for each sample where the result is supposed to be between 0 and 1 is shown below. Only I0 is correct while I16 and I18 have very large values. I think the problem is that after running the session the dimension of outputs is still 1, should have been 20. Is it possible at all to do inference on multiple samples using the c++ api ?
I0 == 0.434162
I1 == 0
I2 == 0
I3 == 0.0640963
I4 == 0.0718748
I5 == 0.325485
I6 == 0
I7 == 0
I8 == 0
I9 == 0
I10 == 0.141193
I11 == 0.398055
I12 == 0.237758
I13 == 0.530693
I14 == 2.44527e-42
I15 == 0
I16 == -5.62959e+14
I17 == 4.56697e-41
I18 == -5.62959e+14
I19 == 4.56697e-41
The problem turned out to be due to a bug the keras_to_tensorflow I used for conversion. I reported the issue here. The bug is still there in keras_to_tensorflow
On line 68:
pred[i] = tf.identity(net_model.output[i], name=pred_node_names[i])
The "output" should have been "outputs"
pred[i] = tf.identity(net_model.outputs[i], name=pred_node_names[i])

Recursive Breath First Search works on first execution but not following executions

The solution presented at CodeReview works fine on CentOS 7.1. I have tried to port it to Windows 7, Visual Studio 2012. With minor edits to allow for the parts of C++11 that VS 2012 doesn't support the code compiles, and the first execution of the loop works correctly. The rest of the execution of test cases fail, growing progressively more incorrect with each execution.
The code for this problem can be found on github.
finished computation at 0 /* problem here not included in question */
elapsed time: 0.202012 Seconds
The point of origin for all path searches was A3
The destination point for all path searches was H4
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 5 Resulting Paths
There were 323 attempted paths
The average path length is 4.8
The median path length is 4
The longest path is 6 moves
The shortest path is 4 moves
I believe the problem is in one of the files listed below. I've been debugging this for 2 days, I can use some help.
CRKnightMoves_Cpp2.cpp
/*
* KnightMoves.cpp
*
* Author: pacmaninbw
*/
#include "stdafx.h"
#include <iostream>
#include <stdexcept>
#include <chrono>
#include <ctime>
#include <algorithm>
#include <functional>
#include <vector>
#include "KnightMoves.h"
#include "KnightMovesImplementation.h"
#include "KMBoardDimensionConstants.h"
double Average(std::vector<double> TestTimes)
{
double AverageTestTime = 0.0;
double SumOfTestTimes = 0.0;
int CountOfTestTimes = 0;
for (auto TestTimesIter : TestTimes)
{
SumOfTestTimes += TestTimesIter;
CountOfTestTimes++;
}
if (CountOfTestTimes) { // Prevent division by zero.
AverageTestTime = SumOfTestTimes / static_cast<double>(CountOfTestTimes);
}
return AverageTestTime;
}
void OutputOverAllStatistics(std::vector<double> TestTimes)
{
if (TestTimes.size() < 1) {
std::cout << "No test times to run statistics on!" << std::endl;
return;
}
std::cout << std::endl << "Overall Results" << std::endl;
std::cout << "The average execution time is " << Average(TestTimes) << " seconds" << std::endl;
std::nth_element(TestTimes.begin(), TestTimes.begin() + TestTimes.size()/2, TestTimes.end());
std::cout << "The median execution time is " << TestTimes[TestTimes.size()/2] << " seconds" << std::endl;
std::nth_element(TestTimes.begin(), TestTimes.begin()+1, TestTimes.end(), std::greater<double>());
std::cout << "The longest execution time is " << TestTimes[0] << " seconds" << std::endl;
std::nth_element(TestTimes.begin(), TestTimes.begin()+1, TestTimes.end(), std::less<double>());
std::cout << "The shortest execution time is " << TestTimes[0] << " seconds" << std::endl;
}
double ExecutionLoop(KMBaseData UserInputData)
{
KnightMovesImplementation *KnightPathFinder = new KnightMovesImplementation(UserInputData);
std::chrono::time_point<std::chrono::system_clock> start, end;
start = std::chrono::system_clock::now();
KMOutputData OutputData = KnightPathFinder->CalculatePaths();
end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
std::time_t end_time = std::chrono::system_clock::to_time_t(end);
double ElapsedTimeForOutPut = elapsed_seconds.count();
char ctimebuffer[1024];
std::cout << "finished computation at " << ctime_s(ctimebuffer, 1024, &end_time) << "\n"
<< "elapsed time: " << ElapsedTimeForOutPut << " Seconds\n" << "\n" << "\n";
// Don't include output of results in elapsed time calculation
OutputData.DontShowPathData();
// OutputData.ShowPathData();
OutputData.ShowResults();
delete KnightPathFinder;
return ElapsedTimeForOutPut;
}
int LetUserEnterTestCaseNumber(std::vector<KMBaseData> &TestData)
{
int i = 1;
int Choice = -1;
std::cout << "Select the number of the test case you want to run.\n";
std::cout << "Test Case #" << "\tStart Name" << "\tTarget Name" << "\tBoard Size" << "\tSlicing Method" << "\n";
for (auto TestCase: TestData) {
std::cout << i << "\t" << TestCase.m_StartName << "\t" << TestCase.m_TargetName << "\t" << TestCase.m_DimensionOneSide << "\t";
switch (TestCase.m_LimitationsOnMoves)
{
default :
throw std::runtime_error("LetUserEnterTestCaseNumber : Unknown type of Path Limitation.");
case DenyByPreviousLocation :
std::cout << "Can't return to previous location";
break;
case DenyByPreviousRowOrColumn:
std::cout << "Can't return to previous row or column";
break;
}
std::cout << "\n";
i++;
}
std::cout << i << "\tAll of the above except for 13 and 14\n";
std::cout << ++i <<"\tAll of the above (Go get lunch)\n";
std::cin >> Choice;
if (Choice == 15)
{
std::vector<KMBaseData> TempTests;
for (auto TestCase: TestData)
{
if ((TestCase.m_DimensionOneSide != MaximumBoardDimension) && (TestCase.m_LimitationsOnMoves != DenyByPreviousLocation))
{
TempTests.push_back(TestCase);
}
}
TestData = TempTests;
}
return Choice;
}
void InitTestData(std::vector<KMBaseData> &TestData)
{
KMBaseData TestCases[] = {
{1,3,"A3",8,4,"H4", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{1,1,"A1",8,8,"H8", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{1,8,"A8",8,1,"H1", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{2,3,"B3",8,4,"H4", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{2,3,"B3",8,8,"H8", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{3,1,"C1",8,4,"H4", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{3,1,"A3",8,8,"H8", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{1,3,"A3",2,5,"B5", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn}, // Minimum should be one move
{8,4,"H4",1,3,"A3", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{4,4,"D4",1,8,"A8", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{4,4,"D4",5,6,"E6", DefaultBoardDimensionOnOneSide, DenyByPreviousRowOrColumn},
{1,3,"A3",2,5,"B5", 12, DenyByPreviousRowOrColumn}, // Minimum should be one move
{1,3,"A3",2,5,"B5", DefaultBoardDimensionOnOneSide, DenyByPreviousLocation}, // Minimum should be one move
{1,3,"A3",2,5,"B5", MaximumBoardDimension, DenyByPreviousRowOrColumn} // Minimum should be one move
};
for (int i = 0; i < sizeof(TestCases)/sizeof(KMBaseData); i++) {
TestData.push_back(TestCases[i]);
}
}
int _tmain(int argc, _TCHAR* argv[])
{
int status = 0;
std::vector<KMBaseData> TestData;
std::vector<double> TestTimes;
try {
InitTestData(TestData);
int Choice = LetUserEnterTestCaseNumber(TestData);
if (Choice < 0)
{
return status;
}
if (Choice < 15)
{
ExecutionLoop(TestData[Choice-1]);
}
else
{
for (auto TestDataIter: TestData) {
TestTimes.push_back(ExecutionLoop(TestDataIter));
}
}
OutputOverAllStatistics(TestTimes);
return status;
}
catch(std::runtime_error &e) {
std::cerr << "A fatal error occurred in KnightMoves: ";
std::cerr << e.what() << std::endl;
status = 1;
}
catch(std::runtime_error *e) {
std::cerr << "A fatal error occurred in KnightMoves: ";
std::cerr << e->what() << std::endl;
status = 1;
}
catch(...) {
std::cerr << "An unknown fatal error occurred in KnightMoves." << std::endl;
status = 1;
}
return status;
}
KnightMovesImplementation.h
#pragma once
/*
* KnightMovesImplementation.h
*
* Created on: Mar 18, 2016
* Modified on: June 20, 2016
* Author: pacmaninbw
*
* This class provides the search for all the paths a Knight on a chess
* board can take from the point of origin to the destination. It
* implements a modified Knights Tour. The classic knights tour problem
* is to visit every location on the chess board without returning to a
* previous location. That is a single path for the knight. This
* implementation returns all possible paths from point a to point b.
* The actual implementation is documented in the CPP file because it
* can be changed. This head file provides the public interface which
* should not be changed. The public interface may be moved to a
* super class in the future.
*/
#ifndef KNIGHTMOVESIMPLEMENTATION_H_
#define KNIGHTMOVESIMPLEMENTATION_H_
#include "KMPath.h"
#include "KMOutputData.h"
#include "KMMoveFilters.h"
class KnightMovesImplementation {
private:
KMBoardLocation m_PointOfOrigin;
KMBoardLocation m_Destination;
unsigned int m_SingleSideBoardDimension;
KnightMovesMethodLimitations m_PathLimitations;
KMOutputData *m_Results;
KMMoveFilters *m_MoveFilters;
KMPath *m_Path;
protected:
bool CalculatePath(KMMove CurrentMove); // Recursive function
void InitPointOfOrigin(KMBaseData UserData);
void InitDestination(KMBaseData UserData);
public:
KnightMovesImplementation(KMBaseData UserData);
virtual ~KnightMovesImplementation(void);
KMOutputData CalculatePaths();
};
#endif /* KNIGHTMOVESIMPLEMENTATION_H_ */
KnightsImplementation.cpp
/*
* KnightMovesImplementation.cpp
*
* Created on: Mar 18, 2016
* Modified on: June 21, 2016
* Commented on: June 24, 2016
* Author: pacmaninbw
*
* This class implements the search for all possible paths for a Knight
* on a chess board from one particular square on the board to another
* particular square on the board.
*
* The current implementation is a Recursive Breadth First Search. Conceptually
* the algorithm implements a B+ tree with a maximum of 8 possible branches
* at each level. The root of the tree is the point of origin. A particular
* path terminates in a leaf. A leaf is the result of either reaching the
* destination, or reaching a point where there are no more branches to
* traverse.
*
* The m_Path variable is used as a stack within the search.
*
* The public interface CalculatePaths establishes the root and creates
* the first level of branching. The protected interface CalculatePath
* performs the recursive depth first search, however, the
* m_MoveFilters.GetPossibleMoves() function it calls performs a breadth
* first search of the current level.
*
*/
#include "stdafx.h"
#include "KnightMoves.h"
#include "KnightMovesImplementation.h"
#include "KMBoardDimensionConstants.h"
KnightMovesImplementation::~KnightMovesImplementation(void)
{
delete m_MoveFilters;
delete m_Results;
delete m_Path;
}
KnightMovesImplementation::KnightMovesImplementation(KMBaseData UserInputData)
: m_SingleSideBoardDimension(UserInputData.m_DimensionOneSide),
m_PathLimitations(UserInputData.m_LimitationsOnMoves)
{
InitPointOfOrigin(UserInputData);
InitDestination(UserInputData);
m_Path = new KMPath;
m_MoveFilters = new KMMoveFilters(static_cast<unsigned int>(UserInputData.m_DimensionOneSide), UserInputData.m_LimitationsOnMoves);
m_Results = new KMOutputData(m_PointOfOrigin, m_Destination, m_SingleSideBoardDimension, m_PathLimitations);
}
void KnightMovesImplementation::InitPointOfOrigin(KMBaseData UserInputData)
{
m_PointOfOrigin.SetRow(UserInputData.m_StartRow);
m_PointOfOrigin.SetColumn(UserInputData.m_StartColumn);
m_PointOfOrigin.SetName(UserInputData.m_StartName);
m_PointOfOrigin.SetBoardDimension(m_SingleSideBoardDimension);
}
void KnightMovesImplementation::InitDestination(KMBaseData UserInputData)
{
m_Destination.SetRow(UserInputData.m_TargetRow);
m_Destination.SetColumn(UserInputData.m_TargetColumn);
m_Destination.SetName(UserInputData.m_TargetName);
m_Destination.SetBoardDimension(m_SingleSideBoardDimension);
}
KMOutputData KnightMovesImplementation::CalculatePaths()
{
KMRandomAccessMoveCollection PossibleFirstMoves = m_MoveFilters->GetPossibleMoves(m_PointOfOrigin);
if (PossibleFirstMoves.empty())
{
std::cerr << "No Possible Moves in KnightMovesImplementation::CalculatePaths" << std::endl;
}
else
{
for (auto CurrentMoveIter : PossibleFirstMoves)
{
KMMove CurrentMove = CurrentMoveIter;
CurrentMove.SetOriginCalculateDestination(m_PointOfOrigin);
if (CurrentMove.IsValid()) {
CalculatePath(CurrentMove);
}
}
}
return *m_Results;
}
bool KnightMovesImplementation::CalculatePath(KMMove CurrentMove)
{
bool CompletedSearch = false;
KMBoardLocation CurrentLocation = CurrentMove.GetNextLocation();
m_Path->AddMoveToPath(CurrentMove);
m_MoveFilters->PushVisited(CurrentLocation);
if (CurrentLocation == m_Destination)
{
m_Results->AddPath(*m_Path);
CompletedSearch = true;
m_Results->IncrementAttemptedPaths();
}
else
{
if (CurrentMove.IsValid())
{
KMRandomAccessMoveCollection PossibleMoves = m_MoveFilters->GetPossibleMoves(CurrentLocation);
if (!PossibleMoves.empty())
{
for (auto NextMove : PossibleMoves)
{
CalculatePath(NextMove);
}
}
else
{
// No more moves to test, record the attempted path
m_Results->IncrementAttemptedPaths();
}
}
else
{
// There is a logic error if we get here.
std::cerr << "In KnightMovesImplementation::CalculatePath CurrentMove Not Valid" << std::endl;
}
}
m_Path->RemoveLastMove();
m_MoveFilters->PopVisited();
return CompletedSearch;
}
KMMoveFilters.h
#pragma once
/*
* KMMoveFilters.h
*
* Created on: Jun 20, 2016
* Author: pacmaninbw
*
* This class provides all the possible Knight moves for a specified location
* on the chess board. In the center of the chess board there are 8 possible
* moves. In the middle of the edge on the chess board there are 4 possible
* moves. In a corner of the chess board there are 2 possible moves. The
* location on the board provides the first filter.
* Slicing is used to allow the program to complete in a reasonable finite
* amount of time. The slicing method can be varied, the default slicing
* method is the knight can't return to any row or column it has previously
* visited. The slicing is the second filter.
*/
#ifndef KMMOVEFILTERS_H_
#define KMMOVEFILTERS_H_
#include <vector>
#include "KnightMoves.h"
#include "KMMove.h"
class KMMoveFilters {
private:
std::vector<KMBoardLocation> m_VisitedLocations;
std::vector<unsigned int> m_VisitedRows;
std::vector<unsigned int> m_VisitedColumns;
unsigned int m_SingleSideBoardDimension;
KnightMovesMethodLimitations m_PathLimitations;
static KMRandomAccessMoveCollection AllPossibleMoves;
// The 8 possible moves the knight can make.
static KMMove Left1Up2;
static KMMove Left2Up1;
static KMMove Left2Down1;
static KMMove Left1Down2;
static KMMove Right1Up2;
static KMMove Right2Up1;
static KMMove Right2Down1;
static KMMove Right1Down2;
protected:
bool IsNotPreviouslyVisited(KMMove Move) const { return IsNotPreviouslyVisited(Move.GetNextLocation()); };
bool IsNotPreviouslyVisited(KMBoardLocation Destination) const;
public:
KMMoveFilters(void);
KMMoveFilters(unsigned int BoardDimension, KnightMovesMethodLimitations SlicingMethod);
void ResetFilters(unsigned int BoardDimension, KnightMovesMethodLimitations SlicingMethod) {m_SingleSideBoardDimension = BoardDimension; m_PathLimitations = SlicingMethod; }
virtual ~KMMoveFilters(void);
void PushVisited(KMBoardLocation Location);
void PopVisited();
KMRandomAccessMoveCollection GetPossibleMoves(const KMBoardLocation CurrentLocation) const;
};
KMMoveFilters.cpp
/*
* KMMoveFilters.cpp
*
* Created on: Jun 20, 2016
* Author: pacmaninbw
*/
#include "stdafx.h"
#include <stdexcept>
#include <algorithm>
#include "KMBoardDimensionConstants.h"
#include "KMMoveFilters.h"
KMMoveFilters::~KMMoveFilters(void)
{
}
KMMoveFilters::KMMoveFilters(void)
: m_SingleSideBoardDimension(DefaultBoardDimensionOnOneSide),
m_PathLimitations(DenyByPreviousRowOrColumn)
{
AllPossibleMoves.push_back(Left1Up2);
AllPossibleMoves.push_back(Left2Up1);
AllPossibleMoves.push_back(Left2Down1);
AllPossibleMoves.push_back(Left1Down2);
AllPossibleMoves.push_back(Right1Up2);
AllPossibleMoves.push_back(Right2Up1);
AllPossibleMoves.push_back(Right2Down1);
AllPossibleMoves.push_back(Right1Down2);
}
KMMoveFilters::KMMoveFilters(unsigned int BoardDimension, KnightMovesMethodLimitations SlicingMethod)
: m_SingleSideBoardDimension(BoardDimension), m_PathLimitations(SlicingMethod)
{
AllPossibleMoves.push_back(Left1Up2);
AllPossibleMoves.push_back(Left2Up1);
AllPossibleMoves.push_back(Left2Down1);
AllPossibleMoves.push_back(Left1Down2);
AllPossibleMoves.push_back(Right1Up2);
AllPossibleMoves.push_back(Right2Up1);
AllPossibleMoves.push_back(Right2Down1);
AllPossibleMoves.push_back(Right1Down2);
}
const int Left1 = -1;
const int Left2 = -2;
const int Down1 = -1;
const int Down2 = -2;
const int Right1 = 1;
const int Right2 = 2;
const int Up1 = 1;
const int Up2 = 2;
KMMove KMMoveFilters::Left1Up2(Left1, Up2, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Left2Up1(Left2, Up1, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Left2Down1(Left2, Down1, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Left1Down2(Left1, Down2, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Right1Up2(Right1, Up2, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Right2Up1(Right2, Up1, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Right2Down1(Right2, Down1, DefaultBoardDimensionOnOneSide);
KMMove KMMoveFilters::Right1Down2(Right1, Down2, DefaultBoardDimensionOnOneSide);
KMRandomAccessMoveCollection KMMoveFilters::AllPossibleMoves;
KMRandomAccessMoveCollection KMMoveFilters::GetPossibleMoves(const KMBoardLocation CurrentLocation) const
{
KMRandomAccessMoveCollection PossibleMoves;
for (auto PossibeMove : AllPossibleMoves) {
KMMove *TempMove = new KMMove(PossibeMove);
TempMove->SetBoardDimension(m_SingleSideBoardDimension);
TempMove->SetOriginCalculateDestination(CurrentLocation);
if ((TempMove->IsValid()) && (IsNotPreviouslyVisited(*TempMove))) {
PossibleMoves.push_back(*TempMove);
}
}
return PossibleMoves;
}
bool KMMoveFilters::IsNotPreviouslyVisited(KMBoardLocation PossibleDestination) const
{
bool NotPrevioslyVisited = true;
if (!m_VisitedLocations.empty()) { // This is always a test, we can't move backwards
if (std::find(m_VisitedLocations.begin(), m_VisitedLocations.end(), PossibleDestination)
!= m_VisitedLocations.end()) {
NotPrevioslyVisited = false;
}
}
switch (m_PathLimitations) {
default :
throw std::runtime_error("KMPath::CheckMoveAgainstPreviousLocations : Unknown type of Path Limitation.");
case DenyByPreviousLocation :
// Always tested above.
break;
case DenyByPreviousRowOrColumn:
if ((!m_VisitedRows.empty()) && (!m_VisitedColumns.empty())) {
unsigned int PossibleRow = PossibleDestination.GetRow();
if (std::find(m_VisitedRows.begin(), m_VisitedRows.end(), PossibleRow) != m_VisitedRows.end()) {
NotPrevioslyVisited = false;
break;
}
unsigned int PossibleColum = PossibleDestination.GetColumn();
if (std::find(m_VisitedColumns.begin(), m_VisitedColumns.end(), PossibleColum) != m_VisitedColumns.end()) {
NotPrevioslyVisited = false;
}
}
break;
}
return NotPrevioslyVisited;
}
void KMMoveFilters::PushVisited(KMBoardLocation Location)
{
m_VisitedRows.push_back(Location.GetRow());
m_VisitedColumns.push_back(Location.GetColumn());
m_VisitedLocations.push_back(Location);
}
void KMMoveFilters::PopVisited()
{
m_VisitedRows.pop_back();
m_VisitedColumns.pop_back();
m_VisitedLocations.pop_back();
}
The problem was the static declaration of AllPossibleMoves, the memory leak in GetPossibleMoves may have been an additional contributor to the problem. In the CentOS C++11 version AllPossibleMoves was declared as static const, and was not initialized in the constructor, it was initialized outside as each of it's member moves are. This did not compile in Visual Studio 2012 C++. AllPossibleMoves was declared as static const for execution time reasons in the original version.
I am disappointed in the results since this is much slower than the CentOS version using C++11 compiled with g++. The computer I'm running this on is 2 years new than the CentOS computer and has 8GB of memory with an i7 processor.
First I present the working code, then I present the output of the program.
The final code that solves the problem is:
KMMoveFilters.h
#pragma once
/*
* KMMoveFilters.h
*
* Created on: Jun 20, 2016
* Author: pacmaninbw
*
* This class provides all the possible Knight moves for a specified location
* on the chess board. In the center of the chess board there are 8 possible
* moves. In the middle of the edge on the chess board there are 4 possible
* moves. In a corner of the chess board there are 2 possible moves. The
* location on the board provides the first filter.
* Slicing is used to allow the program to complete in a reasonable finite
* amount of time. The slicing method can be varied, the default slicing
* method is the knight can't return to any row or column it has previously
* visited. The slicing is the second filter.
*/
#ifndef KMMOVEFILTERS_H_
#define KMMOVEFILTERS_H_
#include <vector>
#include "KnightMoves.h"
#include "KMMove.h"
class KMMoveFilters {
private:
std::vector<KMBoardLocation> m_VisitedLocations;
std::vector<unsigned int> m_VisitedRows;
std::vector<unsigned int> m_VisitedColumns;
unsigned int m_SingleSideBoardDimension;
KnightMovesMethodLimitations m_PathLimitations;
KMRandomAccessMoveCollection AllPossibleMoves;
// The 8 possible moves the knight can make.
static KMMove Left1Up2;
static KMMove Left2Up1;
static KMMove Left2Down1;
static KMMove Left1Down2;
static KMMove Right1Up2;
static KMMove Right2Up1;
static KMMove Right2Down1;
static KMMove Right1Down2;
protected:
bool IsNotPreviouslyVisited(KMMove Move) const { return IsNotPreviouslyVisited(Move.GetNextLocation()); }
bool IsNotPreviouslyVisited(KMBoardLocation Destination) const;
public:
KMMoveFilters(void);
KMMoveFilters(unsigned int BoardDimension, KnightMovesMethodLimitations SlicingMethod);
void ResetFilters(unsigned int BoardDimension, KnightMovesMethodLimitations SlicingMethod) {m_SingleSideBoardDimension = BoardDimension; m_PathLimitations = SlicingMethod; }
virtual ~KMMoveFilters(void);
void PushVisited(KMBoardLocation Location);
void PopVisited();
KMRandomAccessMoveCollection GetPossibleMoves(const KMBoardLocation CurrentLocation) const;
};
#endif /* KMMOVEFILTERS_H_ */
Only the changes in KMMoveFilters.cpp
KMRandomAccessMoveCollection KMMoveFilters::GetPossibleMoves(const KMBoardLocation CurrentLocation) const
{
KMRandomAccessMoveCollection SafeAllPossibleMoves = AllPossibleMoves;
KMRandomAccessMoveCollection PossibleMoves;
for (auto PossibleMove : SafeAllPossibleMoves) {
PossibleMove.SetBoardDimension(m_SingleSideBoardDimension);
PossibleMove.SetOriginCalculateDestination(CurrentLocation);
if ((PossibleMove.IsValid()) && (IsNotPreviouslyVisited(PossibleMove))) {
PossibleMoves.push_back(PossibleMove);
}
}
return PossibleMoves;
}
The resulting output
Select the number of the test case you want to run.
Test Case # Start Name Target Name Board Size Slicing Method
1 A3 H4 8 Can't return to previous row or column
2 A1 H8 8 Can't return to previous row or column
3 A8 H1 8 Can't return to previous row or column
4 B3 H4 8 Can't return to previous row or column
5 B3 H8 8 Can't return to previous row or column
6 C1 H4 8 Can't return to previous row or column
7 A3 H8 8 Can't return to previous row or column
8 A3 B5 8 Can't return to previous row or column
9 H4 A3 8 Can't return to previous row or column
10 D4 A8 8 Can't return to previous row or column
11 D4 E6 8 Can't return to previous row or column
12 A3 B5 12 Can't return to previous row or column
13 A3 B5 8 Can't return to previous location
14 A3 B5 26 Can't return to previous row or column
15 All of the above except for 13 and 14
16 All of the above (Go get lunch)
finished computation at 0
elapsed time: 0.209012 Seconds
The point of origin for all path searches was A3
The destination point for all path searches was H4
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 5 Resulting Paths
There were 323 attempted paths
The average path length is 4.8
The median path length is 4
The longest path is 6 moves
The shortest path is 4 moves
finished computation at 0
elapsed time: 0.0930054 Seconds
The point of origin for all path searches was A1
The destination point for all path searches was H8
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 22 Resulting Paths
There were 160 attempted paths
The average path length is 6.36364
The median path length is 6
The longest path is 8 moves
The shortest path is 6 moves
finished computation at 0
elapsed time: 0.0950054 Seconds
The point of origin for all path searches was A8
The destination point for all path searches was H1
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 22 Resulting Paths
There were 160 attempted paths
The average path length is 6.36364
The median path length is 6
The longest path is 8 moves
The shortest path is 6 moves
finished computation at 0
elapsed time: 0.248014 Seconds
The point of origin for all path searches was B3
The destination point for all path searches was H4
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 8 Resulting Paths
There were 446 attempted paths
The average path length is 5
The median path length is 5
The longest path is 7 moves
The shortest path is 3 moves
finished computation at 0
elapsed time: 0.251014 Seconds
The point of origin for all path searches was B3
The destination point for all path searches was H8
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 39 Resulting Paths
There were 447 attempted paths
The average path length is 6.23077
The median path length is 7
The longest path is 7 moves
The shortest path is 5 moves
finished computation at 0
elapsed time: 0.17801 Seconds
The point of origin for all path searches was C1
The destination point for all path searches was H4
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 7 Resulting Paths
There were 324 attempted paths
The average path length is 4.85714
The median path length is 4
The longest path is 6 moves
The shortest path is 4 moves
finished computation at 0
elapsed time: 0.18201 Seconds
The point of origin for all path searches was A3
The destination point for all path searches was H8
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 36 Resulting Paths
There were 324 attempted paths
The average path length is 6
The median path length is 6
The longest path is 8 moves
The shortest path is 4 moves
finished computation at 0
elapsed time: 0.131008 Seconds
The point of origin for all path searches was A3
The destination point for all path searches was B5
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 6 Resulting Paths
There were 243 attempted paths
The average path length is 3
The median path length is 3
The longest path is 5 moves
The shortest path is 1 moves
finished computation at 0
elapsed time: 0.17301 Seconds
The point of origin for all path searches was H4
The destination point for all path searches was A3
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 12 Resulting Paths
There were 318 attempted paths
The average path length is 5.66667
The median path length is 6
The longest path is 8 moves
The shortest path is 4 moves
finished computation at 0
elapsed time: 0.332019 Seconds
The point of origin for all path searches was D4
The destination point for all path searches was A8
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 24 Resulting Paths
There were 602 attempted paths
The average path length is 5.25
The median path length is 5
The longest path is 7 moves
The shortest path is 3 moves
finished computation at 0
elapsed time: 0.266015 Seconds
The point of origin for all path searches was D4
The destination point for all path searches was E6
The number of squares on each edge of the board is 8
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 21 Resulting Paths
There were 487 attempted paths
The average path length is 4.14286
The median path length is 5
The longest path is 7 moves
The shortest path is 1 moves
finished computation at 0
elapsed time: 1.86411 Seconds
The point of origin for all path searches was A3
The destination point for all path searches was B5
The number of squares on each edge of the board is 12
The slicing methodology used to further limit searches was no repeat visits to any rows or columns.
There are 6 Resulting Paths
There were 3440 attempted paths
The average path length is 3
The median path length is 3
The longest path is 5 moves
The shortest path is 1 moves
Overall Results
The average execution time is 0.335186 seconds
The median execution time is 0.209012 seconds
The longest execution time is 1.86411 seconds
The shortest execution time is 0.0930054 seconds
Overall Results with optimized version.
Overall Results
The average execution time is 0.00266682 seconds
The median execution time is 0.0020001 seconds
The longest execution time is 0.0140008 seconds
The shortest execution time is 0.001 seconds
CentOS version Overall Results
The average execution time is 0.00195405 seconds
The median execution time is 0.00103346 seconds
The longest execution time is 0.00130368 seconds
The shortest execution time is 0.000716237 seconds

GIF LZW decompression

I am trying to implement a simple Gif-Reader in c++.
I currently stuck with decompressing the Imagedata.
If an image includes a Clear Code my decompression algorithm fails.
After the Clear Code I rebuild the CodeTable reset the CodeSize to MinimumLzwCodeSize + 1.
Then I read the next code and add it to the indexstream. The problem is that after clearing, the next codes include values greater than the size of the current codetable.
For example the sample file from wikipedia: rotating-earth.gif has a code value of 262 but the GlobalColorTable is only 256. How do I handle this?
I implemented the lzw decompression according to gif spec..
here is the main code part of decompressing:
int prevCode = GetCode(ptr, offset, codeSize);
codeStream.push_back(prevCode);
while (true)
{
auto code = GetCode(ptr, offset, codeSize);
//
//Clear code
//
if (code == IndexClearCode)
{
//reset codesize
codeSize = blockA.LZWMinimumCodeSize + 1;
currentNodeValue = pow(2, codeSize) - 1;
//reset codeTable
codeTable.resize(colorTable.size() + 2);
//read next code
prevCode = GetCode(ptr, offset, codeSize);
codeStream.push_back(prevCode);
continue;
}
else if (code == IndexEndOfInformationCode)
break;
//exists in dictionary
if (codeTable.size() > code)
{
if (prevCode >= codeTable.size())
{
prevCode = code;
continue;
}
for (auto c : codeTable[code])
codeStream.push_back(c);
newEntry = codeTable[prevCode];
newEntry.push_back(codeTable[code][0]);
codeTable.push_back(newEntry);
prevCode = code;
if (codeTable.size() - 1 == currentNodeValue)
{
codeSize++;
currentNodeValue = pow(2, codeSize) - 1;
}
}
else
{
if (prevCode >= codeTable.size())
{
prevCode = code;
continue;
}
newEntry = codeTable[prevCode];
newEntry.push_back(codeTable[prevCode][0]);
for (auto c : newEntry)
codeStream.push_back(c);
codeTable.push_back(newEntry);
prevCode = codeTable.size() - 1;
if (codeTable.size() - 1 == currentNodeValue)
{
codeSize++;
currentNodeValue = pow(2, codeSize) - 1;
}
}
}
Found the solution.
It is called Deferred clear code. So when I check if the codeSize needs to be incremented I also need to check if the codeSize is already max(12), as it is possible to to get codes that are of the maximum Code Size. See spec-gif89a.txt.
if (codeTable.size() - 1 == currentNodeValue && codeSize < 12)
{
codeSize++;
currentNodeValue = (1 << codeSize) - 1;
}

optimize octree octant_determination function in c++

I am building a spacial octree. In order to determine in which branch/octant a certain point (x,y,z) should be placed, I use this function:
if (x>x_centre) {
xsign = 1;
}
else {
xsign = 0;
}
if (y>y_centre) {
ysign = 1;
}
else {
ysign = 0;
}
if (z>z_centre) {
zsign = 1;
}
else {
zsign = 0;
}
return xsign + 2*ysign + 4*zsign;
It returns a number between 0 and 7 unique for every octant. It turns out this snippet is called a big many times. It gets quite time consuming when building large trees.
Is there any easy way to speed this proces up?
This allready gives a 30 percent speed up:
xsign = x>x_centre;
ysign = y>y_centre;
zsign = z>y_centre;
return xsign + 2*ysign + 4*zsign;
Any other tips?

How to decode huffman code quickly?

I have implementated a simple compressor using pure huffman code under Windows.But I do not know much about how to decode the compressed file quickly,my bad algorithm is:
Enumerate all the huffman code in the code table then compare it with the bits in the compressed file.It turns out horrible result:decompressing 3MB file would need 6 hours.
Could you provide a much more efficient algorithm?Should I use Hash or something?
Update:
I have implementated the decoder with state table,based on my friend Lin's advice.I think this method should be better than travesal huffman tree,3MB within 6s.
thanks.
One way to optimise the binary-tree approach is to use a lookup table. You arrange the table so that you can look up a particular encoded bit-pattern directly, allowing for the maximum possible bit-width of any code.
Since most codes don't use the full maximum width, they are included at multiple locations in the table - one location for each combination of the unused bits. The table indicates how many bits to discard from the input as well as the decoded output.
If the longest code is too long, so the table is impractical, a compromise is to use a tree of smaller fixed-width-subscript lookups. For example, you can use a 256-item table to handle a byte. If the input code is more than 8 bits, the table entry indicates that decoding is incomplete and directs you to a table that handles the next up-to 8 bits. Larger tables trade memory for speed - 256 items is probably too small.
I believe this general approach is called "prefix tables", and is what BobMcGees quoted code is doing. A likely difference is that some compression algorithms require the prefix table to be updated during decompression - this is not needed for simple Huffman. IIRC, I first saw it in a book about bitmapped graphics file formats which included GIF, some time before the patent panic.
It should be easy to precalculate either a full lookup table, a hashtable equivalent, or a tree-of-small-tables from a binary tree model. The binary tree is still the key representation (mental model) of how the code works - this lookup table is just an optimised way to implement it.
Why not take a look at how the GZIP source does it, specifically the Huffman decompression code in specifically unpack.c? It's doing exactly what you are, except it's doing it much, much faster.
From what I can tell, it's using a lookup array and shift/mask operations operating on whole words to run faster. Pretty dense code though.
EDIT: here is the complete source
/* unpack.c -- decompress files in pack format.
* Copyright (C) 1992-1993 Jean-loup Gailly
* This is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License, see the file COPYING.
*/
#ifdef RCSID
static char rcsid[] = "$Id: unpack.c,v 1.4 1993/06/11 19:25:36 jloup Exp $";
#endif
#include "tailor.h"
#include "gzip.h"
#include "crypt.h"
#define MIN(a,b) ((a) <= (b) ? (a) : (b))
/* The arguments must not have side effects. */
#define MAX_BITLEN 25
/* Maximum length of Huffman codes. (Minor modifications to the code
* would be needed to support 32 bits codes, but pack never generates
* more than 24 bits anyway.)
*/
#define LITERALS 256
/* Number of literals, excluding the End of Block (EOB) code */
#define MAX_PEEK 12
/* Maximum number of 'peek' bits used to optimize traversal of the
* Huffman tree.
*/
local ulg orig_len; /* original uncompressed length */
local int max_len; /* maximum bit length of Huffman codes */
local uch literal[LITERALS];
/* The literal bytes present in the Huffman tree. The EOB code is not
* represented.
*/
local int lit_base[MAX_BITLEN+1];
/* All literals of a given bit length are contiguous in literal[] and
* have contiguous codes. literal[code+lit_base[len]] is the literal
* for a code of len bits.
*/
local int leaves [MAX_BITLEN+1]; /* Number of leaves for each bit length */
local int parents[MAX_BITLEN+1]; /* Number of parents for each bit length */
local int peek_bits; /* Number of peek bits currently used */
/* local uch prefix_len[1 << MAX_PEEK]; */
#define prefix_len outbuf
/* For each bit pattern b of peek_bits bits, prefix_len[b] is the length
* of the Huffman code starting with a prefix of b (upper bits), or 0
* if all codes of prefix b have more than peek_bits bits. It is not
* necessary to have a huge table (large MAX_PEEK) because most of the
* codes encountered in the input stream are short codes (by construction).
* So for most codes a single lookup will be necessary.
*/
#if (1<<MAX_PEEK) > OUTBUFSIZ
error cannot overlay prefix_len and outbuf
#endif
local ulg bitbuf;
/* Bits are added on the low part of bitbuf and read from the high part. */
local int valid; /* number of valid bits in bitbuf */
/* all bits above the last valid bit are always zero */
/* Set code to the next 'bits' input bits without skipping them. code
* must be the name of a simple variable and bits must not have side effects.
* IN assertions: bits <= 25 (so that we still have room for an extra byte
* when valid is only 24), and mask = (1<<bits)-1.
*/
#define look_bits(code,bits,mask) \
{ \
while (valid < (bits)) bitbuf = (bitbuf<<8) | (ulg)get_byte(), valid += 8; \
code = (bitbuf >> (valid-(bits))) & (mask); \
}
/* Skip the given number of bits (after having peeked at them): */
#define skip_bits(bits) (valid -= (bits))
#define clear_bitbuf() (valid = 0, bitbuf = 0)
/* Local functions */
local void read_tree OF((void));
local void build_tree OF((void));
/* ===========================================================================
* Read the Huffman tree.
*/
local void read_tree()
{
int len; /* bit length */
int base; /* base offset for a sequence of leaves */
int n;
/* Read the original input size, MSB first */
orig_len = 0;
for (n = 1; n <= 4; n++) orig_len = (orig_len << 8) | (ulg)get_byte();
max_len = (int)get_byte(); /* maximum bit length of Huffman codes */
if (max_len > MAX_BITLEN) {
error("invalid compressed data -- Huffman code > 32 bits");
}
/* Get the number of leaves at each bit length */
n = 0;
for (len = 1; len <= max_len; len++) {
leaves[len] = (int)get_byte();
n += leaves[len];
}
if (n > LITERALS) {
error("too many leaves in Huffman tree");
}
Trace((stderr, "orig_len %ld, max_len %d, leaves %d\n",
orig_len, max_len, n));
/* There are at least 2 and at most 256 leaves of length max_len.
* (Pack arbitrarily rejects empty files and files consisting of
* a single byte even repeated.) To fit the last leaf count in a
* byte, it is offset by 2. However, the last literal is the EOB
* code, and is not transmitted explicitly in the tree, so we must
* adjust here by one only.
*/
leaves[max_len]++;
/* Now read the leaves themselves */
base = 0;
for (len = 1; len <= max_len; len++) {
/* Remember where the literals of this length start in literal[] : */
lit_base[len] = base;
/* And read the literals: */
for (n = leaves[len]; n > 0; n--) {
literal[base++] = (uch)get_byte();
}
}
leaves[max_len]++; /* Now include the EOB code in the Huffman tree */
}
/* ===========================================================================
* Build the Huffman tree and the prefix table.
*/
local void build_tree()
{
int nodes = 0; /* number of nodes (parents+leaves) at current bit length */
int len; /* current bit length */
uch *prefixp; /* pointer in prefix_len */
for (len = max_len; len >= 1; len--) {
/* The number of parent nodes at this level is half the total
* number of nodes at parent level:
*/
nodes >>= 1;
parents[len] = nodes;
/* Update lit_base by the appropriate bias to skip the parent nodes
* (which are not represented in the literal array):
*/
lit_base[len] -= nodes;
/* Restore nodes to be parents+leaves: */
nodes += leaves[len];
}
/* Construct the prefix table, from shortest leaves to longest ones.
* The shortest code is all ones, so we start at the end of the table.
*/
peek_bits = MIN(max_len, MAX_PEEK);
prefixp = &prefix_len[1<<peek_bits];
for (len = 1; len <= peek_bits; len++) {
int prefixes = leaves[len] << (peek_bits-len); /* may be 0 */
while (prefixes--) *--prefixp = (uch)len;
}
/* The length of all other codes is unknown: */
while (prefixp > prefix_len) *--prefixp = 0;
}
/* ===========================================================================
* Unpack in to out. This routine does not support the old pack format
* with magic header \037\037.
*
* IN assertions: the buffer inbuf contains already the beginning of
* the compressed data, from offsets inptr to insize-1 included.
* The magic header has already been checked. The output buffer is cleared.
*/
int unpack(in, out)
int in, out; /* input and output file descriptors */
{
int len; /* Bit length of current code */
unsigned eob; /* End Of Block code */
register unsigned peek; /* lookahead bits */
unsigned peek_mask; /* Mask for peek_bits bits */
ifd = in;
ofd = out;
read_tree(); /* Read the Huffman tree */
build_tree(); /* Build the prefix table */
clear_bitbuf(); /* Initialize bit input */
peek_mask = (1<<peek_bits)-1;
/* The eob code is the largest code among all leaves of maximal length: */
eob = leaves[max_len]-1;
Trace((stderr, "eob %d %x\n", max_len, eob));
/* Decode the input data: */
for (;;) {
/* Since eob is the longest code and not shorter than max_len,
* we can peek at max_len bits without having the risk of reading
* beyond the end of file.
*/
look_bits(peek, peek_bits, peek_mask);
len = prefix_len[peek];
if (len > 0) {
peek >>= peek_bits - len; /* discard the extra bits */
} else {
/* Code of more than peek_bits bits, we must traverse the tree */
ulg mask = peek_mask;
len = peek_bits;
do {
len++, mask = (mask<<1)+1;
look_bits(peek, len, mask);
} while (peek < (unsigned)parents[len]);
/* loop as long as peek is a parent node */
}
/* At this point, peek is the next complete code, of len bits */
if (peek == eob && len == max_len) break; /* end of file? */
put_ubyte(literal[peek+lit_base[len]]);
Tracev((stderr,"%02d %04x %c\n", len, peek,
literal[peek+lit_base[len]]));
skip_bits(len);
} /* for (;;) */
flush_window();
Trace((stderr, "bytes_out %ld\n", bytes_out));
if (orig_len != (ulg)bytes_out) {
error("invalid compressed data--length error");
}
return OK;
}
The typical way to decompress a Huffman code is using a binary tree. You insert your codes in the tree, so that each bit in a code represents a branch either to the left (0) or right (1), with decoded bytes (or whatever values you have) in the leaves.
Decoding is then just a case of reading bits from the coded content, walking the tree for each bit. When you reach a leaf, emit that decoded value, and keep reading until the input is exhausted.
Update: this page describes the technique, and has fancy graphics.
You can perform a kind of batch lookup on the usual Huffmann tree lookup:
Choosing a bit depth (call it depth n); this is a trade-off between speed, memory, and time investment to construct tables;
Build a lookup table for all 2^n bit strings of length n. Each entry may encode several complete tokens; there will commonly also be some bits left over that are only a prefix of Huffman codes: for each of these, make a link to a further lookup table for that code;
Build the further lookup tables. The total number of tables is at most one less than the number of entries coded in the Huffmann tree.
Choosing a depth that is a multiple of four, e.g., depth 8, is a good fit for bit shifting operations.
Postscript This differs from the idea in potatoswatter's comment on unwind's answer and from Steve314's answer in using multiple tables: this means that all of the n-bit lookup is put to use, so should be faster but makes table construction and lookup significantly trickier, and will consume much more space for a given depth.
Why not use the decompress algorithm in the same source module? It appears to be a decent algorithm.
The other answers are right, but here is some code in Rust I wrote recently to make the ideas concrete. This is the key routine:
fn decode( &self, input: &mut InpBitStream ) -> usize
{
let mut sym = self.lookup[ input.peek( self.peekbits ) ];
if sym >= self.ncode
{
sym = self.lookup[ sym - self.ncode + ( input.peek( self.maxbits ) >> self.peekbits ) ];
}
input.advance( self.nbits[ sym ] as usize );
sym
}
The tricky bit is setting up the lookup table, see BitDecoder::setup_code in this complete RFC 1951 decoder in Rust:
// RFC 1951 inflate ( de-compress ).
pub fn inflate( data: &[u8] ) -> Vec<u8>
{
let mut inp = InpBitStream::new( &data );
let mut out = Vec::new();
let _chk = inp.get_bits( 16 ); // Checksum
loop
{
let last = inp.get_bit();
let btype = inp.get_bits( 2 );
match btype
{
2 => { do_dyn( &mut inp, &mut out ); }
1 => { do_fixed( &mut inp, &mut out ); }
0 => { do_copy( &mut inp, &mut out ); }
_ => { }
}
if last != 0 { break; }
}
out
}
fn do_dyn( inp: &mut InpBitStream, out: &mut Vec<u8> )
{
let n_lit_code = 257 + inp.get_bits( 5 );
let n_dist_code = 1 + inp.get_bits( 5 );
let n_len_code = 4 + inp.get_bits( 4 );
let mut len = LenDecoder::new( inp, n_len_code );
let mut lit = BitDecoder::new( n_lit_code );
len.get_lengths( inp, &mut lit.nbits );
lit.init();
let mut dist = BitDecoder::new( n_dist_code );
len.get_lengths( inp, &mut dist.nbits );
dist.init();
loop
{
let x = lit.decode( inp );
match x
{
0..=255 => { out.push( x as u8 ); }
256 => { break; }
_ =>
{
let mc = x - 257;
let length = MATCH_OFF[ mc ] + inp.get_bits( MATCH_EXTRA[ mc ] as usize );
let dc = dist.decode( inp );
let distance = DIST_OFF[ dc ] + inp.get_bits( DIST_EXTRA[ dc ] as usize );
copy( out, distance, length );
}
}
}
} // end do_dyn
fn copy( out: &mut Vec<u8>, distance: usize, mut length: usize )
{
let mut i = out.len() - distance;
while length > 0
{
out.push( out[ i ] );
i += 1;
length -= 1;
}
}
/// Decode length-limited Huffman codes.
struct BitDecoder
{
ncode: usize,
nbits: Vec<u8>,
maxbits: usize,
peekbits: usize,
lookup: Vec<usize>
}
impl BitDecoder
{
fn new( ncode: usize ) -> BitDecoder
{
BitDecoder
{
ncode,
nbits: vec![0; ncode],
maxbits: 0,
peekbits: 0,
lookup: Vec::new()
}
}
/// The key routine, will be called many times.
fn decode( &self, input: &mut InpBitStream ) -> usize
{
let mut sym = self.lookup[ input.peek( self.peekbits ) ];
if sym >= self.ncode
{
sym = self.lookup[ sym - self.ncode + ( input.peek( self.maxbits ) >> self.peekbits ) ];
}
input.advance( self.nbits[ sym ] as usize );
sym
}
fn init( &mut self )
{
let ncode = self.ncode;
let mut max_bits : usize = 0;
for bp in &self.nbits
{
let bits = *bp as usize;
if bits > max_bits { max_bits = bits; }
}
self.maxbits = max_bits;
self.peekbits = if max_bits > 8 { 8 } else { max_bits };
self.lookup.resize( 1 << self.peekbits, 0 );
// Code below is from rfc1951 page 7
let mut bl_count : Vec<usize> = vec![ 0; max_bits + 1 ]; // the number of codes of length N, N >= 1.
for i in 0..ncode { bl_count[ self.nbits[i] as usize ] += 1; }
let mut next_code : Vec<usize> = vec![ 0; max_bits + 1 ];
let mut code = 0;
bl_count[0] = 0;
for i in 0..max_bits
{
code = ( code + bl_count[i] ) << 1;
next_code[ i + 1 ] = code;
}
for i in 0..ncode
{
let len = self.nbits[ i ] as usize;
if len != 0
{
self.setup_code( i, len, next_code[ len ] );
next_code[ len ] += 1;
}
}
}
// Decoding is done using self.lookup ( see decode ). To keep the lookup table small,
// codes longer than 8 bits are looked up in two peeks.
fn setup_code( &mut self, sym: usize, len: usize, mut code: usize )
{
if len <= self.peekbits
{
let diff = self.peekbits - len;
for i in code << diff .. (code << diff) + (1 << diff)
{
// bits are reversed to match InpBitStream::peek
let r = reverse( i, self.peekbits );
self.lookup[ r ] = sym;
}
} else {
// Secondary lookup required.
let peekbits2 = self.maxbits - self.peekbits;
// Split code into peekbits portion ( key ) and remainder ( code).
let diff1 = len - self.peekbits;
let key = code >> diff1;
code &= ( 1 << diff1 ) - 1;
// Get the secondary lookup.
let kr = reverse( key, self.peekbits );
let mut base = self.lookup[ kr ];
if base == 0 // Secondary lookup not yet allocated for this key.
{
base = self.lookup.len();
self.lookup.resize( base + ( 1 << peekbits2 ), 0 );
self.lookup[ kr ] = self.ncode + base;
} else {
base -= self.ncode;
}
// Set the secondary lookup values.
let diff = self.maxbits - len;
for i in code << diff .. (code << diff) + (1<<diff)
{
let r = reverse( i, peekbits2 );
self.lookup[ base + r ] = sym;
}
}
}
} // end impl BitDecoder
struct InpBitStream<'a>
{
data: &'a [u8],
pos: usize,
buf: usize,
got: usize, // Number of bits in buffer.
}
impl <'a> InpBitStream<'a>
{
fn new( data: &'a [u8] ) -> InpBitStream
{
InpBitStream { data, pos: 0, buf: 1, got: 0 }
}
fn peek( &mut self, n: usize ) -> usize
{
while self.got < n
{
if self.pos < self.data.len()
{
self.buf |= ( self.data[ self.pos ] as usize ) << self.got;
}
self.pos += 1;
self.got += 8;
}
self.buf & ( ( 1 << n ) - 1 )
}
fn advance( &mut self, n:usize )
{
self.buf >>= n;
self.got -= n;
}
fn get_bit( &mut self ) -> usize
{
if self.got == 0 { self.peek( 1 ); }
let result = self.buf & 1;
self.advance( 1 );
result
}
fn get_bits( &mut self, n: usize ) -> usize
{
let result = self.peek( n );
self.advance( n );
result
}
fn get_huff( &mut self, mut n: usize ) -> usize
{
let mut result = 0;
while n > 0
{
result = ( result << 1 ) + self.get_bit();
n -= 1;
}
result
}
fn clear_bits( &mut self )
{
self.got = 0;
}
} // end impl InpBitStream
/// Decode code lengths.
struct LenDecoder
{
plenc: u8, // previous length code ( which can be repeated )
rep: usize, // repeat
bd: BitDecoder,
}
/// Decodes an array of lengths. There are special codes for repeats, and repeats of zeros.
impl LenDecoder
{
fn new( inp: &mut InpBitStream, n_len_code: usize ) -> LenDecoder
{
let mut result = LenDecoder { plenc: 0, rep:0, bd: BitDecoder::new( 19 ) };
// Read the array of 3-bit code lengths from input.
for i in 0..n_len_code
{
result.bd.nbits[ CLEN_ALPHABET[i] as usize ] = inp.get_bits(3) as u8;
}
result.bd.init();
result
}
// Per RFC1931 page 13, get array of code lengths.
fn get_lengths( &mut self, inp: &mut InpBitStream, result: &mut Vec<u8> )
{
let n = result.len();
let mut i = 0;
while self.rep > 0 { result[i] = self.plenc; i += 1; self.rep -= 1; }
while i < n
{
let lenc = self.bd.decode( inp ) as u8;
if lenc < 16
{
result[i] = lenc;
i += 1;
self.plenc = lenc;
} else {
if lenc == 16 { self.rep = 3 + inp.get_bits(2); }
else if lenc == 17 { self.rep = 3 + inp.get_bits(3); self.plenc=0; }
else if lenc == 18 { self.rep = 11 + inp.get_bits(7); self.plenc=0; }
while i < n && self.rep > 0 { result[i] = self.plenc; i += 1; self.rep -= 1; }
}
}
} // end get_lengths
} // end impl LenDecoder
/// Reverse a string of bits.
pub fn reverse( mut x:usize, mut bits: usize ) -> usize
{
let mut result: usize = 0;
while bits > 0
{
result = ( result << 1 ) | ( x & 1 );
x >>= 1;
bits -= 1;
}
result
}
fn do_copy( inp: &mut InpBitStream, out: &mut Vec<u8> )
{
inp.clear_bits(); // Discard any bits in the input buffer
let mut n = inp.get_bits( 16 );
let _n1 = inp.get_bits( 16 );
while n > 0 { out.push( inp.data[ inp.pos ] ); n -= 1; inp.pos += 1; }
}
fn do_fixed( inp: &mut InpBitStream, out: &mut Vec<u8> ) // RFC1951 page 12.
{
loop
{
// 0 to 23 ( 7 bits ) => 256 - 279; 48 - 191 ( 8 bits ) => 0 - 143;
// 192 - 199 ( 8 bits ) => 280 - 287; 400..511 ( 9 bits ) => 144 - 255
let mut x = inp.get_huff( 7 );
if x <= 23
{
x += 256;
} else {
x = ( x << 1 ) + inp.get_bit();
if x <= 191 { x -= 48; }
else if x <= 199 { x += 88; }
else { x = ( x << 1 ) + inp.get_bit() - 256; }
}
match x
{
0..=255 => { out.push( x as u8 ); }
256 => { break; }
_ => // 257 <= x && x <= 285
{
x -= 257;
let length = MATCH_OFF[x] + inp.get_bits( MATCH_EXTRA[ x ] as usize );
let dcode = inp.get_huff( 5 );
let distance = DIST_OFF[dcode] + inp.get_bits( DIST_EXTRA[dcode] as usize );
copy( out, distance, length );
}
}
}
} // end do_fixed
// RFC 1951 constants.
pub static CLEN_ALPHABET : [u8; 19] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 ];
pub static MATCH_OFF : [usize; 30] = [ 3,4,5,6, 7,8,9,10, 11,13,15,17, 19,23,27,31, 35,43,51,59,
67,83,99,115, 131,163,195,227, 258, 0xffff ];
pub static MATCH_EXTRA : [u8; 29] = [ 0,0,0,0, 0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3, 4,4,4,4, 5,5,5,5, 0 ];
pub static DIST_OFF : [usize; 30] = [ 1,2,3,4, 5,7,9,13, 17,25,33,49, 65,97,129,193, 257,385,513,769,
1025,1537,2049,3073, 4097,6145,8193,12289, 16385,24577 ];
pub static DIST_EXTRA : [u8; 30] = [ 0,0,0,0, 1,1,2,2, 3,3,4,4, 5,5,6,6, 7,7,8,8, 9,9,10,10, 11,11,12,12, 13,13 ];
Github repository here