C++ inheritence and array - c++

I have a java program with which I create a program using logic function blocks. AND gates, OR gates that sorta things. This program can generate functional source code for an arduino board. This essentially allows people to program an Arduino with only logic gates.
The core essentials work but I am only halfway there and I run into a slight problem.
I have this struct and array
typedef struct blox
{
uint8_t IN1 : 1 ; // generic for most blocks
uint8_t IN2 : 1 ; // generic for most blocks
uint8_t IN3 : 1 ; // generic for most blocks
uint8_t Q : 1 ; // generic for most blocks
uint8_t pin : 5 ; // only inputs and output types need this
uint8_t type : 4 ; // 16 combinations
uint32_t oldTime ; // bad idea to use this amount of memory per block if only delays need it?
const uint32_t interval ; // perhaps couple a function pointers or obj pointer to it?
} FunctionBlock ;
FunctionBlock block [ nBlocks ] ;
In the main loop() I run all logic and I update the links. The links are hardcoded.
void loop()
{
/***************** UPDATE FUNCTION BLOCKS *****************/
for( int i = 0 ; i < nBlocks ; i ++ )
{
switch( block[i].type )
{
case AND:
block[i].Q = block[i].IN1 & block[i].IN2 & block[i].IN3 ; // unused inputs INx are initialized to '1'
break ;
case OR:
block[i].Q = block[i].IN1 | block[i].IN2 | block[i].IN3 ;
break ;
case M:
if( block[i].IN3 ) block[i].Q = 0 ; // R
else if( block[i].IN1 ) block[i].Q = 1 ; // S
break ;
case NOT:
block[i].Q = !block[i].IN2 ;
break ;
case INPUT_PIN:
block[i].Q = digitalRead( block[i].pin ) ;
break ;
case OUTPUT_PIN:
digitalWrite( block[i].pin, block[i].IN2 ) ;
break ;
case DEL: for( int i = 0 ; i < n_blocks ; i ++ )
{
if( block[i].Q != block[i].IN ) // if new state changes
{
if( millis() - block[i].oldTime >= block[i].interval ) // keep monitor if interval has expired
{
block[i].Q = block[i].IN ; // if so, adopt the new state
}
}
else
{
block[i].oldTime = millis() ; // if new state does not change, keep setting oldTime
}
}
break ;
}
}
/***************** UPDATE LINKS *****************/
block[3].IN2 = block[1].Q ; // hardcoded list of all links.
block[3].IN1 = block[0].Q ;
block[3].IN3 = block[2].Q ;
block[4].IN2 = block[3].Q ;
} ;
The problem is that the structure has variables for everything. Now AND and OR gates have a 'pin' variable and every block uses 8 bytes for timing, despite only the delay gate has need for it.
I also want to add analog (all that can be more than '0' or '1') components, like an analog input, servo motor, a map() block (to map one range into an other range), comparator contants etc.
Using the struct like this will consume way too much memory.
My best guess would be to use classes and inheritance. But I haven't used inheritance yet in c++ and I do not know how I can stuff objectes and derived objects in a single array.
class FunctionBlock // AND, OR, MEMORY or NOT gates
{
public:
uint8_t IN1 : 1 ;
uint8_t IN2 : 1 ;
uint8_t IN3 : 1 ;
uint8_t Q : 1 ;
uint8_t type ; // if I create derived classes for OR, AND, MEMORY and not gates, I may discard this variable
} ;
class IO : public FunctionBlock // INPUT, OUTPUT
{
uint8_t pin ;
} ;
class DELAY : public FunctionBlock
{
uint32_t prevTime ;
const int delayTime ;
} ;
class MAP : public FunctionBlock
{
int32_t var ; // result = map( var, in1, in2, out1, out2 ) ;
int32_t result
int32_t in1 ;
int32_t in2 ;
int32_t out2 ;
int32_t out2 ;
} ;
// class analogIn, class constant, class comperator, class decadeCounter etc etc
Were this Java I would simply do:
ArrayList <FunctionBlock> blocks = new ArrayList() ;
...
blocks.add( new AND( arguments ) ;
How do I get these derived classes to work in c++ ?

I think it might be better to define the separate structures without inheritance, and then use a union in the FunctionBlock structure to keep the data.
Perhaps something like this:
struct IO
{
uint8_t pin;
};
struct DELAY
{
uint32_t prevTime;
const int delayTime;
};
// ... the other structures...
struct FunctionBlock
{
// The current members of the structure...
union
{
IO io;
DELAY delay;
// ... and the other structures...
};
};
Then you can use e.g. block[i].io.pin to get the IO member pin.
Now you can just use the FunctionBlock structure as any other structure, and create a plain normal array.

I chose to go for inheritence as a union would not really solve the memory problem. And performance is in this case not really important. As in it does not matter if this program run 'efficient' or not.
I declare objects using the subclasses followed by a pointer array.
#include "functionBlocks.h"
static Input b1 = Input(1) ;
static Input b2 = Input(2) ;
static Input b3 = Input(3) ;
//static Or b4 = Or() ; // if I swap this Or gate for a Delay gate
static Delay b4 = Delay(2000) ; // RAM consumes 4 more bytes as anticipated.
static Delay b5 = Delay( 3000 ) ;
static Output b6 = Output(13) ;
FunctionBlock *block[] =
{
&b1,
&b2,
&b3,
&b4,
&b5,
&b6,
} ;
const int nBlocks = 6 ;
void setup()
{
}
void loop()
{
block[3] -> IN1 = block[0] -> Q ;
block[3] -> IN2 = block[1] -> Q ;
block[3] -> IN3 = block[2] -> Q ;
block[4] -> IN2 = block[3] -> Q ;
block[5] -> IN2 = block[4] -> Q ;
/***************** UPDATE FUNCTION BLOCKS *****************/
for( int i = 0 ; i < nBlocks ; i ++ ) block[i] -> run() ;
} ;
I tested the memory consumption by swapping out an OR gate for a DELAY and that increases memory usage by 4 bytes. This is correct because the DELAY uses one whole uint32_t variable on top the base class.
So using subclasses like this does resolve the memory problem.
If I now add a single OR gate, RAM only consumes five more bytes. One byte is used for the variables, IN1, IN2, IN3 and Q but I cannot place the other 4 bytes. I am guessing it is a function pointer to the appropiate run()method.
I also moved all the classes to a seperate header file. It looks as follows:
class FunctionBlock
{
public:
uint8_t IN1 : 1 ;
uint8_t IN2 : 1 ;
uint8_t IN3 : 1 ;
uint8_t Q : 1 ;
virtual void run() ;
} ;
class And : public FunctionBlock
{
public:
And()
{
IN1 = IN2 = IN3 = 1 ;
}
void run()
{
Q = IN1 & IN2 & IN3 ;
}
} ;
class Or : public FunctionBlock
{
public:
Or()
{
IN1 = IN2 = IN3 = 0 ;
}
void run()
{
Q = IN1 | IN2 | IN3 ;
}
} ;
class Delay : public FunctionBlock
{
public:
Delay(int x) : delayTime( x ) // initialize the constant
{
}
void run()
{
if( Q != IN2 ) // if new state changes
{
if( millis() - prevTime >= delayTime ) // keep monitor if interval has expired
{
Q = IN2 ; // if so, adopt the new state
}
}
else
{
prevTime = millis() ; // if new state does not change, keep setting oldTime
}
}
private:
const uint32_t delayTime ;
uint32_t prevTime ;
} ;

Related

Acces variable of subclass, using a pointer array of base class

I hope I formulated the title a bit correct.
I have this base class
class FunctionBlock
{
public:
uint8_t IN1 : 1 ;
uint8_t IN2 : 1 ;
uint8_t IN3 : 1 ;
uint8_t Q : 1 ;
uint8_t Q_NOT : 1 ;
virtual void run() ;
} ;
And many subclasses like
class Or : public FunctionBlock // there is also And, JK, SR, NOT etc..
{
public:
Or()
{
IN1 = IN2 = IN3 = 0 ;
}
void run()
{
Q = IN1 | IN2 | IN3 ;
}
} ;
And the one with the problem:
class AnalogOutput : public FunctionBlock
{
public:
AnalogOutput( uint8_t _pin )
{
pin = _pin ;
}
void run()
{
if( analogIN2 != prevIn )
{ prevIn = analogIN2 ; // if incoming change, update PWM level
analogWrite( pin, analogIN2 ) ;
// Serial.println( analogIN2 ) ; // DEBUG just testing if it... actually works
}
}
uint8_t analogIN2 ; // I need to be 8 bits instead of 1 bit
private:
uint8_t pin ;
uint8_t prevIn ;
} ;
The main program process the logic of all function blocks and it updates the links. The links used to exist of a connection from a Q to an IN1, IN2 or IN3. Using bit fields I can stuff these in one single byte.
Now I have added analog components, so a single bit for Q and IN does not suffice. To solve this problem, I added the public variable analogIN2 as seen in the analogOutput class. Similarly I use a public variable int analogQfor an analog input.
For the links I make a destinction between digital and analog links.
Obivously I could change
uint8_t IN1 : 1 ;
uint8_t IN2 : 1 ;
uint8_t IN3 : 1 ;
uint8_t Q : 1 ;
Into
uint16_t IN1 ; // instead of 1 byte, I would need 8 bytes
uint16_t IN2 ;
uint16_t IN3 ;
uint16_t Q ;
But this would increase memory consumption, so I keep this as a last resort
#include "functionBlocks.h"
static AnalogInput b1 = AnalogInput(1) ;
static AnalogOutput b2 = AnalogOutput(5) ;
static And b3 = And() ;
static Or b4 = Or() ;
FunctionBlock *block[] = {
&b1 ,
&b2 ,
&b3 ,
&b4 ,
} ;
const int nBlocks = 4 ;
void setup()
{
}
void loop()
{
/***************** UPDATE FUNCTION BLOCKS *****************/
for( int i = 0 ; i < nBlocks ; i ++ ) block[i] -> run() ;
/***************** UPDATE LINKS *****************/
block[1] -> analogIN2 = block[0] -> analogQ ; // <-- analog link does not compile
block[3] -> IN2 = block[2] -> Q ; // digital links work fine
block[2] -> IN3 = block[3] -> Q ;
} ;
I marked the line which does not compile.
C:\Users\me\Documents\hobbyProjects\functionBloX\arduinoProgram\arduinoProgram.ino:26:17: error: 'class FunctionBlock' has no member named 'analogIN2'
block[1] -> analogIN2 = block[0] -> analogQ ;
The reason is obvious, the base class does not know about analogIn and analogQ. Therefor the array 'block[]' does not have acces to the members.
My questions:
Can I let the FunctionBlock *block[] objects give acces to analogQ without increasing memory usage drastically. And if so, how can I do that?
The one thing I could think of was to add a 2nd baseclass but I do not know every last in and out of c++, hence this question.
You could even go entirely without inheritance and polymorphism:
#include <tuple>
static auto blocks = std::make_tuple(AnalogInput(1), AnalogOutput(5), And(), Or());
void loop()
{
// Using the tuple elements as arguments to a suitable function
auto run_all = [](auto&... args) { (args.run(), ...); };
std::apply(run_all, blocks);
// Assuming that the digital and analog blocks now have members
// of the same name
auto& [b1, b2, b3, b4] = blocks;
b2.IN2 = b1.Q;
b4.IN2 = b3.Q;
b3.IN3 = b4.Q;
}
Admittedly, the interface of tuples is not as nice as that of arrays or other containers, but they are an option.
If all blocks have the same basic data, you could parametrise that using a template:
#include <concepts>
template<std::integral T = uint8_t, unsigned N = 1>
struct BlockData
{
T IN1 : N = 0;
T IN2 : N = 0;
T IN3 : N = 0;
T Q : N = 0;
T Q_NOT : N = 0;
};
You can also use your current approach, but then the data would need to be encapsulated behind virtual get/set functions:
struct FunctionBlock {
virtual ~FunctionBlock() = default;
virtual void run() = 0;
// Set channel 1 to the given value
virtual void in1(uint8_t) = 0;
// etc.
};

Arduino servo object does not work within my own library

I have written a libary for somebody else to slowly sweep servo's from one position to another. It did not work like I intented and I had to remove the servo objects from the library. Instead I let the new version calculate the servo positions and return those values instead. Yet I really like to know why it is not working.
The header file with the private Servo objects
#include <Arduino.h>
#include <Servo.h>
class ServoSweep {
public:
ServoSweep( byte _servoPin, byte _min, byte _max, byte _speed ) ; // constructor 1
ServoSweep( byte _servoPin, byte _min, byte _max, byte _speed, byte _relayPin ) ; // constructor 2
void sweep( );
void setState( uint8_t _state );
private:
Servo servo ;
unsigned long timeToRun ;
byte pos ;
byte state ;
byte prevPos;
byte servoPin ;
byte servoSpeed ;
byte servoMin ;
byte servoMax ;
byte middlePosition ;
byte relayPresent ;
byte relayPin ;
} ;
And the source file:
#include "ServoSweep.h"
ServoSweep::ServoSweep( byte _servoPin, byte _min, byte _max, byte _speed ) { // constructor 1
servoPin = _servoPin ;
servoSpeed = _speed ;
servoMin = _min ;
servoMax = _max ;
middlePosition = ( (long)servoMax - (long)servoMin ) / (long)2 + (long)servoMin ; // start with middle position
pos = middlePosition ;
servo.write( pos ) ;
servo.attach( servoPin ) ;
}
ServoSweep::ServoSweep( byte _servoPin, byte _min, byte _max, byte _speed, byte _relayPin ) { // constructor 2
servoPin = _servoPin ;
servoSpeed = _speed ;
servoMin = _min ;
servoMax = _max ;
middlePosition = ( (long)servoMax - (long)servoMin ) / (long)2 + (long)servoMin ;
pos = middlePosition ;
servo.write( pos ) ;
servo.attach( servoPin ) ;
relayPresent = 1;
relayPin = _relayPin ;
pinMode( relayPin, OUTPUT ) ;
}
void ServoSweep::sweep () {
if( millis() > timeToRun ) {
timeToRun = millis() + servoSpeed ;
if( state ) {
if( pos < servoMax ) pos ++ ;
}
else {
if( pos > servoMin ) pos -- ;
}
if( prevPos != pos ) {
prevPos = pos ;
if( relayPresent == 1 ) {
if( pos < middlePosition ) digitalWrite( relayPin, LOW ) ;
else digitalWrite( relayPin, HIGH ) ;
}
servo.write( pos ) ;
}
}
}
void ServoSweep::setState( uint8_t _state ) {
state = _state ;
}
The servo signal was complete jitter caused by the arduino. The example sketch I used:
#include "ServoSweep.h"
const int inputButton = 12 ;
const int servoPin1 = 2 ;
const int servoPin2 = 3 ;
unsigned long prev ;
byte state ;
// pin min max speed (bigger speed = slower movement ;
ServoSweep servo1(servoPin1, 10, 30, 50) ;
ServoSweep servo2(servoPin2, 10, 30, 50) ;
void setup() {
pinMode( inputButton, INPUT_PULLUP ) ;
}
void loop() {
servo1.sweep();
servo2.sweep();
if( digitalRead( inputButton ) ) servo1.setState( 1 ) ;
else servo1.setState( 0 ) ;
if( digitalRead( inputButton ) ) servo2.setState( 0 ) ;
else servo2.setState( 1 ) ;
}
Even I comment out al code inside the loop, the jitter is there. The jitter starts as soon as I construct the ServoSweep objects.
What did I wrong with the servo objects? I assume this has to be possible.
The problem is most likely in your constructor. These lines:
servo.write( pos ) ;
servo.attach( servoPin ) ;
in the constructor are trying to work with hardware that may not be ready yet. You are calling the constructor at global scope, so these things may be happening before init() runs and sets up the hardware. So when init() does run, it is probably overwriting values that the servo library had written to timer 1.
This is a common issue and a common newbie trap. Constructors should initialize variables and set up values and things, but they are not for handling hardware. For that you need a begin() or init() method that you can call from setup. Think about how the servo library has the attach function that you have to call from setup. If it were possible to do that in the constructor, they would have had the constructor take the pin number and do it. Think about the begin method that you have to call for Serial to work. That's the same story, there's hardware to setup and you need to be able to control when that happens.
So make one more method:
void ServoSweep::begin() {
servo.write( pos ) ;
servo.attach( servoPin ) ;
}
And call that from setup for each object and remove those lines from the constructor.

Why does debounce code not work with 2 or more objects?

I have written a debounce class to debounce inputs.
The idea was that a state of a certain input may be ON, OFF, FALLING or RISING.
the object.debounceInputs() is to be called with a fixed interval
With the the function object.readInput() the correct state of the object could be read in. A FALLING or RISING state only lasts for 1 interval time (usually set at 20ms) and these states can only be read once.
Ofcourse I tested the software and it worked without flaw, now I started using the software in other projects and a peculiar bug came to light.
The software works perfectly fine... if you have just one input object. If you debounce more than 1 object, the inputs are affecting each other which should not be possible as every object uses private variables.
The source code:
#include "debounceClass.h"
Debounce::Debounce(unsigned char _pin) {
pinMode(_pin, INPUT_PULLUP); // take note I use a pull-up resistor by default
pin = _pin;
}
unsigned char Debounce::readInput() {
byte retValue = state;
if(state == RISING) state = ON; // take note I use a pull-up resistor
if(state == FALLING) state = OFF; // rising or falling may be returned only once
return retValue;
}
void Debounce::debounceInputs() {
static bool oldSample = false, statePrev = false;
bool newSample = digitalRead(pin);
if(newSample == oldSample) { // if the same state is detected atleast twice in 20ms...
if(newSample != statePrev) { // if a flank change occured return RISING or FALLING
statePrev = newSample ;
if(newSample) state = RISING;
else state = FALLING;
}
else { // or if there is no flank change return PRESSED or RELEASED
if(newSample) state = ON;
else state = OFF;
}
}
oldSample = newSample;
return 255;
}
The corresponding header file:
#include <Arduino.h>
#ifndef button_h
#define button_h
//#define
#define ON 9 // random numbers, RISING and FALLING are already defined in Arduino.h
#define OFF 10
class Debounce {
public:
Debounce(unsigned char _pin);
unsigned char readInput();
void debounceInputs();
private:
unsigned char state;
unsigned char pin;
};
#endif
I have had this bug with 2 separate projects, so the fault definitely lies in my Debounce class.
An example program to illustrate the program:
#include "debounceClass.h"
const int pin3 = 3 ;
const int pin4 = 4 ;
Debounce obj1( pin3 ) ;
Debounce obj2( pin4 ) ;
byte previousState1, previousState2;
unsigned long prevTime = 0, prevTime1 = 0, prevTime2 = 0;
void setup()
{
Serial.begin( 115200 ) ;
// CONSTRUCTOR OF OBJECTS SETS THE PINMODE TO INPUT_PULLUP
pinMode( pin3, OUTPUT ) ;
pinMode( pin4, OUTPUT ) ;
}
const int interval = 20, interval1 = 1000, interval2 = 2000;
void loop() {
unsigned long currTime = millis() ;
if( currTime > prevTime + interval ) {
prevTime = currTime ;
obj1.debounceInputs(); // comment one of these 2 out, and the other debounces perfectly
obj2.debounceInputs();
#define printState(x) case x: Serial.print(#x); break
byte state = obj1.readInput() ;
if( state != previousState1 ) {
previousState1 = state ;
Serial.print("state of obj1 = ") ;
switch ( state ) {
printState( ON ) ;
printState( OFF ) ;
printState( RISING ) ;
printState( FALLING ) ;
}
Serial.println();
}
state = obj2.readInput() ;
if( state != previousState2 ) {
previousState2 = state ;
Serial.print("state of obj2 = ") ;
switch ( state ) {
printState( ON ) ;
printState( OFF ) ;
printState( RISING ) ;
printState( FALLING ) ;
}
Serial.println();
}
}
if( currTime > prevTime1 + interval1 ) {
prevTime1 = currTime ;
digitalWrite( pin3, !digitalRead( pin3 ) );
}
if( currTime > prevTime2 + interval2 ) {
prevTime2 = currTime ;
digitalWrite( pin4, !digitalRead( pin4 ) );
}
}
This program automatically toggles both pins so you do not need physical inputs. If you comment out one of the indicated lines, you'll see that the other pin is debounced just fine. But when both pins are debounced the result is catastrophic. There is a weird link between the 2 objects which I cannot explain. I have reached a point at which I start doubting the compiler, so that was the moment I realized that I need to ask others.
Why is this happening and what did I do wrong here?
I found the problem.
I cannot use a static variable within a class method. These static variables are seen by all objects which caused the problem.
I moved the static variables to the private variable section

MFC CListbox GetItemData failed

I used DrawItem() to redraw my CListbox. For some reasons, I want to use custom compare to sort my list item with my own rules, and I use LBS_SORT and no LBS_HASSTRING properties. After using SetItemData() in OnInitDialog(), I get these data in DrawItem(), but it didn't work. Code is like below:
init code :
void OnInitDialog(...)
{
.........
m_List.SetListHeight (40);
for (int i = 0 ; i < 20 ; i ++) {
m_List.AddString ((const char *) i);
m_List.SetItemData (i,(100 + i));
}
....
}
compare code :
int CompareItem(LPCOMPAREITEMSTRUCT lpCompareItemStruct)
{
ASSERT(lpCompareItemStruct->CtlType == ODT_LISTBOX);
int a = lpCompareItemStruct->itemData1;
int b = lpCompareItemStruct->itemData2;
return (a - b);
}
redraw code :
DrawItem (lpDIS)
{
..................
CString str;
int i = (int) GetItemData (lpDIS->itemID); // the i is not what I expect.
str.Format ("%d", (int) i);
dc.DrawText (str,CRect (&lpDIS->rcItem), DT_CENTER | DT_VCENTER | DT_SINGLELINE);
...................
}
when I use
***index = m_List.addstring ((const char *) i) ;
m_List.setitemdata (index,(100 + i));***
it works ,but if I use a struct to addstring ,the index is not right ,code is like this below :
struct test {
int a,b,c,d;
};
init_code :
test *ptest = new test[20]; /* just a test ,we don't delete memory till application ends */
for (int i = 0 ; i < 20 ; i ++) {
ptest [i].a = i;
int index = m_List.AddString ((const char *) (ptest + i));
m_List.SetItemDataPtr (index,(void *) (100 + i));
}
compare code :
int ListEx::CompareItem(LPCOMPAREITEMSTRUCT lpCompareItemStruct)
{
// TODO: Add your code to determine the sorting order of the specified items
// return -1 = item 1 sorts before item 2
// return 0 = item 1 and item 2 sort the same
// return 1 = item 1 sorts after item 2
// ASSERT(lpCompareItemStruct->CtlType == ODT_LISTBOX);
test *pa,*pb;
pa = (test *) lpCompareItemStruct->itemData1; // crashed here
pb = (test *) lpCompareItemStruct->itemData2;
// ASSERT (pa);
// ASSERT (pb);
return (pa->a - pb->a);
}
draw_item code :
CString str;
test *ptest = (test *) (lpDIS->itemData);
str.Format ("%d", (int) ptest->a);
dc.DrawText (str,CRect (&lpDIS->rcItem), DT_CENTER | DT_VCENTER | DT_SINGLELINE);
Is addstring can only use strings ???
if the item is a struct data ,how could I set these struct data to the listbox item ???
In OnInitDialog you should do this:
for (int i = 0 ; i < 20 ; i ++) {
int index = m_List.AddString ((const char *) i);
m_List.SetItemData (index, 100 + i);
}
AddString returns the index where the item has been inserted (which can be anywhere if the list is sorted). m_List.SetItemData (index ,(100 + i)) sets the data for the item you just have inserted.
we can set structure data to list box.
struct _empInfo {
CString strName;
..............
} empInfo = {L"XYZ",...};
m_list.AddString(empinfo.strName);
I tried above code in my app..

How to decode huffman code quickly?

I have implementated a simple compressor using pure huffman code under Windows.But I do not know much about how to decode the compressed file quickly,my bad algorithm is:
Enumerate all the huffman code in the code table then compare it with the bits in the compressed file.It turns out horrible result:decompressing 3MB file would need 6 hours.
Could you provide a much more efficient algorithm?Should I use Hash or something?
Update:
I have implementated the decoder with state table,based on my friend Lin's advice.I think this method should be better than travesal huffman tree,3MB within 6s.
thanks.
One way to optimise the binary-tree approach is to use a lookup table. You arrange the table so that you can look up a particular encoded bit-pattern directly, allowing for the maximum possible bit-width of any code.
Since most codes don't use the full maximum width, they are included at multiple locations in the table - one location for each combination of the unused bits. The table indicates how many bits to discard from the input as well as the decoded output.
If the longest code is too long, so the table is impractical, a compromise is to use a tree of smaller fixed-width-subscript lookups. For example, you can use a 256-item table to handle a byte. If the input code is more than 8 bits, the table entry indicates that decoding is incomplete and directs you to a table that handles the next up-to 8 bits. Larger tables trade memory for speed - 256 items is probably too small.
I believe this general approach is called "prefix tables", and is what BobMcGees quoted code is doing. A likely difference is that some compression algorithms require the prefix table to be updated during decompression - this is not needed for simple Huffman. IIRC, I first saw it in a book about bitmapped graphics file formats which included GIF, some time before the patent panic.
It should be easy to precalculate either a full lookup table, a hashtable equivalent, or a tree-of-small-tables from a binary tree model. The binary tree is still the key representation (mental model) of how the code works - this lookup table is just an optimised way to implement it.
Why not take a look at how the GZIP source does it, specifically the Huffman decompression code in specifically unpack.c? It's doing exactly what you are, except it's doing it much, much faster.
From what I can tell, it's using a lookup array and shift/mask operations operating on whole words to run faster. Pretty dense code though.
EDIT: here is the complete source
/* unpack.c -- decompress files in pack format.
* Copyright (C) 1992-1993 Jean-loup Gailly
* This is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License, see the file COPYING.
*/
#ifdef RCSID
static char rcsid[] = "$Id: unpack.c,v 1.4 1993/06/11 19:25:36 jloup Exp $";
#endif
#include "tailor.h"
#include "gzip.h"
#include "crypt.h"
#define MIN(a,b) ((a) <= (b) ? (a) : (b))
/* The arguments must not have side effects. */
#define MAX_BITLEN 25
/* Maximum length of Huffman codes. (Minor modifications to the code
* would be needed to support 32 bits codes, but pack never generates
* more than 24 bits anyway.)
*/
#define LITERALS 256
/* Number of literals, excluding the End of Block (EOB) code */
#define MAX_PEEK 12
/* Maximum number of 'peek' bits used to optimize traversal of the
* Huffman tree.
*/
local ulg orig_len; /* original uncompressed length */
local int max_len; /* maximum bit length of Huffman codes */
local uch literal[LITERALS];
/* The literal bytes present in the Huffman tree. The EOB code is not
* represented.
*/
local int lit_base[MAX_BITLEN+1];
/* All literals of a given bit length are contiguous in literal[] and
* have contiguous codes. literal[code+lit_base[len]] is the literal
* for a code of len bits.
*/
local int leaves [MAX_BITLEN+1]; /* Number of leaves for each bit length */
local int parents[MAX_BITLEN+1]; /* Number of parents for each bit length */
local int peek_bits; /* Number of peek bits currently used */
/* local uch prefix_len[1 << MAX_PEEK]; */
#define prefix_len outbuf
/* For each bit pattern b of peek_bits bits, prefix_len[b] is the length
* of the Huffman code starting with a prefix of b (upper bits), or 0
* if all codes of prefix b have more than peek_bits bits. It is not
* necessary to have a huge table (large MAX_PEEK) because most of the
* codes encountered in the input stream are short codes (by construction).
* So for most codes a single lookup will be necessary.
*/
#if (1<<MAX_PEEK) > OUTBUFSIZ
error cannot overlay prefix_len and outbuf
#endif
local ulg bitbuf;
/* Bits are added on the low part of bitbuf and read from the high part. */
local int valid; /* number of valid bits in bitbuf */
/* all bits above the last valid bit are always zero */
/* Set code to the next 'bits' input bits without skipping them. code
* must be the name of a simple variable and bits must not have side effects.
* IN assertions: bits <= 25 (so that we still have room for an extra byte
* when valid is only 24), and mask = (1<<bits)-1.
*/
#define look_bits(code,bits,mask) \
{ \
while (valid < (bits)) bitbuf = (bitbuf<<8) | (ulg)get_byte(), valid += 8; \
code = (bitbuf >> (valid-(bits))) & (mask); \
}
/* Skip the given number of bits (after having peeked at them): */
#define skip_bits(bits) (valid -= (bits))
#define clear_bitbuf() (valid = 0, bitbuf = 0)
/* Local functions */
local void read_tree OF((void));
local void build_tree OF((void));
/* ===========================================================================
* Read the Huffman tree.
*/
local void read_tree()
{
int len; /* bit length */
int base; /* base offset for a sequence of leaves */
int n;
/* Read the original input size, MSB first */
orig_len = 0;
for (n = 1; n <= 4; n++) orig_len = (orig_len << 8) | (ulg)get_byte();
max_len = (int)get_byte(); /* maximum bit length of Huffman codes */
if (max_len > MAX_BITLEN) {
error("invalid compressed data -- Huffman code > 32 bits");
}
/* Get the number of leaves at each bit length */
n = 0;
for (len = 1; len <= max_len; len++) {
leaves[len] = (int)get_byte();
n += leaves[len];
}
if (n > LITERALS) {
error("too many leaves in Huffman tree");
}
Trace((stderr, "orig_len %ld, max_len %d, leaves %d\n",
orig_len, max_len, n));
/* There are at least 2 and at most 256 leaves of length max_len.
* (Pack arbitrarily rejects empty files and files consisting of
* a single byte even repeated.) To fit the last leaf count in a
* byte, it is offset by 2. However, the last literal is the EOB
* code, and is not transmitted explicitly in the tree, so we must
* adjust here by one only.
*/
leaves[max_len]++;
/* Now read the leaves themselves */
base = 0;
for (len = 1; len <= max_len; len++) {
/* Remember where the literals of this length start in literal[] : */
lit_base[len] = base;
/* And read the literals: */
for (n = leaves[len]; n > 0; n--) {
literal[base++] = (uch)get_byte();
}
}
leaves[max_len]++; /* Now include the EOB code in the Huffman tree */
}
/* ===========================================================================
* Build the Huffman tree and the prefix table.
*/
local void build_tree()
{
int nodes = 0; /* number of nodes (parents+leaves) at current bit length */
int len; /* current bit length */
uch *prefixp; /* pointer in prefix_len */
for (len = max_len; len >= 1; len--) {
/* The number of parent nodes at this level is half the total
* number of nodes at parent level:
*/
nodes >>= 1;
parents[len] = nodes;
/* Update lit_base by the appropriate bias to skip the parent nodes
* (which are not represented in the literal array):
*/
lit_base[len] -= nodes;
/* Restore nodes to be parents+leaves: */
nodes += leaves[len];
}
/* Construct the prefix table, from shortest leaves to longest ones.
* The shortest code is all ones, so we start at the end of the table.
*/
peek_bits = MIN(max_len, MAX_PEEK);
prefixp = &prefix_len[1<<peek_bits];
for (len = 1; len <= peek_bits; len++) {
int prefixes = leaves[len] << (peek_bits-len); /* may be 0 */
while (prefixes--) *--prefixp = (uch)len;
}
/* The length of all other codes is unknown: */
while (prefixp > prefix_len) *--prefixp = 0;
}
/* ===========================================================================
* Unpack in to out. This routine does not support the old pack format
* with magic header \037\037.
*
* IN assertions: the buffer inbuf contains already the beginning of
* the compressed data, from offsets inptr to insize-1 included.
* The magic header has already been checked. The output buffer is cleared.
*/
int unpack(in, out)
int in, out; /* input and output file descriptors */
{
int len; /* Bit length of current code */
unsigned eob; /* End Of Block code */
register unsigned peek; /* lookahead bits */
unsigned peek_mask; /* Mask for peek_bits bits */
ifd = in;
ofd = out;
read_tree(); /* Read the Huffman tree */
build_tree(); /* Build the prefix table */
clear_bitbuf(); /* Initialize bit input */
peek_mask = (1<<peek_bits)-1;
/* The eob code is the largest code among all leaves of maximal length: */
eob = leaves[max_len]-1;
Trace((stderr, "eob %d %x\n", max_len, eob));
/* Decode the input data: */
for (;;) {
/* Since eob is the longest code and not shorter than max_len,
* we can peek at max_len bits without having the risk of reading
* beyond the end of file.
*/
look_bits(peek, peek_bits, peek_mask);
len = prefix_len[peek];
if (len > 0) {
peek >>= peek_bits - len; /* discard the extra bits */
} else {
/* Code of more than peek_bits bits, we must traverse the tree */
ulg mask = peek_mask;
len = peek_bits;
do {
len++, mask = (mask<<1)+1;
look_bits(peek, len, mask);
} while (peek < (unsigned)parents[len]);
/* loop as long as peek is a parent node */
}
/* At this point, peek is the next complete code, of len bits */
if (peek == eob && len == max_len) break; /* end of file? */
put_ubyte(literal[peek+lit_base[len]]);
Tracev((stderr,"%02d %04x %c\n", len, peek,
literal[peek+lit_base[len]]));
skip_bits(len);
} /* for (;;) */
flush_window();
Trace((stderr, "bytes_out %ld\n", bytes_out));
if (orig_len != (ulg)bytes_out) {
error("invalid compressed data--length error");
}
return OK;
}
The typical way to decompress a Huffman code is using a binary tree. You insert your codes in the tree, so that each bit in a code represents a branch either to the left (0) or right (1), with decoded bytes (or whatever values you have) in the leaves.
Decoding is then just a case of reading bits from the coded content, walking the tree for each bit. When you reach a leaf, emit that decoded value, and keep reading until the input is exhausted.
Update: this page describes the technique, and has fancy graphics.
You can perform a kind of batch lookup on the usual Huffmann tree lookup:
Choosing a bit depth (call it depth n); this is a trade-off between speed, memory, and time investment to construct tables;
Build a lookup table for all 2^n bit strings of length n. Each entry may encode several complete tokens; there will commonly also be some bits left over that are only a prefix of Huffman codes: for each of these, make a link to a further lookup table for that code;
Build the further lookup tables. The total number of tables is at most one less than the number of entries coded in the Huffmann tree.
Choosing a depth that is a multiple of four, e.g., depth 8, is a good fit for bit shifting operations.
Postscript This differs from the idea in potatoswatter's comment on unwind's answer and from Steve314's answer in using multiple tables: this means that all of the n-bit lookup is put to use, so should be faster but makes table construction and lookup significantly trickier, and will consume much more space for a given depth.
Why not use the decompress algorithm in the same source module? It appears to be a decent algorithm.
The other answers are right, but here is some code in Rust I wrote recently to make the ideas concrete. This is the key routine:
fn decode( &self, input: &mut InpBitStream ) -> usize
{
let mut sym = self.lookup[ input.peek( self.peekbits ) ];
if sym >= self.ncode
{
sym = self.lookup[ sym - self.ncode + ( input.peek( self.maxbits ) >> self.peekbits ) ];
}
input.advance( self.nbits[ sym ] as usize );
sym
}
The tricky bit is setting up the lookup table, see BitDecoder::setup_code in this complete RFC 1951 decoder in Rust:
// RFC 1951 inflate ( de-compress ).
pub fn inflate( data: &[u8] ) -> Vec<u8>
{
let mut inp = InpBitStream::new( &data );
let mut out = Vec::new();
let _chk = inp.get_bits( 16 ); // Checksum
loop
{
let last = inp.get_bit();
let btype = inp.get_bits( 2 );
match btype
{
2 => { do_dyn( &mut inp, &mut out ); }
1 => { do_fixed( &mut inp, &mut out ); }
0 => { do_copy( &mut inp, &mut out ); }
_ => { }
}
if last != 0 { break; }
}
out
}
fn do_dyn( inp: &mut InpBitStream, out: &mut Vec<u8> )
{
let n_lit_code = 257 + inp.get_bits( 5 );
let n_dist_code = 1 + inp.get_bits( 5 );
let n_len_code = 4 + inp.get_bits( 4 );
let mut len = LenDecoder::new( inp, n_len_code );
let mut lit = BitDecoder::new( n_lit_code );
len.get_lengths( inp, &mut lit.nbits );
lit.init();
let mut dist = BitDecoder::new( n_dist_code );
len.get_lengths( inp, &mut dist.nbits );
dist.init();
loop
{
let x = lit.decode( inp );
match x
{
0..=255 => { out.push( x as u8 ); }
256 => { break; }
_ =>
{
let mc = x - 257;
let length = MATCH_OFF[ mc ] + inp.get_bits( MATCH_EXTRA[ mc ] as usize );
let dc = dist.decode( inp );
let distance = DIST_OFF[ dc ] + inp.get_bits( DIST_EXTRA[ dc ] as usize );
copy( out, distance, length );
}
}
}
} // end do_dyn
fn copy( out: &mut Vec<u8>, distance: usize, mut length: usize )
{
let mut i = out.len() - distance;
while length > 0
{
out.push( out[ i ] );
i += 1;
length -= 1;
}
}
/// Decode length-limited Huffman codes.
struct BitDecoder
{
ncode: usize,
nbits: Vec<u8>,
maxbits: usize,
peekbits: usize,
lookup: Vec<usize>
}
impl BitDecoder
{
fn new( ncode: usize ) -> BitDecoder
{
BitDecoder
{
ncode,
nbits: vec![0; ncode],
maxbits: 0,
peekbits: 0,
lookup: Vec::new()
}
}
/// The key routine, will be called many times.
fn decode( &self, input: &mut InpBitStream ) -> usize
{
let mut sym = self.lookup[ input.peek( self.peekbits ) ];
if sym >= self.ncode
{
sym = self.lookup[ sym - self.ncode + ( input.peek( self.maxbits ) >> self.peekbits ) ];
}
input.advance( self.nbits[ sym ] as usize );
sym
}
fn init( &mut self )
{
let ncode = self.ncode;
let mut max_bits : usize = 0;
for bp in &self.nbits
{
let bits = *bp as usize;
if bits > max_bits { max_bits = bits; }
}
self.maxbits = max_bits;
self.peekbits = if max_bits > 8 { 8 } else { max_bits };
self.lookup.resize( 1 << self.peekbits, 0 );
// Code below is from rfc1951 page 7
let mut bl_count : Vec<usize> = vec![ 0; max_bits + 1 ]; // the number of codes of length N, N >= 1.
for i in 0..ncode { bl_count[ self.nbits[i] as usize ] += 1; }
let mut next_code : Vec<usize> = vec![ 0; max_bits + 1 ];
let mut code = 0;
bl_count[0] = 0;
for i in 0..max_bits
{
code = ( code + bl_count[i] ) << 1;
next_code[ i + 1 ] = code;
}
for i in 0..ncode
{
let len = self.nbits[ i ] as usize;
if len != 0
{
self.setup_code( i, len, next_code[ len ] );
next_code[ len ] += 1;
}
}
}
// Decoding is done using self.lookup ( see decode ). To keep the lookup table small,
// codes longer than 8 bits are looked up in two peeks.
fn setup_code( &mut self, sym: usize, len: usize, mut code: usize )
{
if len <= self.peekbits
{
let diff = self.peekbits - len;
for i in code << diff .. (code << diff) + (1 << diff)
{
// bits are reversed to match InpBitStream::peek
let r = reverse( i, self.peekbits );
self.lookup[ r ] = sym;
}
} else {
// Secondary lookup required.
let peekbits2 = self.maxbits - self.peekbits;
// Split code into peekbits portion ( key ) and remainder ( code).
let diff1 = len - self.peekbits;
let key = code >> diff1;
code &= ( 1 << diff1 ) - 1;
// Get the secondary lookup.
let kr = reverse( key, self.peekbits );
let mut base = self.lookup[ kr ];
if base == 0 // Secondary lookup not yet allocated for this key.
{
base = self.lookup.len();
self.lookup.resize( base + ( 1 << peekbits2 ), 0 );
self.lookup[ kr ] = self.ncode + base;
} else {
base -= self.ncode;
}
// Set the secondary lookup values.
let diff = self.maxbits - len;
for i in code << diff .. (code << diff) + (1<<diff)
{
let r = reverse( i, peekbits2 );
self.lookup[ base + r ] = sym;
}
}
}
} // end impl BitDecoder
struct InpBitStream<'a>
{
data: &'a [u8],
pos: usize,
buf: usize,
got: usize, // Number of bits in buffer.
}
impl <'a> InpBitStream<'a>
{
fn new( data: &'a [u8] ) -> InpBitStream
{
InpBitStream { data, pos: 0, buf: 1, got: 0 }
}
fn peek( &mut self, n: usize ) -> usize
{
while self.got < n
{
if self.pos < self.data.len()
{
self.buf |= ( self.data[ self.pos ] as usize ) << self.got;
}
self.pos += 1;
self.got += 8;
}
self.buf & ( ( 1 << n ) - 1 )
}
fn advance( &mut self, n:usize )
{
self.buf >>= n;
self.got -= n;
}
fn get_bit( &mut self ) -> usize
{
if self.got == 0 { self.peek( 1 ); }
let result = self.buf & 1;
self.advance( 1 );
result
}
fn get_bits( &mut self, n: usize ) -> usize
{
let result = self.peek( n );
self.advance( n );
result
}
fn get_huff( &mut self, mut n: usize ) -> usize
{
let mut result = 0;
while n > 0
{
result = ( result << 1 ) + self.get_bit();
n -= 1;
}
result
}
fn clear_bits( &mut self )
{
self.got = 0;
}
} // end impl InpBitStream
/// Decode code lengths.
struct LenDecoder
{
plenc: u8, // previous length code ( which can be repeated )
rep: usize, // repeat
bd: BitDecoder,
}
/// Decodes an array of lengths. There are special codes for repeats, and repeats of zeros.
impl LenDecoder
{
fn new( inp: &mut InpBitStream, n_len_code: usize ) -> LenDecoder
{
let mut result = LenDecoder { plenc: 0, rep:0, bd: BitDecoder::new( 19 ) };
// Read the array of 3-bit code lengths from input.
for i in 0..n_len_code
{
result.bd.nbits[ CLEN_ALPHABET[i] as usize ] = inp.get_bits(3) as u8;
}
result.bd.init();
result
}
// Per RFC1931 page 13, get array of code lengths.
fn get_lengths( &mut self, inp: &mut InpBitStream, result: &mut Vec<u8> )
{
let n = result.len();
let mut i = 0;
while self.rep > 0 { result[i] = self.plenc; i += 1; self.rep -= 1; }
while i < n
{
let lenc = self.bd.decode( inp ) as u8;
if lenc < 16
{
result[i] = lenc;
i += 1;
self.plenc = lenc;
} else {
if lenc == 16 { self.rep = 3 + inp.get_bits(2); }
else if lenc == 17 { self.rep = 3 + inp.get_bits(3); self.plenc=0; }
else if lenc == 18 { self.rep = 11 + inp.get_bits(7); self.plenc=0; }
while i < n && self.rep > 0 { result[i] = self.plenc; i += 1; self.rep -= 1; }
}
}
} // end get_lengths
} // end impl LenDecoder
/// Reverse a string of bits.
pub fn reverse( mut x:usize, mut bits: usize ) -> usize
{
let mut result: usize = 0;
while bits > 0
{
result = ( result << 1 ) | ( x & 1 );
x >>= 1;
bits -= 1;
}
result
}
fn do_copy( inp: &mut InpBitStream, out: &mut Vec<u8> )
{
inp.clear_bits(); // Discard any bits in the input buffer
let mut n = inp.get_bits( 16 );
let _n1 = inp.get_bits( 16 );
while n > 0 { out.push( inp.data[ inp.pos ] ); n -= 1; inp.pos += 1; }
}
fn do_fixed( inp: &mut InpBitStream, out: &mut Vec<u8> ) // RFC1951 page 12.
{
loop
{
// 0 to 23 ( 7 bits ) => 256 - 279; 48 - 191 ( 8 bits ) => 0 - 143;
// 192 - 199 ( 8 bits ) => 280 - 287; 400..511 ( 9 bits ) => 144 - 255
let mut x = inp.get_huff( 7 );
if x <= 23
{
x += 256;
} else {
x = ( x << 1 ) + inp.get_bit();
if x <= 191 { x -= 48; }
else if x <= 199 { x += 88; }
else { x = ( x << 1 ) + inp.get_bit() - 256; }
}
match x
{
0..=255 => { out.push( x as u8 ); }
256 => { break; }
_ => // 257 <= x && x <= 285
{
x -= 257;
let length = MATCH_OFF[x] + inp.get_bits( MATCH_EXTRA[ x ] as usize );
let dcode = inp.get_huff( 5 );
let distance = DIST_OFF[dcode] + inp.get_bits( DIST_EXTRA[dcode] as usize );
copy( out, distance, length );
}
}
}
} // end do_fixed
// RFC 1951 constants.
pub static CLEN_ALPHABET : [u8; 19] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 ];
pub static MATCH_OFF : [usize; 30] = [ 3,4,5,6, 7,8,9,10, 11,13,15,17, 19,23,27,31, 35,43,51,59,
67,83,99,115, 131,163,195,227, 258, 0xffff ];
pub static MATCH_EXTRA : [u8; 29] = [ 0,0,0,0, 0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3, 4,4,4,4, 5,5,5,5, 0 ];
pub static DIST_OFF : [usize; 30] = [ 1,2,3,4, 5,7,9,13, 17,25,33,49, 65,97,129,193, 257,385,513,769,
1025,1537,2049,3073, 4097,6145,8193,12289, 16385,24577 ];
pub static DIST_EXTRA : [u8; 30] = [ 0,0,0,0, 1,1,2,2, 3,3,4,4, 5,5,6,6, 7,7,8,8, 9,9,10,10, 11,11,12,12, 13,13 ];
Github repository here