C++ issue with LZW decoding - c++

I'm writing a LZW algorithm and I'm having trouble with decoding the encoded file. The encoding function works fine,the issues is as far as I know in the building of the dictionary in the decoding function.
Edit: The size of the decoding dictionary is for 1 bigger than the encoding dictionary. Also the decoding dictionary doesn't seem to contain any string value that has more than 2 characters. For testing I used a .txt file of Alice in wonderland and the first few words are ok but after that everything goes wrong
My encoding function:
void encode(char* path, int vel_s){
unordered_map<string,int>dict; //dictionary
int bit_size = ceil(log2(vel_s + 1)); //size of codes - x bits
//init dictionary
for(int i = 0; i < 256; i++){
string k = "";
k.push_back((unsigned char)i);
dict[k] = i;
}
int count = 256, maxVel = pow(2, bit_size); //counter and max value in bits
BinReader reader(path);//text to encodede
BinWriter writer("out.bin");//output file
string P = "";
P.push_back(reader.readByte());//read first char
while(!reader.f.eof()){
char C = reader.readByte(); //read next char
if(dict.find(P + C) != dict.end()){ //if in dictionary
P+=C;
}else{ //if not in dictionary
if(count >= maxVel){
count = 0;
}
int out = dict[P]; //int code of P
string binString = toBinary(out, bit_size); //binary value of out
for(int i = 0; i<binString.size();i++){ //write bits
if(binString[i]=='1'){
writer.writeBit(true);
}else{
writer.writeBit(false);
}
}
dict[P + C] = count; //write P+C to dictionary
count++;
P = C; //overwrite P
}
}
//write last output
int out = dict[P];
string binString = toBinary(out, bit_size);
for(int i = 0; i<binString.size();i++){
if(binString[i]=='1'){
writer.writeBit(true);
}else{
writer.writeBit(false);
}
}
}
And this is the decoding function:
void decompress(char* path, int vel_s){//file path + dictionary max size
unordered_map<int,string>dict; //dictionary
int bit_size = ceil(log2(vel_s + 1));//max number of bits in code
for(int i = 0; i < 256; i++){ //init dictionary
dict[i] = (unsigned char) i;
}
int count = 256; //counter
vector<bool>B;
BinWriter writer("out_d.txt"); //output file
BinReader reader(path); //input file
int newI, oldI; //new and old value
for(int i = 0; i < bit_size; i++){ //read code from encoded file of size bit_size
B.push_back(reader.readBit());
}
oldI = convertToInt(B); //convert to integer
string S = dict[oldI]; //String is dictionary value of old value
writer.writeByte(S[0]); //write first letter
string C = ""; //init C
C =S[0]; //C is first character of S
int maxVel = pow(2, bit_size); //max value of encoded code
while(!reader.f.eof()){
B.clear();//clear vector B for reading bits
for(int i = 0; i < bit_size; i++){
B.push_back(reader.readBit());//read next code
}
newI = convertToInt(B); //convert bits to int
if(dict.find(newI) == dict.end()){ //if new value not in dictionary
S = dict[oldI]; //S is value of old code
S += C; //concat C to S
}else{
S = dict[newI]; //S is value of new code
}
for(int k = 0; k < S.size(); k++){ //write S to file
writer.writeByte(S[k]);
}
C = "";
C += S[0]; //C is first character of S
dict[count] = dict[oldI] + C; //new input in dictionary
count++;
oldI = newI; //old value is new value
}
And the toBinary() and convertToInt() functions that I use for help
string toBinary(int x, int l){
string temp;
while(x!=0){
temp =(x%2==0 ? "0":"1")+temp;
x/=2;
}
while(temp.length() != l){
temp = "0"+temp;
}
return temp;
}
int convertToInt(vector<bool>B){
int power= B.size()-1,res= 0;
for(int i = 0; i < B.size(); i++){
if(B[i]){
res+=pow(2, power);
}
power--;
}
return res;
}

Related

Scanning a cipher text document, guessing the key, and calculating distribution

My project is breaking a Vignere cipher. However, I've gotten to this last part and now I'm stuck. I don't know if I'm misunderstanding the math or the logic of the program I'm writing, but basically I should be getting different calcDistribution() values for each letter at each position of the key. As it stands, this program returns only 16 unique values grouped together by key position. Ultimately the program should be able to pick the highest value calcDistribution() letter at that key position.
Here is the portion of my code I'm sure is the source of the problem:
//First, we need to pick a character to XOR against the cipherText. reuse char ci, string byte
char kg;
string keyBreakGuess = "";
string ranOuttaNames1 = "";
string ranOuttaNames2 = "";
float charDistribution[128-65][MAXKEY];
for(int d = 0; d < keyGuess; d++){ //This loop will search each spot up to the length of the key
for(ci = 65; ci < 123; ci++){ //This loop will look at every possible key character
cipherText.seekg(0, ios::beg);
for(int e = 0; e < 500 && !cipherText.eof(); e++){ //This loop will handle the sampling
cipherText.seekg((LENGTH * keyGuess * e) + (LENGTH * d), ios::beg); //f(x) = 8*(c)*(x) + 8*(b)
if(cipherText.eof())
break;
for(int f = 0; f < LENGTH && !cipherText.eof(); f++){
cipherText >> kg;
byte += kg;
}
ranOuttaNames2 = ci;
ranOuttaNames1 = string2Binary(ranOuttaNames2);
ranOuttaNames2 = "";
for(int u = 0; u < LENGTH; u++){
if(byte[u] == ranOuttaNames1[u])
ranOuttaNames2 += '0';
else
ranOuttaNames2 += '1';
}
kg = binary2Char(ranOuttaNames2);
if((int)kg >= 32 && (int)kg < 128)
ascii[(int)kg]++;
byte = "";
ranOuttaNames1 = "";
}
//This is where we will calcDist for each char and pick the max character for that piece of the key
charDistribution[(int)ci-65][d] = calcDistribution(ascii, ASCIIBYTELENGTH);
cout << ci << " " << (d+1) << ": " << charDistribution[(int)ci-65][d] << endl;
for(int r = 0; r < ASCIIBYTELENGTH; r++){
ascii[r] = 0;
}
}
}
maxD = 0;
for(int s = 0; s < keyGuess; s++){
for(int q = 65; q < 128; q++){
if(maxD < charDistribution[q-65][s]){
maxD = charDistribution[q-65][s];
kg = q;
}
}
keyBreakGuess += kg;
}
cout << "My guess for the full key: " << keyBreakGuess << endl; //This should have the full key
Here is the calcDistribution function:
//This function will calculate the distribution of characters of an array
double calcDistribution(int arr[], int size)
{
double p = 0.000;
double sum = 0;
//calculate sum of all indexes
for(int t = 0; t < size; t++){
sum += arr[t];
}
//divide each point point in the index by the sum of all indexes
for(int c = 0; c < size; c++){
try{
p += pow((arr[c] / sum), 2);
} catch (int e) {
cerr << "Sum is 0, error occurred.";
exit(1);
}
}
return p;
}
Any pointers in the right direction would be much appreciated.
I solved this problem by adding a case to break if a character wasn't printable or a number or letter. This kept values from repeating because it wouldn't even calculate a value with special characters. While this probably wouldn't hold true for every possible document you would want to encrypt with a Vignere cipher, it did work for the document I was given for this project.

Bitwise integer concationation

For some background, I'm trying to write a system to pass packets of integers for the purpose of building a maze using a boolean toggle to decide whether two nodes should have a wall between them, currently my maze handles 480 walls, therefore I don't want to send a packet with a single item, but rather split it into an array of integers (length 8) thus giving me 480/8 objects to send.
const int wallRows = mazeSize / 8;
int temp = NULL;
int temp2 = NULL;
int current = NULL;
int concatCount = 0;
int* walls = new int[wallRows];
int wallIndex = 0;
for (int i = 0; i < mazeSize; i++) {
current = temp2;
//ensure my ints have only 8 bytes
if (concatCount >= 7) {
//allocate a full int to the array
walls[wallIndex] = temp;
//clear the int
temp = NULL;
//move to the next array pos
wallIndex++;
//restart the int count
concatCount = 0;
}
if (maze->allEdges[i]._iswall) {
//append a 1 to the int
temp = 0b1;
}
else {
//append a 0 to the int
temp = 0b0;
}
//increment the int count
current = (temp2 << 1) | temp;
concatCount++;
}
This is what I have currently built, my idea was to start with an int, pass it the int based on the return of the bool "_isWall" and bit shift the result onto the end of the int.
When the int reaches capacity, iterate to the next int in the array and begin again until the maze's walls have populated the array.
Edit: lack of clarity on what I was asking.
My bitwise operation does not appear to actually allocate multiple bits to the same integer, where am I going wrong?
Use val | (1UL << temp2), and not temp2 << 1 to set the bits. Later you can use bitwise & operator to see if the bit is set. You must initialize the whole byte to zero and set the bit only if the value is true. Here is an example:
int main(void)
{
//assign random values for testing
int wallinfo[480];
for(int i = 0; i < 480; i++)
wallinfo[i] = !!(rand() % 2);
//copy to the values to compress
unsigned char compress[60] = { 0 };
for(int i = 0; i < 60; i++)
for(int j = 0; j < 8; j++)
if(wallinfo[i * 8 + j])
compress[i] |= 1UL << j;
//decompress to get back wallinfo
int decompress[480];
for(int i = 0; i < 60; i++)
for(int j = 0; j < 8; j++)
decompress[i * 8 + j] = !!(compress[i] & (1UL << j));
//wallinfo should match decompress
if(memcmp(wallinfo, decompress, 480) == 0)
printf("success\n");
else
printf("failed\n");
return 0;
}

copying strings , c++

i'm trying to perform a program that prints string correctly from it's middle
the problem to me is that strange behavior to me that at certain line it agrees to print string only char by char and refuses to return it to be printed in the main function
here is my code :
string unscramble(string line)
{
string temp;
int i,j;
int len=line.size();
i=len/2-1;
j=0;
do{
if(i<0)
i=len-1;
temp[j]=line[i];
cout << temp[j];
j++;
i--;
}while(i!=(len/2)-1);
return temp;
}
Your issue is in the assignment to temp. You'll also hit an issue if the string length is even. Do it this way instead:
string unscramble(string line)
{
string temp;
int i, j;
int len = line.size();
int factor = len / 2 - 1;
if(len % 2 == 1)
factor = len / 2;
i = factor;
j = 0;
do
{
if(i < 0)
i = len - 1;
temp += line[i];
j++;
i--;
}
while(i != factor);
return temp;
}
Example input: RUT OWT SNEH HCNERF EERHTEGDIRTRAP A DNA SEVODELT
Output: THREE FRENCH HENS TWO TURTLEDOVES AND A PARTRIDGE

Aho-Corasick Algo in c/c++ for hex

my problem - i am trying to use aho-corasick algo that i found for c++ while searching net, it currently only search for char based string, i want it to modify it to search for hex based string of varied character. any help to improve the code is greatly appreciated. if i just modify my string text it goes into an ifinite loop.
int buildMatchingMachine(const vector<string> &words, char lowestChar = 'a', char highestChar = 'z')
{
memset(out, 0, sizeof out);
memset(f, -1, sizeof f);
memset(g, -1, sizeof g);
int states = 1; // Initially, we just have the 0 state
for (int i = 0; i < words.size(); ++i)
{
const string &keyword = words[i];
int currentState = 0;
for (int j = 0; j < keyword.size(); ++j)
{
int c = keyword[j] - lowestChar;
if (g[currentState][c] == -1)
{ // Allocate a new node
g[currentState][c] = states++;
}
currentState = g[currentState][c];
}
out[currentState] |= (1 << i); // There's a match of keywords[i] at node currentState.
}
// State 0 should have an outgoing edge for all characters.
for (int c = 0; c < MAXC; ++c)
{
if (g[0][c] == -1)
{
g[0][c] = 0;
}
}
// Now, let's build the failure function
queue<int> q;
for (int c = 0; c <= highestChar - lowestChar; ++c)
{ // Iterate over every possible input
// All nodes s of depth 1 have f[s] = 0
if (g[0][c] != -1 && g[0][c] != 0)
{
f[g[0][c]] = 0;
q.push(g[0][c]);
}
}
while (q.size())
{
int state = q.front();
q.pop();
for (int c = 0; c <= highestChar - lowestChar; ++c)
{
if (g[state][c] != -1)
{
int failure = f[state];
while (g[failure][c] == -1)
{
failure = f[failure];
}
failure = g[failure][c];
f[g[state][c]] = failure;
out[g[state][c]] |= out[failure]; // Merge out values
q.push(g[state][c]);
}
}
}
return states;
}
int openFile::findNextState(int currentState, char nextInput, char lowestChar = 'a')
{
int answer = currentState;
int c = nextInput - lowestChar;
while (g[answer][c] == -1)
answer = f[answer];
return g[answer][c];
}
I have found a working solution, you just need to redefine the lowest char and highest char of hex based symbols to its corresponding ascii value not int value also change the MAXS and MAXC to suitable number, now the code works for hex based values.

C++ string compress

I wrote a program to compress a string using the counts of repeated characters. if the compressed string is longer than the original string, then we still return the original string. Below is my program:
void stringCompress(char* src) {
char* original;
original = src;
char* rst;
rst = src;
int histogram[256];
for (int i = 0; i < 256; i++) {
histogram[i] = 0;
}
int length = 0;
while (*src != NULL) {
length++;
src++;
}
src = original;
int j = 0;
for (int i = 0; i < length; i++) {
histogram[(int) src[i]]++;
if (histogram[(int) src[i]] == 1) {
rst[j] = src[i];
j++;
}
}
rst[j] = '\0';
char* final;
rst = original;
int index = 0;
char buffer[33];
for (int i = 0; i < j; i++) {
final[index] = rst[i];
stringstream number;
number<<histogram[(int)rst[i]];
-------> //cout<<number.str()<<endl;
char* temp = new char[number.str().length()+1];
strcpy(temp, number.str().c_str());
index++;
cout<<temp<<endl;
for(int k =0 ;k<number.str().length();k++)
{
final[index]=temp[k];
index++;
}
}
final[index] = '\0';
src = original;
if (index <= length) {
for (int i = 0; i < index; i++)
cout<<final[i];
} else {
cout << src << endl;
}
}
But strange thing is that if I leave the cout sentence cout<<number.str()<<endl; there (the arrow points to the sentence), then the output is right. For example, aaaabcdaa outputs a6b1c1d1 and aabcd outputs aabcd. However if I comment out cout<<number.str()<<endl;, then nothing is generated. Any help is appreciated.
The variable final is uninitialized in your code. When I initialize it with a memory buffer, then your program prints the desired output whether the line you pointed to is commented out or not.
Perhaps you meant to use buffer (which is unused) as memory for final, such as:
final = buffer;