Understanding ICU ubidi. Direction is always UBIDI_LTR - c++

I have written a piece of sample code referring to ICU reference, to read a line from a file and get it's base direction and see the result of Unicode Bidi algorithm on it.
in my input file I have written فارسی which is a sequence of right to left characters.
but this line std::cout << ubidi_getBaseDirection(us.getBuffer(), us.length()) << std::endl; prints 0 which is UBIDI_LTR.
And no matter what combination of characters (RTL and LTR combinations) I give in the input file, it will always have one run with direction UBIDI_LTR.
Is there something wrong with my code?
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/localpointer.h"
#include "unicode/ubidi.h"
#include <unicode/unistr.h>
#include<string>
#include<iostream>
#include <fstream>
#include "unicode/ustream.h"
using namespace icu;
using icu::UnicodeString;
int main(int argc, char* argv[])
{
std::string input;
std::string output;
std::ifstream MyReadFile("in.txt");
getline(MyReadFile, input);
UnicodeString us(input.c_str());
UBiDi* bidi = ubidi_open();
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setPara(bidi, us.getBuffer(), us.length(), UBIDI_RTL, nullptr, &errorCode);
std::cout << (ubidi_getBaseDirection(us.getBuffer(), us.length()) == UBIDI_LTR) << std::endl;
std::ofstream MyFile;
MyFile.open("out.txt");
if (U_SUCCESS(errorCode))
{
UnicodeString Ustring(ubidi_getText(bidi));
std::string Ustr;
Ustring.toUTF8String(Ustr);
int32_t count = ubidi_countRuns(bidi, &errorCode);
int32_t logicalStart, length;
if (count > 0)
MyFile << "VisualRun \t" << "direction" << "\t" << "s" << '\t' << "l" << '\t' << "output" << std::endl;
for (int32_t i = 0; i < count; i++) {
UBiDiDirection dir = ubidi_getVisualRun(bidi, i, &logicalStart, &length);
std::string dirstr = "UBIDI_LTR";
if (dir == UBIDI_RTL)
dirstr = "UBIDI_RTL";
UnicodeString temp = Ustring.tempSubString(logicalStart, length);
char* dest = (char*)malloc(temp.length());
temp.extract(logicalStart, length, dest, strlen(dest));
output = std::string(dest);
MyFile << "VisualRun \t" << dirstr << "\t" << logicalStart << '\t' << length << '\t' << output << std::endl;
}
}
else
{
std::cout << "Failed" << std::endl;
}
MyFile.close();
return 0;
}

Related

fstream stops to read at substitute control character

I'm writing a simple encryption program in C++ to encrypt a text-based file.
It's using a simple XOR cipher algorithm, but this produces ASCII control characters in the output file. When I try to read from the newly encrypted file with std::ifstream, it stumbles upon character #26, it stops and becomes unable to read the rest of the file.
Example if I try to encrypt this text:
This is just a simple sample
text with two rows and one sentence.
It turns it to this
/[[[[[
[[[ [[[U
When I try to read that file in my program, it can't read past the character at position 15, so I get a half encrypted file.
How can I fix this?
Here's the code:
#include <iostream>
#include <Windows.h>
#include <string>
#include <fstream>
void Encrypt(char encryptionKey, std::string filename)
{
std::ifstream sourceFile(filename);
std::ofstream outputFile(filename.substr(0, filename.find_last_of("\\")) + "\\Encrypted" + filename.substr(filename.find_last_of("\\") + 1), std::ofstream::out | std::ofstream::trunc);
std::string sourceLine;
std::string outputLine;
long numLines = 0;
if (sourceFile.is_open())
{
std::cout << "Opening file: " + filename + " for encryption" << std::endl;
while (sourceFile.good()) // This iterates over the whole file, once for each line
{
sourceLine = ""; //Clearing the line for each new line
outputLine = ""; //Clearing the line for each new line
std::getline(sourceFile, sourceLine);
for (int i = 0; i < sourceLine.length(); i++) // Looping through all characters in each line
{
char focusByte = sourceLine[i] ^ encryptionKey;
std::cout << " focusByte: " << focusByte << std::endl;
outputLine.push_back(focusByte);
//std::cout << sourceLine << std::flush;
}
numLines++;
outputFile << outputLine << std::endl;
}
}
sourceFile.close();
outputFile.close();
}
void Decrypt(unsigned int encryptionKey, std::string filename)
{
std::ifstream sourceFile(filename);
std::ofstream outputFile(filename.substr(0, filename.find_last_of("\\")) + "\\Decrypted" + filename.substr(filename.find_last_of("\\") + 1), std::ofstream::out | std::ofstream::trunc);
std::string sourceLine;
std::string outputLine;
long numLines = 0;
if (sourceFile.is_open())
{
std::cout << "Opening file: " + filename + " for decryption" << std::endl;
while (sourceFile.good()) // This iterates over the whole file, once for each line
{
if (sourceFile.fail() == true)
std::cout << "eof" << std::endl;
sourceLine = ""; //Clearing the line for each new line
outputLine = ""; //Clearing the line for each new line
std::getline(sourceFile, sourceLine);
for (int i = 0; i < sourceLine.length(); i++) // Looping through all characters in each line
{
char focusByte = sourceLine[i] ^ encryptionKey;
std::cout << " focusByte: " << focusByte << std::endl;
outputLine.push_back(focusByte);
}
numLines++;
outputFile << outputLine << std::endl;
}
}
sourceFile.close();
outputFile.close();
}
int main(int argument_count,
char * argument_list[])
{
system("color a");
std::string filename;
if (argument_count < 2)
{
std::cout << "You didn't supply a filename" << std::endl;
}
else
{
filename = argument_list[1];
std::cout << "Target file: " << filename << std::endl;
std::cout << "Press e to encrypt the selected file, Press d to decrypt the file > " << std::flush;
char choice;
while (true)
{
std::cin >> choice;
if (choice == 'e')
{
Encrypt(123, filename);
break;
}
else if (choice == 'd')
{
Decrypt(123, filename);
break;
}
else
{
std::cout << "please choose option e or d for encryption respectivly decryption" << std::endl;
}
}
}
std::cout << "\nPaused, press Enter to continue > " << std::flush;
system("Pause");
return EXIT_SUCCESS;
}
In Decrypt(), after the first call to std::getline(), sourceFile.good() is false and sourceFile.fail() is true, which is why you stop reading subsequent lines from the encrypted file.
The reason is because the encrypted file has an encoded 0x1A byte in it, and depending on your platform and STL implementation, that character likely gets interpreted as an EOF condition, thus enabling the std::ifstream's eofbit state, terminating further reading.
In my compiler's STL implementation on Windows, when std::ifstream reads from a file, it ultimately calls a function named _Fgetc():
template<> inline bool _Fgetc(char& _Byte, _Filet *_File)
{ // get a char element from a C stream
int _Meta;
if ((_Meta = fgetc(_File)) == EOF) // <-- here
return (false);
else
{ // got one, convert to char
_Byte = (char)_Meta;
return (true);
}
}
When it tries to read an 0x1A character, fgetc() returns EOF, and when _Fgetc() returns false, std::getline() sets the eofbit on the std::ifstream and exits.
Check your compiler's STL for similar behavior.
This behavior is because you are opening the encrypted file in text mode. You need to open the encrypted file in binary mode instead:
std::ifstream sourceFile(..., std::ifstream::binary);
Also, you should enable binary mode on the encrypted file in Encrypt() as well:
std::ofstream outputFile(..., std::ofstream::binary | std::ofstream::trunc);
Try something more like this instead:
#include <Windows.h>
#include <iostream>
#include <string>
#include <fstream>
#include <cstdlib>
void Encrypt(char encryptionKey, const std::string &filename)
{
std::string::size_type pos = filename.find_last_of("\\");
std::string out_filename = filename.substr(0, pos+1) + "Encrypted" + filename.substr(pos + 1);
std::ifstream sourceFile(filename.c_str());
std::ofstream outputFile(out_filename.c_str(), std::ofstream::binary | std::ofstream::trunc);
if (sourceFile.is_open())
{
std::cout << "Opened file: " + filename + " for encryption" << std::endl;
std::string line;
long numLines = 0;
while (std::getline(sourceFile, line)) // This iterates over the whole file, once for each line
{
for (std::string::size_type i = 0; i < line.length(); ++i) // Looping through all characters in each line
{
char focusByte = line[i] ^ encryptionKey;
std::cout << " focusByte: " << focusByte << std::endl;
line[i] = focusByte;
//std::cout << line << std::flush;
}
outputFile << line << std::endl;
++numLines;
}
}
}
void Decrypt(char encryptionKey, const std::string &filename)
{
std::string::size_type pos = filename.find_last_of("\\");
std::string out_filename = filename.substr(0, pos+1) + "Decrypted" + filename.substr(pos + 1);
std::ifstream sourceFile(filename.c_str(), std::ifstream::binary);
std::ofstream outputFile(out_filename.c_str(), std::ofstream::trunc);
if (sourceFile.is_open())
{
std::cout << "Opened file: " + filename + " for decryption" << std::endl;
std::string line;
long numLines = 0;
while (std::getline(sourceFile, line)) // This iterates over the whole file, once for each line
{
for (std::string::size_type i = 0; i < line.length(); ++i) // Looping through all characters in each line
{
char focusByte = line[i] ^ encryptionKey;
std::cout << " focusByte: " << focusByte << std::endl;
line[i] = focusByte;
}
outputFile << line << std::endl;
++numLines;
}
std::cout << "eof" << std::endl;
}
}
int main(int argument_count, char* argument_list[])
{
std::system("color a");
std::string filename;
if (argument_count < 2)
{
std::cout << "Enter a file to process: " << std::flush;
std::getline(std::cin, filename);
}
else
{
filename = argument_list[1];
}
if (filename.empty())
{
std::cout << "You didn't supply a filename" << std::endl;
return EXIT_FAILURE;
}
std::cout << "Target file: " << filename << std::endl;
std::cout << "Press e to encrypt the file" << std::endl;
std::cout << "Press d to decrypt the file" << std::endl;
char choice;
while (true)
{
std::cout << "> " << std::flush;
std::cin >> choice;
if (choice == 'e')
{
Encrypt(123, filename);
break;
}
else if (choice == 'd')
{
Decrypt(123, filename);
break;
}
else
{
std::cout << "please choose option e or d for encryption or decryption, respectively" << std::endl;
}
}
std::cout << std::endl << "Paused, press Enter to continue" << std::flush;
std::system("pause");
return EXIT_SUCCESS;
}
That being said, keep in mind that when using XOR, some of the encrypted characters might end up being \r (0x0D) or \n (0x0A), which will interfere with std::getline() when decrypting the file later on, producing a decrypted output that does not match the original text input.
Since you should be treating the encrypted file as binary, you should not be reading/writing the file as text at all. Choose a different format for your encrypted output that does not rely on line-break semantics in text vs binary mode.
For example:
#include <Windows.h>
#include <iostream>
#include <string>
#include <fstream>
#include <cstdlib>
void Encrypt(char encryptionKey, const std::string &filename)
{
std::string::size_type pos = filename.find_last_of("\\");
std::string out_filename = filename.substr(0, pos+1) + "Encrypted" + filename.substr(pos + 1);
std::ifstream sourceFile(filename.c_str());
std::ofstream outputFile(out_filename.c_str(), std::ofstream::binary | std::ofstream::trunc);
if (sourceFile.is_open())
{
std::cout << "Opened file: " + filename + " for encryption" << std::endl;
std::string line;
std::string::size_type lineLen;
long numLines = 0;
while (std::getline(sourceFile, line)) // This iterates over the whole file, once for each line
{
lineLen = line.length();
for (std::string::size_type i = 0; i < lineLen; ++i) // Looping through all characters in each line
{
char focusByte = line[i] ^ encryptionKey;
std::cout << " focusByte: " << focusByte << std::endl;
line[i] = focusByte;
//std::cout << line << std::flush;
}
outputFile.write((char*)&lineLen, sizeof(lineLen));
outputFile.write(line.c_str(), lineLen);
++numLines;
}
}
}
void Decrypt(char encryptionKey, const std::string &filename)
{
std::string::size_type pos = filename.find_last_of("\\");
std::string out_filename = filename.substr(0, pos+1) + "Decrypted" + filename.substr(pos + 1);
std::ifstream sourceFile(filename.c_str(), std::ifstream::binary);
std::ofstream outputFile(out_filename.c_str(), std::ofstream::trunc);
if (sourceFile.is_open())
{
std::cout << "Opened file: " + filename + " for decryption" << std::endl;
std::string line;
std::string::size_type lineLen;
long numLines = 0;
while (sourceFile.read((char*)&lineLen, sizeof(lineLen))) // This iterates over the whole file, once for each line
{
line.resize(lineLen);
if (!sourceFile.read(&line[0], lineLen))
break;
for (std::string::size_type i = 0; i < lineLen; ++i) // Looping through all characters in each line
{
char focusByte = line[i] ^ encryptionKey;
std::cout << " focusByte: " << focusByte << std::endl;
line[i] = focusByte;
}
outputFile << line << std::endl;
++numLines;
}
std::cout << "eof" << std::endl;
}
}
int main(int argument_count, char* argument_list[])
{
std::system("color a");
std::string filename;
if (argument_count < 2)
{
std::cout << "Enter a file to process: " << std::flush;
std::getline(std::cin, filename);
}
else
{
filename = argument_list[1];
}
if (filename.empty())
{
std::cout << "You didn't supply a filename" << std::endl;
return EXIT_FAILURE;
}
std::cout << "Target file: " << filename << std::endl;
std::cout << "Press e to encrypt the file" << std::endl;
std::cout << "Press d to decrypt the file" << std::endl;
char choice;
while (true)
{
std::cout << "> " << std::flush;
std::cin >> choice;
if (choice == 'e')
{
Encrypt(123, filename);
break;
}
else if (choice == 'd')
{
Decrypt(123, filename);
break;
}
else
{
std::cout << "please choose option e or d for encryption or decryption, respectively" << std::endl;
}
}
std::cout << std::endl << "Paused, press Enter to continue" << std::flush;
std::system("pause");
return EXIT_SUCCESS;
}
ASCII value 26 is EOF on some operating systems.
You should probably treat your encrypted file as a byte stream rather than a text file for reading and writing. That means either using read() and write() functions of the IOStream or at the very least opening the files in binary mode.
If you're just enciphering your text instead of encrypting, maybe choose a different cipher (eg. ROT13) that is closed on the set of printable ASCII or UTF-8 characters.
I compiled your code in Linux (minus all the Windows stuff)...
I get this when encrypting your sentence with your code:
/[[[[[
[[[ [[[U
It also decrypts back to the original sentence. Without the goofy characters, it is the same as your output so your actual issue seems related to the encoding of the file and the program you are using to view the results. Stephan is correct in saying you should be reading/writing bytes instead of text. This can cause all sorts of issues with the characters you create. For example, line feeds and carriage returns since you are using getline().
Edit: Strange. After editing this answer, all the odd characters disappeared. Here is a screenshot:

creating a c++ program that displays hexadecimal-formatted data from a bmp file

I'm trying to create a program that displays output of a bmp file in the form of hexadecimal. So far I get the output, but I need it to be organized a certain way.
The way it needs to be organized is with the address of the bmp file to be on the left column and then 16 bytes of data in hex across each row in the order they appear in the file. While leaving an extra space between every 8 bytes. So far, I got the hexadecimal to show up, I just need help with organizing it.
What I have:
What I'm trying to make it look like:
Here is my code:
#include <iostream> // cout
#include <fstream> // ifstream
#include <iomanip> // setfill, setw
#include <stdlib.h>
using namespace std; // Use this to avoid repeated "std::cout", etc.
int main(int argc, char *argv[]) // argv[1] is the first command-line argument
[enter image description here][1]{
// Open the provided file for reading of binary data
ifstream is("C:\\Users\\Test\\Documents\\SmallTest.bmp", ifstream::binary);
if (is) // if file was opened correctly . . .
{
is.seekg(0, is.end); // Move to the end of the file
int length = is.tellg(); // Find the current position, which is file length
is.seekg(0, is.beg); // Move to the beginning of the file
char * buffer = new char[length]; // Explicit allocation of memory.
cout << "Reading " << length << " characters... ";
is.read(buffer, length); // read data as a block or group (not individually)
if (is)
cout << "all characters read successfully.\n";
else
cout << "error: only " << is.gcount() << " could be read.\n";
is.close();
// Now buffer contains the entire file. The buffer can be printed as if it
// is a _string_, but by definition that kind of print will stop at the first
// occurrence of a zero character, which is the string-ending mark.
cout << "buffer is:\n" << buffer << "\n"; // Print buffer
for (int i = 0; i < 100; i++) // upper range limit is typically length
{
cout << setfill('0') << setw(4) << hex << i << " ";
cout << setfill('0') << setw(2) << hex << (0xff & (int)buffer[i]) << " ";
}
delete[] buffer; // Explicit freeing or de-allocation of memory.
}
else // There was some error opening file. Show message.
{
cout << "\n\n\tUnable to open file " << argv[1] << "\n";
}
return 0;
}
You could do it something like this:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include <cctype>
std::ostream& fullLine(std::ostream& out, const std::vector<uint8_t>& v, size_t offset)
{
//save stream state so we can restore it after all the hex/setw/setfill nonsense.
std::ios oldState(0);
oldState.copyfmt(out);
out << std::hex << std::setfill('0') << std::setw(8) << offset << " ";
for (size_t i = 0; i < 16; ++i)
{
if (i == 8) out << " ";
out << std::hex << std::setfill('0') << std::setw(2) << static_cast<uint32_t>(v[i + offset]) << " ";
}
out << " ";
//restore stream state to print normal text
out.copyfmt(oldState);
for (size_t i = 0; i < 16; ++i)
{
out << (std::isprint(v[i + offset]) ? static_cast<char>(v[i + offset]) : '.');
}
out << "\n";
return out;
}
int main()
{
std::vector<uint8_t> data;
std::ifstream f("test.txt", std::ios::binary);
if (f)
{
f.seekg(0, f.end);
data.resize(static_cast<size_t>(f.tellg()));
f.seekg(0, f.beg);
f.read((char*)data.data(), data.size());
const size_t numFullLines = data.size() / 16;
const size_t lastLineLength = data.size() % 16;
for (size_t i = 0; i < numFullLines; ++i)
{
if (!fullLine(std::cout, data, i * 16))
{
std::cerr << "Error during output!\n";
return -1;
}
}
}
return 0;
}
There's probably a fancy way to do it, but I usually go for brute force when I'm looking for particular output using iostreams.
How to handle the partial last line is up to you. :)
Use the % operator to break the line after every 16th count:
cout << hex;
for(int i = 0; i < 100; i++)
{
if(i && (i % 16) == 0)
cout << "\n";
cout << setfill('0') << setw(2) << (buffer[i] & 0xFF) << " ";
}
I need it to be organized a certain way.
In another answer, I submitted this form of dumpByteHex()... perhaps it can help you achieve what you want. (see also https://stackoverflow.com/a/46083427/2785528)
// C++ support function
std::string dumpByteHex (char* startAddr, // reinterpret_cast explicitly
size_t len, // allows to char* from T*
std::string label = "",
int indent = 0)
{
std::stringstream ss;
if(len == 0) {
std::cerr << "\n dumpByteHex() err: data length is 0? " << std::endl << std::dec;
assert(len != 0);
}
// Output description
ss << label << std::flush;
unsigned char* kar = reinterpret_cast<unsigned char*>(startAddr); // signed to unsigned
std::string echo; // holds input chars until eoln
size_t indx;
size_t wSpaceAdded = false;
for (indx = 0; indx < len; indx++)
{
if((indx % 16) == 0)
{
if(indx != 0) // echo is empty the first time through for loop
{
ss << " " << echo << std::endl;
echo.erase();
}
// fields are typically < 8 bytes, so skip when small
if(len > 7) {
if (indent) { ss << std::setw(indent) << " "; }
ss << std::setfill('0') << std::setw(4) << std::hex
<< indx << " " << std::flush;
} // normally show index
}
// hex code
ss << " " << std::setfill('0') << std::setw(2) << std::hex
<< static_cast<int>(kar[indx]) << std::flush;
if((indx % 16) == 7) { ss << " "; wSpaceAdded = true; } // white space for readability
// defer the echo-of-input, capture to echo
if (std::isprint(kar[indx])) { echo += kar[indx]; }
else { echo += '.'; }
}
// finish last line when < 17 characters
if (((indx % 16) != 0) && wSpaceAdded) { ss << " "; indx++; } // when white space added
while ((indx % 16) != 0) { ss << " "; indx++; } // finish line
// the last echo
ss << " " << echo << '\n';
return ss.str();
} // void dumpByteHex()
Output format:
0000 11 22 33 44 55 66 00 00 00 00 77 88 99 aa ."3DUf....w...

Binary char array into stringstream and pop from the buffer

I have 20byte binary char array. I want to divide into 3 parts: 4byte, 8byte, 8byte. I implemented it like the following. It works but seems I might be able to use buffer stream. I want to know how to use it.
Now
void main()
{
// _data is 20byte binary char array. 0000000000000000000000000000000000000000000001111001110001111111001110000010110000001011101101000000000000000000000000000000000000000000000000000000000000000001
// strA (4 byte)
string strA;
for (std::size_t i = 0; i < 4; ++i) {
strA += bitset<8>(_data.c_str()[i]).to_string();
}
cout << strA << endl; // 00000000000000000000000000000000
// strB (8 byte)
string strB;
for (std::size_t i = 4; i < 12; ++i) {
strB += bitset<8>(_data.c_str()[i]).to_string();
}
cout << strB << endl; // 0000000000000111100111000111111100111000001011000000101110110100
// strC (8 byte)
string strC;
for (std::size_t i = 12; i < 20; ++i) {
strC += bitset<8>(_data.c_str()[i]).to_string();
}
cout << strC << endl; // 0000000000000000000000000000000000000000000000000000000000000001
}
Expectation
I want to implement like this.
void main()
{
stringstream ss = _data;
strA = ss.pop(4);
strB = ss.pop(8);
strC = ss.pop(8);
}
Update 1
Thank you guys. I'm trying all of answers you gave me one by one. I'm newbie in c++ so it takes time to understand it. The following is Anders K's one.
struct S { char four[4]; char eight1[8]; char eight2[8]; };
struct S *p = reinterpret_cast<S*>(&_data);
cout << p->four << endl; // => Output "(" I think I can find way to output
Update 2
It works using string::substr. Thanks Zakir.
int main()
{
// I don't know how to change to string value in smart way..
string str;
for (std::size_t i = 0; i < _data.size(); ++i) {
str += bitset<8>(_data.c_str()[i]).to_string();
}
cout << str << endl; // 0000000000000000000000000000000000000000000001111001110001111111001110000010110000001011101101000000000000000000000000000000000000000000000000000000000000000001
std::string d = str; // Your binary stream goes here
int lenA = (4*8); // First 4 Bytes
int lenB = (8*8); // Second 8 Bytes
int lenC = (8*8); // Last 8 Bytes
std::string strA = d.substr(0, lenA);
std::string strB = d.substr(lenA + 1, lenB - 1);
std::string strC = d.substr(lenA + lenB + 1, lenC - 1);
cout << strA << endl; // 00000000000000000000000000000000
cout << strB << endl; // 000000000000111100111000111111100111000001011000000101110110100
cout << strC << endl; // 000000000000000000000000000000000000000000000000000000000000001
}
Update 3
I got an error when I try Scheff's way. This is my fault and I think I can solve it. And I think I should reconsider about _data's type.
int main
{
const char data = _data;
const char *iter = data;
string strA = pop(iter, 4);
string strB = pop(iter, 8);
string strC = pop(iter, 8);
cout << "strA: '" << strA << "'" << endl;
cout << "strB: '" << strB << "'" << endl;
cout << "strC: '" << strC << "'" << endl;
}
Make Error Message
error: no viable conversion from 'string' (aka 'basic_string<char, char_traits<char>, allocator<char> >') to
'const char'
const char data = _data;
It is not possible to make a new method for std::stringstream. (At least, I would not recommend this.)
Instead, I would suggest to make it a function. The usage would be similar.
#include <bitset>
#include <iostream>
#include <sstream>
#include <string>
using namespace std;
string pop(istream &in, size_t n)
{
string ret;
while (n--) {
unsigned char byte = (unsigned char)in.get();
ret += bitset<8>(byte).to_string();
}
return ret;
}
int main()
{
string data(
"\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa"
"\xbb\xcc\xdd\xee\xff\xde\xad\xbe\xef\x00", 20);
istringstream in; in.str(data);
string strA = pop(in, 4);
string strB = pop(in, 8);
string strC = pop(in, 8);
cout << "strA: '" << strA << "'" << endl;
cout << "strB: '" << strB << "'" << endl;
cout << "strC: '" << strC << "'" << endl;
return 0;
}
Output:
strA: '00010001001000100011001101000100'
strB: '0101010101100110011101111000100010011001101010101011101111001100'
strC: '1101110111101110111111111101111010101101101111101110111100000000'
Note:
Using a std::istream makes it applicable to any stream derived from std::istream.
There is no error handling in pop(). Thus, the returned result of pop() might be wrong if the passed stream isn't good() afterwards.
Btw. I agree with the comments that a std::stream might be "over-engineered". Thus, here the "light-weight" version:
#include <bitset>
#include <iostream>
#include <string>
using namespace std;
string pop(const char *&iter, size_t n)
{
string ret;
while (n--) {
ret += bitset<8>((unsigned char)*iter++).to_string();
}
return ret;
}
int main()
{
const char data[] =
"\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa"
"\xbb\xcc\xdd\xee\xff\xde\xad\xbe\xef\x00";
const char *iter = data;
string strA = pop(iter, 4);
string strB = pop(iter, 8);
string strC = pop(iter, 8);
cout << "strA: '" << strA << "'" << endl;
cout << "strB: '" << strB << "'" << endl;
cout << "strC: '" << strC << "'" << endl;
return 0;
}
The output is identical like above.
Note:
The usage of char[] and char* is much more sensitive for out-of-bound access. Thus, it has to be used carefully.
I'm not quite sure whether the (unsigned char) cast is necessary. As I have often seen "funny" effects concerning char, int and sign extension, I guess it cannot hurt. (I feel better with it.)
I can propose you a very simple alternative using string::substr
#include <iostream>
#include <string>
using namespace std;
int main ()
{
string _data="00010001001000100011001101000100\
0101010101100110011101111000100010011001101010101011101111001100\
1101110111101110111111111101111010101101101111101110111100000000";
int lenA = (4*8); //First 4 Bytes
int lenB = (8*8); //Second 8 Bytes
int lenC = (16*8); //Last 16 Bytes
string strA = _data.substr(0, lenA - 1);
string strB = _data.substr(lenA, lenB - 1);
string strC = _data.substr(lenB, lenC - 1);
std::cout << "strA: " << strA << endl;
std::cout << "strB: " << strB << endl;
std::cout << "strC: " << strC << endl;
return 0;
}
This is neat and simple but gets your job done!
Demo here
Output:-
strA: 0001000100100010001100110100010
strB: 010101010110011001110111100010001001100110101010101110111100110
strC: 100110011010101010111011110011001101110111101110111111111101111010101101101111101110111100000000

ifstream::read not working?

I am trying to read from a .csv file. There are two functions below, one for writing and one for reading.
The file contains a simple table:
date,first,second
1 a one
2 b two
3 c three
4 c four
For some reason, the statement while(file_stream.read(&c,1)); does not read anything. It stops at the first character and I'm dumbfounded as to why. Any clues?
#include <iostream>
#include <sstream>
#include <fstream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <string>
#include <cstdlib>
using namespace std;
std::string filename;
std::string line_string;
ifstream file_stream;
stringstream ss;
vector< vector<string> > vec;
char c;
void read_file()
{
filename = "test.csv";
cout << filename << endl;
file_stream.open(filename.c_str(),ios::out|ios::binary);
if(file_stream.fail())
{
cout << "File didn't open" << endl;
return;
}
if(file_stream.is_open())
cout << "file opened" << endl;
while(file_stream.read(&c,1)); // this isn't working
{
cout <<"char c is: " << c;
ss << noskipws << c;
}
file_stream.close();
cout << "string is: " << ss.str() << endl;
//get each line
int counter = 0;
vector<string> invec;
while(getline(ss,line_string,'\n'))
{
string header_string;
stringstream header_stream;
header_stream << line_string;
while(getline(header_stream, header_string,','))
{
invec.push_back(header_string);
}
invec.push_back(header_string);
vec.push_back(invec);
invec.clear();
counter++;
}
}
void test_output()
{
for(int i = 0; i < vec.size();i++)
{
for(int in = 0; in < vec[0].size(); in++)
cout << vec[i][in] << " ";
cout << endl;
}
}
int main()
{
read_file();
test_output();
}
Look very very carefully at the line that is not working:
while(file_stream.read(&c,1)); // this isn't working
{
cout <<"char c is: " << c;
ss << noskipws << c;
}
The ; character at the end of the while statement does NOT belong! You are running a no-body loop that does not terminate until read() fails, and THEN your code enters the bracketed block to output the last character that was successfully read (if any).
You need to remove that erroneous ; character:
while(file_stream.read(&c,1)) // this works
{
cout <<"char c is: " << c;
ss << noskipws << c;
}
Now, the real question is - why are you reading the input file character-by-character into a std::stringstream in the first place? You can use std::getline() with the input std::ifstream directly:
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <string>
std::vector< std::vector<std::string> > vec;
void read_file()
{
std::string filename = "test.csv";
std::cout << filename << std::endl;
std::ifstream file_stream;
file_stream.open(filename.c_str(), ios::binary);
if (!file_stream)
{
std::cout << "File didn't open" << std::endl;
return;
}
std::cout << "file opened" << std::endl;
//get each line
std::vector<std::string> invec;
std::string line;
int counter = 0;
if (std::getline(file_stream, line))
{
std::istringstream iss(line);
while (std::getline(iss, line, ','))
invec.push_back(line);
vec.push_back(invec);
invec.clear();
++counter;
while (std::getline(file_stream, line))
{
iss.str(line);
while (iss >> line)
invec.push_back(line);
vec.push_back(invec);
invec.clear();
++counter;
}
}
}
void test_output()
{
if (!vec.empty())
{
for(int in = 0; in < vec[0].size(); ++in)
std::cout << vec[0][in] << ",";
std::cout << std::endl;
for(int i = 1; i < vec.size(); ++i)
{
for(int in = 0; in < vec[i].size(); ++in)
std::cout << vec[i][in] << " ";
std::cout << std::endl;
}
}
}
int main()
{
read_file();
test_output();
}

how can I find the sequence number (index) of word in such a paragraph c++?

I'm working on a project which needs to find the number of words and the indices of each word in the paragraph ...I have written the code which is counting the number of word in a string but I stuck with finding the indices of words,
such as : Hi John How are you I miss you ..
I need to print the indices like : 0 1 2 3 4 5 6 7
here is the code:
int _tmain(int argc, _TCHAR* argv[])
{
int count_words(std::string);
std::string input_text;
std::cout<< "Enter a text: ";
std::getline(std::cin,input_text);
int number_of_words=1;
int counter []={0};
for(int i = 0; i < input_text.length();i++)
if(input_text[i] == ' ')
number_of_words++;
std::cout << "Number of words: " << number_of_words << std::endl;
//std:: cout << number_of_words << std::endl;
system ("PAUSE");
}
Hopefully this helps. Edited to include use of count_words function.
#include <iostream>
#include <sstream>
void count_words(std::string);
int main(){
std::string input_text, output_text;
std::cout<< "Enter a text: ";
std::getline(std::cin,input_text);
count_words(input_text);
system ("PAUSE");
return 0; //MUST RETURN AN INTEGER VALUE FROM 'INT MAIN'
}
void count_words(std::string inputString){
std::string output_text;
std::stringstream indexes;
int number_of_words=0; //If there are no words, it would be false, make it 0.
//int counter []={0}; //This serves no purpose.
if(!inputString.empty()){// test to make sure it isn't empty.
number_of_words++;
for(int i = 0; i < inputString.length();i++){ // For loops should have curly braces {} containing their statement.
if(inputString[i] == ' '){
number_of_words++;
}
if((isalpha(inputString[i]))&&inputString[i-1]==' '){ //test for following space separated word
indexes << i << " ";
}
}
}
output_text = indexes.str(); //convert stringstream to string
std::cout << "Number of words: " << number_of_words << std::endl;
//std:: cout << number_of_words << std::endl; //duplicate info
std::cout << "Indexes: " << output_text << std::endl;
}
I'm not sure if i understand the question. You only need print the "indices"?? like this? (Using your own code)
#include <iostream>
#include <vector>
#include <string>
void stringTokenizer(const std::string& str, const std::string& delimiter, std::vector<std::string>& tokens) {
size_t prev = 0, next = 0, len;
while ((next = str.find(delimiter, prev)) != std::string::npos) {
len = next - prev;
if (len > 0) {
tokens.push_back(str.substr(prev, len));
}
prev = next + delimiter.size();
}
if (prev < str.size()) {
tokens.push_back(str.substr(prev));
}
}
int main()
{
std::vector <std::string> split;
std::string input_text;
std::cout<< "Enter a text: ";
std::getline(std::cin,input_text);
stringTokenizer(input_text, " ", split);
int number_of_words = 0;
for (std::vector<std::string>::iterator it = split.begin(); it != split.end(); it++, number_of_words++) {
std::cout << *it << " " << number_of_words << std::endl;
}
}