How to convert a String from UTF8 to Latin1 in C/C++? - c++

The question I have is quite simple, but I couldn't find a solution so far:
How can I convert a UTF8 encoded string to a latin1 encoded string in C++ without using any extra libs like libiconv?
Every example I could find so far is for latin1 to UTF8 conversion?

typedef unsigned value_type;
template <typename Iterator>
size_t get_length (Iterator p)
{
unsigned char c = static_cast<unsigned char> (*p);
if (c < 0x80) return 1;
else if (!(c & 0x20)) return 2;
else if (!(c & 0x10)) return 3;
else if (!(c & 0x08)) return 4;
else if (!(c & 0x04)) return 5;
else return 6;
}
template <typename Iterator>
value_type get_value (Iterator p)
{
size_t len = get_length (p);
if (len == 1)
return *p;
value_type res = static_cast<unsigned char> (
*p & (0xff >> (len + 1)))
<< ((len - 1) * 6);
for (--len; len; --len)
res |= (static_cast<unsigned char> (*(++p)) - 0x80) << ((len - 1) * 6);
return res;
}
This function will return the unicode code point at p. You can now convert a string using
for (std::string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p)
{
value_type value = get_value<std::string::iterator&>(p));
if (value > 0xff)
throw "AAAAAH!";
s_latin1.append(static_cast<char>(value));
}
No guarantees, the code is quite old :)

Here is a version of filmor's answer that I wrote for my purposes. A bit more readable, probably a bit slower. I didn't need the template stuff since I was always dealing with char *, and in my case I wanted to replace non-Latin1 character's with _. Just in case it helps someone:
int GetUtf8CharacterLength( unsigned char utf8Char )
{
if ( utf8Char < 0x80 ) return 1;
else if ( ( utf8Char & 0x20 ) == 0 ) return 2;
else if ( ( utf8Char & 0x10 ) == 0 ) return 3;
else if ( ( utf8Char & 0x08 ) == 0 ) return 4;
else if ( ( utf8Char & 0x04 ) == 0 ) return 5;
return 6;
}
char Utf8ToLatin1Character( char *s, int *readIndex )
{
int len = GetUtf8CharacterLength( static_cast<unsigned char>( s[ *readIndex ] ) );
if ( len == 1 )
{
char c = s[ *readIndex ];
(*readIndex)++;
return c;
}
unsigned int v = ( s[ *readIndex ] & ( 0xff >> ( len + 1 ) ) ) << ( ( len - 1 ) * 6 );
(*readIndex)++;
for ( len-- ; len > 0 ; len-- )
{
v |= ( static_cast<unsigned char>( s[ *readIndex ] ) - 0x80 ) << ( ( len - 1 ) * 6 );
(*readIndex)++;
}
return ( v > 0xff ) ? 0 : (char)v;
}
// overwrites s in place
char *Utf8ToLatin1String( char *s )
{
for ( int readIndex = 0, writeIndex = 0 ; ; writeIndex++ )
{
if ( s[ readIndex ] == 0 )
{
s[ writeIndex ] = 0;
break;
}
char c = Utf8ToLatin1Character( s, &readIndex );
if ( c == 0 )
{
c = '_';
}
s[ writeIndex ] = c;
}
return s;
}
Test code:
char s2[ 256 ] = "lif\xc3\xa9 is b\xc3\xa9tt\xc3\xa9r with acc\xc3\xa9nts";
Utf8ToLatin1String( s2 );

latin1 (aka ISO-8859-1) defines the first 256 code points of Unicode. Thus, in UTF-8, if your character is 8 bits, then it will exactly map to the latin1 equivalent. If it's more than 8 bits in length, then there is no correspondent within latin1 and you should map it to some "unknown character" (e.g., \0 or ?).

Related

How to convert a literal string of hex to actual hex values in C++? [duplicate]

What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process

Building a char array with hex bytes from string values [duplicate]

What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process

Casting string to literal bytes [duplicate]

What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process

Feeding the TEA Cipher without flooding ram

How would it be possible to feed the TEA cipher without exceeding a systems ram with large files?
I have tried doing this however it has all ended with massive failure and hours of tinkering that leads to nothing. So could someone give me an example of how this can be done or any meaningful information on how to do it?
void decodeXtea(unsigned int* v, unsigned int* w, unsigned int* k) {
register unsigned int v0=v[0], v1=v[1], i, sum=0xC6EF3720;
register unsigned int delta=0x9E3779B9;
for(i=0; i<32; i++) {
v1 -= (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + k[(sum>>11) & 3]);
sum -= delta;
v0 -= (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + k[sum & 3]);
}
w[0]=v0; w[1]=v1;
}
void TeaDecode ( const std::string& str, const std::string& key, std::string* out )
{
unsigned int v[2];
unsigned int w[2];
unsigned int k[4];
unsigned int keybuffer [ 4 ];
// Clear buffers
memset ( v, 0, sizeof(v) );
memset ( w, 0, sizeof(w) );
memset ( k, 0, sizeof(k) );
memset ( keybuffer, 0, sizeof(keybuffer) );
out->clear ();
// Count the number of passes that we need
int numBlocks = str.length() / 4;
int numPasses = numBlocks - 1;
if ( numPasses <= 0 )
return;
// Process the key
int len = key.length ();
if ( len > 16 )
len = 16;
memcpy ( keybuffer, key.c_str(), len );
for ( int i = 0; i < 4; ++i )
k[i] = keybuffer[i];
// Create a temporary buffer to store the result
unsigned char* buffer = new unsigned char [ numPasses * 4 + 4 ];
memset ( buffer, 0, numPasses * 4 + 4 );
// Decode it!
const char* p = str.c_str();
v[1] = *(unsigned int*)&p[numPasses * 4];
for ( int i = 0; i < numPasses; ++i )
{
v[0] = *(unsigned int*)&p[(numPasses-i-1)*4];
decodeXtea ( &v[0], &w[0], &k[0] );
*(unsigned int*)&buffer[(numPasses-i-1)*4] = w[0];
v[1] = w[1];
}
out->assign ( (char *)buffer, numPasses*4 );
delete [] buffer;
}
void encodeXtea(unsigned int* v, unsigned int* w, unsigned int* k) {
register unsigned int v0=v[0], v1=v[1], i, sum=0;
register unsigned int delta=0x9E3779B9;
for(i=0; i<32; i++) {
v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + k[sum & 3]);
sum += delta;
v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + k[(sum>>11) & 3]);
}
w[0]=v0; w[1]=v1;
}
void TeaEncode ( const std::string& str, const std::string& key, std::string* out )
{
unsigned int v[2];
unsigned int w[2];
unsigned int k[4];
unsigned int keybuffer [ 4 ];
// Clear buffers
memset ( v, 0, sizeof(v) );
memset ( w, 0, sizeof(w) );
memset ( k, 0, sizeof(k) );
memset ( keybuffer, 0, sizeof(keybuffer) );
out->clear ();
// Process the key
int len = key.length ();
if ( len > 16 )
len = 16;
memcpy ( keybuffer, key.c_str(), len );
for ( int i = 0; i < 4; ++i )
k[i] = keybuffer[i];
// Copy the input string to a buffer of size multiple of 4
int strbuflen = str.length ();
if ( strbuflen == 0 )
return;
if ( (strbuflen % 4) > 0 )
strbuflen += 4 - (strbuflen % 4);
unsigned char* strbuf = new unsigned char [ strbuflen ];
memset ( strbuf, 0, strbuflen );
memcpy ( strbuf, str.c_str(), str.length() );
// Encode it!
v[1] = 0;
for ( int i = 0; i < strbuflen; i += 4 )
{
v[0] = *(unsigned int*)&strbuf[i];
encodeXtea ( &v[0], &w[0], &k[0] );
out->append ( (char*)&w[0], 4 );
v[1] = w[1];
}
out->append ( (char*)&v[1], 4 );
delete [] strbuf;
}
This fixed it.
void readSystem(string fname,string outFileName,string key,string mode)
{
//size_t buffer_size = 1<<20;
size_t buffer_size;
if(mode == "E")
{
buffer_size = 32;
}
else
{
buffer_size = 36;
}
//char *buffer = new char[buffer_size];
string buffer(buffer_size,'\0');
string data,output;
//data.resize(buffer_size);
// The input
std::ifstream fin(fname,ios::binary);
// The output
ofstream outFile(outFileName,ios::binary);// | ios::app);
// Anti overwrite
if(getSize(outFileName) > 0)
{
cout << "Overwrite error" << endl;
exit(0);
}
while (fin)
{
// Try to read next chunk of data
// fin.read(buffer, buffer_size);
fin.read(&buffer.front(), buffer_size);
// Get the number of bytes actually read
size_t count = fin.gcount();
data = buffer;
//data = encode(data,key);
if(mode == "E")
{
data = encode(data,key);
}
if(mode == "D")
{
data = decode(data,key);
}
//blockXor(data,key);
//outFile.write(data.c_str(),count);
outFile.write(data.c_str(),data.length());
// If nothing has been read, break
if (!count)
break;
// Do whatever you need with first count bytes in the buffer
}
outFile.close();
fin.close();
// delete[] buffer;
}

Upgrading NC30 M16 C Compiler: va_arg issue

In my NC30 M16C compiler version 5, I had the following macros used by previous programmer. We use this macros in the "printf()", "sprintf()" etc functions.
typedef unsigned char * va_list;
#define va_start( args, first ) args = (va_list) ( (unsigned short) &first + sizeof( first ) )
#define va_arg( args, type ) *( (type *) args )++
#define va_end( args )
When I compile this code with NC30 M16C compiler version 6, then it is giving me the error "Invalid lvalue".
Here is the part of the entire error message:
clib.c(253) : C2700 (E) invalid lvalue
===> unum = va_arg( args, unsigned int );
clib.c(293) : C2700 (E) invalid lvalue
===> fch = va_arg( args, far char * );
clib.c(299) : C2700 (E) invalid lvalue
===> nch = va_arg( args, char * );
clib.c(305) : C2700 (E) invalid lvalue
===> unum = va_arg( args, unsigned int );
clib.c(323) : C2700 (E) invalid lvalue
===> ulong = va_arg( args, unsigned long );
clib.c(341) : C2700 (E) invalid lvalue
===> llong = va_arg( args, unsigned long long );
clib.c(359) : C2700 (E) invalid lvalue
===> ulong = va_arg( args, unsigned long );
clib.c(377) : C2700 (E) invalid lvalue
===> unum = va_arg( args, unsigned short );
clib.c(382) : C2700 (E) invalid lvalue
===> ft = va_arg( args, float );
clib.c(519) : C2694 (E) unknown variable source
===> *source++ = zeropad ? '0' : ' ';
clib.c(527) : C2694 (E) unknown variable source
===> *source++ = '%';
clib.c(532) : C2700 (E) invalid lvalue
===> snum = va_arg( args, signed int );
clib.c(550) : C2694 (E) unknown variable source
===> *source++ = *tempptr;
clib.c(556) : C2700 (E) invalid lvalue
===> fch = va_arg( args, far char * );
clib.c(558) : C2694 (E) unknown variable source
===> *source++ = *fch++;
clib.c(564) : C2700 (E) invalid lvalue
===> nch = va_arg( args, char * );
clib.c(566) : C2694 (E) unknown variable source
===> *source++ = *nch++;
clib.c(572) : C2700 (E) invalid lvalue
===> unum = va_arg( args, unsigned int );
Here is one of the function in which we have used these micros. This function is the same function as "printf" but named it zwPrintf():
int zwPrintf( far char * format, ... ) {
zwVAList args;
unsigned int unum;
unsigned char temp[ FIELD_BUFF_SIZE + 1 ]; /* for formatting numbers (max length for long decimal) */
unsigned char * tempptr;
int zeropad, minfield, counter;
far char * fch;
char * nch;
unsigned long ulong;
int negative;
float ft, mantissa;
unsigned long long llong;
//unsigned char mychar;
//unsigned char *mychar_p;
//unsigned int *mytest_p;
va_start( args, format );
while( *format ) {
if( *format == '%' ) {
format++;
zeropad = 0;
minfield = 0;
negative = 0;
if( *format == '0' )
zeropad = 1; /* we want zero padding to field width */
while( *format >= '0' && *format <= '9' ) {
/* we are specifying field width */
minfield *= 10;
minfield += *format - '0';
format++;
}
if( minfield > FIELD_BUFF_SIZE ) { /* we want a field width greater than our field buffer, pad misc */
for( counter = 0; counter < minfield - FIELD_BUFF_SIZE; counter++ )
zwPutc( (unsigned char) ( zeropad ? '0' : ' ' ) );
minfield = FIELD_BUFF_SIZE;
}
switch( *format ) {
case '%': /* literal % */
zwPutc( '%' );
break;
case 'd': /* signed decimal output */
unum = va_arg( args, unsigned int ); /* pull unsigned, and do math ourselves (to avoid confusion) */
//mychar='a';
//mychar_p = &mychar;
//mytest_p = ((unsigned int*)mychar_p);
//mytest_p++;
//unum = *mytest_p;
//unum = (*((unsigned int*)args))++;
//unum = (*((unsigned int*)args))++;
/* convert to decimal (backward) */
if( unum >= 0x8000 ) { /* number is -'ve */
negative = 1;
unum = ~unum + 1; /* make number +'ve */
}
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
do {
if( unum )
*tempptr-- = ( unum % 10 ) + '0';
else {
if( negative && ( zeropad == 0 ) ) {
*tempptr-- = '-';
negative = 0;
}
else
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
}
unum /= 10;
counter++;
} while( unum || counter < minfield );
/* output the string */
if( negative )
zwPutc( '-' );
for( tempptr++; tempptr <= &temp[ FIELD_BUFF_SIZE ]; tempptr++ )
zwPutc( *tempptr );
break;
case 's': /* far char * */
fch = va_arg( args, far char * );
while( *fch )
zwPutc( *fch++ );
break;
case 'S': /* near char * (extension) */
nch = va_arg( args, char * );
while( *nch )
zwPutc( *nch++ );
break;
case 'x': /* hexadecimal */
unum = va_arg( args, unsigned int );
/* convert to hexadecimal (backward) */
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
do {
if( unum )
*tempptr-- = zwHexToAsc( (unsigned char) unum & 0x0F );
else
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
unum >>= 4;
counter++;
} while( unum || counter < minfield );
/* output the string */
for( tempptr++; tempptr <= &temp[ FIELD_BUFF_SIZE ]; tempptr++ )
zwPutc( *tempptr );
break;
case 'i': /* unsigned long int decimal (extension) */
ulong = va_arg( args, unsigned long );
/* convert to decimal (backward) */
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
do {
if( ulong )
*tempptr-- = (unsigned char)( ulong % 10 ) + '0';
else
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
ulong /= 10;
counter++;
} while( ulong || counter < minfield );
/* output the string */
for( tempptr++; tempptr <= &temp[ FIELD_BUFF_SIZE ]; tempptr++ )
zwPutc( *tempptr );
break;
case 'L': /* unsigned long long decimal (extension) */
llong = va_arg( args, unsigned long long );
/* convert to decimal (backward) */
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
do {
if( llong )
*tempptr-- = (unsigned char)( llong % 10 ) + '0';
else
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
llong /= 10;
counter++;
} while( llong || counter < minfield );
/* output the string */
for( tempptr++; tempptr <= &temp[ FIELD_BUFF_SIZE ]; tempptr++ )
zwPutc( *tempptr );
break;
case 'h': /* unsigned long int hexadecimal (extension) */
ulong = va_arg( args, unsigned long );
/* convert to hexadecimal (backward) */
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
do {
if( ulong )
*tempptr-- = zwHexToAsc( ( (unsigned char) ulong ) & 0x0F );
else
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
ulong >>= 4;
counter++;
} while( ulong || counter < minfield );
/* output the string */
for( tempptr++; tempptr <= &temp[ FIELD_BUFF_SIZE ]; tempptr++ )
zwPutc( *tempptr );
break;
case 'c':
unum = va_arg( args, unsigned short );
zwPutc( (char) unum );
break;
case 'f':
ft = va_arg( args, float );
#if 0
/* convert to decimal (backward) */
if( ft < 0 ) { /* number is -'ve */
negative = 1;
ft = -ft;
}
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
/* split float to integer and mantissa part */
ulong = ft / 1;
mantissa = ft - ( float )ulong;
/* get integer part */
do {
if( ulong ){
*tempptr-- = (unsigned char)( ulong % 10 ) + '0';
}
else {
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
}
ulong /= 10;
counter++;
} while( ulong || counter < minfield );
if ( negative ) {
temp[ 0 ] = '-';
zwMemcpy( &temp[ 1 ], &temp[ FIELD_BUFF_SIZE - counter ], counter ); //change to right position
counter++;
}
else
zwMemcpy( &temp[ 0 ], &temp[ FIELD_BUFF_SIZE - counter ], counter );
temp[ counter++ ] = '.';
/* get mantissa part */
tempptr = &temp[ counter ];
do {
unum = ( mantissa * 10 ) / 1;
if( unum ){
*tempptr++ = (unsigned char)( unum ) + '0';
}
else {
*tempptr++ = '0';
}
mantissa = ( float ) ( mantissa * 10.0 - ( float )unum ) * 10.0;
counter++;
} while( mantissa > 0 || counter < minfield );
for( unum = 0; unum < counter; unum++ )
zwPutc( temp[ unum ] );
/* convert to decimal (backward) */
if( ft < 0 ) { /* number is -'ve */
negative = 1;
ft = -ft;
}
tempptr = &temp[ FIELD_BUFF_SIZE ];
counter = 0;
do {
if( ft >= 1.0 ){
*tempptr-- = ( ft % 10.0 ) + '0';
// *tempptr-- = ( ft * 10 - ( ( ft * 100 ) / 10 ) ) + '0';
}
else {
if( negative && ( zeropad == 0 ) ) {
*tempptr-- = '-';
negative = 0;
}
else
*tempptr-- = ( zeropad || counter == 0 ) ? '0' : ' ';
}
ft /= 10;
counter++;
} while( ft >= 1.0 || counter < minfield );
/* output the string */
if( negative )
zwPutc( '-' );
for( tempptr++; tempptr <= &temp[ FIELD_BUFF_SIZE ]; tempptr++ )
zwPutc( *tempptr );
#endif
break;
case 0: /* end of string (malformed string anyway) */
va_end( args );
return -1; /* error */
break;
}
}
else
zwPutc( *format );
format++;
}
va_end( args );
return -1;
}
Please guide me, what should I do to correct this issue.
Thanks in advance.
Could you try to replace:
#define va_arg( args, type ) *( (type *) args )++
with
#define va_arg( args, type ) *( (type *) args ), args += sizeof (type)
Explanation:
You get the compilation error because this expression:
((type *) args )++
is invalid in C: the result of a cast is not a lvalue and the postfix ++ operator requires its operand to be a lvalue. A lot of compilers are lax with this constraint but apparently yours is not (or the new version is stricter).
Also note that the proposed workaround should work in you program with simple assignment expressions like:
unum = va_arg( args, unsigned int );
because = has higher precedence than the comma operator.
EDIT:
another (maybe even better) solution: you should be able to bypass the fact the result of a cast is not a lvalue using this solution:
#define va_arg( args, type ) *( *(type **) &args )++