What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process
Related
What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process
What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process
I have below function that supports for conversion of LPCTSTR to BYTE , but the input str only support digits as of now.
void StrToByte2(LPCTSTR str, BYTE *dest)
{
UINT count = _ttoi(str);
BYTE buf[4] = { 0 };
char string[10] = { 0 };
sprintf_s(string, 10, "%04d", count);
for (int i = 0; i < 4; ++i)
{
if ((string[i] >= '0') && (string[i] <= '9'))
buf[i] = string[i] - '0';
}
dest[0] = (BYTE)(buf[0] << 4) | buf[1];
dest[1] = (BYTE)(buf[2] << 4) | buf[3];
}
If i call this function on "1234" ( any digits) , dest output some 12814,
struct st
{
byte btID[2];
int nID;
};
PTR ptr(new st);
StrToByte2(strCode, ptr->btID);
but when i call this function on any hexadecimal ex A123 , it outputs 0000 always.
Below function is used to convert back the dest code to str
CString Byte2ToStr(const byte* pbuf)
{
CString str;
str.Format(_T("%02X%02X"), pbuf[0], pbuf[1]);
return str;
}
How can i get A123 to converted to bytes and than back to str to display A123??
Please help!!
PTR ptr(new st);
This is a memory leak in C++, because new st allocates memory and there is no way to release it.
UINT count = _ttoi(str);
...
sprintf_s(string, 10, "%04d", count);
This is converting string to integer, then converts integer back to string. It doesn't seem to have a real purpose.
For example, "1234" is converted to 1234, and back to "1234". But "A123" is not a valid number so it is converted to 0, then converted to "0000". So this method fails. You can just work with the original string.
It seems this function tries to fit 2 integers in to 1 byte. This can be done as long as each value is less than 16 or 0xF (I don't know what purpose this might have) It can be fixed as follows:
void StrToByte2(const wchar_t* str, BYTE *dest)
{
int len = wcslen(str);
if(len != 4)
return; //handle error
char buf[4] = { 0 };
for(int i = 0; i < 4; ++i)
if(str[i] >= L'0' && str[i] <= L'9')
buf[i] = (BYTE)(str[i] - L'0');
dest[0] = (buf[0] << 4) + buf[1];
dest[1] = (buf[2] << 4) + buf[3];
}
CStringW Byte2_To_Str(BYTE *dest)
{
CStringW str;
str.AppendFormat(L"%X", 0xF & (dest[0] >> 4));
str.AppendFormat(L"%X", 0xF & (dest[0]));
str.AppendFormat(L"%X", 0xF & (dest[1] >> 4));
str.AppendFormat(L"%X", 0xF & (dest[1]));
return str;
}
int main()
{
BYTE dest[2] = { 0 };
StrToByte2(L"1234", dest);
OutputDebugStringW(Byte2_To_Str(dest));
OutputDebugStringW(L"\n");
return 0;
}
If the string is hexadecimal, you can use sscanf to convert each pair of character to bytes.
Basically, "1234" changes to 12 34
"A123" changes to A1 23
bool hexstring_to_bytes(const wchar_t* str, BYTE *dest, int dest_size = 2)
{
int len = wcslen(str);
if((len / 2) > dest_size)
{
//error
return false;
}
for(int i = 0; i < len / 2; i++)
{
int v;
if(swscanf_s(str + i * 2, L"%2x", &v) != 1)
break;
dest[i] = (unsigned char)v;
}
return true;
}
CStringW bytes_to_hexstring(const BYTE* bytes, int byte_size = 2)
{
CString str;
for(int i = 0; i < byte_size; i++)
str.AppendFormat(L"%02X ", bytes[i] & 0xFF);
return str;
}
int main()
{
CStringW str;
CStringW new_string;
BYTE dest[2] = { 0 };
str = L"1234";
hexstring_to_bytes(str, dest);
new_string = bytes_to_hexstring(dest);
OutputDebugString(new_string);
OutputDebugString(L"\n");
str = L"A123";
hexstring_to_bytes(str, dest);
new_string = bytes_to_hexstring(dest);
OutputDebugStringW(new_string);
OutputDebugStringW(L"\n");
return 0;
}
How can I use argv values with int128_t support? I know about atoi() and family of functions exposed by <cstdlib> but somehow I cannot find one for int128_t fixed width integer. This might be because of the fact that this type isn't backed by either c or c++ standard, but is there any way for me to make this code work?
#include <iostream>
int main(int argc, char **argv) {
__int128_t value = atoint128_t(argv[1]);
}
Almost all answers posted are good enough for me but I'm selecting the one that is a drop by solution for my current code, so do look at other ones too.
Here's a simple way of implementing this:
__int128_t atoint128_t(const char *s)
{
const char *p = s;
__int128_t val = 0;
if (*p == '-' || *p == '+') {
p++;
}
while (*p >= '0' && *p <= '9') {
val = (10 * val) + (*p - '0');
p++;
}
if (*s == '-') val = val * -1;
return val;
}
This code checks each character to see if it's a digit (with an optional leading + or -), and if so it multiplies the current result by 10 and adds the value associated with that digit. It then inverts the sign if need be.
Note that this implementation does not check for overflow, which is consistent with the behavior of atoi.
EDIT:
Revised implementation that covers int128_MIN case by either adding or subtracting the value of each digit based on the sign, and skipping leading whitespace.
int myatoi(const char *s)
{
const char *p = s;
int neg = 0, val = 0;
while ((*p == '\n') || (*p == '\t') || (*p == ' ') ||
(*p == '\f') || (*p == '\r') || (*p == '\v')) {
p++;
}
if ((*p == '-') || (*p == '+')) {
if (*p == '-') {
neg = 1;
}
p++;
}
while (*p >= '0' && *p <= '9') {
if (neg) {
val = (10 * val) - (*p - '0');
} else {
val = (10 * val) + (*p - '0');
}
p++;
}
return val;
}
Here is a C++ implementation:
#include <string>
#include <stdexcept>
__int128_t atoint128_t(std::string const & in)
{
__int128_t res = 0;
size_t i = 0;
bool sign = false;
if (in[i] == '-')
{
++i;
sign = true;
}
if (in[i] == '+')
{
++i;
}
for (; i < in.size(); ++i)
{
const char c = in[i];
if (not std::isdigit(c))
throw std::runtime_error(std::string("Non-numeric character: ") + c)
res *= 10;
res += c - '0';
}
if (sign)
{
res *= -1;
}
return res;
}
int main()
{
__int128_t a = atoint128_t("170141183460469231731687303715884105727");
}
If you want to test it then there is a stream operator here.
Performance
I ran a few performance test. I generate 100,000 random numbers uniformly distributed in the entire support of __int128_t. Then I converted each of them 2000 times. All of these (200,000,000) conversions where completed within ~12 seconds.
Using this code:
#include <iostream>
#include <string>
#include <random>
#include <vector>
#include <chrono>
int main()
{
std::mt19937 gen(0);
std::uniform_int_distribution<> num(0, 9);
std::uniform_int_distribution<> len(1, 38);
std::uniform_int_distribution<> sign(0, 1);
std::vector<std::string> str;
for (int i = 0; i < 100000; ++i)
{
std::string s;
int l = len(gen);
if (sign(gen))
s += '-';
for (int u = 0; u < l; ++u)
s += std::to_string(num(gen));
str.emplace_back(s);
}
namespace sc = std::chrono;
auto start = sc::duration_cast<sc::microseconds>(sc::high_resolution_clock::now().time_since_epoch()).count();
__int128_t b = 0;
for (int u = 0; u < 200; ++u)
{
for (int i = 0; i < str.size(); ++i)
{
__int128_t a = atoint128_t(str[i]);
b += a;
}
}
auto time = sc::duration_cast<sc::microseconds>(sc::high_resolution_clock::now().time_since_epoch()).count() - start;
std::cout << time / 1000000. << 's' << std::endl;
}
Adding here a "not-so-naive" implementation in pure C, it's still kind of simple:
#include <stdio.h>
#include <inttypes.h>
__int128 atoi128(const char *s)
{
while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '+') ++s;
int sign = 1;
if (*s == '-')
{
++s;
sign = -1;
}
size_t digits = 0;
while (s[digits] >= '0' && s[digits] <= '9') ++digits;
char scratch[digits];
for (size_t i = 0; i < digits; ++i) scratch[i] = s[i] - '0';
size_t scanstart = 0;
__int128 result = 0;
__int128 mask = 1;
while (scanstart < digits)
{
if (scratch[digits-1] & 1) result |= mask;
mask <<= 1;
for (size_t i = digits-1; i > scanstart; --i)
{
scratch[i] >>= 1;
if (scratch[i-1] & 1) scratch[i] |= 8;
}
scratch[scanstart] >>= 1;
while (scanstart < digits && !scratch[scanstart]) ++scanstart;
for (size_t i = scanstart; i < digits; ++i)
{
if (scratch[i] > 7) scratch[i] -= 3;
}
}
return result * sign;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
__int128 x = atoi128(argv[1]);
printf("%" PRIi64 "\n", (int64_t)x); // just for demo with smaller numbers
}
}
It reads the number bit by bit, using a shifted BCD scratch space, see Double dabble for the algorithm (it's reversed here). This is a lot more efficient than doing many multiplications by 10 in general. *)
This relies on VLAs, without them, you can replace
char scratch[digits];
with
char *scratch = malloc(digits);
if (!scratch) return 0;
and add a
free(scratch);
at the end of the function.
Of course, the code above has the same limitations as the original atoi() (e.g. it will produce "random" garbage on overflow and has no way to check for that) .. if you need strtol()-style guarantees and error checking, extend it yourself (not a big problem, just work to do).
*) Of course, implementing double dabble in C always suffers from the fact you can't use "hardware carries", so there are extra bit masking and testing operations necessary. On the other hand, "naively" multiplying by 10 can be very efficient, as long as the platform provides multiplication instructions with a width "close" to your target type. Therefore, on your typical x86_64 platform (which has instructions for multiplying 64bit integers), this code is probably a lot slower than the naive decimal method. But it scales much better to really huge integers (which you would implement e.g. using arrays of uintmax_t).
is there any way for me to make this code work?
"What about implementing your own atoint128_t ?" #Marian
It is not to hard to roll your own atoint128_t().
Points to consider.
There is 0 or 1 more representable negative value than positive values. Accumulating the value using negative numbers provides more range.
Overflow is not defined for atoi(). Perhaps provide a capped value and set errno? Detecting potential OF prevents UB.
__int128_t constants need careful code to form correctly.
How to handle unusual input? atoi() is fairly loose and made sense years ago for speed/size, yet less UB is usually desired these days. Candidate cases: "", " ", "-", "z", "+123", "999..many...999", "the min int128", "locale_specific_space" + " 123" or even non-string NULL.
Code to do atoi() and atoint128_t() need only vary on the type, range, and names. The algorithm is the same.
#if 1
#define int_t __int128_t
#define int_MAX (((__int128_t)0x7FFFFFFFFFFFFFFF << 64) + 0xFFFFFFFFFFFFFFFF)
#define int_MIN (-1 - int_MAX)
#define int_atoi atoint128_t
#else
#define int_t int
#define int_MAX INT_MAX
#define int_MIN INT_MIN
#define int_atoi int_atoi
#endif
Sample code: Tailor as needed. Relies on C99 or later negative/positive and % functionality.
int_t int_atoi(const char *s) {
if (s == NULL) { // could omit this test
errno = EINVAL;
return 0;
}
while (isspace((unsigned char ) *s)) { // skip same leading white space like atoi()
s++;
}
char sign = *s; // remember if the sign was `-` for later
if (sign == '-' || sign == '+') {
s++;
}
int_t sum = 0;
while (isdigit((unsigned char)*s)) {
int digit = *s - '0';
if ((sum > int_MIN/10) || (sum == int_MIN/10 && digit <= -(int_MIN%10))) {
sum = sum * 10 - digit; // accumulate on the - side
} else {
sum = int_MIN;
errno = ERANGE;
break; // overflow
}
s++;
}
if (sign != '-') {
if (sum < -int_MAX) {
sum = int_MAX;
errno = ERANGE;
} else {
sum = -sum; // Make positive
}
}
return sum;
}
As #Lundin commented about the lack of overflow detection, etc. Modeling the string-->int128 after strtol() is a better idea.
For simplicity, consider __128_t strto__128_base10(const char *s, char *endptr);
This answer all ready handles overflow and flags errno like strtol(). Just need a few changes:
bool digit_found = false;
while (isdigit((unsigned char)*s)) {
digit_found = true;
// delete the `break`
// On overflow, continue looping to get to the end of the digits.
// break;
// after the `while()` loop:
if (!digit_found) { // optional test
errno = EINVAL;
}
if (endptr) {
*endptr = digit_found ? s : original_s;
}
A full long int strtol(const char *nptr, char **endptr, int base); like functionality would also handle other bases with special code when base is 0 or 16. #chqrlie
The C Standard does not mandate support for 128-bit integers.
Yet they are commonly supported by modern compilers: both gcc and clang support the types __int128_t and __uint128_t, but surprisingly still keep intmax_t and uintmax_t limited to 64 bits.
Beyond the basic arithmetic operators, there is not much support for these large integers, especially in the C library: no scanf() or printf() conversion specifiers, etc.
Here is an implementation of strtoi128(), strtou128() and atoi128() that is consistent with the C Standard's atoi(), strtol() and strtoul() specifications.
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Change these typedefs for your local flavor of 128-bit integer types */
typedef __int128_t i128;
typedef __uint128_t u128;
static int strdigit__(char c) {
/* This is ASCII / UTF-8 specific, would not work for EBCDIC */
return (c >= '0' && c <= '9') ? c - '0'
: (c >= 'a' && c <= 'z') ? c - 'a' + 10
: (c >= 'A' && c <= 'Z') ? c - 'A' + 10
: 255;
}
static u128 strtou128__(const char *p, char **endp, int base) {
u128 v = 0;
int digit;
if (base == 0) { /* handle octal and hexadecimal syntax */
base = 10;
if (*p == '0') {
base = 8;
if ((p[1] == 'x' || p[1] == 'X') && strdigit__(p[2]) < 16) {
p += 2;
base = 16;
}
}
}
if (base < 2 || base > 36) {
errno = EINVAL;
} else
if ((digit = strdigit__(*p)) < base) {
v = digit;
/* convert to unsigned 128 bit with overflow control */
while ((digit = strdigit__(*++p)) < base) {
u128 v0 = v;
v = v * base + digit;
if (v < v0) {
v = ~(u128)0;
errno = ERANGE;
}
}
if (endp) {
*endp = (char *)p;
}
}
return v;
}
u128 strtou128(const char *p, char **endp, int base) {
if (endp) {
*endp = (char *)p;
}
while (isspace((unsigned char)*p)) {
p++;
}
if (*p == '-') {
p++;
return -strtou128__(p, endp, base);
} else {
if (*p == '+')
p++;
return strtou128__(p, endp, base);
}
}
i128 strtoi128(const char *p, char **endp, int base) {
u128 v;
if (endp) {
*endp = (char *)p;
}
while (isspace((unsigned char)*p)) {
p++;
}
if (*p == '-') {
p++;
v = strtou128__(p, endp, base);
if (v >= (u128)1 << 127) {
if (v > (u128)1 << 127)
errno = ERANGE;
return -(i128)(((u128)1 << 127) - 1) - 1;
}
return -(i128)v;
} else {
if (*p == '+')
p++;
v = strtou128__(p, endp, base);
if (v >= (u128)1 << 127) {
errno = ERANGE;
return (i128)(((u128)1 << 127) - 1);
}
return (i128)v;
}
}
i128 atoi128(const char *p) {
return strtoi128(p, (char**)NULL, 10);
}
char *utoa128(char *dest, u128 v, int base) {
char buf[129];
char *p = buf + 128;
const char *digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
*p = '\0';
if (base >= 2 && base <= 36) {
while (v > (unsigned)base - 1) {
*--p = digits[v % base];
v /= base;
}
*--p = digits[v];
}
return strcpy(dest, p);
}
char *itoa128(char *buf, i128 v, int base) {
char *p = buf;
u128 uv = (u128)v;
if (v < 0) {
*p++ = '-';
uv = -uv;
}
if (base == 10)
utoa128(p, uv, 10);
else
if (base == 16)
utoa128(p, uv, 16);
else
utoa128(p, uv, base);
return buf;
}
static char *perrno(char *buf, int err) {
switch (err) {
case EINVAL:
return strcpy(buf, "EINVAL");
case ERANGE:
return strcpy(buf, "ERANGE");
default:
sprintf(buf, "%d", err);
return buf;
}
}
int main(int argc, char *argv[]) {
char buf[130];
char xbuf[130];
char ebuf[20];
char *p1, *p2;
i128 v, v1;
u128 v2;
int i;
for (i = 1; i < argc; i++) {
printf("%s:\n", argv[i]);
errno = 0;
v = atoi128(argv[i]);
perrno(ebuf, errno);
printf(" atoi128(): %s 0x%s errno=%s\n",
itoa128(buf, v, 10), utoa128(xbuf, v, 16), ebuf);
errno = 0;
v1 = strtoi128(argv[i], &p1, 0);
perrno(ebuf, errno);
printf(" strtoi128(): %s 0x%s endptr:\"%s\" errno=%s\n",
itoa128(buf, v1, 10), utoa128(xbuf, v1, 16), p1, ebuf);
errno = 0;
v2 = strtou128(argv[i], &p2, 0);
perrno(ebuf, errno);
printf(" strtou128(): %s 0x%s endptr:\"%s\" errno=%s\n",
utoa128(buf, v2, 10), utoa128(xbuf, v2, 16), p2, ebuf);
}
return 0;
}
I have a hexadecimal MAC address held in a std::string. What would be the best way to turn that MAC address into an integer-type held in a uint64_t?
I'm aware of stringstream, sprintf, atoi, etc. I've actually written little conversion functions with the first 2 of those, but they seem more sloppy than I would like.
So, can someone show me a good, clean way to convert
std::string mac = "00:00:12:24:36:4f";
into a uint64_t?
PS: I don't have boost/TR1 facilities available and can't install them where the code will actually be used (which is also why I haven't copy pasted one of my attempts, sorry about that!). So please keep solutions to straight-up C/C++ calls. If you have an interesting solution with a UNIX system call I'd be interested too!
uint64_t string_to_mac(std::string const& s) {
unsigned char a[6];
int last = -1;
int rc = sscanf(s.c_str(), "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx%n",
a + 0, a + 1, a + 2, a + 3, a + 4, a + 5,
&last);
if(rc != 6 || s.size() != last)
throw std::runtime_error("invalid mac address format " + s);
return
uint64_t(a[0]) << 40 |
uint64_t(a[1]) << 32 | (
// 32-bit instructions take fewer bytes on x86, so use them as much as possible.
uint32_t(a[2]) << 24 |
uint32_t(a[3]) << 16 |
uint32_t(a[4]) << 8 |
uint32_t(a[5])
);
}
My solution (requires c++11):
#include <string>
#include <cstdint>
#include <algorithm>
#include <stdlib.h>
uint64_t convert_mac(std::string mac) {
// Remove colons
mac.erase(std::remove(mac.begin(), mac.end(), ':'), mac.end());
// Convert to uint64_t
return strtoul(mac.c_str(), NULL, 16);
}
Use sscanf:
std::string mac = "00:00:12:24:36:4f";
unsigned u[6];
int c=sscanf(mac.c_str(),"%x:%x:%x:%x:%x:%x",u,u+1,u+2,u+3,u+4,u+5);
if (c!=6) raise_error("input format error");
uint64_t r=0;
for (int i=0;i<6;i++) r=(r<<8)+u[i];
// or: for (int i=0;i<6;i++) r=(r<<8)+u[5-i];
I can't think of any magic tricks. Here's a random attempt that may or may not be better than what you've done. It's simplish, but I bet there's far faster solutions.
uint64_t mac2int(std::string s) {
uint64_t r=0;
std::string::iterator i;
std::string::iterator end = s.end();
for(i = s.begin; i != end; ++i) {
char let = *i;
if (let >= '0' && let <= '9') {
r = r*0xf + (let-'0');
} else if (let >= 'a' && let <= 'f') {
r = r*0xf + (let-'a'+10);
} else if (let >= 'A' && let <= 'F') {
r = r*0xf + (let-'A'+10);
}
}
return r;
}
This will just shift hex digits through until the string runs out, not caring about delimiters or total length. But it converts the input string to the desired uint64_t format.
#include <string>
#include <stdint.h>
uint64_t cvt(std::string &v)
{
std::string::iterator i;
std::string digits = "0123456789abcdefABCDEF";
uint64_t result = 0;
size_t pos = 0;
i = v.begin();
while (i != v.end())
{
// search for character in hex digits set
pos = digits.find(*i);
// if found in valid hex digits
if (pos != std::string::npos)
{
// handle upper/lower case hex digit
if (pos > 0xf)
{
pos -= 6;
}
// shift a nibble in
result <<= 4;
result |= pos;
}
++i;
}
return result;
}
Another faster version without calling library functions:
inline unsigned read_hex_byte(char const** beg, char const* end) {
if(end - *beg < 2)
throw std::invalid_argument("");
unsigned hi = (*beg)[0], lo = (*beg)[1];
*beg += 2;
hi -= hi >= '0' && hi <= '9' ? '0' :
hi >= 'a' && hi <= 'f' ? 'a' - 10 :
hi >= 'A' && hi <= 'F' ? 'A' - 10 :
throw std::invalid_argument("");
lo -= lo >= '0' && lo <= '9' ? '0' :
lo >= 'a' && lo <= 'f' ? 'a' - 10 :
lo >= 'A' && lo <= 'F' ? 'A' - 10 :
throw std::invalid_argument("");
return hi << 4 | lo;
}
uint64_t string_to_mac2(std::string const& s) {
char const *beg = s.data(), *end = beg + s.size();
uint64_t r;
try {
r = read_hex_byte(&beg, end);
beg += beg != end && ':' == *beg;
r = r << 8 | read_hex_byte(&beg, end);
beg += beg != end && ':' == *beg;
r = r << 8 | read_hex_byte(&beg, end);
beg += beg != end && ':' == *beg;
r = r << 8 | read_hex_byte(&beg, end);
beg += beg != end && ':' == *beg;
r = r << 8 | read_hex_byte(&beg, end);
beg += beg != end && ':' == *beg;
r = r << 8 | read_hex_byte(&beg, end);
} catch(std::invalid_argument&) {
beg = end - 1;
}
if(beg != end)
throw std::runtime_error("invalid mac address format " + s);
return r;
}
My 2 cents:
uint64_t ParseMac(const std::string& str)
{
std::istringstream iss(str);
uint64_t nibble;
uint64_t result(0);
iss >> std::hex;
while(iss >> nibble) {
result = (result << 8) + nibble;
iss.get();
}
return result;
}
More C++11 way without input data validation:
uint64_t stomac( const std::string &mac )
{
static const std::regex r{ "([\\da-fA-F]{2})(:|$)" };
auto it = std::sregex_iterator( mac.begin(), mac.end(), r );
static const auto end = std::sregex_iterator();
return std::accumulate( it, end, 0, []( uint64_t i, const std::sregex_iterator::value_type &v ) {
return ( i << 8 ) + std::stol( v.str(1), nullptr, 16 );
} );
}
live example
You can also use the ASCII to struct ether_addr conversion routine ether_aton, or its thread-safe version ether_aton_r (GNU extension).
#include <netinet/ether.h>
#include <stdint.h>
#include <string>
#define ETHER_ADDR_ERR UINT64_C(~0)
uint64_t ether_atou64( const std::string& addr_str ) {
union {
uint64_t result;
struct ether_addr address;
};
result = 0;
struct ether_addr* ptr = ether_aton_r( addr_str.c_str(), &address );
if( !ptr ) {
return ETHER_ADDR_ERR;
}
return result;
}
Sorry I connot comment yet.
For the answer from #AB71E5, you need to change "strtoul" to "strtoull".
Ex : 01:02:03:04:05:06 = 48bits but "unsigned long" = 32bits.
The final result is :
#include <string>
#include <cstdint>
#include <algorithm>
#include <stdlib.h>
uint64_t convert_mac(std::string mac) {
// Remove colons
mac.erase(std::remove(mac.begin(), mac.end(), ':'), mac.end());
// Convert to uint64_t
return strtoull(mac.c_str(), NULL, 16);
}