Related
What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process
How can I use argv values with int128_t support? I know about atoi() and family of functions exposed by <cstdlib> but somehow I cannot find one for int128_t fixed width integer. This might be because of the fact that this type isn't backed by either c or c++ standard, but is there any way for me to make this code work?
#include <iostream>
int main(int argc, char **argv) {
__int128_t value = atoint128_t(argv[1]);
}
Almost all answers posted are good enough for me but I'm selecting the one that is a drop by solution for my current code, so do look at other ones too.
Here's a simple way of implementing this:
__int128_t atoint128_t(const char *s)
{
const char *p = s;
__int128_t val = 0;
if (*p == '-' || *p == '+') {
p++;
}
while (*p >= '0' && *p <= '9') {
val = (10 * val) + (*p - '0');
p++;
}
if (*s == '-') val = val * -1;
return val;
}
This code checks each character to see if it's a digit (with an optional leading + or -), and if so it multiplies the current result by 10 and adds the value associated with that digit. It then inverts the sign if need be.
Note that this implementation does not check for overflow, which is consistent with the behavior of atoi.
EDIT:
Revised implementation that covers int128_MIN case by either adding or subtracting the value of each digit based on the sign, and skipping leading whitespace.
int myatoi(const char *s)
{
const char *p = s;
int neg = 0, val = 0;
while ((*p == '\n') || (*p == '\t') || (*p == ' ') ||
(*p == '\f') || (*p == '\r') || (*p == '\v')) {
p++;
}
if ((*p == '-') || (*p == '+')) {
if (*p == '-') {
neg = 1;
}
p++;
}
while (*p >= '0' && *p <= '9') {
if (neg) {
val = (10 * val) - (*p - '0');
} else {
val = (10 * val) + (*p - '0');
}
p++;
}
return val;
}
Here is a C++ implementation:
#include <string>
#include <stdexcept>
__int128_t atoint128_t(std::string const & in)
{
__int128_t res = 0;
size_t i = 0;
bool sign = false;
if (in[i] == '-')
{
++i;
sign = true;
}
if (in[i] == '+')
{
++i;
}
for (; i < in.size(); ++i)
{
const char c = in[i];
if (not std::isdigit(c))
throw std::runtime_error(std::string("Non-numeric character: ") + c)
res *= 10;
res += c - '0';
}
if (sign)
{
res *= -1;
}
return res;
}
int main()
{
__int128_t a = atoint128_t("170141183460469231731687303715884105727");
}
If you want to test it then there is a stream operator here.
Performance
I ran a few performance test. I generate 100,000 random numbers uniformly distributed in the entire support of __int128_t. Then I converted each of them 2000 times. All of these (200,000,000) conversions where completed within ~12 seconds.
Using this code:
#include <iostream>
#include <string>
#include <random>
#include <vector>
#include <chrono>
int main()
{
std::mt19937 gen(0);
std::uniform_int_distribution<> num(0, 9);
std::uniform_int_distribution<> len(1, 38);
std::uniform_int_distribution<> sign(0, 1);
std::vector<std::string> str;
for (int i = 0; i < 100000; ++i)
{
std::string s;
int l = len(gen);
if (sign(gen))
s += '-';
for (int u = 0; u < l; ++u)
s += std::to_string(num(gen));
str.emplace_back(s);
}
namespace sc = std::chrono;
auto start = sc::duration_cast<sc::microseconds>(sc::high_resolution_clock::now().time_since_epoch()).count();
__int128_t b = 0;
for (int u = 0; u < 200; ++u)
{
for (int i = 0; i < str.size(); ++i)
{
__int128_t a = atoint128_t(str[i]);
b += a;
}
}
auto time = sc::duration_cast<sc::microseconds>(sc::high_resolution_clock::now().time_since_epoch()).count() - start;
std::cout << time / 1000000. << 's' << std::endl;
}
Adding here a "not-so-naive" implementation in pure C, it's still kind of simple:
#include <stdio.h>
#include <inttypes.h>
__int128 atoi128(const char *s)
{
while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '+') ++s;
int sign = 1;
if (*s == '-')
{
++s;
sign = -1;
}
size_t digits = 0;
while (s[digits] >= '0' && s[digits] <= '9') ++digits;
char scratch[digits];
for (size_t i = 0; i < digits; ++i) scratch[i] = s[i] - '0';
size_t scanstart = 0;
__int128 result = 0;
__int128 mask = 1;
while (scanstart < digits)
{
if (scratch[digits-1] & 1) result |= mask;
mask <<= 1;
for (size_t i = digits-1; i > scanstart; --i)
{
scratch[i] >>= 1;
if (scratch[i-1] & 1) scratch[i] |= 8;
}
scratch[scanstart] >>= 1;
while (scanstart < digits && !scratch[scanstart]) ++scanstart;
for (size_t i = scanstart; i < digits; ++i)
{
if (scratch[i] > 7) scratch[i] -= 3;
}
}
return result * sign;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
__int128 x = atoi128(argv[1]);
printf("%" PRIi64 "\n", (int64_t)x); // just for demo with smaller numbers
}
}
It reads the number bit by bit, using a shifted BCD scratch space, see Double dabble for the algorithm (it's reversed here). This is a lot more efficient than doing many multiplications by 10 in general. *)
This relies on VLAs, without them, you can replace
char scratch[digits];
with
char *scratch = malloc(digits);
if (!scratch) return 0;
and add a
free(scratch);
at the end of the function.
Of course, the code above has the same limitations as the original atoi() (e.g. it will produce "random" garbage on overflow and has no way to check for that) .. if you need strtol()-style guarantees and error checking, extend it yourself (not a big problem, just work to do).
*) Of course, implementing double dabble in C always suffers from the fact you can't use "hardware carries", so there are extra bit masking and testing operations necessary. On the other hand, "naively" multiplying by 10 can be very efficient, as long as the platform provides multiplication instructions with a width "close" to your target type. Therefore, on your typical x86_64 platform (which has instructions for multiplying 64bit integers), this code is probably a lot slower than the naive decimal method. But it scales much better to really huge integers (which you would implement e.g. using arrays of uintmax_t).
is there any way for me to make this code work?
"What about implementing your own atoint128_t ?" #Marian
It is not to hard to roll your own atoint128_t().
Points to consider.
There is 0 or 1 more representable negative value than positive values. Accumulating the value using negative numbers provides more range.
Overflow is not defined for atoi(). Perhaps provide a capped value and set errno? Detecting potential OF prevents UB.
__int128_t constants need careful code to form correctly.
How to handle unusual input? atoi() is fairly loose and made sense years ago for speed/size, yet less UB is usually desired these days. Candidate cases: "", " ", "-", "z", "+123", "999..many...999", "the min int128", "locale_specific_space" + " 123" or even non-string NULL.
Code to do atoi() and atoint128_t() need only vary on the type, range, and names. The algorithm is the same.
#if 1
#define int_t __int128_t
#define int_MAX (((__int128_t)0x7FFFFFFFFFFFFFFF << 64) + 0xFFFFFFFFFFFFFFFF)
#define int_MIN (-1 - int_MAX)
#define int_atoi atoint128_t
#else
#define int_t int
#define int_MAX INT_MAX
#define int_MIN INT_MIN
#define int_atoi int_atoi
#endif
Sample code: Tailor as needed. Relies on C99 or later negative/positive and % functionality.
int_t int_atoi(const char *s) {
if (s == NULL) { // could omit this test
errno = EINVAL;
return 0;
}
while (isspace((unsigned char ) *s)) { // skip same leading white space like atoi()
s++;
}
char sign = *s; // remember if the sign was `-` for later
if (sign == '-' || sign == '+') {
s++;
}
int_t sum = 0;
while (isdigit((unsigned char)*s)) {
int digit = *s - '0';
if ((sum > int_MIN/10) || (sum == int_MIN/10 && digit <= -(int_MIN%10))) {
sum = sum * 10 - digit; // accumulate on the - side
} else {
sum = int_MIN;
errno = ERANGE;
break; // overflow
}
s++;
}
if (sign != '-') {
if (sum < -int_MAX) {
sum = int_MAX;
errno = ERANGE;
} else {
sum = -sum; // Make positive
}
}
return sum;
}
As #Lundin commented about the lack of overflow detection, etc. Modeling the string-->int128 after strtol() is a better idea.
For simplicity, consider __128_t strto__128_base10(const char *s, char *endptr);
This answer all ready handles overflow and flags errno like strtol(). Just need a few changes:
bool digit_found = false;
while (isdigit((unsigned char)*s)) {
digit_found = true;
// delete the `break`
// On overflow, continue looping to get to the end of the digits.
// break;
// after the `while()` loop:
if (!digit_found) { // optional test
errno = EINVAL;
}
if (endptr) {
*endptr = digit_found ? s : original_s;
}
A full long int strtol(const char *nptr, char **endptr, int base); like functionality would also handle other bases with special code when base is 0 or 16. #chqrlie
The C Standard does not mandate support for 128-bit integers.
Yet they are commonly supported by modern compilers: both gcc and clang support the types __int128_t and __uint128_t, but surprisingly still keep intmax_t and uintmax_t limited to 64 bits.
Beyond the basic arithmetic operators, there is not much support for these large integers, especially in the C library: no scanf() or printf() conversion specifiers, etc.
Here is an implementation of strtoi128(), strtou128() and atoi128() that is consistent with the C Standard's atoi(), strtol() and strtoul() specifications.
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Change these typedefs for your local flavor of 128-bit integer types */
typedef __int128_t i128;
typedef __uint128_t u128;
static int strdigit__(char c) {
/* This is ASCII / UTF-8 specific, would not work for EBCDIC */
return (c >= '0' && c <= '9') ? c - '0'
: (c >= 'a' && c <= 'z') ? c - 'a' + 10
: (c >= 'A' && c <= 'Z') ? c - 'A' + 10
: 255;
}
static u128 strtou128__(const char *p, char **endp, int base) {
u128 v = 0;
int digit;
if (base == 0) { /* handle octal and hexadecimal syntax */
base = 10;
if (*p == '0') {
base = 8;
if ((p[1] == 'x' || p[1] == 'X') && strdigit__(p[2]) < 16) {
p += 2;
base = 16;
}
}
}
if (base < 2 || base > 36) {
errno = EINVAL;
} else
if ((digit = strdigit__(*p)) < base) {
v = digit;
/* convert to unsigned 128 bit with overflow control */
while ((digit = strdigit__(*++p)) < base) {
u128 v0 = v;
v = v * base + digit;
if (v < v0) {
v = ~(u128)0;
errno = ERANGE;
}
}
if (endp) {
*endp = (char *)p;
}
}
return v;
}
u128 strtou128(const char *p, char **endp, int base) {
if (endp) {
*endp = (char *)p;
}
while (isspace((unsigned char)*p)) {
p++;
}
if (*p == '-') {
p++;
return -strtou128__(p, endp, base);
} else {
if (*p == '+')
p++;
return strtou128__(p, endp, base);
}
}
i128 strtoi128(const char *p, char **endp, int base) {
u128 v;
if (endp) {
*endp = (char *)p;
}
while (isspace((unsigned char)*p)) {
p++;
}
if (*p == '-') {
p++;
v = strtou128__(p, endp, base);
if (v >= (u128)1 << 127) {
if (v > (u128)1 << 127)
errno = ERANGE;
return -(i128)(((u128)1 << 127) - 1) - 1;
}
return -(i128)v;
} else {
if (*p == '+')
p++;
v = strtou128__(p, endp, base);
if (v >= (u128)1 << 127) {
errno = ERANGE;
return (i128)(((u128)1 << 127) - 1);
}
return (i128)v;
}
}
i128 atoi128(const char *p) {
return strtoi128(p, (char**)NULL, 10);
}
char *utoa128(char *dest, u128 v, int base) {
char buf[129];
char *p = buf + 128;
const char *digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
*p = '\0';
if (base >= 2 && base <= 36) {
while (v > (unsigned)base - 1) {
*--p = digits[v % base];
v /= base;
}
*--p = digits[v];
}
return strcpy(dest, p);
}
char *itoa128(char *buf, i128 v, int base) {
char *p = buf;
u128 uv = (u128)v;
if (v < 0) {
*p++ = '-';
uv = -uv;
}
if (base == 10)
utoa128(p, uv, 10);
else
if (base == 16)
utoa128(p, uv, 16);
else
utoa128(p, uv, base);
return buf;
}
static char *perrno(char *buf, int err) {
switch (err) {
case EINVAL:
return strcpy(buf, "EINVAL");
case ERANGE:
return strcpy(buf, "ERANGE");
default:
sprintf(buf, "%d", err);
return buf;
}
}
int main(int argc, char *argv[]) {
char buf[130];
char xbuf[130];
char ebuf[20];
char *p1, *p2;
i128 v, v1;
u128 v2;
int i;
for (i = 1; i < argc; i++) {
printf("%s:\n", argv[i]);
errno = 0;
v = atoi128(argv[i]);
perrno(ebuf, errno);
printf(" atoi128(): %s 0x%s errno=%s\n",
itoa128(buf, v, 10), utoa128(xbuf, v, 16), ebuf);
errno = 0;
v1 = strtoi128(argv[i], &p1, 0);
perrno(ebuf, errno);
printf(" strtoi128(): %s 0x%s endptr:\"%s\" errno=%s\n",
itoa128(buf, v1, 10), utoa128(xbuf, v1, 16), p1, ebuf);
errno = 0;
v2 = strtou128(argv[i], &p2, 0);
perrno(ebuf, errno);
printf(" strtou128(): %s 0x%s endptr:\"%s\" errno=%s\n",
utoa128(buf, v2, 10), utoa128(xbuf, v2, 16), p2, ebuf);
}
return 0;
}
What is the best way to convert a variable length hex string e.g. "01A1" to a byte array containing that data.
i.e converting this:
std::string = "01A1";
into this
char* hexArray;
int hexLength;
or this
std::vector<char> hexArray;
so that when I write this to a file and hexdump -C it I get the binary data containing 01A1.
This implementation uses the built-in strtol function to handle the actual conversion from text to bytes, but will work for any even-length hex string.
std::vector<char> HexToBytes(const std::string& hex) {
std::vector<char> bytes;
for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
char byte = (char) strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}
return bytes;
}
This ought to work:
int char2int(char input)
{
if(input >= '0' && input <= '9')
return input - '0';
if(input >= 'A' && input <= 'F')
return input - 'A' + 10;
if(input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::invalid_argument("Invalid input string");
}
// This function assumes src to be a zero terminated sanitized string with
// an even number of [0-9a-f] characters, and target to be sufficiently large
void hex2bin(const char* src, char* target)
{
while(*src && src[1])
{
*(target++) = char2int(*src)*16 + char2int(src[1]);
src += 2;
}
}
Depending on your specific platform there's probably also a standard implementation though.
So for fun, I was curious if I could do this kind of conversion at compile-time. It doesn't have a lot of error checking and was done in VS2015, which doesn't support C++14 constexpr functions yet (thus how HexCharToInt looks). It takes a c-string array, converts pairs of characters into a single byte and expands those bytes into a uniform initialization list used to initialize the T type provided as a template parameter. T could be replaced with something like std::array to automatically return an array.
#include <cstdint>
#include <initializer_list>
#include <stdexcept>
#include <utility>
/* Quick and dirty conversion from a single character to its hex equivelent */
constexpr std::uint8_t HexCharToInt(char Input)
{
return
((Input >= 'a') && (Input <= 'f'))
? (Input - 87)
: ((Input >= 'A') && (Input <= 'F'))
? (Input - 55)
: ((Input >= '0') && (Input <= '9'))
? (Input - 48)
: throw std::exception{};
}
/* Position the characters into the appropriate nibble */
constexpr std::uint8_t HexChar(char High, char Low)
{
return (HexCharToInt(High) << 4) | (HexCharToInt(Low));
}
/* Adapter that performs sets of 2 characters into a single byte and combine the results into a uniform initialization list used to initialize T */
template <typename T, std::size_t Length, std::size_t ... Index>
constexpr T HexString(const char (&Input)[Length], const std::index_sequence<Index...>&)
{
return T{HexChar(Input[(Index * 2)], Input[((Index * 2) + 1)])...};
}
/* Entry function */
template <typename T, std::size_t Length>
constexpr T HexString(const char (&Input)[Length])
{
return HexString<T>(Input, std::make_index_sequence<(Length / 2)>{});
}
constexpr auto Y = KS::Utility::HexString<std::array<std::uint8_t, 3>>("ABCDEF");
You can use boost:
#include <boost/algorithm/hex.hpp>
char bytes[60] = {0};
std::string hash = boost::algorithm::unhex(std::string("313233343536373839"));
std::copy(hash.begin(), hash.end(), bytes);
You said "variable length." Just how variable do you mean?
For hex strings that fit into an unsigned long I have always liked the C function strtoul. To make it convert hex pass 16 as the radix value.
Code might look like:
#include <cstdlib>
std::string str = "01a1";
unsigned long val = strtoul(str.c_str(), 0, 16);
If you want to use OpenSSL to do it, there is a nifty trick I found:
BIGNUM *input = BN_new();
int input_length = BN_hex2bn(&input, argv[2]);
input_length = (input_length + 1) / 2; // BN_hex2bn() returns number of hex digits
unsigned char *input_buffer = (unsigned char*)malloc(input_length);
retval = BN_bn2bin(input, input_buffer);
Just be sure to strip off any leading '0x' to the string.
This can be done with a stringstream, you just need to store the value in an intermediate numeric type such as an int:
std::string test = "01A1"; // assuming this is an even length string
char bytes[test.length()/2];
stringstream converter;
for(int i = 0; i < test.length(); i+=2)
{
converter << std::hex << test.substr(i,2);
int byte;
converter >> byte;
bytes[i/2] = byte & 0xFF;
converter.str(std::string());
converter.clear();
}
Somebody mentioned using sscanf to do this, but didn't say how. This is how. It's useful because it also works in ancient versions of C and C++ and even most versions of embedded C or C++ for microcontrollers.
When converted to bytes, the hex-string in this example resolves to the ASCII text "Hello there!" which is then printed.
#include <stdio.h>
int main ()
{
char hexdata[] = "48656c6c6f20746865726521";
char bytedata[20]{};
for(int j = 0; j < sizeof(hexdata) / 2; j++) {
sscanf(hexdata + j * 2, "%02hhX", bytedata + j);
}
printf ("%s -> %s\n", hexdata, bytedata);
return 0;
}
I would use a standard function like sscanf to read the string into an unsigned integer, and then you already have the bytes you need in memory. If you were on a big endian machine you could just write out (memcpy) the memory of the integer from the first non-zero byte. However you can't safely assume this in general, so you can use some bit masking and shifting to get the bytes out.
const char* src = "01A1";
char hexArray[256] = {0};
int hexLength = 0;
// read in the string
unsigned int hex = 0;
sscanf(src, "%x", &hex);
// write it out
for (unsigned int mask = 0xff000000, bitPos=24; mask; mask>>=8, bitPos-=8) {
unsigned int currByte = hex & mask;
if (currByte || hexLength) {
hexArray[hexLength++] = currByte>>bitPos;
}
}
C++11 variant (with gcc 4.7 - little endian format):
#include <string>
#include <vector>
std::vector<uint8_t> decodeHex(const std::string & source)
{
if ( std::string::npos != source.find_first_not_of("0123456789ABCDEFabcdef") )
{
// you can throw exception here
return {};
}
union
{
uint64_t binary;
char byte[8];
} value{};
auto size = source.size(), offset = (size % 16);
std::vector<uint8_t> binary{};
binary.reserve((size + 1) / 2);
if ( offset )
{
value.binary = std::stoull(source.substr(0, offset), nullptr, 16);
for ( auto index = (offset + 1) / 2; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
for ( ; offset < size; offset += 16 )
{
value.binary = std::stoull(source.substr(offset, 16), nullptr, 16);
for ( auto index = 8; index--; )
{
binary.emplace_back(value.byte[index]);
}
}
return binary;
}
Crypto++ variant (with gcc 4.7):
#include <string>
#include <vector>
#include <crypto++/filters.h>
#include <crypto++/hex.h>
std::vector<unsigned char> decodeHex(const std::string & source)
{
std::string hexCode;
CryptoPP::StringSource(
source, true,
new CryptoPP::HexDecoder(new CryptoPP::StringSink(hexCode)));
return std::vector<unsigned char>(hexCode.begin(), hexCode.end());
}
Note that the first variant is about two times faster than the second one and at the same time works with odd and even number of nibbles (the result of "a56ac" is {0x0a, 0x56, 0xac}). Crypto++ discards the last one if there are odd number of nibbels (the result of "a56ac" is {0xa5, 0x6a}) and silently skips invalid hex characters (the result of "a5sac" is {0xa5, 0xac}).
#include <iostream>
#include <sstream>
#include <vector>
int main() {
std::string s("313233");
char delim = ',';
int len = s.size();
for(int i = 2; i < len; i += 3, ++len) s.insert(i, 1, delim);
std::istringstream is(s);
std::ostringstream os;
is >> std::hex;
int n;
while (is >> n) {
char c = (char)n;
os << std::string(&c, 1);
if(is.peek() == delim) is.ignore();
}
// std::string form
std::string byte_string = os.str();
std::cout << byte_string << std::endl;
printf("%s\n", byte_string.c_str());
// std::vector form
std::vector<char> byte_vector(byte_string.begin(), byte_string.end());
byte_vector.push_back('\0'); // needed for a c-string
printf("%s\n", byte_vector.data());
}
The output is
123
123
123
'1' == 0x31, etc.
If your goal is speed, I have an AVX2 SIMD implementation of an encoder and decoder here: https://github.com/zbjornson/fast-hex. These benchmark ~12x faster than the fastest scalar implementations.
#include <iostream>
using byte = unsigned char;
static int charToInt(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
// Decodes specified HEX string to bytes array. Specified nBytes is length of bytes
// array. Returns -1 if fails to decode any of bytes. Returns number of bytes decoded
// on success. Maximum number of bytes decoded will be equal to nBytes. It is assumed
// that specified string is '\0' terminated.
int hexStringToBytes(const char* str, byte* bytes, int nBytes) {
int nDecoded {0};
for (int i {0}; str[i] != '\0' && nDecoded < nBytes; i += 2, nDecoded += 1) {
if (str[i + 1] != '\0') {
int m {charToInt(str[i])};
int n {charToInt(str[i + 1])};
if (m != -1 && n != -1) {
bytes[nDecoded] = (m << 4) | n;
} else {
return -1;
}
} else {
return -1;
}
}
return nDecoded;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
return 1;
}
byte bytes[0x100];
int ret {hexStringToBytes(argv[1], bytes, 0x100)};
if (ret < 0) {
return 1;
}
std::cout << "number of bytes: " << ret << "\n" << std::hex;
for (int i {0}; i < ret; ++i) {
if (bytes[i] < 0x10) {
std::cout << "0";
}
std::cout << (bytes[i] & 0xff);
}
std::cout << "\n";
return 0;
}
i've modified TheoretiCAL's code
uint8_t buf[32] = {};
std::string hex = "0123";
while (hex.length() % 2)
hex = "0" + hex;
std::stringstream stream;
stream << std::hex << hex;
for (size_t i= 0; i <sizeof(buf); i++)
stream >> buf[i];
How I do this at compiletime
#pragma once
#include <memory>
#include <iostream>
#include <string>
#include <array>
#define DELIMITING_WILDCARD ' '
// #sean :)
constexpr int _char_to_int( char ch )
{
if( ch >= '0' && ch <= '9' )
return ch - '0';
if( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
return ch - 'a' + 10;
};
template <char wildcard, typename T, size_t N = sizeof( T )>
constexpr size_t _count_wildcard( T &&str )
{
size_t count = 1u;
for( const auto &character : str )
{
if( character == wildcard )
{
++count;
}
}
return count;
}
// construct a base16 hex and emplace it at make_count
// change 16 to 256 if u want the result to be when:
// sig[0] == 0xA && sig[1] == 0xB = 0xA0B
// or leave as is for the scenario to return 0xAB
#define CONCATE_HEX_FACTOR 16
#define CONCATE_HEX(a, b) ( CONCATE_HEX_FACTOR * ( a ) + ( b ) )
template
< char skip_wildcard,
// How many occurances of a delimiting wildcard do we find in sig
size_t delimiter_count,
typename T, size_t N = sizeof( T )>
constexpr auto _make_array( T &&sig )
{
static_assert( delimiter_count > 0, "this is a logical error, delimiter count can't be of size 0" );
static_assert( N > 1, "sig length must be bigger than 1" );
// Resulting byte array, for delimiter_count skips we should have delimiter_count integers
std::array<int, delimiter_count> ret{};
// List of skips that point to the position of the delimiter wildcard in skip
std::array<size_t, delimiter_count> skips{};
// Current skip
size_t skip_count = 0u;
// Character count, traversed for skip
size_t skip_traversed_character_count = 0u;
for( size_t i = 0u; i < N; ++i )
{
if( sig[i] == DELIMITING_WILDCARD )
{
skips[skip_count] = skip_traversed_character_count;
++skip_count;
}
++skip_traversed_character_count;
}
// Finally traversed character count
size_t traversed_character_count = 0u;
// Make count (we will supposedly have at least an instance in our return array)
size_t make_count = 1u;
// Traverse signature
for( size_t i = 0u; i < N; ++i )
{
// Read before
if( i == 0u )
{
// We don't care about this, and we don't want to use 0
if( sig[0u] == skip_wildcard )
{
ret[0u] = -1;
continue;
}
ret[0u] = CONCATE_HEX( _char_to_int( sig[0u] ), _char_to_int( sig[1u] ) );
continue;
}
// Make result by skip data
for( const auto &skip : skips )
{
if( ( skip == i ) && skip < N - 1u )
{
// We don't care about this, and we don't want to use 0
if( sig[i + 1u] == skip_wildcard )
{
ret[make_count] = -1;
++make_count;
continue;
}
ret[make_count] = CONCATE_HEX( _char_to_int( sig[i + 1u] ), _char_to_int( sig[i + 2u] ) );
++make_count;
}
}
}
return ret;
}
#define SKIP_WILDCARD '?'
#define BUILD_ARRAY(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( a )>( a )
#define BUILD_ARRAY_MV(a) _make_array<SKIP_WILDCARD, _count_wildcard<DELIMITING_WILDCARD>( std::move( a ) )>( std::move( a ) )
// -----
// usage
// -----
template <int n>
constexpr int combine_two()
{
constexpr auto numbers = BUILD_ARRAY( "55 8B EC 83 E4 F8 8B 4D 08 BA ? ? ? ? E8 ? ? ? ? 85 C0 75 12 ?" );
constexpr int number = numbers[0];
constexpr int number_now = n + number;
return number_now;
}
int main()
{
constexpr auto shit = BUILD_ARRAY( "?? AA BB CC DD ? ? ? 02 31 32" );
for( const auto &hex : shit )
{
printf( "%x ", hex );
}
combine_two<3>();
constexpr auto saaahhah = combine_two<3>();
static_assert( combine_two<3>() == 88 );
static_assert( combine_two<3>() == saaahhah );
printf( "\n%d", saaahhah );
}
Method can be used for runtime too, but for that you'd probably prefer something else, faster.
It may be useful to someone. The logic of translating a set of bytes into a string and back. Solves the zero character problem.
#include <sstream>
#include <iomanip>
std::string BytesToHex(const std::vector<char>& data, size_t len)
{
std::stringstream ss;
ss << std::hex << std::setfill('0');
for(size_t index(0); index < len; ++index)
{
ss << std::setw(2) << static_cast<unsigned short>(data[index]);
}
return ss.str();
}
std::vector<char> HexToBytes(const std::string& data)
{
std::stringstream ss;
ss << data;
std::vector<char> resBytes;
size_t count = 0;
const auto len = data.size();
while(ss.good() && count < len)
{
unsigned short num;
char hexNum[2];
ss.read(hexNum, 2);
sscanf(hexNum, "%2hX", &num);
resBytes.push_back(static_cast<char>(num));
count += 2;
}
return resBytes;
}
If you can make your data to look like this e.g array of "0x01", "0xA1"
Then you can iterate your array and use sscanf to create the array of values
unsigned int result;
sscanf(data, "%x", &result);
The difficulty in an hex to char conversion is that the hex digits work pairwise, f.ex: 3132 or A0FF. So an even number of hex digits is assumed. However it could be perfectly valid to have an odd number of digits, like: 332 and AFF, which should be understood as 0332 and 0AFF.
I propose an improvement to Niels Keurentjes hex2bin() function.
First we count the number of valid hex digits. As we have to count, let's control also the buffer size:
void hex2bin(const char* src, char* target, size_t size_target)
{
int countdgts=0; // count hex digits
for (const char *p=src; *p && isxdigit(*p); p++)
countdgts++;
if ((countdgts+1)/2+1>size_target)
throw exception("Risk of buffer overflow");
By the way, to use isxdigit() you'll have to #include <cctype>.
Once we know how many digits, we can determine if the first one is the higher digit (only pairs) or not (first digit not a pair).
bool ishi = !(countdgts%2);
Then we can loop digit by digit, combining each pair using bin shift << and bin or, and
toggling the 'high' indicator at each iteration:
for (*target=0; *src; ishi = !ishi) {
char tmp = char2int(*src++); // hex digit on 4 lower bits
if (ishi)
*target = (tmp << 4); // high: shift by 4
else *target++ |= tmp; // low: complete previous
}
*target=0; // null terminated target (if desired)
}
I found this question, but the accepted answer didn't look like a C++ way of solving the task to me (this doesn't mean it's a bad answer or anything, just explaining motivation behind adding this one). I recollected this nice answer and decided to implement something similar. Here is complete code of what I ended up with (it also works for std::wstring):
#include <cctype>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
template <typename OutputIt>
class hex_ostream_iterator :
public std::iterator<std::output_iterator_tag, void, void, void, void>
{
OutputIt out;
int digitCount;
int number;
public:
hex_ostream_iterator(OutputIt out) : out(out), digitCount(0), number(0)
{
}
hex_ostream_iterator<OutputIt> &
operator=(char c)
{
number = (number << 4) | char2int(c);
digitCount++;
if (digitCount == 2) {
digitCount = 0;
*out++ = number;
number = 0;
}
return *this;
}
hex_ostream_iterator<OutputIt> &
operator*()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++()
{
return *this;
}
hex_ostream_iterator<OutputIt> &
operator++(int)
{
return *this;
}
private:
int
char2int(char c)
{
static const std::string HEX_CHARS = "0123456789abcdef";
const char lowerC = std::tolower(c);
const std::string::size_type pos = HEX_CHARS.find_first_of(lowerC);
if (pos == std::string::npos) {
throw std::runtime_error(std::string("Not a hex digit: ") + c);
}
return pos;
}
};
template <typename OutputIt>
hex_ostream_iterator<OutputIt>
hex_iterator(OutputIt out)
{
return hex_ostream_iterator<OutputIt>(out);
}
template <typename InputIt, typename OutputIt>
hex_ostream_iterator<OutputIt>
from_hex_string(InputIt first, InputIt last, OutputIt out)
{
if (std::distance(first, last) % 2 == 1) {
*out = '0';
++out;
}
return std::copy(first, last, out);
}
int
main(int argc, char *argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " hexstring" << std::endl;
return EXIT_FAILURE;
}
const std::string input = argv[1];
std::vector<unsigned char> bytes;
from_hex_string(input.begin(), input.end(),
hex_iterator(std::back_inserter(bytes)));
typedef std::ostream_iterator<unsigned char> osit;
std::copy(bytes.begin(), bytes.end(), osit(std::cout));
return EXIT_SUCCESS;
}
And the output of ./hex2bytes 61a062a063 | hexdump -C:
00000000 61 a0 62 a0 63 |a.b.c|
00000005
And of ./hex2bytes 6a062a063 | hexdump -C (note odd number of characters):
00000000 06 a0 62 a0 63 |..b.c|
00000005
In: "303132", Out: "012". Input string can be odd or even length.
char char2int(char input)
{
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'A' && input <= 'F')
return input - 'A' + 10;
if (input >= 'a' && input <= 'f')
return input - 'a' + 10;
throw std::runtime_error("Incorrect symbol in hex string");
};
string hex2str(string &hex)
{
string out;
out.resize(hex.size() / 2 + hex.size() % 2);
string::iterator it = hex.begin();
string::iterator out_it = out.begin();
if (hex.size() % 2 != 0) {
*out_it++ = char(char2int(*it++));
}
for (; it < hex.end() - 1; it++) {
*out_it++ = char2int(*it++) << 4 | char2int(*it);
};
return out;
}
Very similar to some of the other answers here, this is what I went with:
typedef uint8_t BYTE;
BYTE* ByteUtils::HexStringToBytes(BYTE* HexString, int ArrayLength)
{
BYTE* returnBytes;
returnBytes = (BYTE*) malloc(ArrayLength/2);
int j=0;
for(int i = 0; i < ArrayLength; i++)
{
if(i % 2 == 0)
{
int valueHigh = (int)(*(HexString+i));
int valueLow = (int)(*(HexString+i+1));
valueHigh = ByteUtils::HexAsciiToDec(valueHigh);
valueLow = ByteUtils::HexAsciiToDec(valueLow);
valueHigh *= 16;
int total = valueHigh + valueLow;
*(returnBytes+j++) = (BYTE)total;
}
}
return returnBytes;
}
int ByteUtils::HexAsciiToDec(int value)
{
if(value > 47 && value < 59)
{
value -= 48;
}
else if(value > 96 && value < 103)
{
value -= 97;
value += 10;
}
else if(value > 64 && value < 71)
{
value -= 65;
value += 10;
}
else
{
value = 0;
}
return value;
}
static bool Hexadec2xdigit(const std::string& data, std::string& buffer, std::size_t offset = sizeof(uint16_t))
{
if (data.empty())
{
return false;
}
try
{
constexpr auto s_function_lambda = [] (const char* string) noexcept { return *static_cast<const uint16_t*>(reinterpret_cast<const uint16_t*>(string)); };
{
for (std::size_t i = 0, tmp = s_function_lambda(data.c_str() + i); i < data.size(); i += offset, tmp = s_function_lambda(data.c_str() + i))
{
if (std::isxdigit(data[i]))
{
buffer += static_cast<char>(/*std::stoul*/std::strtoul(reinterpret_cast<const char*>(std::addressof(tmp)), NULL, 16));
}
}
}
return true;
}
catch (const std::invalid_argument& ex)
{
}
catch (const std::out_of_range& ex)
{
}
return false;
}
This code doesn't have much of a copy process
I am writing an operating system in C and assembly, and in implementing the EXT2 file system I have encountered a problem. I need to convert FOUR bytes of hexadecimal to decimal in c. An example would be to convert 00 00 01(10000) to 65536.I need to convert to decimal,because parsing the super block requires all values to be in decimal. Most specifically the ext2 fs I'm working on is here:
#include "ext2.h"
#include <stdlib.h>
long hex2dec(unsigned const char *hex){
long ret = 0;
int i = 0;
while(hex[i] != 0){
//if(hex[i] >= 0x00 && hex[i] <= 0x09)
// ret+=(10 * i) * hex[i];
}
//kprintf("\n");
return ret;
}
char *strsep(char *buf,int offset,int num){
char *ret = malloc(1024);
int j = 0;
int i = offset;
int end = (offset + num);
int i1 = 0;
while(i1 < num){
///kstrcat(ret,&buf[i]);
ret[i1] = buf[i];
i++;
i1++;
}
return ret;
}
int get_partition(partnum){
if(partnum > 4)
return -1;
//int i = (12 * partnum);
int i = 0;
if(partnum == 1)
i = 190;
else if(partnum == 2)
i = 206;
else if(partnum == 3)
i = 222;
else
i = 190;
int ret = 0;
char *buf = malloc(1024);
ata_read_master(buf,1,0x00);
ret = buf[(i + 2)];
return ret;
}
int _intlen(int i){
int ret = 0;
while(i){
ret++;
i/=10;
}
return ret;
}
int _hex2int(char c){
if(c == '0')
return 0;
else if(c == '1')
return 1;
else if(c == '2')
return 2;
else if(c == '3')
return 3;
else if(c == '4')
return 4;
else if(c == '5')
return 5;
else if(c == '6')
return 6;
else if(c == '7')
return 7;
else if(c == '8')
return 8;
else if(c == '9')
return 9;
else if(c == 'A')
return 10;
else if(c == 'B')
return 11;
else if(c == 'C')
return 12;
else if(c == 'D')
return 13;
else if(c == 'E')
return 14;
else if(c == 'F')
return 15;
}
int hex2int(char c){
int i = c;
}
int comb(const char *str,int n){
int i = 0;
int ret = 0;
while(i < n){
//if(str[i] == 0x01)
// kprintf("(:");
/*int j = str[i];
int k = 0;
int m = 0;
if(j < 10)
j*=10;
else
while(j > 0){
k+=(10 ^ (_intlen(j) - m)) * j % 10;
m++;
j/=10;
}
//kprintf("%d",j);
//if(j == 1)
// kprintf("(:");*/
i++;
}
//ret = (char)ret;
ret = (char)str
int ret = 0;
int i = 0;
char *s = malloc(1024);
/*while(i < n){
//kstrcat(s,&((char*)buf[i]));
n++;
}*/
return ret;
//kprintf("\n");
//return ret;
}
struct ext2_superblock *parse_sblk(int partnum){
int i = get_partition(partnum);
if(i > 0)
kprintf("[EXT2_SUPERBLOCK]Found partition!\n");
else
i = 0;
struct ext2_superblock *ret;
struct ext2_superblock retnp;
char *buf = malloc(1024);
int i1 = 0;
//char *tmpbuf = malloc(4);
/*if(i != 0)
ata_read_master(buf,((i * 4)/256),0x00);
else{
kprintf("[WRN]: Looking for superblock at offset 1024\n");
ata_read_master(buf,4,0x00);
}*/
ata_read_master(buf,2,0x00);
const char *cmp = strsep(buf,0,4);
retnp.ninode = comb(strsep(buf,0,4),4);
retnp.nblock = comb(strsep(buf,4,4),4);
retnp.nsblock = comb(strsep(buf,8,4),4);
retnp.nunallocb = comb(strsep(buf,12,4),4);
retnp.nunalloci = comb(strsep(buf,16,4),4);
retnp.supernum = comb(strsep(buf,20,4),4);
retnp.leftshiftbs = comb(strsep(buf,24,4),4);
retnp.leftshiftfs = comb(strsep(buf,28,4),4);
retnp.numofblockpg= comb(strsep(buf,32,4),4);
// retnp.numofffpbg= comb(strsep(buf,36,4));
retnp.numoffpbg = comb(strsep(buf,36,4),4);
retnp.numofinpbg = comb(strsep(buf,40,4),4);
retnp.lastmount = comb(strsep(buf,44,4),4);
retnp.lastwrite = comb(strsep(buf,48,4),4);
retnp.fsckpass = comb(strsep(buf,52,2),2);
retnp.fsckallow = comb(strsep(buf,54,2),2);
retnp.sig = comb(strsep(buf,56,2),2);
retnp.state = comb(strsep(buf,58,2),2);
retnp.erroropp = comb(strsep(buf,60,2),2);
retnp.minorpor = comb(strsep(buf,52,2),2);
retnp.ptimefsck = comb(strsep(buf,64,4),4);
retnp.inter = comb(strsep(buf,68,4),4);
retnp.osid = comb(strsep(buf,72,4),4);
retnp.mpv = comb(strsep(buf,76,4),4);
retnp.uid = comb(strsep(buf,80,2),2);
retnp.gid = comb(strsep(buf,82,2),2);
ret = &retnp;
return ret;
i1 = 0;
}
If there is anyway of avoiding conversion and successfully implementing ext2 I would be glad to hear it. I would prefer it to be in c,but assembly is also okay.
If you have this:
const uint8_t bytes[] = { 0, 0, 1 };
and you want to consider that the bytes of a (24-bit) unsigned integer in little-endian order, you can convert to the actual integer using:
const uint32_t value = ((uint32_t) bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
This will set value equal to 65536.
You can use std::istringstream or sscanf instead of writing your own.
char const * hex_text[] = "0x100";
const std::string hex_str(hex_text);
std::istringstream text_stream(hex_str);
unsigned int value;
text_stream >> std::ios::hex >> value;
std::cout << "Decimal value of 0x100: " << value << "\n";
Or using sscanf:
sscanf(hex_text, "0x%X", &value);
std::cout << "Decimal value of 0x100: " << value << "\n";
A good idea is to search your C++ reference for existing functions or search the internet, before writing your own.
To roll your own:
unsigned int hex2dec(const std::string& hex_text)
{
unsigned int value = 0U;
const unsigned int length = hex_text.length();
for (unsigned int i = 0; i < length; ++i)
{
const char c = hex_text[i];
if ((c >= '0') && (c <= '9'))
{
value = value * 16 + (c - '0');
}
else
{
c = toupper(c);
if ((c >= 'A') && (c <= 'Z'))
{
value = value * 16 + (c - 'A') + 10;
}
}
}
return value;
}
To convert to use C-style character strings, change the parameter type and use strlen for the length.
I am developing a c/c++ program on linux. Can you please tell me if there is any c/c++ library which decodes url?
I am looking for libraries which
convert
"http%3A%2F%2F"
to:
"http://"
or
"a+t+%26+t" to "a t & t"
Thank you.
I actually used Saul's function in an analysis program I was writing (analyzing millions of URL encoded strings), and while it works, at that scale it was slowing my program down horribly, so I decided to write a faster version. This one is thousands of times faster when compiled with GCC and the -O2 option. It can also use the same output buffer as the input (e.g. urldecode2(buf, buf) will work if the original string was in buf and is to be overwritten by its decoded counterpart).
Edit: It doesn't take the buffer size as an input because it is assumed that the buffer will be large enough, this is safe because it is known that the length of the output will always be <= that of the input, so either use the same buffer for the output or create one that's at least the size of the input + 1 for the null terminator, e.g.:
char *output = malloc(strlen(input)+1);
urldecode2(output, input);
printf("Decoded string: %s\n", output);
Edit 2: An anonymous user attempted to edit this answer to handle the '+' character's translation to ' ', which I think it should probably do, again this wasn't something that I needed for my application, but I've added it below.
Here's the routine:
#include <stdlib.h>
#include <ctype.h>
void urldecode2(char *dst, const char *src)
{
char a, b;
while (*src) {
if ((*src == '%') &&
((a = src[1]) && (b = src[2])) &&
(isxdigit(a) && isxdigit(b))) {
if (a >= 'a')
a -= 'a'-'A';
if (a >= 'A')
a -= ('A' - 10);
else
a -= '0';
if (b >= 'a')
b -= 'a'-'A';
if (b >= 'A')
b -= ('A' - 10);
else
b -= '0';
*dst++ = 16*a+b;
src+=3;
} else if (*src == '+') {
*dst++ = ' ';
src++;
} else {
*dst++ = *src++;
}
}
*dst++ = '\0';
}
Here is a C decoder for a percent encoded string. It returns -1 if the encoding is invalid and 0 otherwise. The decoded string is stored in out. I'm quite sure this is the fastest code of the answers given so far.
int percent_decode(char* out, const char* in) {
{
static const char tbl[256] = {
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
-1,10,11,12,13,14,15,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,10,11,12,13,14,15,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1
};
char c, v1, v2, *beg=out;
if(in != NULL) {
while((c=*in++) != '\0') {
if(c == '%') {
if((v1=tbl[(unsigned char)*in++])<0 ||
(v2=tbl[(unsigned char)*in++])<0) {
*beg = '\0';
return -1;
}
c = (v1<<4)|v2;
}
*out++ = c;
}
}
*out = '\0';
return 0;
}
uriparser library is small and lightweight.
This function I've just whipped up is very lightweight and should do as you wish, note I haven't programmed this to strict URI standards (used what I know off the top of my head). It's buffer-safe and doesn't overflow as far as I can see; adapt as you deem fit:
#include <assert.h>
void urldecode(char *pszDecodedOut, size_t nBufferSize, const char *pszEncodedIn)
{
memset(pszDecodedOut, 0, nBufferSize);
enum DecodeState_e
{
STATE_SEARCH = 0, ///< searching for an ampersand to convert
STATE_CONVERTING, ///< convert the two proceeding characters from hex
};
DecodeState_e state = STATE_SEARCH;
for(unsigned int i = 0; i < strlen(pszEncodedIn)-1; ++i)
{
switch(state)
{
case STATE_SEARCH:
{
if(pszEncodedIn[i] != '%')
{
strncat(pszDecodedOut, &pszEncodedIn[i], 1);
assert(strlen(pszDecodedOut) < nBufferSize);
break;
}
// We are now converting
state = STATE_CONVERTING;
}
break;
case STATE_CONVERTING:
{
// Conversion complete (i.e. don't convert again next iter)
state = STATE_SEARCH;
// Create a buffer to hold the hex. For example, if %20, this
// buffer would hold 20 (in ASCII)
char pszTempNumBuf[3] = {0};
strncpy(pszTempNumBuf, &pszEncodedIn[i], 2);
// Ensure both characters are hexadecimal
bool bBothDigits = true;
for(int j = 0; j < 2; ++j)
{
if(!isxdigit(pszTempNumBuf[j]))
bBothDigits = false;
}
if(!bBothDigits)
break;
// Convert two hexadecimal characters into one character
int nAsciiCharacter;
sscanf(pszTempNumBuf, "%x", &nAsciiCharacter);
// Ensure we aren't going to overflow
assert(strlen(pszDecodedOut) < nBufferSize);
// Concatenate this character onto the output
strncat(pszDecodedOut, (char*)&nAsciiCharacter, 1);
// Skip the next character
i++;
}
break;
}
}
}
The ever-excellent glib has some URI functions, including scheme-extraction, escaping and un-escaping.
I'd suggest curl and libcurl. It's widely used and should do the trick for you. Just check their website.
Thanks to #ThomasH for his answer. I'd like to propose here a better formattation…
And… since the decoded URI component is always less long than the same encoded URI component, is always possible to implode it within the same array of characters (a.k.a.: "string"). So, I'll propose here two possibilities:
#include <stdio.h>
#include <ctype.h>
#include <limits.h>
int decodeURIComponent (char *sSource, char *sDest) {
int nLength;
for (nLength = 0; *sSource; nLength++) {
if (*sSource == '%' && sSource[1] && sSource[2] && isxdigit(sSource[1]) && isxdigit(sSource[2])) {
sSource[1] -= sSource[1] <= '9' ? '0' : (sSource[1] <= 'F' ? 'A' : 'a')-10;
sSource[2] -= sSource[2] <= '9' ? '0' : (sSource[2] <= 'F' ? 'A' : 'a')-10;
sDest[nLength] = 16 * sSource[1] + sSource[2];
sSource += 3;
continue;
}
sDest[nLength] = *sSource++;
}
sDest[nLength] = '\0';
return nLength;
}
#define implodeURIComponent(url) decodeURIComponent(url, url)
And, finally…:
int main () {
char sMyUrl[] = "http%3a%2F%2ffoo+bar%2fabcd";
int nNewLength = implodeURIComponent(sMyUrl);
/* Let's print: "http://foo+bar/abcd\nLength: 19" */
printf("%s\nLength: %d\n", sMyUrl, nNewLength);
return 0;
}
Ste*
Try urlcpp https://github.com/larroy/urlcpp
It's a C++ module that you can easily integrate in your project, depends on boost::regex
Came across this 8 year old question as I was looking for the same. Based on previous answers, I also wrote my own version which is independent from libs, easy to understand and probably fast (no benchmark). Tested code with gcc, it should decode until end or invalid character (not tested). Just remember to free allocated space.
const char ascii_hex_4bit[23] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
static inline char to_upper(char c)
{
if ((c >= 'a') && (c <= 'z')) return c ^ 0x20;
return c;
}
char *url_decode(const char *str)
{
size_t i, j, len = strlen(str);
char c, d, url_hex;
char *decoded = malloc(len + 1);
if (decoded == NULL) return NULL;
i = 0;
j = 0;
do
{
c = str[i];
d = 0;
if (c == '%')
{
url_hex = to_upper(str[++i]);
if (((url_hex >= '0') && (url_hex <= '9')) || ((url_hex >= 'A') && (url_hex <= 'F')))
{
d = ascii_hex_4bit[url_hex - 48] << 4;
url_hex = to_upper(str[++i]);
if (((url_hex >= '0') && (url_hex <= '9')) || ((url_hex >= 'A') && (url_hex <= 'F')))
{
d |= ascii_hex_4bit[url_hex - 48];
}
else
{
d = 0;
}
}
}
else if (c == '+')
{
d = ' ';
}
else if ((c == '*') || (c == '-') || (c == '.') || ((c >= '0') && (c <= '9')) ||
((c >= 'A') && (c <= 'Z')) || (c == '_') || ((c >= 'a') && (c <= 'z')))
{
d = c;
}
decoded[j++] = d;
++i;
} while ((i < len) && (d != 0));
decoded[j] = 0;
return decoded;
}
/**
* Locale-independent conversion of ASCII characters to lowercase.
*/
int av_tolower(int c)
{
if (c >= 'A' && c <= 'Z')
c ^= 0x20;
return c;
}
/**
* Decodes an URL from its percent-encoded form back into normal
* representation. This function returns the decoded URL in a string.
* The URL to be decoded does not necessarily have to be encoded but
* in that case the original string is duplicated.
*
* #param url a string to be decoded.
* #return new string with the URL decoded or NULL if decoding failed.
* Note that the returned string should be explicitly freed when not
* used anymore.
*/
char *urldecode(const char *url)
{
int s = 0, d = 0, url_len = 0;
char c;
char *dest = NULL;
if (!url)
return NULL;
url_len = strlen(url) + 1;
dest = av_malloc(url_len);
if (!dest)
return NULL;
while (s < url_len) {
c = url[s++];
if (c == '%' && s + 2 < url_len) {
char c2 = url[s++];
char c3 = url[s++];
if (isxdigit(c2) && isxdigit(c3)) {
c2 = av_tolower(c2);
c3 = av_tolower(c3);
if (c2 <= '9')
c2 = c2 - '0';
else
c2 = c2 - 'a' + 10;
if (c3 <= '9')
c3 = c3 - '0';
else
c3 = c3 - 'a' + 10;
dest[d++] = 16 * c2 + c3;
} else { /* %zz or something other invalid */
dest[d++] = c;
dest[d++] = c2;
dest[d++] = c3;
}
} else if (c == '+') {
dest[d++] = ' ';
} else {
dest[d++] = c;
}
}
return dest;
}
by
www.elesos.com