Sql Like similar use in c++ - c++

I would like to ask a simple question, is there any way to use "like" such thing in c++
LIKE '%s I want some thing like this in c++ because if I want user to find something in the program, this simply matches all the related data.

Regular expressions can be used to achieve what you want. There are two problems, though:
1.) They are really complicated to use compared to the simple SQL LIKE keyword.
2.) They are only a standard feature in C++11. If you write in "old" C++, then you'd have to use the Boost.Regex library.
However...
Looking at the very basic LIKE examples at w3schools.com, you could in fact implement many of them in simple C++, using normal std::string functions.
For instance, let's have a look a this:
SELECT * FROM Customers WHERE City LIKE '%s';
This just means that for every string, you look at the last character and check if it's 's':
std::string s;
// ...
if (!s.empty() && (s[s.size() - 1] == 's') {
// ...
}
Or this:
SELECT * FROM Customers WHERE Country NOT LIKE '%land%';
In C++:
std::string s;
// ...
if (s.find("land") == std::string::npos) {
// ...
}
So, in the end, it really depends on what exactly you'd want to do with LIKE.

You could try using boost's string algorithm library, which can be used with std::string. It also gives you the option of using C++11 regular expressions. There are lots of functions for doing useful things (see the docs), such as starts_with() (or istarts_with() for case insensitive version)

This is version copied from SQL Lite.
Now you can use: patternCompare("A*A","AxA");
//---------------------LIKE------------------------------
/*
** This lookup table is used to help decode the first byte of
** a multi-byte UTF8 character.
*/
static const unsigned char sqlite3Utf8Trans1[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
typedef unsigned int u32;
typedef unsigned char u8;
u32 sqlite3Utf8Read(
const unsigned char **pz /* Pointer to string from which to read char */
){
unsigned int c;
/* Same as READ_UTF8() above but without the zTerm parameter.
** For this routine, we assume the UTF8 string is always zero-terminated.
*/
c = *((*pz)++);
if (c >= 0xc0){
c = sqlite3Utf8Trans1[c - 0xc0];
while ((*(*pz) & 0xc0) == 0x80){
c = (c << 6) + (0x3f & *((*pz)++));
}
if (c<0x80
|| (c & 0xFFFFF800) == 0xD800
|| (c & 0xFFFFFFFE) == 0xFFFE){
c = 0xFFFD;
}
}
return c;
}
/*
** Assuming zIn points to the first byte of a UTF-8 character,
** advance zIn to point to the first byte of the next UTF-8 character.
*/
#define SQLITE_SKIP_UTF8(zIn) { \
if( (*(zIn++))>=0xc0 ){ \
while( (*zIn & 0xc0)==0x80 ){ zIn++; } \
} \
}
const unsigned char sqlite3UpperToLower[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161,
162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215,
216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
252, 253, 254, 255
};
# define GlobUpperToLower(A) if( !((A)&~0x7f) ){ A = sqlite3UpperToLower[A]; }
/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression. Return true (1) if they
** are the same and false (0) if they are different.
**
** Globbing rules:
**
** '*' Matches any sequence of zero or more characters.
**
** '?' Matches exactly one character.
**
** [...] Matches one character from the enclosed list of
** characters.
**
** [^...] Matches one character not in the enclosed list.
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'. A
** range of characters can be specified using '-'. Example:
** "[a-z]" matches any single lower-case letter. To match a '-', make
** it the last character in the list.
**
** This routine is usually quick, but can be N**2 in the worst case.
**
** Hints: to match '*' or '?', put them in "[]". Like this:
**
** abc[*]xyz Matches "abc*xyz" only
*/
static int patternCompare(
const u8 *zPattern, /* The glob pattern */
const u8 *zString
)
{
u32 c, c2;
int invert;
int seen;
const u8 matchOne = '?';
const u8 matchAll = '*';
const u8 matchSet = '[';
const u8 noCase =0;
int prevEscape = 0; /* True if the previous character was 'escape' */
u32 esc = 0;
//u32 esc; /* The escape character */
while ((c = sqlite3Utf8Read(&zPattern)) != 0){
if (c == matchAll && !prevEscape){
while ((c = sqlite3Utf8Read(&zPattern)) == matchAll
|| c == matchOne){
if (c == matchOne && sqlite3Utf8Read(&zString) == 0){
return 0;
}
}
if (c == 0){
return 1;
}
else if (c == esc){
c = sqlite3Utf8Read(&zPattern);
if (c == 0){
return 0;
}
}
else if (c == matchSet){
//assert(esc == 0); /* This is GLOB, not LIKE */
//assert(matchSet<0x80); /* '[' is a single-byte character */
while (*zString && patternCompare(&zPattern[-1], zString) == 0){
SQLITE_SKIP_UTF8(zString);
}
return *zString != 0;
}
while ((c2 = sqlite3Utf8Read(&zString)) != 0){
if (noCase){
GlobUpperToLower(c2);
GlobUpperToLower(c);
while (c2 != 0 && c2 != c){
c2 = sqlite3Utf8Read(&zString);
GlobUpperToLower(c2);
}
}
else{
while (c2 != 0 && c2 != c){
c2 = sqlite3Utf8Read(&zString);
}
}
if (c2 == 0) return 0;
if (patternCompare(zPattern, zString)) return 1;
}
return 0;
}
else if (c == matchOne && !prevEscape){
if (sqlite3Utf8Read(&zString) == 0){
return 0;
}
}
else if (c == matchSet){
u32 prior_c = 0;
//assert(esc == 0); /* This only occurs for GLOB, not LIKE */
seen = 0;
invert = 0;
c = sqlite3Utf8Read(&zString);
if (c == 0) return 0;
c2 = sqlite3Utf8Read(&zPattern);
if (c2 == '^'){
invert = 1;
c2 = sqlite3Utf8Read(&zPattern);
}
if (c2 == ']'){
if (c == ']') seen = 1;
c2 = sqlite3Utf8Read(&zPattern);
}
while (c2 && c2 != ']'){
if (c2 == '-' && zPattern[0] != ']' && zPattern[0] != 0 && prior_c>0){
c2 = sqlite3Utf8Read(&zPattern);
if (c >= prior_c && c <= c2) seen = 1;
prior_c = 0;
}
else{
if (c == c2){
seen = 1;
}
prior_c = c2;
}
c2 = sqlite3Utf8Read(&zPattern);
}
if (c2 == 0 || (seen ^ invert) == 0){
return 0;
}
}
else if (esc == c && !prevEscape){
prevEscape = 1;
}
else{
c2 = sqlite3Utf8Read(&zString);
if (noCase){
GlobUpperToLower(c);
GlobUpperToLower(c2);
}
if (c != c2){
return 0;
}
prevEscape = 0;
}
}
return *zString == 0;
}

http://www.cplusplus.com/reference/regex/regex_match/ has an example of what you want to do. See lines 9-10 of the code near the bottom.

Related

Using clBLAS on MACOSX

I just installed clBLAS on my mac (Monterey 12.4) using brew :
brew install clblas
But I can't run the simple example given by the library :
#include <sys/types.h>
#include <stdio.h>
/* Include the clBLAS header. It includes the appropriate OpenCL headers */
#include <clBLAS.h>
/* This example uses predefined matrices and their characteristics for
* simplicity purpose.
*/
#define M 4
#define N 3
#define K 5
static const cl_float alpha = 10;
static const cl_float A[M*K] = {
11, 12, 13, 14, 15,
21, 22, 23, 24, 25,
31, 32, 33, 34, 35,
41, 42, 43, 44, 45,
};
static const size_t lda = K; /* i.e. lda = K */
static const cl_float B[K*N] = {
11, 12, 13,
21, 22, 23,
31, 32, 33,
41, 42, 43,
51, 52, 53,
};
static const size_t ldb = N; /* i.e. ldb = N */
static const cl_float beta = 20;
static cl_float C[M*N] = {
11, 12, 13,
21, 22, 23,
31, 32, 33,
41, 42, 43,
};
static const size_t ldc = N; /* i.e. ldc = N */
static cl_float result[M*N];
int main( void )
{
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem bufA, bufB, bufC;
cl_event event = NULL;
int ret = 0;
/* Setup OpenCL environment. */
err = clGetPlatformIDs( 1, &platform, NULL );
err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );
props[1] = (cl_context_properties)platform;
ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
queue = clCreateCommandQueue( ctx, device, 0, &err );
/* Setup clBLAS */
err = clblasSetup( );
/* Prepare OpenCL memory objects and place matrices inside them. */
bufA = clCreateBuffer( ctx, CL_MEM_READ_ONLY, M * K * sizeof(*A),
NULL, &err );
bufB = clCreateBuffer( ctx, CL_MEM_READ_ONLY, K * N * sizeof(*B),
NULL, &err );
bufC = clCreateBuffer( ctx, CL_MEM_READ_WRITE, M * N * sizeof(*C),
NULL, &err );
err = clEnqueueWriteBuffer( queue, bufA, CL_TRUE, 0,
M * K * sizeof( *A ), A, 0, NULL, NULL );
err = clEnqueueWriteBuffer( queue, bufB, CL_TRUE, 0,
K * N * sizeof( *B ), B, 0, NULL, NULL );
err = clEnqueueWriteBuffer( queue, bufC, CL_TRUE, 0,
M * N * sizeof( *C ), C, 0, NULL, NULL );
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
M, N, K,
alpha, bufA, 0, lda,
bufB, 0, ldb, beta,
bufC, 0, ldc,
1, &queue, 0, NULL, &event );
/* Wait for calculations to be finished. */
err = clWaitForEvents( 1, &event );
/* Fetch results of calculations from GPU memory. */
err = clEnqueueReadBuffer( queue, bufC, CL_TRUE, 0,
M * N * sizeof(*result),
result, 0, NULL, NULL );
/* Release OpenCL memory objects. */
clReleaseMemObject( bufC );
clReleaseMemObject( bufB );
clReleaseMemObject( bufA );
/* Finalize work with clBLAS */
clblasTeardown( );
/* Release OpenCL working objects. */
clReleaseCommandQueue( queue );
clReleaseContext( ctx );
return ret;
}
I get the error :
Undefined symbols for architecture x86_64:
"_clblasSetup", referenced from:
_main in main.o
"_clblasSgemm", referenced from:
_main in main.o
"_clblasTeardown", referenced from:
_main in main.o
ld: symbol(s) not found for architecture x86_64
clang: error: linker command failed with exit code 1 (use -v to see invocation)
make: *** [Program] Error 1
I know this is a linker problem but I don't know how to solve it. I'm including OpenCL like I do for other project :
LDFLAGS=-framework OpenCL
I tried variations around :
LDFLAGS=-framework OpenCL -framework clblas
But nothing works. Sorry if the question is simple.
EDIT : I found with this question that cblas.h is in a
-framework Accelerate
But still no possibility to find clblas.h
I found the solution which was quite simple.
We only need to add the library in the makefile :
/usr/local/Cellar/clblas/2.12/lib/libclBLAS.dylib
And add the path to the header :
/usr/local/Cellar/clblas/2.12/include/
Paths may depend on your installation.

Encrypt and Decrypt some string in C++ And Delphi

I have the following code in Delphi XE2 to Encrypt and Decrypt a String.
What is wrong is that I get different encrypt Results ? I need to read and write some string in both languages Delphi and C++, and I do not want use a DLL writing in Delphi to do It in C++
function CryptString(Const Input: string; password : AnsiString; Encrypt: Boolean) : string;
const
BufferSize=1024*1024;
var
StreamSource : TStringStream;
StreamDest : TStringStream;
CRYPTPROV : HCRYPTPROV;
CRYPTHASH : HCRYPTHASH;
CRYPTKEY : HCRYPTKEY;
Buffer : LPBYTE;
BytesIn : DWORD;
Final : Boolean;
Encoder : TIdEncoderMIME;
Decoder : TIdDecoderMIME;
DestStream : TStringStream;
begin
CryptAcquireContext(CRYPTPROV, nil, MS_DEF_DH_SCHANNEL_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
try
//create a valid key based in the password
if not CryptCreateHash(CRYPTPROV, CALG_SHA1, 0, 0, CRYPTHASH) then RaiseLastOSError;
try
if not CryptHashData(CRYPTHASH, #Password[1], Length(Password), 0) then RaiseLastOSError;
if not CryptDeriveKey(CRYPTPROV, CALG_RC4, CRYPTHASH, 0, CRYPTKEY) then RaiseLastOSError;
finally
CryptDestroyHash(CRYPTHASH);
end;
StreamSource := TStringStream.Create(Input);
StreamSource.Position := 0;
StreamDest := TStringStream.Create;
try
GetMem(Buffer, BufferSize);
try
if not Encrypt then
begin
//decode the string using base64
Decoder := TIdDecoderMIME.Create(nil);
try
DestStream := TStringStream.Create;
try
StreamDest.Position:=0;
Decoder.DecodeBegin(DestStream);
Decoder.Decode(StreamSource);
Decoder.DecodeEnd;
StreamSource.Clear;
DestStream.Position:=0;
StreamSource.CopyFrom(DestStream,DestStream.Size);
StreamSource.Position:=0;
finally
FreeAndNil( DestStream);
end;
finally
FreeAndNil( Decoder);
end;
end;
repeat
BytesIn := StreamSource.Read(Buffer^, BufferSize);
Final := (StreamSource.Position >= StreamSource.Size);
if Encrypt then begin
if not CryptEncrypt(CRYPTKEY, 0, Final, 0, Buffer, BytesIn, BytesIn) then
RaiseLastOSError;
end
else if not CryptDecrypt(CRYPTKEY, 0, Final, 0, Buffer, BytesIn) then
RaiseLastOSError;
StreamDest.Write(Buffer^, BytesIn);
until Final;
//encode the string using base64
if Encrypt then
begin
Encoder := TIdEncoderMIME.Create(nil);
try
DestStream:=TStringStream.Create;
try
StreamDest.Position:=0;
Encoder.Encode(StreamDest,DestStream);
Result := DestStream.DataString;
finally
FreeAndNil( DestStream);
end;
finally
FreeAndNil(Encoder);
end;
end
else
Result:= StreamDest.DataString;
finally
FreeMem(Buffer, BufferSize);
end;
finally
FreeAndNil( StreamSource);
FreeAndNil( StreamDest);
end;
finally
CryptReleaseContext(CRYPTPROV, 0);
end;
end;
I found the following code in C++ to make the same thing, but the encryption results are different.
int main()
{
const char* passw = "teste";
const char* toencrypt = "sa";
HCRYPTPROV hProv;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
DWORD todwSize = (DWORD)strlen(toencrypt);
PBYTE pBuffer;
CryptAcquireContext(&hProv, NULL, NULL , PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
CryptCreateHash(hProv, CALG_SHA1 , 0, 0, &hHash);
CryptHashData(hHash, (BYTE*)passw, strlen(passw), 0);
CryptDeriveKey(hProv, CALG_RC4, hHash, 0, &hKey);
CryptDestroyHash(hHash);
//--------------------------------------------------------------------
pBuffer = (BYTE *)malloc(todwSize);
strcpy((char*)pBuffer, toencrypt);
CryptEncrypt(hKey, 0, TRUE, 0, pBuffer, &todwSize, todwSize);
PBYTE pBreturn = pBuffer;
const char* message = (const char*)pBreturn;
printf("%s", message);
system("pause");
//--------------------------------------------------------------------
DWORD dwSize = (DWORD)strlen(message);
PBYTE depBuffer;
depBuffer = (BYTE *)malloc(1460);
strcpy((char*)depBuffer, message);
CryptDecrypt(hKey, 0, TRUE, 0, depBuffer, &dwSize);
CryptDestroyKey(hKey);
CryptDestroyHash(hHash);
CryptReleaseContext(hProv, 0);
if (GetLastError() != 0)
{
printf("%d", GetLastError());
}
PBYTE depBreturn = depBuffer;
printf("%s", (const char*)depBreturn);
printf("\n%d", strlen(message));
return 0;
}
This code bellow Show how Encrypt string, decrypt string, and encrypt string and decrypt string convert to base64 like Delphi code above do. in Vc++
Part of this code is based on CryptEncrypt does not encrypt whole text
#include "stdafx.h"
#include <atlenc.h>
#include <atlstr.h>
#include <locale.h>
static const unsigned char pr2six[256] =
{
/* ASCII table */
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};
int Base64decode_len(const char *bufcoded)
{
int nbytesdecoded;
register const unsigned char *bufin;
register int nprbytes;
bufin = (const unsigned char *)bufcoded;
while (pr2six[*(bufin++)] <= 63);
nprbytes = (bufin - (const unsigned char *)bufcoded) - 1;
nbytesdecoded = ((nprbytes + 3) / 4) * 3;
return nbytesdecoded + 1;
}
int Base64decode(char *bufplain, const char *bufcoded)
{
int nbytesdecoded;
register const unsigned char *bufin;
register unsigned char *bufout;
register int nprbytes;
bufin = (const unsigned char *)bufcoded;
while (pr2six[*(bufin++)] <= 63);
nprbytes = (bufin - (const unsigned char *)bufcoded) - 1;
nbytesdecoded = ((nprbytes + 3) / 4) * 3;
bufout = (unsigned char *)bufplain;
bufin = (const unsigned char *)bufcoded;
while (nprbytes > 4) {
*(bufout++) =
(unsigned char)(pr2six[*bufin] << 2 | pr2six[bufin[1]] >> 4);
*(bufout++) =
(unsigned char)(pr2six[bufin[1]] << 4 | pr2six[bufin[2]] >> 2);
*(bufout++) =
(unsigned char)(pr2six[bufin[2]] << 6 | pr2six[bufin[3]]);
bufin += 4;
nprbytes -= 4;
}
/* Note: (nprbytes == 1) would be an error, so just ingore that case */
if (nprbytes > 1) {
*(bufout++) =
(unsigned char)(pr2six[*bufin] << 2 | pr2six[bufin[1]] >> 4);
}
if (nprbytes > 2) {
*(bufout++) =
(unsigned char)(pr2six[bufin[1]] << 4 | pr2six[bufin[2]] >> 2);
}
if (nprbytes > 3) {
*(bufout++) =
(unsigned char)(pr2six[bufin[2]] << 6 | pr2six[bufin[3]]);
}
*(bufout++) = '\0';
nbytesdecoded -= (4 - nprbytes) & 3;
return nbytesdecoded;
}
static const char basis_64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
int Base64encode_len(int len)
{
return ((len + 2) / 3 * 4) + 1;
}
int Base64encode(char *encoded, const char *string, int len)
{
int i;
char *p;
p = encoded;
for (i = 0; i < len - 2; i += 3) {
*p++ = basis_64[(string[i] >> 2) & 0x3F];
*p++ = basis_64[((string[i] & 0x3) << 4) |
((int)(string[i + 1] & 0xF0) >> 4)];
*p++ = basis_64[((string[i + 1] & 0xF) << 2) |
((int)(string[i + 2] & 0xC0) >> 6)];
*p++ = basis_64[string[i + 2] & 0x3F];
}
if (i < len) {
*p++ = basis_64[(string[i] >> 2) & 0x3F];
if (i == (len - 1)) {
*p++ = basis_64[((string[i] & 0x3) << 4)];
*p++ = '=';
}
else {
*p++ = basis_64[((string[i] & 0x3) << 4) |
((int)(string[i + 1] & 0xF0) >> 4)];
*p++ = basis_64[((string[i + 1] & 0xF) << 2)];
}
*p++ = '=';
}
*p++ = '\0';
return p - encoded;
}
bool encryptStr(const char *pSourceTxt,const char* pKey, int length, char * pEncryptTxt)
{
HCRYPTPROV hProv;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
DWORD todwSize = (DWORD)strlen(pSourceTxt), needSize;
PBYTE pBuffer;
char *txtBuf = (char *)_alloca(todwSize + 1);
if (CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
{
if (CryptCreateHash(hProv, CALG_SHA1, 0, 0, &hHash))
{
if (CryptHashData(hHash, (BYTE*)pKey, (DWORD)strlen(pKey), 0) &&
CryptDeriveKey(hProv, CALG_RC4, hHash, 0, &hKey))
{
if (CryptEncrypt(hKey, 0, TRUE, 0, NULL, &(needSize = todwSize), 0))
{
memcpy(pBuffer = (BYTE *)_alloca(needSize), pSourceTxt, todwSize);
if (CryptEncrypt(hKey, 0, TRUE, 0, pBuffer, &todwSize, needSize))
{
txtBuf[todwSize] = NULL;
memcpy(txtBuf, pBuffer, todwSize);
memcpy(pEncryptTxt, txtBuf, todwSize + 1);
}
}
CryptDestroyKey(hKey);
}
CryptDestroyHash(hHash);
}
CryptReleaseContext(hProv, 0);
}
return true;
}
bool encryptStrBase64(const char *pSourceTxt, const char* pKey, int length, char * pEncryptTxt)
{
HCRYPTPROV hProv;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
DWORD todwSize = (DWORD)strlen(pSourceTxt), needSize;
PBYTE pBuffer;
char *txtBuf = (char *)_alloca(todwSize + 1);
if (CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
{
if (CryptCreateHash(hProv, CALG_SHA1, 0, 0, &hHash))
{
if (CryptHashData(hHash, (BYTE*)pKey, (DWORD)strlen(pKey), 0) &&
CryptDeriveKey(hProv, CALG_RC4, hHash, 0, &hKey))
{
if (CryptEncrypt(hKey, 0, TRUE, 0, NULL, &(needSize = todwSize), 0))
{
memcpy(pBuffer = (BYTE *)_alloca(needSize), pSourceTxt, todwSize);
if (CryptEncrypt(hKey, 0, TRUE, 0, pBuffer, &todwSize, needSize))
{
txtBuf[todwSize] = NULL;
memcpy(txtBuf, pBuffer, todwSize);
char *txtEncode64 = (char *)_alloca(strlen(pEncryptTxt));
Base64encode(txtEncode64, txtBuf, todwSize);
memcpy(pEncryptTxt, txtEncode64, todwSize +100);
}
}
CryptDestroyKey(hKey);
}
CryptDestroyHash(hHash);
}
CryptReleaseContext(hProv, 0);
}
return true;
}
bool decryptStr(const char *pEncryptTxt, const char* pKey, int length, char * pDecryptTxt)
{
HCRYPTPROV hProv;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
DWORD todwSize = (DWORD)strlen(pEncryptTxt);
PBYTE pBuffer = (BYTE *)_alloca(todwSize);
char *txtBuf = (char *)_alloca(todwSize + 1);
if (CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
{
if (CryptCreateHash(hProv, CALG_SHA1, 0, 0, &hHash))
{
if (CryptHashData(hHash, (BYTE*)pKey, (DWORD)strlen(pKey), 0) &&
CryptDeriveKey(hProv, CALG_RC4, hHash, 0, &hKey))
{
memcpy(pBuffer, pEncryptTxt, todwSize);
if (CryptDecrypt(hKey, 0, TRUE, 0, pBuffer, &todwSize))
{
txtBuf[todwSize] = NULL;
memcpy(txtBuf, pBuffer, todwSize);
memcpy(pDecryptTxt, txtBuf, todwSize + 1);
}
CryptDestroyKey(hKey);
}
CryptDestroyHash(hHash);
}
CryptReleaseContext(hProv, 0);
}
return true;
}
bool decryptStrBase64(const char *pEncryptTxt, const char* pKey, int length, char * pDecryptTxt)
{
HCRYPTPROV hProv;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
char *sourceTxt = (char *)_alloca(2048);
Base64decode(sourceTxt, pEncryptTxt);
DWORD todwSize = (DWORD)strlen(sourceTxt);
PBYTE pBuffer = (BYTE *)_alloca(todwSize);
char *txtBuf = (char *)_alloca(todwSize + 1);
if (CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
{
if (CryptCreateHash(hProv, CALG_SHA1, 0, 0, &hHash))
{
if (CryptHashData(hHash, (BYTE*)pKey, (DWORD)strlen(pKey), 0) &&
CryptDeriveKey(hProv, CALG_RC4, hHash, 0, &hKey))
{
memcpy(pBuffer, sourceTxt, todwSize);
if (CryptDecrypt(hKey, 0, TRUE, 0, pBuffer, &todwSize))
{
txtBuf[todwSize] = NULL;
memcpy(txtBuf, pBuffer, todwSize);
memcpy(pDecryptTxt, txtBuf, todwSize + 1);
}
CryptDestroyKey(hKey);
}
CryptDestroyHash(hHash);
}
CryptReleaseContext(hProv, 0);
}
return true;
}
int main()
{
char* encryptWord = "test word";
char *encryptKey = "cryptkey";
int encryptLen = strlen(encryptWord);
char * txtEncrypt = (char *)_alloca(encryptLen+1);
//simple Encrypt TXT
encryptStr(encryptWord, encryptKey, encryptLen,txtEncrypt);
printf("Simple Encrypt %s \n", txtEncrypt);
//Decrypt string
char* decryptTxt = (char *)_alloca(encryptLen + 1);
decryptStr(txtEncrypt, encryptKey, encryptLen, decryptTxt);
printf("Txt decrypted %s \n", decryptTxt);
//Encrypt and Convert to Base64 like delphi Routine
char* base64TxtEncrypt = (char*)_alloca(encryptLen + 100);//need aloc more size due base64 routine
encryptStrBase64(encryptWord, encryptKey, encryptLen, base64TxtEncrypt);
printf("Base64 txt encrypted %s \n", base64TxtEncrypt);
//decrypt and Convert to original String like delphi Routine
char* base64TxtDecrypt = (char*)_alloca(encryptLen + 100);
decryptStrBase64( base64TxtEncrypt, encryptKey, strlen(base64TxtEncrypt) , base64TxtDecrypt);
printf("Base64 txt Decrypted %s \n", base64TxtDecrypt);
system("pause");
return 0;
}
And finally the conversion of this code to c#, and mission end
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
namespace CryptCsharpNativo
{
class Program
{
public const uint PROV_RSA_FULL = 1;
public const uint CRYPT_VERIFYCONTEXT = 0xF0000000;
public const uint CRYPT_NEWKEYSET = 0x00000008;
public enum ALG_ID
{
CALG_MD5 = 0x00008003,
CALG_RC4 = 0x00006801,
CALG_SHA1 = 0x00008004
}
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptAcquireContext(out IntPtr phProv, string pszContainer, string pszProvider, uint dwProvType, uint dwFlags);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptCreateHash(IntPtr hProv, ALG_ID Algid, IntPtr hKey, uint dwFlags, out IntPtr phHash);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptHashData(IntPtr hHash, byte[] pbData, int dwDataLen, uint dwFlags);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptDeriveKey(IntPtr hProv, ALG_ID Algid, IntPtr hBaseData, uint dwFlags, ref IntPtr phKey);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptDestroyHash(IntPtr hHash);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptEncrypt(IntPtr hKey, IntPtr hHash, [MarshalAs(UnmanagedType.Bool)]bool Final, uint dwFlags, byte[] pbData, ref int pdwDataLen);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptDecrypt(IntPtr hKey, IntPtr hHash, [MarshalAs(UnmanagedType.Bool)]bool Final, uint dwFlags, byte[] pbData, ref int pdwDataLen);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptDestroyKey(IntPtr hKey);
[DllImport("advapi32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool CryptReleaseContext(IntPtr hProv, uint dwFlags);
const uint NTE_BAD_KEYSET = 0x80090016;
public static string decryptStr(string pSourceTxt, string pKey)
{
IntPtr hCryptProv;
IntPtr hKey = IntPtr.Zero;
IntPtr hHash;
int dwCount = 0;
byte[] key = Encoding.ASCII.GetBytes(pKey);
int datalen = pSourceTxt.Length;
// Get a handle to the default provider.
if (!CryptAcquireContext(out hCryptProv, null, null, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
{
return "";
}
// Decrypt the file with a session key derived from a password.
// Create a hash object.
if (!CryptCreateHash(hCryptProv, ALG_ID.CALG_SHA1, IntPtr.Zero, 0, out hHash))
return "";
// Hash in the password data.
if (!CryptHashData(hHash, key, key.Length, 0))
return "";
// Derive a session key from the hash object.
if (!CryptDeriveKey(hCryptProv, ALG_ID.CALG_RC4, hHash, 0, ref hKey))
return "";
// Destroy the hash object.
if (!(CryptDestroyHash(hHash)))
return "";
hHash = IntPtr.Zero;
byte[] originalCrypt = Convert.FromBase64String(pSourceTxt);
if (!CryptDecrypt(hKey, IntPtr.Zero, true, 0, originalCrypt, ref datalen))
return "";
datalen = dwCount;
// Destroy session key.
if (hKey != IntPtr.Zero)
{
if (!(CryptDestroyKey(hKey)))
return "";
}
// Release provider handle.
if (hCryptProv != IntPtr.Zero)
{
if (!(CryptReleaseContext(hCryptProv, 0)))
return "";
}
return Encoding.ASCII.GetString(originalCrypt);
} // end Decryptfile
public static string encryptStr(string pSourceTxt, string pKey)
{
IntPtr hCryptProv;
IntPtr hKey = IntPtr.Zero;
IntPtr hHash;
int dwCount = 0;
byte[] sourceTxt = Encoding.ASCII.GetBytes(pSourceTxt);
byte[] key = Encoding.ASCII.GetBytes(pKey);
int datalen = pSourceTxt.Length;
// Get a handle to the default provider.
if (!CryptAcquireContext(out hCryptProv, null, null, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
{
return "";
}
// Decrypt the file with a session key derived from a password.
// Create a hash object.
if (!CryptCreateHash(hCryptProv, ALG_ID.CALG_SHA1, IntPtr.Zero, 0, out hHash))
return "";
// Hash in the password data.
if (!CryptHashData(hHash, key, key.Length, 0))
return "";
// Derive a session key from the hash object.
if (!CryptDeriveKey(hCryptProv, ALG_ID.CALG_RC4 , hHash, 0, ref hKey))
return "";
// Destroy the hash object.
if (!(CryptDestroyHash(hHash)))
return "";
hHash = IntPtr.Zero;
if (!CryptEncrypt(hKey, IntPtr.Zero, true, 0, null, ref datalen))
return "";
if (!CryptEncrypt(hKey, IntPtr.Zero, true, 0, sourceTxt, ref datalen))
return "";
string base64 = Convert.ToBase64String(sourceTxt);
datalen = dwCount;
// Destroy session key.
if (hKey != IntPtr.Zero)
{
if (!(CryptDestroyKey(hKey)))
return "";
}
// Release provider handle.
if (hCryptProv != IntPtr.Zero)
{
if (!(CryptReleaseContext(hCryptProv, 0)))
return "";
}
return base64;
} // end encryptfile
static void Main(string[] args)
{
string encryptTxt = "Teste Crypt";
string key = "teste123";
string encryptStr = Program.encryptStr(encryptTxt, key);
string decryptStr = Program.decryptStr(encryptStr, key);
Console.Write("Encrypt String with Base64 = {0} \n" , encryptStr);
Console.Write("Decrypted String with base64 = {0}", decryptStr);
Console.ReadLine();
}
}
}

JNI wrapper to openssl AES_ecb_encrypt not working

I'm doing a JNI wrapper to call AES_ecb_encrypt function from openssl.
The wrapper looks like this:
#include "aes.h"
#include <jni.h>
#include <string.h>
jbyteArray
Java_com_package_AESDecryptionFilterInputStream_encrypt( JNIEnv* env,
jobject this,
jbyte* data,
jbyte* userkey,
jint length,
jint mode)
{
const unsigned char* indata = (unsigned char*)data;
const unsigned char* ukey = (unsigned char*)userkey;
unsigned char *outdata = NULL;
outdata = malloc(length);
AES_KEY key;
memset(&key, 0, sizeof(AES_KEY));
if(mode == AES_ENCRYPT)
AES_set_encrypt_key(ukey, 128, &key);
else
AES_set_decrypt_key(ukey, 128, &key);
AES_ecb_encrypt(indata, outdata, &key, mode);
jbyteArray bArray = (*env)->NewByteArray(env, length);
jboolean isCopy;
void *decrypteddata = (*env)->GetPrimitiveArrayCritical(env, (jarray)bArray, &isCopy);
memcpy(decrypteddata, outdata, length);
(*env)->ReleasePrimitiveArrayCritical(env, bArray, decrypteddata, 0);
return bArray;
}
but, when I call it from java code to encrypt and then decrypt a string, the results aren't correct.
I'm declaring the library like this:
static {
System.loadLibrary("aes_ecb");
}
public native byte[] encrypt(byte[] data, byte[] userkey, int length, int mode);
I'm calling it like this:
byte[] dec = "0123456789012345".getBytes();
byte[] enc = encrypt(dec, decryptionKey.getBytes(), dec.length, 1);
byte[] dec2 = encrypt(enc, decryptionKey.getBytes(), enc.length, 0);
The problem is that the plain text bytes are:
dec = {49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 49, 50, 51, 52, 53}
when I call encrypt:
enc = {4, 106, -41, 38, -127, 71, 33, 77, -125, 105, -57, 82, -13, 93, 44, -125}
and then when I call decrypt I get:
dec2 = {-103, 26, 73, -2, 64, -21, 14, -38, -51, 13, -7, 40, -83, 42, 119, -3}
dec and dec2 should have the same value but they don't!
What am I doing wrong?
I believe it might be something with converting signed char to unsigned char... I'm not sure about that piece of code where I cast directly jbyte* to unsigned char*...
Thank you!
How did you get the function signature ?
"Java_com_package_AESDecryptionFilterInputStream_encrypt( JNIEnv* env,
jobject this,
jbyte* data,
jbyte* userkey,
jint length,
jint mode)"
In JNI byte array will be passed as jbyteArray.
Function should be something like
Java_com_package_AESDecryptionFilterInputStream_encrypt( JNIEnv* env,
jobject this,
jbyteArray data,
jbyteArray userkey,
jint length,
jint mode)
You should use javah to generate the signature
You can get the length of the array passed by (*env)->GetArrayLength(env,data)
Then you have to copy into a byte* by using (*env)->GetByteArrayRegion(env, data, 0, length, nativeBytePointer);
You will probably have to allocate memory for the nativeBytePointer

Most efficient way to escape XML/HTML in C++ string?

I can't believe this question hasn't been asked before. I have a string that needs to be inserted into an HTML file but it may contain special HTML characters. I want to replace these with the appropriate HTML representation.
The code below works but is pretty verbose and ugly. Performance is not critical for my application but I guess there are scalability problems here also. How can I improve this? I guess this is a job for STL algorithms or some esoteric Boost function, but the code below is the best I can come up with myself.
void escape(std::string *data)
{
std::string::size_type pos = 0;
for (;;)
{
pos = data->find_first_of("\"&<>", pos);
if (pos == std::string::npos) break;
std::string replacement;
switch ((*data)[pos])
{
case '\"': replacement = """; break;
case '&': replacement = "&"; break;
case '<': replacement = "<"; break;
case '>': replacement = ">"; break;
default: ;
}
data->replace(pos, 1, replacement);
pos += replacement.size();
};
}
Instead of just replacing in the original string, you can do copying with on-the-fly replacement which avoids having to move characters in the string. This will have much better complexity and cache behavior, so I'd expect a huge improvement. Or you can use boost::spirit::xml encode or http://code.google.com/p/pugixml/.
void encode(std::string& data) {
std::string buffer;
buffer.reserve(data.size());
for(size_t pos = 0; pos != data.size(); ++pos) {
switch(data[pos]) {
case '&': buffer.append("&"); break;
case '\"': buffer.append("""); break;
case '\'': buffer.append("&apos;"); break;
case '<': buffer.append("<"); break;
case '>': buffer.append(">"); break;
default: buffer.append(&data[pos], 1); break;
}
}
data.swap(buffer);
}
EDIT: A small improvement can be achieved by using an heuristic to determine the size of the buffer. Replace the buffer.reserve line with data.size()*1.1 (10%) or something similar depending of how much replacements are expected.
void escape(std::string *data)
{
using boost::algorithm::replace_all;
replace_all(*data, "&", "&");
replace_all(*data, "\"", """);
replace_all(*data, "\'", "&apos;");
replace_all(*data, "<", "<");
replace_all(*data, ">", ">");
}
Could win the prize for least verbose?
Here is a simple ~30 line C program that does the trick in a rather good manner. Here I am assuming that the temp_str will have allocated memory enough to have the additional escaped characters.
void toExpatEscape(char *temp_str)
{
const char cEscapeChars[6]={'&','\'','\"','>','<','\0'};
const char * const pEscapedSeqTable[] =
{
"&",
"&apos;",
""",
">",
"<",
};
unsigned int i, j, k, nRef = 0, nEscapeCharsLen = strlen(cEscapeChars), str_len = strlen(temp_str);
int nShifts = 0;
for (i=0; i<str_len; i++)
{
for(nRef=0; nRef<nEscapeCharsLen; nRef++)
{
if(temp_str[i] == cEscapeChars[nRef])
{
if((nShifts = strlen(pEscapedSeqTable[nRef]) - 1) > 0)
{
memmove(temp_str+i+nShifts, temp_str+i, str_len-i+nShifts);
for(j=i,k=0; j<=i+nShifts,k<=nShifts; j++,k++)
temp_str[j] = pEscapedSeqTable[nRef][k];
str_len += nShifts;
}
}
}
}
temp_str[str_len] = '\0';
}
My tests showed this answer gave the best performance from offered (not surprising it has the most rate).
I've implemented same algorithm for my project (I really want good performance & memory usage) - my tests showed my implementation has ~2.6-3.25 better speed performace. Also I don't like previous best offered algorithm bcs of bad memory usage - you will have extra memory usage as when apply 1.1 multiplier 'heuristic', as when .append() lead to resize.
So, leave my code here - maybe somebody find it useful.
HtmlPreprocess.h:
#ifndef _HTML_PREPROCESS_H_
#define _HTML_PREPROCESS_H_
#include <string>
class HtmlPreprocess
{
public:
HtmlPreprocess();
~HtmlPreprocess();
static void htmlspecialchars(
const std::string & in,
std::string & out
);
};
#endif // _HTML_PREPROCESS_H_
HtmlPreprocess.cpp:
#include "HtmlPreprocess.h"
HtmlPreprocess::HtmlPreprocess()
{
}
HtmlPreprocess::~HtmlPreprocess()
{
}
const unsigned char map_char_to_final_size[] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 6, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
const unsigned char map_char_to_index[] =
{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 2, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 0xFF, 3, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
};
void HtmlPreprocess::htmlspecialchars(
const std::string & in,
std::string & out
)
{
const char * lp_in_stored = &in[0];
size_t in_size = in.size();
const char * lp_in = lp_in_stored;
size_t final_size = 0;
for (size_t i = 0; i < in_size; i++)
final_size += map_char_to_final_size[*lp_in++];
out.resize(final_size);
lp_in = lp_in_stored;
char * lp_out = &out[0];
for (size_t i = 0; i < in_size; i++)
{
char current_char = *lp_in++;
unsigned char next_action = map_char_to_index[current_char];
switch (next_action){
case 0:
*lp_out++ = '&';
*lp_out++ = 'a';
*lp_out++ = 'm';
*lp_out++ = 'p';
*lp_out++ = ';';
break;
case 1:
*lp_out++ = '&';
*lp_out++ = 'a';
*lp_out++ = 'p';
*lp_out++ = 'o';
*lp_out++ = 's';
*lp_out++ = ';';
break;
case 2:
*lp_out++ = '&';
*lp_out++ = 'q';
*lp_out++ = 'u';
*lp_out++ = 'o';
*lp_out++ = 't';
*lp_out++ = ';';
break;
case 3:
*lp_out++ = '&';
*lp_out++ = 'g';
*lp_out++ = 't';
*lp_out++ = ';';
break;
case 4:
*lp_out++ = '&';
*lp_out++ = 'l';
*lp_out++ = 't';
*lp_out++ = ';';
break;
default:
*lp_out++ = current_char;
}
}
}
If you're going for processing speed, then it seems to me that the best would be to have a second string that you build as you go, copying from the first string to the second string, and then appending the html escapes as you encounter them. Since I assume that the replace method involves first a memory move, followed by a copy into the replaced position, it's going to be very slow for large strings. If you have a second string to build using .append(), it will avoid the memory move.
As far was code "cleanness", I think that's about as pretty as you're going to get. You could create an array of characters and their replacements, and then search the array, but that would probably be slower and not much cleaner anyway.
I'd honestly go with a more generic version using iterators, such that you can "stream" the encoding. Consider the following implementation:
#include <algorithm>
namespace xml {
// Helper for null-terminated ASCII strings (no end of string iterator).
template<typename InIter, typename OutIter>
OutIter copy_asciiz ( InIter begin, OutIter out )
{
while ( *begin != '\0' ) {
*out++ = *begin++;
}
return (out);
}
// XML escaping in it's general form. Note that 'out' is expected
// to an "infinite" sequence.
template<typename InIter, typename OutIter>
OutIter escape ( InIter begin, InIter end, OutIter out )
{
static const char bad[] = "&<>";
static const char* rep[] = {"&", "<", ">"};
static const std::size_t n = sizeof(bad)/sizeof(bad[0]);
for ( ; (begin != end); ++begin )
{
// Find which replacement to use.
const std::size_t i =
std::distance(bad, std::find(bad, bad+n, *begin));
// No need for escaping.
if ( i == n ) {
*out++ = *begin;
}
// Escape the character.
else {
out = copy_asciiz(rep[i], out);
}
}
return (out);
}
}
Then, you can simplify the average case using a few overloads:
#include <iterator>
#include <string>
namespace xml {
// Get escaped version of "content".
std::string escape ( const std::string& content )
{
std::string result;
result.reserve(content.size());
escape(content.begin(), content.end(), std::back_inserter(result));
return (result);
}
// Escape data on the fly, using "constant" memory.
void escape ( std::istream& in, std::ostream& out )
{
escape(std::istreambuf_iterator<char>(in),
std::istreambuf_iterator<char>(),
std::ostreambuf_iterator<char>(out));
}
}
Finally, test the whole lot:
#include <iostream>
int main ( int, char ** )
{
std::cout << xml::escape("<foo>bar & qux</foo>") << std::endl;
}
You can use the boost::property_tree::xml_parser::encode_char_entities if you don't want to write it yourself.
For reference, here's the code in boost 1.64.0:
```
template<class Str>
Str encode_char_entities(const Str &s)
{
// Don't do anything for empty strings.
if(s.empty()) return s;
typedef typename Str::value_type Ch;
Str r;
// To properly round-trip spaces and not uglify the XML beyond
// recognition, we have to encode them IF the text contains only spaces.
Str sp(1, Ch(' '));
if(s.find_first_not_of(sp) == Str::npos) {
// The first will suffice.
r = detail::widen<Str>(" ");
r += Str(s.size() - 1, Ch(' '));
} else {
typename Str::const_iterator end = s.end();
for (typename Str::const_iterator it = s.begin(); it != end; ++it)
{
switch (*it)
{
case Ch('<'): r += detail::widen<Str>("<"); break;
case Ch('>'): r += detail::widen<Str>(">"); break;
case Ch('&'): r += detail::widen<Str>("&"); break;
case Ch('"'): r += detail::widen<Str>("""); break;
case Ch('\''): r += detail::widen<Str>("&apos;"); break;
default: r += *it; break;
}
}
}
return r;
}
```
I profiled 3 solutions with Visual Studio 2017. Input were 10 000 000 strings of size 5-20 with a probability of 9,4% that a char needs to be escaped.
Solution from Giovanni Funchal
Solution from HostageBrain
Solution is mine
The result:
needs 1.675 seconds
needs 0.769 seconds
needs 0.368 seconds
In mine Solution, the final size is precalculated and a copy of string data is done, only when needed. So the heap memory allocations should be minimal.
const unsigned char calcFinalSize[] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 6, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
void escapeXml(std::string & in)
{
const char* dataIn = in.data();
size_t sizeIn = in.size();
const char* dataInCurrent = dataIn;
const char* dataInEnd = dataIn + sizeIn;
size_t outSize = 0;
while (dataInCurrent < dataInEnd)
{
outSize += calcFinalSize[static_cast<uint8_t>(*dataInCurrent)];
dataInCurrent++;
}
if (outSize == sizeIn)
{
return;
}
std::string out;
out.resize(outSize);
dataInCurrent = dataIn;
char* dataOut = &out[0];
while (dataInCurrent < dataInEnd)
{
switch (*dataInCurrent) {
case '&':
memcpy(dataOut, "&", sizeof("&") - 1);
dataOut += sizeof("&") - 1;
break;
case '\'':
memcpy(dataOut, "&apos;", sizeof("&apos;") - 1);
dataOut += sizeof("&apos;") - 1;
break;
case '\"':
memcpy(dataOut, """, sizeof(""") - 1);
dataOut += sizeof(""") - 1;
break;
case '>':
memcpy(dataOut, ">", sizeof(">") - 1);
dataOut += sizeof(">") - 1;
break;
case '<':
memcpy(dataOut, "<", sizeof("<") - 1);
dataOut += sizeof("<") - 1;
break;
default:
*dataOut++ = *dataInCurrent;
}
dataInCurrent++;
}
in.swap(out);
}
Edit: Replaced "&quote;" with """. Old solution was overwriting memory, because the look-up table contained a length of 6 for "&quote;".
Or with just stl :
std::string& rep(std::string &s, std::string from, std::string to)
{
int pos = -1;
while ( (pos = s.find(from, pos+1) ) != string::npos)
s.erase(pos, from.length()).insert(pos, to);
return s;
}
Usage:
rep(s, "&", """);
rep(s, "\"", """);
or:
rep(s, "HTML","xxxx");

Convert files of any types to a file with c strings

Please suggest a small command-line utility (for Windows) to convert files from particular directory to a valid c file. Maybe it can be done just with batch commands?
The resulting file should look like this:
static const unsigned char some_file[] = {
/* some_file.html */
0x2f, 0x70, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x73, 0x2e, 0x73, 0x68, 0x74, 0x6d, 0x6c, 0,
0x25, 0x21, 0x3a, 0x20, 0x2f, 0x68, 0x65, 0x61, 0x64, 0x65
}
static const unsigned char some_other_file[] = {
/* some_other_file.png*/
0x2f, 0x34, 0x30, 0x34, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0,
0x3c, 0x68, 0x74, 0x6d, 0x6c, 0x3e, 0xa, 0x20, 0x20, 0x3c
}
P.S. Please don't suggest Perl and Python ports. They are too heavy for this task.
P.P.S. May be someone knows more customizable utility than bin2h, but less heavy and complex than awt? Which can parse several files and put them into one C. Also specifing custom variable names (using some kind of an index file) whould be great. So it can be added to the build process.
Use xxd -i file.
I use the one included with Vim. For example:
C:\Documents and Settings\user> xxd -i error.log | head -n2
unsigned char error_log[] = {
0x0c, 0x0d, 0x0a, 0x3d, 0x3d, 0x32, 0x30, 0x30, 0x39, 0x2f, 0x35, 0x2f,
See also Is it possible to view a binary in ones and zeros?
Bin2h will do this.
Bin2h - Win32 binary to C header file
converter
A Win32 command-line utility for
converting a binary file into a C
header file, representing the contents
of that file as a block of data.
I don't believe the input file has to be a binary file.
If you want a utility that can be freely used (commercial or whatever) here's a GPL bin2c by Adrian Prantl:
/*
* bin2c: A Program to convert binary data into C source code
* Copyright 2004 by Adrian Prantl <adrian#f4z.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <stdio.h>
#include <stdlib.h>
char* self = 0;
void usage() {
printf("Usage:\n%s input.bin output.h name\n\n", self);
}
void bail_out(const char* s1, const char* s2) {
fprintf(stderr, "%s: FATAL ERROR:\n%s%s\n", self, s1, s2);
exit(1);
}
int main(int argc, char** argv) {
FILE *fi, *fo;
int c, i;
self = argv[0];
if (argc != 4) {
usage();
return 0;
}
if ((fi = fopen(argv[1], "rb")) == 0)
bail_out("Cannot open input file ", argv[1]);
if ((fo = fopen(argv[2], "w")) == 0)
bail_out("Cannot open output file ", argv[2]);
if ((c = fgetc(fi)) != EOF) {
fprintf(fo, "#ifndef %s_H\n", argv[3]);
fprintf(fo, "#define %s_H\n\n", argv[3]);
fprintf(fo, "const unsigned char %s[] = {\n", argv[3]);
fprintf(fo, c < 16 ? " 0x%02x" : " 0x%02x", (unsigned char) c);
}
i = 1;
while ((c = fgetc(fi)) != EOF) {
if (i < 12)
fprintf(fo, c < 16 ? ", 0x%02x" : ", 0x%02x", (unsigned char) c);
else {
fprintf(fo, c < 16 ? ",\n 0x%02x" : ",\n 0x%02x", (unsigned char) c);
i = 0;
}
i++;
}
fprintf(fo, "\n};\n\n");
fprintf(fo, "#endif\n");
printf("converted %s\n", argv[1]);
return 0;
}
It's a single 70 line or so C file - nothing to it to compile and run.
SRecord can do that, and more. Though it is hardly difficult to write your own in C.