I can't find the problem, anyone know solve?
Code
#include <algorithm>
int main(int argc, char* argv[]) {
return 0;
}
Warning
extra tokens at end of #include directive [enabled by default]
Looking at the code quoted above using od -c gives this output:
0000000 # i n c l u d e < a l g o r i
0000020 t h m > 342 200 216 \n i n t m a i n
0000040 ( i n t a r g c , c h a r *
0000060 a r g v [ ] ) { \n r
0000100 e t u r n 0 ; \n } \n
Note the bytes between the > and the \n: You probably want to get rid of them.
Related
Why is every character followed by a white space in the following?
C++ DLL
test.h:
#ifndef TEST_DLL_H
#define TEST_DLL_H
#define EXPORT __declspec(dllexport) __stdcall
#include <iostream>
#include <Windows.h>
namespace Test_DLL
{
struct Simple
{
TCHAR a[1024];
};
extern "C"
{
int EXPORT simple(Simple* a);
}
};
#endif
test.cpp:
#include "test.h"
int EXPORT Test_DLL::simple(Simple* a)
{
std::wcout << a->a << std::endl;
return 0;
}
Python
test.py:
import ctypes
from ctypes import wintypes
class MyStructure(ctypes.Structure):
_fields_ = [("a", wintypes.WCHAR * 1024)]
a = "Hello, world!"
hDLL = ctypes.LibraryLoader(ctypes.WinDLL)
hDLL_Test = hDLL.LoadLibrary(r"...\test.dll")
simple = hDLL_Test.simple
mystruct = MyStructure(a=a)
ret = simple(ctypes.byref(mystruct))
The result:
H e l l o , w o r l d !
Is the problem on the C++ DLL side? Or am I missing something on the Python side?
At the beginning I thought that it's some minor problem in your code. When debugging I discovered that it isn't quite so. Starting from your example, I developed another one that illustrates some key points.
test.h:
#if !defined(TEST_DLL_H)
#define TEST_DLL_H
#if defined(_WIN32)
# if defined(TEST_EXPORTS)
# define TEST_API __declspec(dllexport)
# else
# define TEST_API __declspec(dllimport)
# endif
# define CALLING_CONVENTION __cdecl
#else
# define __TEXT(X) L##X
# define TEXT(X) __TEXT(X)
# define TEST_API
# define CALLING_CONVENTION
#endif
namespace TestDll {
typedef struct Simple_ {
wchar_t a[1024];
} Simple;
extern "C" {
TEST_API int CALLING_CONVENTION simple(Simple *pSimple);
TEST_API int CALLING_CONVENTION printStr(char *pStr);
TEST_API int CALLING_CONVENTION wprintWstr(wchar_t *pWstr);
TEST_API wchar_t* CALLING_CONVENTION wstr();
TEST_API void CALLING_CONVENTION clearWstr(wchar_t *pWstr);
}
};
#endif // TEST_DLL_H
test.cpp:
#define TEST_EXPORTS
#include "test.h"
#if defined(_WIN32)
# include <Windows.h>
#else
# include <wchar.h>
# define __FUNCTION__ "function"
#endif
#include <stdio.h>
//#include <iostream>
#define PRINT_MSG_0() printf("From C: - [%s] (%d) - [%s]\n", __FILE__, __LINE__, __FUNCTION__)
#define WPRINT_MSG_0() wprintf(L"From C: - [%s] (%d) - [%s]\n", TEXT(__FILE__), __LINE__, TEXT(__FUNCTION__))
#define DUMMY_TEXT_W L"Dummy text."
//using namespace std;
int TestDll::simple(Simple *pSimple) {
//std::wcout << pSimple->a << std::endl;
WPRINT_MSG_0();
int ret = wprintf(L"%s", pSimple->a);
wprintf(L"\n");
return ret;
}
int TestDll::printStr(char *pStr) {
PRINT_MSG_0();
int ret = printf("%s", pStr);
printf("\n");
return ret;
}
int TestDll::wprintWstr(wchar_t *pWstr) {
WPRINT_MSG_0();
int ret = wprintf(L"%s", pWstr);
wprintf(L"\n");
int len = wcslen(pWstr);
char *buf = (char*)pWstr;
wprintf(L"Hex (%d): ", len);
for (int i = 0; i < len * sizeof(wchar_t); i++)
wprintf(L"%02X ", buf[i]);
wprintf(L"\n");
return ret;
}
wchar_t *TestDll::wstr() {
wchar_t *ret = (wchar_t*)malloc((wcslen(DUMMY_TEXT_W) + 1) * sizeof(wchar_t));
wcscpy(ret, DUMMY_TEXT_W);
return ret;
}
void TestDll::clearWstr(wchar_t *pWstr) {
free(pWstr);
}
main.cpp:
#include "test.h"
#include <stdio.h>
#if defined(_WIN32)
# include <Windows.h>
#endif
int main() {
char *text = "Hello, world!";
TestDll::Simple s = { TEXT("Hello, world!") };
int ret = simple(&s); // ??? Compiles even if namespace not specified here !!!
printf("\"simple\" returned %d\n", ret);
ret = TestDll::printStr("Hello, world!");
printf("\"printStr\" returned %d\n", ret);
ret = TestDll::wprintWstr(s.a);
printf("\"wprintWstr\" returned %d\n", ret);
return 0;
}
code.py:
#!/usr/bin/env python3
import sys
import ctypes
DLL_NMAME = "./test.dll"
DUMMY_TEXT = "Hello, world!"
WCharArr1024 = ctypes.c_wchar * 1024
class SimpleStruct(ctypes.Structure):
_fields_ = [
("a", WCharArr1024),
]
def main():
test_dll = ctypes.CDLL(DLL_NMAME)
simple_func = test_dll.simple
simple_func.argtypes = [ctypes.POINTER(SimpleStruct)]
simple_func.restype = ctypes.c_int
stuct_obj = SimpleStruct(a=DUMMY_TEXT)
print_str_func = test_dll.printStr
print_str_func.argtypes = [ctypes.c_char_p]
print_str_func.restype = ctypes.c_int
wprint_wstr_func = test_dll.wprintWstr
wprint_wstr_func.argtypes = [ctypes.c_wchar_p]
wprint_wstr_func.restype = ctypes.c_int
wstr_func = test_dll.wstr
wstr_func.argtypes = []
wstr_func.restype = ctypes.c_wchar_p
clear_wstr_func = test_dll.clearWstr
clear_wstr_func.argtypes = [ctypes.c_wchar_p]
clear_wstr_func.restype = None
#print("From PY: [{:s}]".format(stuct_obj.a))
ret = simple_func(ctypes.byref(stuct_obj))
print("\"{:s}\" returned {:d}".format(simple_func.__name__, ret))
ret = print_str_func(DUMMY_TEXT.encode())
print("\"{:s}\" returned {:d}".format(print_str_func.__name__, ret))
#ret = wprint_wstr_func(ctypes.cast(DUMMY_TEXT.encode(), ctypes.c_wchar_p))
ret = wprint_wstr_func(DUMMY_TEXT)
print("\"{:s}\" returned {:d}".format(wprint_wstr_func.__name__, ret))
s = wstr_func()
print("\"{:s}\" returned \"{:s}\"".format(wstr_func.__name__, s))
#clear_wstr_func(s)
if __name__ == "__main__":
#print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Changes:
Removed the C++ layer (to exclude as many variables as possible) and only rely on C
Adapted the code to be Nix compliant (I've run it on Ubtu, but I encountered other issues that I'm not going to discuss)
Added more functions (this was a debugging process), to gather as much intel as possible
Did some renames, refactorings and other non important changes
While investigating, I discovered a funny problem (the coment from main.cpp). Apparently simple function compiles even if I don't prepend the namespace in which it's declared. This doesn't apply for the other functions. After some quick tries, I realized that it's because of the Simple argument (probably because it's also part of the namespace?). Anyway, didn't spend too much time and didn't get to the bottom of it (yet), probably it's Undefined Behavior (and it only works because of dumb luck)
The narrow and wide functions are mixed, that's a NO - NO, and is only for debugging / demonstrating purposes
Output:
e:\Work\Dev\StackOverflow\q054269984>"c:\Install\x86\Microsoft\Visual Studio Community\2015\vc\vcvarsall.bat" x64
e:\Work\Dev\StackOverflow\q054269984>dir /b
code.py
main.cpp
test.cpp
test.h
e:\Work\Dev\StackOverflow\q054269984>cl /nologo /DDLL /DUNICODE /MD /EHsc test.cpp /link /NOLOGO /DLL /OUT:test.dll
test.cpp
Creating library test.lib and object test.exp
e:\Work\Dev\StackOverflow\q054269984>cl /nologo /DUNICODE /MD /EHsc main.cpp /link /NOLOGO /OUT:main.exe test.lib
main.cpp
e:\Work\Dev\StackOverflow\q054269984>dir /b
code.py
main.cpp
main.exe
main.obj
test.cpp
test.dll
test.exp
test.h
test.lib
test.obj
e:\Work\Dev\StackOverflow\q054269984>main.exe
From C: - [test.cpp] (23) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (39) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
e:\Work\Dev\StackOverflow\q054269984>"e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
It seems to be Python related
The strings themselves are not messed up (their lengths and wprintf return value are correct). It's more like stdout is the culprit
Then, I went further:
e:\Work\Dev\StackOverflow\q054269984>for /f %f in ('dir /b "e:\Work\Dev\VEnvs\py_064*"') do ("e:\Work\Dev\VEnvs\%f\Scripts\python.exe" code.py)
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_02.07.15_test0\Scripts\python.exe" code.py )
Python 2.7.15 (v2.7.15:ca079a3ea3, Apr 30 2018, 16:30:26) [MSC v.1500 64 bit (AMD64)] on win32
From C: - [test.cpp] (23) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (39) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.04.04_test0\Scripts\python.exe" code.py )
Python 3.4.4 (v3.4.4:737efcadf5a6, Dec 20 2015, 20:20:57) [MSC v.1600 64 bit (AMD64)] on win32
From C: - [test.cpp] (23) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (39) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.05.04_test0\Scripts\python.exe" code.py )
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py )
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.07.02_test0\Scripts\python.exe" code.py )
Python 3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
As seen, the behavior is reproducible starting with Python 3.5.
I thought it is because of [Python]: PEP 529 -- Change Windows filesystem encoding to UTF-8, but that's only availalbe from version 3.6.
Then I started reading, (I even tried to do a diff between Python 3.4 and Python 3.5) but with not much success. Some articles that I went through:
[MSDN]: Windows with C++ - Using Printf with Modern C++
[MSDN]: VS2005, console, Unicode, wcout fails
[Python 3]: What’s New In Python 3.5
Then I noticed [SO]: Output unicode strings in Windows console app (#DuckMaestro's answer) and started to play with [MS.Docs]: _setmode.
Adding:
#include <io.h>
#include <fcntl.h>
static int set_stdout_mode(int mode) {
fflush(stdout);
int ret = _setmode(_fileno(stdout), mode);
return ret;
}
and calling it like int stdout_mode = set_stdout_mode(_O_TEXT); in test.cpp before outputting anything from C (and C++: std::wcout line uncommented), yielded:
e:\Work\Dev\StackOverflow\q054269984>"e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
Hello, world!
From C: - [test.cpp] (32) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (40) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (48) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
"wstr" returned "Dummy text."
Although it works, I do not know why. It could be Undefined Behavior
Printing _setmode's return value, revealed that Python 3.4 and also main.exe automatically set the mode to _O_TEXT (0x4000), while newer Python versions (those that don't work) set it to _O_BINARY (0x8000) - which apparently seems to be the cause (might be related: [Python]: Issue #16587 - Py_Initialize breaks wprintf on Windows)
Trying to set it to any of the wide related constants (_O_U16TEXT, _O_U8TEXT, _O_WTEXT) crashes the program when calling printf or std::cout (even if restoring the original mode when done with wide functions - before the narrow ones)
Trying to output real Unicode chars, won't work (most likely)
You could achieve the same goal on Python side: msvcrt.setmode(sys.stdout.fileno(), 0x4000)
Condition:
Contents can only contain characters from the following set:
a b c d e f g h i j k l m n o p q r s t u v w x y z
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
0 1 2 3 4 5 6 7 8 9
/ - ? : ( ) . , ' +
• Contents may NOT begin with ‘/’
• Contents may NOT contain ‘//’
export function directDebitValidator(nameRe: RegExp): ValidatorFn {
return (control: AbstractControl): { [key: string]: any } | null => {
const directDebitID = nameRe.test(control.value);
return directDebitID ? { 'directDebit': { value: control.value } } : null;
};
}
#Directive({
selector: '[directDebit]',
providers: [{ provide: NG_VALIDATORS, useExisting: DirectDebitValidatorDirective, multi: true }]
})
export class DirectDebitValidatorDirective {
validate(control: AbstractControl): { [key: string]: any } | null {
return control.value ? directDebitValidator(new RegExp("^(? !.* [\/]{2})[a-zA-Z0-9-?:().,'+]+([a-zA-Z0-9\/-?:().,'+])*$"))(control)
: null;
}
}
There are a couple of issues:
There cannot be spaces in the lookahead definition
The [/-?] creates a range, the - must be escaped or placed at the start/end of the character class.
You may use a / unescaped in the constructor notation since no delimiters are being used there.
So, you may use
directDebitValidator(new RegExp("^(?!.*/{2})[a-zA-Z0-9?:().,'+-][a-zA-Z0-9/?:().,'+-]*$"))
Or, using a regex literal notation:
directDebitValidator(/^(?!.*\/{2})[a-zA-Z0-9?:().,'+-][a-zA-Z0-9\/?:().,'+-]*$/)
See the regex demo.
Hi I Had a query earlier and thought I had cracked it with the help of Richard but it doesn't appear
I have attached an image and what I am trying to achieve to make my query clearer.
* If E is correct then cell F will be set to match D manually
* If E is yes and F is set to 111 then G will populate with the contents of C
* If E is no and F is set to anything but 111 then it will return 0
* If E is correct then cell F will be set to match D manually
* If E is yes and F is set to 112 then H will populate with the contents of C
* If E is no and F is set to anything but 112 then it will return 0
* If E is correct then cell F will be set to match D manually
* If E is yes and F is set to 118 then I will populate with the contents of C
* If E is no and F is set to anything but 118 then it will return 0
* If E is correct then cell F will be set to match D manually
* If E is yes and F is set to 119 then J will populate with the contents of C
* If E is no and F is set to anything but 119 then it will return 0
It's not 100% clear, but sounds like this is what you're after:
F2 = =IF(E2="Yes",IF(OR(D2=111,D2=112,D2=118,D2=119)=TRUE,D2,""),"")
G2 = =IF(AND(E2="Yes",F2=111)=TRUE,C2,"")
H2 = =IF(AND(E2="Yes",F2=112)=TRUE,C2,"")
I2 = =IF(AND(E2="Yes",F2=118)=TRUE,C2,"")
J2 = =IF(AND(E2="Yes",F2=119)=TRUE,C2,"")
Then just fill down. I've put "" instead of 0, because it's a lot easier to see what's going on without zero's everywhere. You can change them back once you're happy with the outcome.
Incidentally, sometimes it's easier to parse the code out. Excel works fine if you have code on different lines, like the following for D2:
=
IF(
E2="Yes",
IF(
OR(
D2=111,D2=112,D2=118,D2=119
)=TRUE,
D2,
""
),
""
)
I'm developing a piece of code to filter a text as follows:
<DATA>
.SUBCKT SVI A B C D E F
+ G H I
+ J K L
.....
+ X Y Z
*.PININFO AA BB CC
*.PININFO DD EE FF
<DATA>
I need the output to be
A B C D E F
G H I
J K L
.....
X Y Z
I already made a regular expression to do so:
m/\.SUBCKT\s+SVI\s(.*)|\+(.*)/gm
The problem is that I have many similar sections like this input but I only need to detect + lines which are following .SUBCKT SVI header not any other header.
How I could match group many times like (\+\s+(.*)). I want to match this repeated capture group as it repeated many times.
Any advice to get this expression.
Perhaps this is closer to what you need.
m/\.SUBCKT\s+SVI\s(.*)\n(\+\s+(.*)\n)*/gm
Does this do what you want? Note that it stops at the ..... because it doesn't begin with a + or .SUBCKT
It won't handle the case where a range of + lines is immediately followed by another .SUBCKT line; is that a problem?
use strict;
use warnings;
while ( <DATA> ) {
next unless my $in_range = s/^\.SUBCKT\s+// ... /^[^+]/;
next if $in_range =~ /E/;
s/^\S+\s+//;
print;
}
__DATA__
<DATA>
.SUBCKT SVI A B C D E F
+ G H I
+ J K L
.....
+ X Y Z
*.PININFO AA BB CC
*.PININFO DD EE FF
<DATA>
output
A B C D E F
G H I
J K L
Update
Here's a state machine version that deals with the special case described above
use strict;
use warnings;
my $state;
while ( <DATA> ) {
if ( /^\.SUBCKT\s+\S+\s+(.+)/ ) {
$state = 1;
print $1, "\n";
}
elsif ( /^\+\s+(.+)/ ) {
print $1, "\n" if $state;
}
else {
$state = 0;
}
}
__DATA__
<DATA>
.SUBCKT SVI A B C D E F
+ G H I
+ J K L
.SUBCKT SVI A B C D E F
+ M N O
+ P Q R
*.PININFO AA BB CC
*.PININFO DD EE FF
<DATA>
output
A B C D E F
G H I
J K L
A B C D E F
M N O
P Q R
I made use of #shawnt00 answer and modified the regular expression and it made the job.
\.SUBCKT\s+SVI_TRX201TH\s(.*\n(\+\s+.*\n)*)
I am looking for a way to use Regex Replace functions on IBM iseries.
As far as i know, i can use C++ librairies (regex.h) (source)
With this, i can only match regex, but not replace.
(using regcomp() to compile and regexec() to match the regex)
Does anyone know a way to do it ?
It's true that the C/C++ POSIX regular expression library doesn't have a built in regexp replace function, but you can accomplish the same thing using positional information from regexec() and the RPGLE %replace() built in function. (I'm assuming you're going to use RPGLE but you could use another language.)
For example, if you wanted to mask all but the last four digits of a phone number you could do this:
/include qcpysrc,regex_h
d regex_phone_number...
d ds inz likeds(regex_t)
d dsrm ds inz likeds(regmatch_t) dim(20)
d data s 52a inz varying
d pattern s 256a inz varying
d rc s 10i 0 inz(0)
/FREE
*inlr = *on ;
data = 'My phone #''s are: (444) 555 - 6666 and 777.888.9999' ;
dsply data ;
pattern = '\(?([0-9]{3})[ .)]*([0-9]{3})[ .-]*([0-9]{4})' ;
rc = regcomp(regex_phone_number :pattern :REG_EXTENDED) ;
if rc = 0 ;
dow '1' ;
rc = regexec(regex_phone_number :data
:regex_phone_number.re_nsub :%addr(dsrm) :0) ;
if rc <> 0 ;
leave ;
endif ;
data = %replace('***': data :dsrm(2).rm_so+1
:dsrm(2).rm_eo - dsrm(2).rm_so) ;
data = %replace('***': data :dsrm(3).rm_so+1
:dsrm(3).rm_eo - dsrm(3).rm_so) ;
enddo ;
endif ;
dsply data ;
regfree(regex_phone_number) ;
/END-FREE
Here's what the copy book regex_h looks like:
** Header file for calling the "Regular Expression" functions
** provided by the ILE C Runtime Library from an RPG IV
** program. Scott Klement, 2001-05-04
** Converted to qualified DS 2003-11-29
** Modified by Jarrett Gilliam 2014-11-05
**
** This copy book is for using the C regular expression library, regex.h, in RPG.
** You can go to http://www.regular-expressions.info/ to learn more about
** regular expressions. This regex flavor is POSIX ERE. You can go to
** http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_71/rtref/regexec.htm
** to learn more about how the C functions work.
d/if defined(REGEX_H)
d/eof
d/endif
d/define REGEX_H
**------------------------------------------------------------
* cflags for regcomp()
**------------------------------------------------------------
d REG_BASIC c CONST(0)
d REG_EXTENDED c CONST(1)
d REG_ICASE c CONST(2)
d REG_NEWLINE c CONST(4)
d REG_NOSUB c CONST(8)
**------------------------------------------------------------
* eflags for regexec()
**------------------------------------------------------------
d REG_NOTBOL c CONST(256)
d REG_NOTEOL c CONST(512)
**------------------------------------------------------------
* errors returned
**------------------------------------------------------------
* RE pattern not found
d REG_NOMATCH c CONST(1)
* Invalid Regular Expression
d REG_BADPAT c CONST(2)
* Invalid collating element
d REG_ECOLLATE c CONST(3)
* Invalid character class
d REG_ECTYPE c CONST(4)
* Last character is \
d REG_EESCAPE c CONST(5)
* Invalid number in \digit
d REG_ESUBREG c CONST(6)
* imbalance
d REG_EBRACK c CONST(7)
* \( \) or () imbalance
d REG_EPAREN c CONST(8)
* \{ \} or { } imbalance
d REG_EBRACE c CONST(9)
* Invalid \{ \} range exp
d REG_BADBR c CONST(10)
* Invalid range exp endpoint
d REG_ERANGE c CONST(11)
* Out of memory
d REG_ESPACE c CONST(12)
* ?*+ not preceded by valid RE
d REG_BADRPT c CONST(13)
* invalid multibyte character
d REG_ECHAR c CONST(14)
* (shift 6 caret or not) anchor and not BOL
d REG_EBOL c CONST(15)
* $ anchor and not EOL
d REG_EEOL c CONST(16)
* Unknown error in regcomp() call
d REG_ECOMP c CONST(17)
* Unknown error in regexec() call
d REG_EEXEC c CONST(18)
**------------------------------------------------------------
* Structure of a compiled regular expression:
**------------------------------------------------------------
d REG_SUBEXP_MAX c 20
d regex_t ds qualified align based(template)
d re_nsub 10i 0
d re_comp *
d re_cflags 10i 0
d re_erroff 10i 0
d re_len 10i 0
d re_ucoll 10i 0 dim(2)
d re_lsub * DIM(REG_SUBEXP_MAX)
d re_esub * DIM(REG_SUBEXP_MAX)
d re_map 256a
d re_shift 5i 0
d re_dbcs 5i 0
**------------------------------------------------------------
* structure used to report matches found by regexec()
**------------------------------------------------------------
d regmatch_t ds qualified align based(template)
d rm_so 10i 0
d rm_ss 5i 0
d rm_eo 10i 0
d rm_es 5i 0
**------------------------------------------------------------
* regcomp() -- Compile a Regular Expression ("RE")
*
* int regcomp(regex_t *preg, const char *pattern,
* int cflags);
*
* where:
* preg (output) = the compiled regular expression.
* pattern (input) = the RE to be compiled.
* cflags (input) = the sum of the cflag constants
* (listed above) for this RE.
*
* Returns 0 = success, otherwise an error number.
**------------------------------------------------------------
d regcomp pr 10i 0 extproc('regcomp')
d preg like(regex_t)
d pattern * value options(*string)
d cflags 10i 0 value
**------------------------------------------------------------
* regexec() -- Execute a compiled Regular Expression ("RE")
*
* int regexec(const regex_t *preg, const char *string,
* size_t nmatch, regmatch_t *pmatch, int eflags);
*
* where:
* preg (input) = the compiled regular expression
* (the output of regcomp())
* string (input) = string to run the RE upon
* nmatch (input) = the number of matches to return.
* pmatch (output) = array of regmatch_t DS's
* showing what matches were found.
* eflags (input) = the sum of the flags (constants
* provided above) modifying the RE
*
* Returns 0 = success, otherwise an error number.
**------------------------------------------------------------
d regexec pr 10i 0 extproc('regexec')
d preg like(regex_t) const
d string * value options(*string)
d nmatch 10u 0 value
d pmatch * value
d eflags 10i 0 value
**------------------------------------------------------------
* regerror() -- return error information from regcomp/regexec
*
* size_t regerror(int errcode, const regex_t *preg,
* char *errbuf, size_t errbuf_size);
*
* where:
* errcode (input) = the error code to return info on
* (obtained as the return value from
* either regcomp() or regexec())
* preg (input) = the (compiled) RE to return the
* error for.
* errbuf (output) = buffer containing human-readable
* error message.
* errbuf_size (input) = size of errbuf (max length of msg
* that will be returned)
*
* returns: length of buffer needed to get entire error msg
**------------------------------------------------------------
d regerror pr 10u 0 extproc('regerror')
d errcode 10i 0 value
d preg like(regex_t) const
d errbuf * value
d errbuf_size 10i 0 value
**------------------------------------------------------------
* regfree() -- free memory locked by Regular Expression
*
* void regfree(regex_t *preg);
*
* where:
* preg (input) = regular expression to free mem for.
*
* NOTE: regcomp() will always allocate extra memory
* to be pointed to by the various pointers in
* the regex_t structure. if you don't call this,
* that memory will never be returned to the system!
**------------------------------------------------------------
d regfree pr extproc('regfree')
d preg like(regex_t)
Here's the output:
DSPLY My phone #'s are: (444) 555 - 6666 and 777.888.9999
DSPLY My phone #'s are: (***) *** - 6666 and ***.***.9999
The code could be improved by extracting the replace logic and putting it in a Procedure of it's own, creating a custom regexp replace function based on the POSIX library but it's not absolutely necessary.
The ILE C/C++ runtime library does not have a regex replace function available.
Java, however, has excellent support for regular expressions and integrates easily with RPGLE.
Introduction to Java and RPG
Using Regular Expressions in Java
I succeed in using Regex with Java.
I was inspired by this code from scott klement and that code from ibm.
The mix works well. I just added the replace function.
H
/include QSYSINC/QRPGLESRC,JNI
D newString pr O CLASS(*JAVA:'java.lang.String')
D EXTPROC(*JAVA:'java.lang.String':
D *CONSTRUCTOR)
D bytearray 32767A VARYING CONST
D getBytes PR 65535A VARYING
D EXTPROC(*JAVA:
D 'java.lang.String':
D 'getBytes')
D PatternCompile pr O CLASS(*JAVA:
D 'java.util.regex.Pattern')
D EXTPROC(*JAVA:
D 'java.util.regex.Pattern':
D 'compile') STATIC
D pattern O CLASS(*JAVA:'java.lang.String')
D PatternMatcher pr O CLASS(*JAVA:
D 'java.util.regex.Matcher')
D EXTPROC(*JAVA:
D 'java.util.regex.Pattern':
D 'matcher')
D comparestr O CLASS(*JAVA
D :'java.lang.CharSequence')
D CheckMatches pr 1N EXTPROC(*JAVA
D :'java.util.regex.Matcher'
D :'matches')
D DoReplace pr O CLASS(*JAVA:'java.lang.String')
D EXTPROC(*JAVA
D :'java.util.regex.Matcher'
D :'replaceAll')
D replacement O CLASS(*JAVA
D :'java.lang.String')
D RegExPattern s O CLASS(*JAVA:
D 'java.util.regex.Pattern')
D RegExMatcher s O CLASS(*JAVA:
D 'java.util.regex.Matcher')
D jstrStmt s like(jstring)
D jPatStr s like(jstring)
D jRepStr s like(jstring)
D jRepStr2 s like(jstring)
D result S 30A
/free
jPatStr = newString('^(\+33|0)([1-9][0-9]{8})$');
jstrStmt = newString('+33123456789');
jRepStr = newString('0$2');
RegExPattern = PatternCompile(jPatStr);
RegExMatcher = PatternMatcher(RegExPattern : jstrStmt);
if (CheckMatches(RegExMatcher) = *ON);
dsply ('it matches');
else;
dsply ('it doesn''t match');
endif;
jRepStr2 = DoReplace(RegExMatcher : jRepStr);
result = getBytes(jRepStr2);
dsply (%subst(result : 1 : 30));
*inlr = *on;
/end-free
It works, but with Java. I still work on the PASE Solution WarrenT suggested, but using PASE in an ILE program is such a pain...
The Young i Professionals Wiki has a page of Open Source Binaries. In the list is the PCRE Library (Perl Compatible Regular Expressions).
Let us know how this works out. I may try it myself ;-)
For excellent SQLRPGLE example and explanation refer to :
https://www.rpgpgm.com/2017/10/replacing-parts-of-strings-using-regexp.html
REGEXP_REPLACE
(
source-string
,
pattern-expression
,
replacement-string
,
start
,
occurence
,
flags
)