Why is every character followed by a white space in the following?
C++ DLL
test.h:
#ifndef TEST_DLL_H
#define TEST_DLL_H
#define EXPORT __declspec(dllexport) __stdcall
#include <iostream>
#include <Windows.h>
namespace Test_DLL
{
struct Simple
{
TCHAR a[1024];
};
extern "C"
{
int EXPORT simple(Simple* a);
}
};
#endif
test.cpp:
#include "test.h"
int EXPORT Test_DLL::simple(Simple* a)
{
std::wcout << a->a << std::endl;
return 0;
}
Python
test.py:
import ctypes
from ctypes import wintypes
class MyStructure(ctypes.Structure):
_fields_ = [("a", wintypes.WCHAR * 1024)]
a = "Hello, world!"
hDLL = ctypes.LibraryLoader(ctypes.WinDLL)
hDLL_Test = hDLL.LoadLibrary(r"...\test.dll")
simple = hDLL_Test.simple
mystruct = MyStructure(a=a)
ret = simple(ctypes.byref(mystruct))
The result:
H e l l o , w o r l d !
Is the problem on the C++ DLL side? Or am I missing something on the Python side?
At the beginning I thought that it's some minor problem in your code. When debugging I discovered that it isn't quite so. Starting from your example, I developed another one that illustrates some key points.
test.h:
#if !defined(TEST_DLL_H)
#define TEST_DLL_H
#if defined(_WIN32)
# if defined(TEST_EXPORTS)
# define TEST_API __declspec(dllexport)
# else
# define TEST_API __declspec(dllimport)
# endif
# define CALLING_CONVENTION __cdecl
#else
# define __TEXT(X) L##X
# define TEXT(X) __TEXT(X)
# define TEST_API
# define CALLING_CONVENTION
#endif
namespace TestDll {
typedef struct Simple_ {
wchar_t a[1024];
} Simple;
extern "C" {
TEST_API int CALLING_CONVENTION simple(Simple *pSimple);
TEST_API int CALLING_CONVENTION printStr(char *pStr);
TEST_API int CALLING_CONVENTION wprintWstr(wchar_t *pWstr);
TEST_API wchar_t* CALLING_CONVENTION wstr();
TEST_API void CALLING_CONVENTION clearWstr(wchar_t *pWstr);
}
};
#endif // TEST_DLL_H
test.cpp:
#define TEST_EXPORTS
#include "test.h"
#if defined(_WIN32)
# include <Windows.h>
#else
# include <wchar.h>
# define __FUNCTION__ "function"
#endif
#include <stdio.h>
//#include <iostream>
#define PRINT_MSG_0() printf("From C: - [%s] (%d) - [%s]\n", __FILE__, __LINE__, __FUNCTION__)
#define WPRINT_MSG_0() wprintf(L"From C: - [%s] (%d) - [%s]\n", TEXT(__FILE__), __LINE__, TEXT(__FUNCTION__))
#define DUMMY_TEXT_W L"Dummy text."
//using namespace std;
int TestDll::simple(Simple *pSimple) {
//std::wcout << pSimple->a << std::endl;
WPRINT_MSG_0();
int ret = wprintf(L"%s", pSimple->a);
wprintf(L"\n");
return ret;
}
int TestDll::printStr(char *pStr) {
PRINT_MSG_0();
int ret = printf("%s", pStr);
printf("\n");
return ret;
}
int TestDll::wprintWstr(wchar_t *pWstr) {
WPRINT_MSG_0();
int ret = wprintf(L"%s", pWstr);
wprintf(L"\n");
int len = wcslen(pWstr);
char *buf = (char*)pWstr;
wprintf(L"Hex (%d): ", len);
for (int i = 0; i < len * sizeof(wchar_t); i++)
wprintf(L"%02X ", buf[i]);
wprintf(L"\n");
return ret;
}
wchar_t *TestDll::wstr() {
wchar_t *ret = (wchar_t*)malloc((wcslen(DUMMY_TEXT_W) + 1) * sizeof(wchar_t));
wcscpy(ret, DUMMY_TEXT_W);
return ret;
}
void TestDll::clearWstr(wchar_t *pWstr) {
free(pWstr);
}
main.cpp:
#include "test.h"
#include <stdio.h>
#if defined(_WIN32)
# include <Windows.h>
#endif
int main() {
char *text = "Hello, world!";
TestDll::Simple s = { TEXT("Hello, world!") };
int ret = simple(&s); // ??? Compiles even if namespace not specified here !!!
printf("\"simple\" returned %d\n", ret);
ret = TestDll::printStr("Hello, world!");
printf("\"printStr\" returned %d\n", ret);
ret = TestDll::wprintWstr(s.a);
printf("\"wprintWstr\" returned %d\n", ret);
return 0;
}
code.py:
#!/usr/bin/env python3
import sys
import ctypes
DLL_NMAME = "./test.dll"
DUMMY_TEXT = "Hello, world!"
WCharArr1024 = ctypes.c_wchar * 1024
class SimpleStruct(ctypes.Structure):
_fields_ = [
("a", WCharArr1024),
]
def main():
test_dll = ctypes.CDLL(DLL_NMAME)
simple_func = test_dll.simple
simple_func.argtypes = [ctypes.POINTER(SimpleStruct)]
simple_func.restype = ctypes.c_int
stuct_obj = SimpleStruct(a=DUMMY_TEXT)
print_str_func = test_dll.printStr
print_str_func.argtypes = [ctypes.c_char_p]
print_str_func.restype = ctypes.c_int
wprint_wstr_func = test_dll.wprintWstr
wprint_wstr_func.argtypes = [ctypes.c_wchar_p]
wprint_wstr_func.restype = ctypes.c_int
wstr_func = test_dll.wstr
wstr_func.argtypes = []
wstr_func.restype = ctypes.c_wchar_p
clear_wstr_func = test_dll.clearWstr
clear_wstr_func.argtypes = [ctypes.c_wchar_p]
clear_wstr_func.restype = None
#print("From PY: [{:s}]".format(stuct_obj.a))
ret = simple_func(ctypes.byref(stuct_obj))
print("\"{:s}\" returned {:d}".format(simple_func.__name__, ret))
ret = print_str_func(DUMMY_TEXT.encode())
print("\"{:s}\" returned {:d}".format(print_str_func.__name__, ret))
#ret = wprint_wstr_func(ctypes.cast(DUMMY_TEXT.encode(), ctypes.c_wchar_p))
ret = wprint_wstr_func(DUMMY_TEXT)
print("\"{:s}\" returned {:d}".format(wprint_wstr_func.__name__, ret))
s = wstr_func()
print("\"{:s}\" returned \"{:s}\"".format(wstr_func.__name__, s))
#clear_wstr_func(s)
if __name__ == "__main__":
#print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Changes:
Removed the C++ layer (to exclude as many variables as possible) and only rely on C
Adapted the code to be Nix compliant (I've run it on Ubtu, but I encountered other issues that I'm not going to discuss)
Added more functions (this was a debugging process), to gather as much intel as possible
Did some renames, refactorings and other non important changes
While investigating, I discovered a funny problem (the coment from main.cpp). Apparently simple function compiles even if I don't prepend the namespace in which it's declared. This doesn't apply for the other functions. After some quick tries, I realized that it's because of the Simple argument (probably because it's also part of the namespace?). Anyway, didn't spend too much time and didn't get to the bottom of it (yet), probably it's Undefined Behavior (and it only works because of dumb luck)
The narrow and wide functions are mixed, that's a NO - NO, and is only for debugging / demonstrating purposes
Output:
e:\Work\Dev\StackOverflow\q054269984>"c:\Install\x86\Microsoft\Visual Studio Community\2015\vc\vcvarsall.bat" x64
e:\Work\Dev\StackOverflow\q054269984>dir /b
code.py
main.cpp
test.cpp
test.h
e:\Work\Dev\StackOverflow\q054269984>cl /nologo /DDLL /DUNICODE /MD /EHsc test.cpp /link /NOLOGO /DLL /OUT:test.dll
test.cpp
Creating library test.lib and object test.exp
e:\Work\Dev\StackOverflow\q054269984>cl /nologo /DUNICODE /MD /EHsc main.cpp /link /NOLOGO /OUT:main.exe test.lib
main.cpp
e:\Work\Dev\StackOverflow\q054269984>dir /b
code.py
main.cpp
main.exe
main.obj
test.cpp
test.dll
test.exp
test.h
test.lib
test.obj
e:\Work\Dev\StackOverflow\q054269984>main.exe
From C: - [test.cpp] (23) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (39) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
e:\Work\Dev\StackOverflow\q054269984>"e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
It seems to be Python related
The strings themselves are not messed up (their lengths and wprintf return value are correct). It's more like stdout is the culprit
Then, I went further:
e:\Work\Dev\StackOverflow\q054269984>for /f %f in ('dir /b "e:\Work\Dev\VEnvs\py_064*"') do ("e:\Work\Dev\VEnvs\%f\Scripts\python.exe" code.py)
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_02.07.15_test0\Scripts\python.exe" code.py )
Python 2.7.15 (v2.7.15:ca079a3ea3, Apr 30 2018, 16:30:26) [MSC v.1500 64 bit (AMD64)] on win32
From C: - [test.cpp] (23) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (39) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.04.04_test0\Scripts\python.exe" code.py )
Python 3.4.4 (v3.4.4:737efcadf5a6, Dec 20 2015, 20:20:57) [MSC v.1600 64 bit (AMD64)] on win32
From C: - [test.cpp] (23) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (39) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.05.04_test0\Scripts\python.exe" code.py )
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py )
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.07.02_test0\Scripts\python.exe" code.py )
Python 3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)] on win32
F r o m C : - [ t e s t . c p p ] ( 2 3 ) - [ T e s t D l l : : s i m p l e ]
H e l l o , w o r l d !
"simple" returned 13
From C: - [test.cpp] (31) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
F r o m C : - [ t e s t . c p p ] ( 3 9 ) - [ T e s t D l l : : w p r i n t W s t r ]
H e l l o , w o r l d !
H e x ( 1 3 ) : 4 8 0 0 6 5 0 0 6 C 0 0 6 C 0 0 6 F 0 0 2 C 0 0 2 0 0 0 7 7 0 0 6 F 0 0 7 2 0 0 6 C 0 0 6 4 0 0 2 1 0 0
"wprintWstr" returned 13
"wstr" returned "Dummy text."
As seen, the behavior is reproducible starting with Python 3.5.
I thought it is because of [Python]: PEP 529 -- Change Windows filesystem encoding to UTF-8, but that's only availalbe from version 3.6.
Then I started reading, (I even tried to do a diff between Python 3.4 and Python 3.5) but with not much success. Some articles that I went through:
[MSDN]: Windows with C++ - Using Printf with Modern C++
[MSDN]: VS2005, console, Unicode, wcout fails
[Python 3]: What’s New In Python 3.5
Then I noticed [SO]: Output unicode strings in Windows console app (#DuckMaestro's answer) and started to play with [MS.Docs]: _setmode.
Adding:
#include <io.h>
#include <fcntl.h>
static int set_stdout_mode(int mode) {
fflush(stdout);
int ret = _setmode(_fileno(stdout), mode);
return ret;
}
and calling it like int stdout_mode = set_stdout_mode(_O_TEXT); in test.cpp before outputting anything from C (and C++: std::wcout line uncommented), yielded:
e:\Work\Dev\StackOverflow\q054269984>"e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
Hello, world!
From C: - [test.cpp] (32) - [TestDll::simple]
Hello, world!
"simple" returned 13
From C: - [test.cpp] (40) - [TestDll::printStr]
Hello, world!
"printStr" returned 13
From C: - [test.cpp] (48) - [TestDll::wprintWstr]
Hello, world!
Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
"wprintWstr" returned 13
"wstr" returned "Dummy text."
Although it works, I do not know why. It could be Undefined Behavior
Printing _setmode's return value, revealed that Python 3.4 and also main.exe automatically set the mode to _O_TEXT (0x4000), while newer Python versions (those that don't work) set it to _O_BINARY (0x8000) - which apparently seems to be the cause (might be related: [Python]: Issue #16587 - Py_Initialize breaks wprintf on Windows)
Trying to set it to any of the wide related constants (_O_U16TEXT, _O_U8TEXT, _O_WTEXT) crashes the program when calling printf or std::cout (even if restoring the original mode when done with wide functions - before the narrow ones)
Trying to output real Unicode chars, won't work (most likely)
You could achieve the same goal on Python side: msvcrt.setmode(sys.stdout.fileno(), 0x4000)
I'm looking for a function that returns a map[string]interface{} where interface{} can be a slice, a a map[string]interface{} or a value.
My use case is to parse WKT geometry like the following and retrieves point values; Example for a donut polygon:
POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3))
The regex (I voluntary set \d that matches only integers for readability purpose):
(POLYGON \(
(?P<polygons>\(
(?P<points>(?P<point>(\d \d), ){3,})
(?P<last_point>\d \d )\),)*
(?P<last_polygon>\(
(?P<points>(?P<point>(\d \d), ){3,})
(?P<last_point>\d \d)\))\)
)
I have a function (copied from SO) that retrieves some informations but it's not that good for nested groups and list of groups:
func getRegexMatchParams(reg *regexp.Regexp, url string) (paramsMap map[string]string) {
match := reg.FindStringSubmatch(url)
paramsMap = make(map[string]string)
for i, name := range reg.SubexpNames() {
if i > 0 && i <= len(match) {
paramsMap[name] = match[i]
}
}
return match
}
It seems that the group point gets only 1 point.
example on playground
[EDIT] The result I want is something like this:
map[string]interface{}{
"polygons": map[string]interface{} {
"points": []interface{}{
{map[string]string{"point": "0 0"}},
{map[string]string{"point": "0 10"}},
{map[string]string{"point": "10 10"}},
{map[string]string{"point": "10 0"}},
},
"last_point": "0 0",
},
"last_polygon": map[string]interface{} {
"points": []interface{}{
{map[string]string{"point": "3 3"}},
{map[string]string{"point": "3 7"}},
{map[string]string{"point": "7 7"}},
{map[string]string{"point": "7 3"}},
},
"last_point": "3 3",
}
}
So I can use it further for different purposes like querying databases and validate that last_point = points[0] for each polygon.
Try to add some whitespace to the regex.
Also note that this engine won't retain all capture group values that are
within a quantified outer grouping like (a|b|c)+ where this group will only contain the last a or b or c it finds.
And, your regex can be reduced to this
(POLYGON\s*\((?P<polygons>\(\s*(?P<points>(?P<point>\s*(\d+\s+\d+)\s*,){3,})\s*(?P<last_point>\d+\s+\d+)\s*\)(?:\s*,\s*|\s*\)))+)
https://play.golang.org/p/rLaaEa_7GX
The original:
(POLYGON\s*\((?P<polygons>\(\s*(?P<points>(?P<point>\s*(\d+\s+\d+)\s*,){3,})\s*(?P<last_point>\d+\s+\d+)\s*\),)*(?P<last_polygon>\(\s*(?P<points>(?P<point>\s*(\d+\s+\d+)\s*,){3,})\s*(?P<last_point>\d+\s+\d+)\s*\))\s*\))
https://play.golang.org/p/rZgJYPDMzl
See below for what the groups contain.
( # (1 start)
POLYGON \s* \(
(?P<polygons> # (2 start)
\( \s*
(?P<points> # (3 start)
(?P<point> # (4 start)
\s*
( \d+ \s+ \d+ ) # (5)
\s*
,
){3,} # (4 end)
) # (3 end)
\s*
(?P<last_point> \d+ \s+ \d+ ) # (6)
\s* \),
)* # (2 end)
(?P<last_polygon> # (7 start)
\( \s*
(?P<points> # (8 start)
(?P<point> # (9 start)
\s*
( \d+ \s+ \d+ ) # (10)
\s*
,
){3,} # (9 end)
) # (8 end)
\s*
(?P<last_point> \d+ \s+ \d+ ) # (11)
\s* \)
) # (7 end)
\s* \)
) # (1 end)
Input
POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3))
Output
** Grp 0 - ( pos 0 , len 65 )
POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3))
** Grp 1 - ( pos 0 , len 65 )
POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3))
** Grp 2 [polygons] - ( pos 9 , len 30 )
(0 0, 0 10, 10 10, 10 0, 0 0),
** Grp 3 [points] - ( pos 10 , len 23 )
0 0, 0 10, 10 10, 10 0,
** Grp 4 [point] - ( pos 27 , len 6 )
10 0,
** Grp 5 - ( pos 28 , len 4 )
10 0
** Grp 6 [last_point] - ( pos 34 , len 3 )
0 0
** Grp 7 [last_polygon] - ( pos 39 , len 25 )
(3 3, 3 7, 7 7, 7 3, 3 3)
** Grp 8 [points] - ( pos 40 , len 19 )
3 3, 3 7, 7 7, 7 3,
** Grp 9 [point] - ( pos 54 , len 5 )
7 3,
** Grp 10 - ( pos 55 , len 3 )
7 3
** Grp 11 [last_point] - ( pos 60 , len 3 )
3 3
Possible Solution
It's not impossible. It just takes a few extra steps.
(As an aside, isn't there a library for WKT that can parse this for you ?)
Now, I don't know your language capabilities, so this is just a general approach.
1. Validate the form you're parsing.
This will validate and return all polygon sets as a single string in All_Polygons group.
Target POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3))
POLYGON\s*\((?P<All_Polygons>(?:\(\s*\d+\s+\d+(?:\s*,\s*\d+\s+\d+){2,}\s*\))(?:\s*,\(\s*\d+\s+\d+(?:\s*,\s*\d+\s+\d+){2,}\s*\))*)\s*\)
** Grp 1 [All_Polygons] - ( pos 9 , len 55 )
(0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3)
2. If 1 was successful, set up a loop match using the output of All_Polygons string.
Target (0 0, 0 10, 10 10, 10 0, 0 0),(3 3, 3 7, 7 7, 7 3, 3 3)
(?:\(\s*(?P<Single_Poly_All_Pts>\d+\s+\d+(?:\s*,\s*\d+\s+\d+){2,})\s*\))
This step is equivalent of a find all type of match. It should match successive values of all the points of a single polygon, returned in Single_Poly_All_Pts group string.
This will give you these 2 separate matches, which can be put into a temp array having 2 value strings:
** Grp 1 [Single_Poly_All_Pts] - ( pos 1 , len 27 )
0 0, 0 10, 10 10, 10 0, 0 0
** Grp 1 [Single_Poly_All_Pts] - ( pos 31 , len 23 )
3 3, 3 7, 7 7, 7 3, 3 3
3. If 2 was successful, set up a loop match using the temp array output of step 2.
This will give you the individual points of each polygon.
(?P<Single_Point>\d+\s+\d+)
Again this is a loop match (or a find all type of match). For each array element
(Polygon), this will produce the individual points.
Target[element 1] 0 0, 0 10, 10 10, 10 0, 0 0
** Grp 1 [Single_Point] - ( pos 0 , len 3 )
0 0
** Grp 1 [Single_Point] - ( pos 5 , len 4 )
0 10
** Grp 1 [Single_Point] - ( pos 11 , len 5 )
10 10
** Grp 1 [Single_Point] - ( pos 18 , len 4 )
10 0
** Grp 1 [Single_Point] - ( pos 24 , len 3 )
0 0
And,
Target[element 2] 3 3, 3 7, 7 7, 7 3, 3 3
** Grp 1 [Single_Point] - ( pos 0 , len 3 )
3 3
** Grp 1 [Single_Point] - ( pos 5 , len 3 )
3 7
** Grp 1 [Single_Point] - ( pos 10 , len 3 )
7 7
** Grp 1 [Single_Point] - ( pos 15 , len 3 )
7 3
** Grp 1 [Single_Point] - ( pos 20 , len 3 )
3 3
I use a regexp to test a link :
lolspec:\/\/(spectator\.(na|euw1|eu|kr|oc1|br|la1|la2|ru|tr|pbe1)\.lol\.riotgames\.com:(80|8088)((([?&]region=(NA1|EUW1|EUN1|KR|OC1|BR1|LA1|LA2|RU|TR1|PBE1))|([?&]gameID=([0-9]+))|([?&]encKey=(.+)))){3})
to test this link :
lolspec://spectator.euw1.lol.riotgames.com:80?region=NA1&gameID=44584&encKey=fghgdsv1134+ianfcuia
but some groups are empty (#7, #8, #9)
what should I do ?
Probably overkill on the capture groups.
The regex you use there contains a container capture group 4 that is quantified
like this ( ... ){3}.
What that will do is overwrite the container capture buffer 3 times leaving
the last value captured within the capture group.
Moving on to the next level is a single inner group with which the outer group encapsulates, like this (( ... )){3} so thats not needed, and you get the same affect of overwritting.
Moving even deeper, are three capture groups all separated by alternations.
They follow the same rules, each one will get overwritten if they can match
again during each successive 1..3 quantified passes.
Its only that one group match in the alternation cluster.
So, if you have identical adjacent data, it could be matched by the same
alternation cluster, leaving the other cluster groups empty.
So, this is not the approach if you want to match out-of-order parameters
in a string.
The way this is done is using lookahead assertions OR if you are using
an engine that can do conditionals.
The way to do it using conditionals is like this
(?:
.*?
(?:
( (?(1)(?!)) abc ) # (1)
| ( (?(2)(?!)) def ) # (2)
| ( (?(3)(?!)) ghi ) # (3)
)
){3}
It forces finding all of the capture group contents.
The way you are doing it is the same but without the conditionals,
and suffering the consequences as stated above.
Btw, Your regex above does not have any empty groups with that particular sample, But it has many problems.
lolspec:\/\/
( # (1 start)
spectator\.
( na | euw1 | eu | kr | oc1 | br | la1 | la2 | ru | tr | pbe1 ) # (2)
\.lol\.riotgames\.com:
( 80 | 8088 ) # (3)
( # (4 start)
( # (5 start)
( # (6 start)
[?&] region=
( NA1 | EUW1 | EUN1 | KR | OC1 | BR1 | LA1 | LA2 | RU | TR1 | PBE1 ) # (7)
) # (6 end)
| ( # (8 start)
[?&] gameID=
( [0-9]+ ) # (9)
) # (8 end)
| ( # (10 start)
[?&] encKey=
( .+ ) # (11)
) # (10 end)
) # (5 end)
){3} # (4 end)
) # (1 end)
Output
** Grp 0 - ( pos 0 , len 97 )
lolspec://spectator.euw1.lol.riotgames.com:80?region=NA1&gameID=44584&encKey=fghgdsv1134+ianfcuia
** Grp 1 - ( pos 10 , len 87 )
spectator.euw1.lol.riotgames.com:80?region=NA1&gameID=44584&encKey=fghgdsv1134+ianfcuia
** Grp 2 - ( pos 20 , len 4 )
euw1
** Grp 3 - ( pos 43 , len 2 )
80
** Grp 4 - ( pos 69 , len 28 )
&encKey=fghgdsv1134+ianfcuia
** Grp 5 - ( pos 69 , len 28 )
&encKey=fghgdsv1134+ianfcuia
** Grp 6 - ( pos 45 , len 11 )
?region=NA1
** Grp 7 - ( pos 53 , len 3 )
NA1
** Grp 8 - ( pos 56 , len 13 )
&gameID=44584
** Grp 9 - ( pos 64 , len 5 )
44584
** Grp 10 - ( pos 69 , len 28 )
&encKey=fghgdsv1134+ianfcuia
** Grp 11 - ( pos 77 , len 20 )
fghgdsv1134+ianfcuia