VBA or PostgreSQL: remove unneeded parentheses from a mathematical equation string - regex

I'm looking to remove mathematically unneeded parentheses from a mathematical equation string. I need to do this either, and preferably, in PostgreSQL 6 or VBA.
For example, I have the following string value in a PostgreSQL database:
PercentileRank((([bp47244]+([bp47229][ttm]))/(AvgAeTe([bp48918]))))
And I need it to look like this (edited/corrected):
PercentileRank(([bp47244]+[bp47229][ttm])/AvgAeTe([bp48918]))
I'd prefer a function or query in PostgreSQL, but a VBA solution could work.
Note PercentileRank() and AvgAeTe() are functions. This [bp47244] and [bp47229][ttm] each represent single numbers/variables, but they could be expressed in any way, like [abc123] and [xyz321][ttm]. I see a lot of examples out there, but I don't see one using PostgreSQL or VBA that works for me, so I thought it would be a good question.
Of course I am looking for a general solution that can be applied to any equation.
I'm working on this now, so if I find an answer before one is posted here, I'll share; however, I am not good at regex (not that the solution has to use regex).
Thanks!
UPDATE:
I'm working off this logic:
Let L be operator immediately left of the left parenthesis, or nil
Let R be operator immediately right of the right parenthesis, or nil
If L is nil and R is nil:
Redundant
Else:
Scan the unparenthesized operators between the parentheses
Let X be the lowest priority operator
If X has lower priority than L or R:
Not redundant
Else:
Redundant
from this link:
Remove redundant parentheses from an arithmetic expression
I'll code something up in VBA that follows this logic and post an answer.

This seems to work for my situation:
Function RemoveParens(s As String) As String
'remove unecessary parentheses
'exponents not implemented
'mathematical brackets are not implmented (it is assumed that only parentheses are used to create mathematical order)
'brakets are assumed to identify a variable or calculation on a variable
'[bp47229][ttm] -> one value/variable; [xyz123] -> one value/variable
'logic based on Antti Huima's answer:
'https://stackoverflow.com/questions/44203517/vba-or-postgresql-remove-unneeded-parentheses-from-a-mathematical-equation-stri
's = "PercentileRank((([bp47244]+([bp47229][ttm]))/(AvgAeTe([bp48918]))))"
's = "PercentileRank(2*(1+3)(5*4))"
If InStr(1, s, "^") > 0 Then
msgbox "Exponents are not implemented in RemoveParens"
End If
ReDim arS(1 To Len(s)) As String
Dim i As Integer
For i = 1 To Len(s)
arS(i) = Mid(s, i, 1)
Next i
Dim iCnt As Integer
iCnt = 0
Dim iLen As Integer
iLen = Len(s)
Dim sTmp As String
Dim bRemove As Boolean
bRemove = False
Dim sLfOpr As String
Dim sRtOpr As String
Dim iCntBtwn As Integer
Dim sLast As String
'loop through chars
Do
iCnt = iCnt + 1
sTmp = Mid(s, iCnt, 1)
If sTmp = "(" Then
if iCnt - 1 <= 0 then
sLfOpr = ""
else
sLfOpr = Mid(s, iCnt - 1, 1)
end if
'in case we have "5(...) or (...)(...)
If IsNumeric(sLfOpr) Or sLfOpr = ")" Then
sLfOpr = "*"
End If
'if it isn't an oper then clear it
If sLfOpr <> "+" _
And sLfOpr <> "-" _
And sLfOpr <> "/" _
And ((Not IsAlpha(sLfOpr) = True) Or (Not Mid(s, iCnt, 1) = "(")) _
And sLfOpr <> "*" _
Then
sLfOpr = ""
End If
'find the matching paren to the right of LfOpr
Dim iCntR As Integer
iCntR = iCnt
Dim iCntParen As Integer
iCntParen = 1
Dim sTmpR As String
sTmpR = ""
Do
iCntR = iCntR + 1
sTmpR = Mid(s, iCntR, 1)
If sTmpR = "(" Then
iCntParen = iCntParen + 1
ElseIf sTmpR = ")" Then
iCntParen = iCntParen - 1
End If
'we found the close paren that matches the open paren
If iCntParen = 0 Then
sRtOpr = Mid(s, iCntR + 1, 1)
'in case we have "(...)5 or (...)(...)
If IsNumeric(sRtOpr) Or sRtOpr = "(" Then
sRtOpr = "*"
End If
If sRtOpr <> "+" _
And sRtOpr <> "-" _
And sRtOpr <> "/" _
And ((Not IsAlpha(sRtOpr) = True) Or (Not Mid(s, iCntR, 1) = "(")) _
And sRtOpr <> "*" _
Then
sRtOpr = ""
End If
If sRtOpr = "" And sLfOpr = "" Then
arS(iCnt) = ""
arS(iCntR) = ""
'go to the next overall open paren
Exit Do
Else
' ------------ search btwn parens -------------------
Dim iCntParenOp As Integer
Dim iCntParenCl As Integer
iCntParenOp = 0
iCntParenCl = 0
Dim sTmpB As String
sTmpB = ""
Dim sLowOpr As String
sLowOpr = ""
Dim iCntRLw As Integer
iCntRLw = iCnt
Dim bInSub As Boolean
bInSub = False
Dim bNoOpr As Boolean
bNoOpr = True
'loop through chars between the two parens
For i = iCnt + 1 To iCntR
iCntRLw = iCntRLw + 1
sTmpR = Mid(s, iCntRLw, 1)
If sTmpR = "(" Then
iCntParenOp = iCntParenOp + 1
bInSub = True
ElseIf sTmpR = ")" Then
iCntParenCl = iCntParenCl + 1
If bInSub = True And iCntParenCl = iCntParenOp Then
bInSub = False
End If
End If
'we found the close paren that matches the open paren
'and we are not in a nested/sub paren
If bInSub = False Then
'in case we have "(...)5 or (...)(...)
If (IsNumeric(sTmpR) And Mid(s, iCntRLw + 1, 1) = "(") Or (sTmpR = "(" And Mid(s, iCntRLw + 1, 1) = "(") Then
sTmp = "*"
End If
'it is an operator
If sTmpR = "+" _
Or sTmpR = "-" _
Or sTmpR = "/" _
Or ((IsAlpha(sTmpR) = True) And (Mid(s, iCntRLw + 1, 1) = "(")) _
Or sTmpR = "*" _
Or bNoOpr = True _
Then
'see if sLowROpr operater has lower priority than sLfOpr and sRtOpr
If Not IsLowerPri(sTmpR, sRtOpr, sLfOpr) Then
arS(iCnt) = ""
arS(iCntR) = ""
Exit For
End If
bNoOpr = False
End If
End If
Next i
End If
Exit Do 'always stop loop if iCntParen = 0
End If
Loop While iCntR <> iLen
End If
Loop While iCnt <> iLen
Dim sOut As String
For i = LBound(arS) To UBound(arS)
sOut = sOut & arS(i)
Next i
'Debug.Print s
RemoveParens = sOut
End Function
Function IsLowerPri(sTestOpr As String, sRtOpr As String, sLfOpr As String) As Boolean
'exponents not implemented yet
Dim iTestOpr As Integer
Dim iRtOpr As Integer
Dim iLfOpr As Integer
iTestOpr = 1
If sTestOpr = "+" Or sTestOpr = "-" Then
iTestOpr = 1
ElseIf sTestOpr = "*" Or sTestOpr = "/" Then
iTestOpr = 2
ElseIf IsAlpha(sTestOpr) And sTestOpr <> "" Then
iTestOpr = 3
End If
If sRtOpr = "+" Or sRtOpr = "-" Then
iRtOpr = 1
ElseIf sRtOpr = "*" Or sRtOpr = "/" Then
iRtOpr = 2
ElseIf IsAlpha(sRtOpr) And sRtOpr <> "" Then
iRtOpr = 3
End If
If sLfOpr = "+" Or sLfOpr = "-" Then
iLfOpr = 1
ElseIf sLfOpr = "*" Or sLfOpr = "/" Then
iLfOpr = 2
ElseIf IsAlpha(sLfOpr) And sLfOpr <> "" Then
iLfOpr = 3
End If
If iTestOpr < iRtOpr Or iTestOpr < iLfOpr Then
IsLowerPri = True
Else
IsLowerPri = False
End If
End Function
It needs a lot of clean-up and probably some testing. I will give answer credit to whomever posts the best improvement or a different solution all together that is better.
UPDATE:
Forgot this function:
Public Function IsAlpha(strValue As String) As Boolean
IsAlpha = strValue Like WorksheetFunction.Rept("[a-zA-Z]", Len(strValue))
End Function

Related

Select text strings with multiple formatting tags within

Context:
VB.NET application using htmlagility pack to handle html document.
Issue:
In a html document, I'd like to prefixe all the strings starting with # and ending with space by an url whatever formatting tags are used within.
So #sth would became http://www.anything.tld/sth
For instance:
Before:
<p>#string1</p> blablabla
<p><strong>#stri</strong>ng2</p> bliblibli
After:
<p>#string1 blablabla</p>
<p><strong>#stri</strong>ng2 bliblibli</p>
I guess i can achieve this with html agility pack but how to select the entire text string without its formatting ?
Or should i use a simple regex replace routine?
Here's my solution. I'm sure it would make some experienced developpers bleed from every hole but it actually works.
The htmlcode is in strCorpusHtmlContent
Dim matchsHashtag As MatchCollection
Dim matchHashtag As Match
Dim captureHashtag As Capture
Dim strHashtagFormatted As String
Dim strRegexPatternHashtag As String = "#([\s]*)(\w*)"
matchsHashtag = Regex.Matches(strCorpusHtmlContent, strRegexPatternHashtag)
For Each matchHashtag In matchsHashtag
For Each captureHashtag In matchHashtag.Captures
Dim strHashtagToFormat As String
Dim strHashtagValueToFormat As String
' Test if the hashtag is followed by a tag
If Mid(strCorpusHtmlContent, captureHashtag.Index + captureHashtag.Length + 1, 1) = "<" Then
strHashtagValueToFormat = captureHashtag.Value
Dim intStartPosition As Integer = captureHashtag.Index + captureHashtag.Length + 1
Dim intSpaceCharPostion As Integer = intStartPosition
Dim nextChar As Char
Dim blnInATag As Boolean = True
Do Until (nextChar = " " Or nextChar = vbCr Or nextChar = vbLf Or nextChar = vbCrLf) And blnInATag = False
nextChar = CChar(Mid(strCorpusHtmlContent, intSpaceCharPostion + 1, 1))
If nextChar = "<" Then
blnInATag = True
ElseIf nextChar = ">" Then
blnInATag = False
End If
If blnInATag = False And nextChar <> ">" And nextChar <> " " Then
strHashtagValueToFormat &= nextChar
End If
intSpaceCharPostion += 1
Loop
strHashtagToFormat = Mid(strCorpusHtmlContent, captureHashtag.Index + 1, intSpaceCharPostion - captureHashtag.Length)
Else
strHashtagToFormat = captureHashtag.Value
End If
strHashtagFormatted = "" & strHashtagToFormat & ""
strCorpusHtmlContent = Regex.Replace(strCorpusHtmlContent, strHashtagToFormat, strHashtagFormatted)
Next
Next
Before:
<p>#has<strong>hta</strong><em>g_m</em>u<span style="text-decoration: underline;">ltifortmat</span> to convert</p>
After:
<p>#has<strong>hta</strong><em>g_m</em>u<span style="text-decoration: underline;">ltiformat</span> to convert</p>

How to split multiple UPPERCASE/delimiter/text using regex? (VBA)

I've got 2k+ records with string followyng rule (LOCATION I UPPERCASE - text) x several times, like this:
I- TRZON - Fragmenty błony śluzowej trzonu żołądka w stanie przewleklego
powierzchownego (++) aktywnego (++) zapalenia. W barwieniu Warthin-Starry
nie stwierdza się bakterii odpowiadających Helicobacter pylori. II-ANTRUM +
KĄT - Fragmenty błony śluzowej części odźwiernikowej żołądka w stanie
przewlekłego głębokiego zapalenia (+++). W barwieniu Warthin-Starry nie
stwierdza się bakterii odpowiadajacych Helicobacter pylori.
Which I'm trying to split as follows using regex:
Location - I- TRZON
Text Fragmenty błony śluzowej trzonu żołądka w stanie przewleklego powierzchownego (++) aktywnego (++) zapalenia. W barwieniu Warthin-Starry nie stwierdza się bakterii odpowiadających Helicobacter pylori.
Location II- ANTRUM + KĄT
Text Fragmenty błony śluzowej części odźwiernikowej żołądka w stanie przewlekłego głębokiego zapalenia (+++). W barwieniu Warthin-Starry nie stwierdza się bakterii odpowiadajacych Helicobacter pylori.
So far I managed to do this by creating something like this
([A-ZŻŹĆĄŚĘŁÓŃ\s,+\-0-9]*)[\s]?-+?(.*[^A-ZŻŹĆĄŚĘŁÓŃ\s,+\-0-9]) ([A-ZŻŹĆĄŚĘŁÓŃ\s,+\-0-9]+)*[\s]?-+?(.*)
But obviously it cannot manage those strings, where one or three pairs of location and text are possible. The main problems I encountered are hyphens used in text (see - Warthin-Starry).
If I try something more elegant, like
([A-ZŻŹĆŃĄŚŁĘÓ]+[\s-\+,]*?)-(.*)
It obviously matches only the word before the first hyphen into the first group, and everything else into next.
To sum up: how to translate into regex something like: match, splitting into two groups: 1) UPPERCASE text with any other signs (no lowercase), followed by 2) text, that is as long as you encounter another UPPERCASE text.
I must admit that I'm fairly new to regex, but I searched for a few days and nothing seems to work universally (and it's only the beginning of extracting data from this string...)
I`m not sure how you can do this with RegEx, i have a hard time myself to get my head around that syntax.
However, I would probably just use the DATA/Text To Columns, split with hyphen, and just concatenate back together the extra splits caused by hyphens in text.
If is not just an one off processing, you can always use VBA as well, something like:
Sub TextToColumns()
Dim ws As Worksheet: Set ws = ActiveSheet
Dim lRow As Long, sndHyphen As Long, R As Long
lRow = ws.Cells(1, 1).End(xlDown).Row
For R = 1 To lRow 'Iterate through all rows containing this data
sndHyphen = InStr(InStr(ws.Cells(R, 1), "-") + 1, ws.Cells(R, 1), "-") 'Get the hyphens positions
ws.Cells(R, 2) = Left(ws.Cells(R, 1), sndHyphen - 2) 'Get the data before the second hyphen
ws.Cells(R, 3) = Mid(ws.Cells(R, 1), sndHyphen + 2) 'Get the data after the second hyphen
Next R
End Sub
Thank you for your input. I finally managed to do this using two subs:
Sub locfinder()
Dim myregexp As RegExp
Set myregexp = New RegExp
Dim myMatches As Variant
Dim myMatch As Variant
Dim str As String
Dim i, j As Integer
Dim endrow As Integer
Sheets("dane").Activate
endrow = LastRow
Dim rozp1, rozp2 As String
For i = 1 To endrow
str = Sheets("Dane").Cells(i, 10).Value
myregexp.Global = True
myregexp.Pattern = "([A-ZŻŹĆĄŚĘŁÓŃ]+[\s,+\-0-9]*[A-ZŻŹĆĄŚĘŁÓŃ]*[\s,+\-0-9]*[A-ZŻŹĆĄŚĘŁÓŃ]*[\s,+\-0-9]*|Trzon|Antrum)\s?-"
If Not str = "" Then
Set myMatches = myregexp.Execute(str)
j = 1
For Each myMatch In myMatches
If myMatch.Value <> "" Then
Sheets("Dane").Cells(i, j + 10).Value = Trim(myMatch.SubMatches(0))
j = j + 1
End If
Next
End If
Next i
End Sub
Then extracted diagnoses using
Sub rozpfinder()
Dim myregexp As RegExp
Set myregexp = New RegExp
Dim myMatches As Variant
Dim myMatch As Variant
Dim str As String
Dim i, j As Integer
Dim endrow As Integer
Sheets("dane").Activate
endrow = LastRow
Dim rozp, loc As Collection
Dim splitted() As String
Dim rozpoznanie, lokalizacja
Dim wb As Workbook
Dim ws As Worksheet
Set wb = ThisWorkbook
Set ws = wb.Worksheets("Dane")
For i = 1 To endrow
str = ws.Cells(i, 10).Value
Set loc = New Collection
Set rozp = New Collection
For j = 1 To 2
If ws.Cells(i, 10 + j) <> "" Then
loc.Add ws.Cells(i, 10 + j).Value
End If
Next j
For Each lokalizacja In loc
If lokalizacja <> "I" Then
str = Replace(str, lokalizacja, "xxx")
Else
lokalizacja = "I-"
str = Replace(str, lokalizacja, "xxx-")
End If
Next lokalizacja
splitted = split(str, "xxx")
For j = 0 To UBound(splitted)
If splitted(j) <> "" Then
myregexp.Pattern = "-[^\w]"
myMatch = myregexp.Replace(splitted(j), "")
rozp.Add (Trim(myMatch))
End If
Next j
j = 1
For Each rozpoznanie In rozp
ws.Cells(i, 12 + j).Value = rozpoznanie
j = j + 1
Next rozpoznanie
Next i
End Sub
While it wasn't 100% accurate, the number of records I need to correct is about 1%, so I guess it works :)

Regular expression to substitute a pattern in VB script

I am trying to write a regular expression in VB script to substitute some patterns.
My string may contain zero or more following patterns -
&USERID
&USERID
&USERID.
&USERID.
&USERID(n)
&USERID(n)
&USERID(n).
&USERID(n).
&USERID(n1, n2)
&USERID(n1, n2)
&USERID(n1, n2).
&USERID(n1, n2).
Sample string -
C:\temp\&USERID_&USERID._&USERID(4)_&USERID(2,2)..txt
If USERID=ABCDEF, then once substituted the resultant string should look like -
C:\temp\ABCDEF_ABCDEF_ABCD_BC.txt
The number in the bracket denotes the number of characters to substitute. Range can be specified using comma separated numbers. In order to achieve this I wrote a regular expression as follows -
"((&USERID\(\d+,\d+\)\.)|(&USERID\(\d+,\d+\)\.)|(&USERID\(\d+,\d+\))|(&USERID\(\d+,\d+\)))|((&USERID\(\d\)\.)|(&USERID\(\d\)\.)|(&USERID\(\d\))|(&USERID\(\d\))|(&USERID\.)|(&USERID\.))"
Using VBScript.RegExp I match the pattern and obtain collection of the matches. Iterating over each match object, I substitute either the complete USERID or part of it based on subscript.
The regular expression works fine. BUT it is very slow compared to string manipulation function.
Can above pattern be optimized?
Update:
I accepted the answer which solves one of my problem. Based on the regular expression, I tried to solve another find and replace problem as follows -
I have following patterns
DATE
DATE(MMDDYYYY)
DATE(DDMMYYYY)
DATE(YYYYMMDD)
DATE(YYYY)
DATE(MM)
DATE(DD)
DATE(DDMONYYYY)
DATE(MON)
DATE(MONTH)
DATE(YYDDD)
DATE(YYYYDDD)
It may have a terminating "." at the end.
Function replaceDate(matchString, label, position, sourceString)
If label = "MMDDYYYY" or label = "" then
replaceDate = "<MMDDYYYY>"
ElseIf label = "DDMMYYYY" then
replaceDate = "<DDMMYYYY>"
ElseIf label = "YYYYMMDD" then
replaceDate = "<YYYYMMDD>"
ElseIf label = "YYYY" then
replaceDate = "<YYYY>"
ElseIf label = "MM" then
replaceDate = "<MM>"
ElseIf label = "DD" then
replaceDate = "<DD>"
ElseIf label = "DDMONYYYY" then
replaceDate = "<DDMONYYYY>"
ElseIf label = "MON" then
replaceDate = "<MON>"
ElseIf label = "MONTH" then
replaceDate = "<MONTH>"
ElseIf label = "YYDD" then
replaceDate = "<YYYYDDD>"
Else
replaceDate = ""
end if
End Function
With new RegExp
.Global = True
.IgnoreCase = False
.Pattern = "(?:&(?:amp;)?)?DATE(?:\((MMDDYYYY|DDMMYYYY|YYYYMMDD|YYYY|MM|DD|DDMONYYYY|MON|MONTH|YYDDD|YYYYDDD)?\))?\.?"
strTempValue = .Replace(strTempValue, GetRef("replaceDate"))
End with
Without more data it is not easy to test, but you can try if this performs better
Dim USERID
USERID = "ABCDEF"
Dim originalString
originalString = "C:\temp\&USERID_&USERID._&USERID(4)_&USERID(2,2)..txt"
Dim convertedString
Function replaceUSERID(matchString, n1, n2, position, sourceString)
n1 = CLng("0" & Trim(n1))
n2 = CLng("0" & Trim(Replace(n2, ",", "")))
If n1 < 1 Then
replaceUSERID = USERID
ElseIf n2 > 0 Then
replaceUSERID = Mid(USERID, n1, n2)
Else
replaceUSERID = Left(USERID, n1)
End If
End Function
With New RegExp
.Pattern = "(?:&(?:amp;)?)?USERID(?:\((\s*\d+\s*)(,\s*\d+\s*)?\))?\.?"
.Global = True
.IgnoreCase = False
convertedString = .Replace(originalString, GetRef("replaceUSERID"))
End With
WScript.Echo originalString
WScript.Echo convertedString
For a multiple "label" replacement
Option Explicit
Dim dicLabels
Set dicLabels = WScript.CreateObject("Scripting.Dictionary")
With dicLabels
.Add "USERID", "ABCDEF"
.Add "LUSER", "ABCDEF"
.Add "ID", "GHIJKL"
End With
Dim originalString
originalString = "C:\temp\&USERID_&USERID._&USERID(4)_&USERID(2,2)_ID(2,3)_&LUSER..txt"
Dim convertedString
Function replaceLabels(matchString, label, n1, n2, position, sourceString)
If Not dicLabels.Exists(label) Then
replaceLabels = matchString
Else
n1 = CLng("0" & Trim(n1))
n2 = CLng("0" & Trim(Replace(n2,",","")))
replaceLabels = dicLabels.Item(label)
If n1 > 0 Then
If n2 > 0 Then
replaceLabels = Mid(dicLabels.Item(label), n1, n2)
Else
replaceLabels = Left(dicLabels.Item(label), n1)
End If
End If
End If
End Function
With New RegExp
.Pattern = "(?:&(?:amp;)?)?("& Join(dicLabels.Keys, "|") &")(?:\((\s*\d+\s*)(,\s*\d+\s*)?\))?\.?"
.Global = True
.IgnoreCase = False
convertedString = .Replace(originalString, GetRef("replaceLabels"))
End With
WScript.Echo originalString
WScript.Echo convertedString

Regular Expression to Test Date VBA

I am looking for a code to test date Format, the date should be in one of these formats
year : 13xx - 20xx
month: xx,x
day: xx,x
the hole date would be on of the following
2012/1/1
2012/01/01
2012/1/01
2012/01/1
I tried the following
Option Explicit
Sub ttt()
MsgBox (testDate("2012/01/01"))
End Sub
Function testDate(strDateToBeTested As String) As Boolean
Dim regularExpression, match
Set regularExpression = CreateObject("vbscript.regexp")
testDate = False
'regularExpression.Pattern = "(14|13|19|20)[0-9]{2}[- /.]([0-9]{1,2})[- /.]([0-9]{1,2})"
'regularExpression.Pattern = "(\d\d\d\d)/(\d|\d\d)/(\d|/dd)"
regularExpression.Pattern = "([0-9]{4}[ /](0[1-9]|[12][0-9]|3[01])[ /](0[1-9]|1[012]))"
regularExpression.Global = True
regularExpression.MultiLine = True
If regularExpression.Test(strDateToBeTested) Then
' For Each match In regularExpression.Execute(strDateToBeTested)
If Len(strDateToBeTested) < 10 Then
testDate = True
' Exit For
End If
'End If
End If
Set regularExpression = Nothing
End Function
The more and more I thought about this (and some research), the more I figured that regex is not the best solution to this format problem. Combining a couple of other ideas (with the ReplaceAndSplit function attributed to the owner), this is what I came up with.
Option Explicit
Sub ttt()
Dim dateStr() As String
Dim i As Integer
dateStr = Split("2012/1/1,2012/01/01,2012/1/01,2012/01/1,1435/2/2," & _
"1435/02/02,1900/07/07,1435/02/02222222,2015/Jan/03", ",")
For i = 1 To UBound(dateStr)
Debug.Print "trying '" & dateStr(i) & "' ... " & testDate(dateStr(i))
Next i
End Sub
Function testDate(strDateToBeTested As String) As Boolean
Dim dateParts() As String
Dim y, m, d As Long
dateParts = ReplaceAndSplit(strDateToBeTested, "/.-")
testDate = False
If IsNumeric(dateParts(0)) Then
y = Int(dateParts(0))
Else
Exit Function
End If
If IsNumeric(dateParts(1)) Then
m = Int(dateParts(1))
Else
Exit Function
End If
If IsNumeric(dateParts(2)) Then
d = Int(dateParts(2))
Else
Exit Function
End If
If (y >= 1435) And (y < 2020) Then 'change or remove the upper limit as needed
If (m >= 1) And (m <= 12) Then
If (d >= 1) And (d <= 30) Then
testDate = True
End If
End If
End If
End Function
'=======================================================
'ReplaceAndSplit by alainbryden, optimized by aikimark
'Uses the native REPLACE() function to replace all delimiters with a common
'delimiter, and then splits them based on that.
'=======================================================
Function ReplaceAndSplit(ByRef Text As String, ByRef DelimChars As String) As String()
Dim DelimLen As Long, Delim As Long
Dim strTemp As String, Delim1 As String, Arr() As String, ThisDelim As String
strTemp = Text
Delim1 = Left$(DelimChars, 1)
DelimLen = Len(DelimChars)
For Delim = 2 To DelimLen
ThisDelim = Mid$(DelimChars, Delim, 1)
If InStr(strTemp, ThisDelim) <> 0 Then _
strTemp = Replace(strTemp, ThisDelim, Delim1)
Next
ReplaceAndSplit = Split(strTemp, Delim1)
End Function

General Purpose UDFs for using Regular Expressions in Excel

I need to parse and summarize and batches of several thousand text lines on a weekly basis. Excel wildcards weren't flexible enough, and I wanted to remove the extra step of either pasting into Notepad++ for processing or feeding to a script.
Here are the tools I came up with. They're still a bit slow -- perhaps 3000 lines per second on a company laptop -- but they are handy.
RXMatch -- return first match, option to return a subgroup.
=RXMatch("Apple","A(..)",1) -> "pp"
RXCount -- count number of matches
=RXCount("Apple","p") -> 2
RXPrint -- embed first match and/or subgroups into a template string
=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"
RXPrintAll -- embed each match into a template string, join the results
=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"
RXMatches -- return a vertical array of matches, option to return a subgroup
=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}
RXMatch
Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String
Dim retval As String
' Takes a string and returns the matching text
' Text is the string to be searched
' Pattern is the regex pattern
' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
' IgnoreCase (optional) set to False for a case-sensitive search
Dim RE As Object
Dim Matches As Object
Set RE = CreateObject("vbscript.regexp")
RE.IgnoreCase = IgnoreCase
RE.Pattern = Pattern
Set Matches = RE.Execute(Text)
If (Matches.Count > 0) Then
If (Group > 0) Then
retval = Matches(0).submatches(Group - 1)
Else
retval = Matches(0)
End If
Else
retval = ""
End If
RXMatch = retval
End Function
RXCount
Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer
Dim retval As Integer
' Counts the number of matches
' Text is the string to be searched
' Pattern is the regex pattern
' IgnoreCase (optional) set to False for a case-sensitive search
Dim RE As Object
Dim Matches As Object
Set RE = CreateObject("vbscript.regexp")
RE.IgnoreCase = IgnoreCase
RE.Global = True
RE.Pattern = Pattern
Set Matches = RE.Execute(Text)
retval = Matches.Count
RXCount = retval
End Function
RXPrint
Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String
Dim retval As String
' Takes a string and returns a new string formatted according to the given template, using the first match found
' Text is the string to be searched
' Pattern is the regex pattern
' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
' IgnoreCase (optional) set to False for a case-sensitive search
Dim REText, RETemplate As Object
Dim MatchesText, MatchesTemplate As Object
Set REText = CreateObject("vbscript.regexp")
REText.IgnoreCase = IgnoreCase
REText.Pattern = Pattern
Set MatchesText = REText.Execute(Text)
Set RETemplate = CreateObject("vbscript.regexp")
RETemplate.Global = True
RETemplate.Pattern = "(?:\\(.))|([^\\]+)"
Set MatchesTemplate = RETemplate.Execute(Template)
If (MatchesText.Count > 0) Then
ReDim retArray(0 To MatchesTemplate.Count - 1) As String
Dim escaped As String
Dim plaintext As String
For i = 0 To MatchesTemplate.Count - 1
escaped = MatchesTemplate(i).submatches(0)
plaintext = MatchesTemplate(i).submatches(1)
If (Len(escaped) > 0) Then
If (IsNumeric(escaped)) Then
Dim groupnum As Integer
groupnum = CInt(escaped)
If groupnum = 0 Then
retArray(i) = MatchesText(0)
ElseIf (groupnum > MatchesText(0).submatches.Count) Then
retArray(i) = "?"
Else
retArray(i) = MatchesText(0).submatches(groupnum - 1)
End If
Else
retArray(i) = escaped
End If
Else
retArray(i) = plaintext
End If
Next i
retval = Join(retArray, "")
Else
retval = ""
End If
RXPrint = retval
End Function
RXPrintAll
Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String
Dim retval As String
' Takes a string and returns a new string formatted according to the given template, repeated for each match
' Text is the string to be searched
' Pattern is the regex pattern
' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
' Delimiter (optional) specified how the results will be joined
' IgnoreCase (optional) set to False for a case-sensitive search
Dim REText, RETemplate As Object
Dim MatchesText, MatchesTemplate As Object
Set REText = CreateObject("vbscript.regexp")
REText.IgnoreCase = IgnoreCase
REText.Global = True
REText.Pattern = Pattern
Set MatchesText = REText.Execute(Text)
Set RETemplate = CreateObject("vbscript.regexp")
RETemplate.Global = True
RETemplate.Pattern = "(?:\\(.))|([^\\]+)"
Set MatchesTemplate = RETemplate.Execute(Template)
If (MatchesText.Count > 0) Then
ReDim retArrays(0 To MatchesText.Count - 1)
For j = 0 To MatchesText.Count - 1
ReDim retArray(0 To MatchesTemplate.Count - 1) As String
Dim escaped As String
Dim plaintext As String
For i = 0 To MatchesTemplate.Count - 1
escaped = MatchesTemplate(i).submatches(0)
plaintext = MatchesTemplate(i).submatches(1)
If (Len(escaped) > 0) Then
If (IsNumeric(escaped)) Then
Dim groupnum As Integer
groupnum = CInt(escaped)
If groupnum = 0 Then
retArray(i) = MatchesText(j)
ElseIf (groupnum > MatchesText(j).submatches.Count) Then
retArray(i) = "?"
Else
retArray(i) = MatchesText(j).submatches(groupnum - 1)
End If
Else
retArray(i) = escaped
End If
Else
retArray(i) = plaintext
End If
Next i
retArrays(j) = Join(retArray, "")
Next j
retval = Join(retArrays, Delimiter)
Else
retval = ""
End If
RXPrintAll = retval
End Function
RXMatches
Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant
Dim retval() As String
' Takes a string and returns all matches in a vertical array
' Text is the string to be searched
' Pattern is the regex pattern
' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
' IgnoreCase (optional) set to False for a case-sensitive search
Dim RE As Object
Dim Matches As Object
Set RE = CreateObject("vbscript.regexp")
RE.IgnoreCase = IgnoreCase
RE.Global = True
RE.Pattern = Pattern
Set Matches = RE.Execute(Text)
If (Matches.Count > 0) Then
ReDim retval(0 To Matches.Count - 1)
For i = 0 To Matches.Count - 1
If (Group > 0) Then
retval(i) = Matches(i).submatches(Group - 1)
Else
retval(i) = Matches(i)
End If
Next i
Else
ReDim retval(1)
retval(0) = ""
End If
RXMatches = Application.Transpose(retval)
End Function
When dealing with UDFs it's vital that you cache created objects.
For example:
Public Function RegexTest(ByVal vHaystack As Variant, ByVal sPattern As String, Optional ByVal sFlags As String = "") As Boolean
'If haystack is an error then return false
If IsError(vHaystack) Then Exit Function
'Stringify haystack
Dim sHaystack As String: sHaystack = vHaystack
'Cache regular expressions, especially important for formulae
Static lookup As Object
If lookup Is Nothing Then Set lookup = CreateObject("Scripting.Dictionary")
'If cached object doesn't exist, create it
Dim sKey As String: sKey = sPattern & "-" & sFlags
If Not lookup.exists(sKey) Then
'Create regex object
Set lookup(sKey) = CreateObject("VBScript.Regexp")
'Bind flags
For i = 1 To Len(sFlags)
Select Case Mid(sFlags, i, 1)
Case "i"
lookup(sKey).IgnoreCase = True
Case "g"
lookup(sKey).Global = True
End Select
Next
'Set pattern
lookup(sKey).Pattern = sPattern
End If
'Use test function of regex object
RegexTest = lookup(sKey).test(sHaystack)
End Function
Applying this to your own functions, you'll see this vastly increases the speed of execution on a large number of cells.