Excel Vba Regular Expression Not Detecting - regex

I have tried to detect the same on regex101, but when I try to run the excel-VBA code it fails to detect.
I have been trying to detect and group the Following Text:
Test A1 (III’15) 270     10/12  ABC/DEF       PNR       AVC
Test Asd(II’05) 300     11/12  RtF/ZXC      PNR        NKL
Test 33 (I’01) PIL     11/12  KNP/ILO      IL 90.5    FX - NO
Test 4 (IIII’10) 270  11-12/12  JKI/IOP   PNR      RPTD - RPTD
My Pattern:
([\w ]+)\s+([\w()\’\’ ]+)\s+(\w+)\s+([\w/-]+)\s+([\w/+]+)\s+([\w.\s]+)\s+([\w -]+)
My Code:
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("A1:A63")
For Each C In Myrange
strPattern = "([\w ]+)\s+([\w\(\)\’\’ ]+)\s+(\w+)\s+([\w\/\-]+)\s+([\w\/\+]+)\s+([\w\.\s]+)\s+([\w \-]+)"
If strPattern <> "" Then
strInput = C.Value
'strReplace = "$1"
With regEx
.Global = False
.MultiLine = False
.IgnoreCase = True
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
C.Offset(0, 1) = regEx.Replace(strInput, "$1")
C.Offset(0, 2) = regEx.Replace(strInput, "$2")
C.Offset(0, 3) = regEx.Replace(strInput, "$3")
C.Offset(0, 4) = regEx.Replace(strInput, "$4")
C.Offset(0, 5) = regEx.Replace(strInput, "$5")
C.Offset(0, 6) = regEx.Replace(strInput, "$6")
C.Offset(0, 7) = regEx.Replace(strInput, "$7")
Else
C.Offset(0, 1) = "(Not matched)"
End If
End If
Next
End Sub
I need to group then as
Group1: (Test A1) Group 2: ((III’15)) Group 3: (270) Group 4: (10/12) Group 5: (ABC/DEF) Group 6: (PNR) Group 7:(AVC)

This will work assuming your spaces are real spaces and not character 160 as they are when copied from here:
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Dim matches
Set Myrange = ActiveSheet.Range("A1:A4")
For Each C In Myrange
strPattern = "([\w ]+)\s*(\([\w\’]+\))\s+(\w+)\s+([\w/-]+)\s+([\w/+]+)\s+(\w+\s?[\w.]*)\s+([\w -]+)"
If strPattern <> "" Then
strInput = C.Value
'strReplace = "$1"
With regEx
.Global = False
.MultiLine = False
.IgnoreCase = True
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
Set matches = regEx.Execute(strInput)
C.Offset(0, 1) = matches(0).SubMatches(0)
C.Offset(0, 2) = matches(0).SubMatches(1)
C.Offset(0, 3) = matches(0).SubMatches(2)
C.Offset(0, 4) = "'" & matches(0).SubMatches(3)
C.Offset(0, 5) = matches(0).SubMatches(4)
C.Offset(0, 6) = matches(0).SubMatches(5)
C.Offset(0, 7) = matches(0).SubMatches(6)
Else
C.Offset(0, 1) = "(Not matched)"
End If
End If
Next
End Sub

I think your pattern was off, and you also need to set Global = True.
This pattern will target each group of your strings. I haven't tested it with the Replace though as not sure what you were trying to achieve there.
RegExr Fiddle
Updated with new pattern after comments:
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String, strInput As String, strReplace As String
Dim j As Long
Dim match
Dim Myrange As Range
Set Myrange = Range("A1:A63")
strPattern = "(^.*(?=\())|(?:\().*(?:\))|((\d{2}(\/|\-)){1,2}\d{2})|(([a-z]{3}|[a-z]{1,2}\s[a-z]{1,2})(\/|(?!\/))){2}|((\w{2,4}(\s\-\s|$)){1,2})|((\w{2}\s)\d{1,2}\.\d{1,2}|[a-z]{3})|((?!\)\s+)(([a-z]|[0-9]){3}))"
If strPattern <> vbNullString Then
With regEx
.Global = True
.MultiLine = False
.IgnoreCase = True
.Pattern = strPattern
End With
For Each c In Myrange
strInput = c.Value
If regEx.test(strInput) Then
j = 1
For Each match In regEx.Execute(strInput)
c.Offset(0, j).Value2 = match
j = j + 1
Next match
Else
c.Offset(0, 1) = "(Not matched)"
End If
Next
End If
End Sub

Related

Regex - how to test for 2 str patterns and make replacements based on which str pattern matches

I currently have two functioning separate subs in Excel VBA. Each sub searches for a different string pattern and then makes a replacement.
Sub 1 searches for a leading 0 in the target string, strips it out, and places the contents in a separate cell.
Sub 2 searches for terminal "99" in the target string, replacing the "99" with Xs, and places the contents in a separate cell.
The way I do this particular operation is to run Sub1 first. Results are placed in column AO. Then I run Sub2 against the results obtained from Sub1 and place those results in the next adjacent column.
I would like to combine the two subs and run just one time getting the desired results.
Here are examples of the target string in column W that I am applying the regex against:
098765-9876-77
333222-7777-G5
9876-078-99
9867x77A
Sub 1
Sub tom_briggs_test_leading_zero()
'This sub searches for a leading zero in the target string and removes it.
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("w2:w73352")
For Each cell In Myrange
strPattern = "^0(.*)"
If strPattern <> "" Then
strInput = cell.Value
strReplace = "$1"
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
cell.Offset(0, 18) = regEx.Replace(strInput, strReplace)
Else
cell.Offset(0, 18) = strInput
End If
End If
Next
End Sub
Sub 2
Sub tom_briggs_test_trailing_99()
'This sub searchs for teriminal 99s in the target string and replaces them
'with -XX.
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("AO2:AO73352")
'AO is the column where results from Sub1 have been placed
For Each cell In Myrange
strPattern = "(.*)-99$"
If strPattern <> "" Then
strInput = cell.Value
strReplace = "$1-XX"
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
cell.Offset(0, 1) = regEx.Replace(strInput, strReplace)
Else
cell.Offset(0, 1) = strInput
End If
End If
Next
End Sub
Thanks for your consideration.
How about this:
Sub tom_briggs_fix_head_and_tail()
'This sub removes a leading zero in the target string and
'replaces trailing 99s in the target string with -XX.
Dim regExHead As New RegExp
Dim strHeadPattern As String
Dim strHeadReplace As String
Dim regExTail As New RegExp
Dim strTailPattern As String
Dim strTailReplace As String
Dim strInput As String
Dim Myrange As Range
Dim c As Range
Set Myrange = ActiveSheet.Range("w2:w73352")
strHeadPattern = "^0(.*)"
strHeadReplace = "$1"
strTailPattern = "(.*)-99$"
strTailReplace = "$1-XX"
With regExHead
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strHeadPattern
End With
With regExTail
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strTailPattern
End With
For Each c In Myrange
strInput = c.Value
strInput = IIf(regExHead.Test(strInput), _
regExHead.Replace(strInput, strHeadReplace), strInput)
strInput = IIf(regExTail.Test(strInput), _
regExTail.Replace(strInput, strTailReplace), strInput)
c.Offset(0, 19) = strInput
Next
End Sub
Hope that helps
You don't need a regex for that. Just take a hint from the following code:
Sub test()
Set myRange = Sheet1.Range("A1:A2") 'Change this range as per your requirement
For Each cell In myRange
strInput = cell.Value
'Checking if the 1st number is 0 or not
If CInt(Mid(strInput, 1, 1)) = 0 Then
strInput = Mid(strInput, 2)
End If
'Checking if -99 is present in the end or not
If StrComp("-99", Right(strInput, 3), 1) = 0 Then
strInput = Left(strInput, Len(strInput) - 3) & "-XX"
End If
'If there was a leading 0 or a trailing 99, then only write the updated value in another cell
If StrComp(cell.Value, strInput, 1) <> 0 Then
cell.Offset(0, 1).Value = strInput
End If
Next
End Sub

Regex to extract numbers from a String in VBA

How can I extract the numbers from col A and print in into col B.
I am using the below regex function, it print all the numbers with a space between them.
How can I get the initial set of numbers and skip the remaining ones.
Docetaxel Injection 160MG/16ML prints 160 16. I need to print only 160.
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("A1:A10")
For Each C In Myrange
strPattern = "\D+"
If strPattern <> "" Then
strInput = C.Value
strReplace = "$1"
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.test(strInput) Then
C.Offset(0, 1) = regEx.Replace(strInput, " ")
Else
C.Offset(0, 1) = "(Not matched)"
End If
End If
Next
End Sub
This should work (pattern allows for decimals but not very robustly so):
Sub splitUpRegexPattern()
Dim re As Object, c As Range
Dim allMatches
Set re = CreateObject("VBScript.RegExp")
re.Pattern = "([\d+\.]+)"
re.IgnoreCase = True
re.Global = True
For Each c In ActiveSheet.Range("A1:A10").Cells
Set allMatches = re.Execute(c.Value)
If allMatches.Count > 0 Then
c.Offset(0, 1).Value = allMatches(0)
Else
c.Offset(0, 1).Value = "(Not matched)"
End If
Next c
End Sub
If its always 3 digits then use \s\d{3} https://regex101.com/r/lEc4mN/1
Option Explicit
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Dim C As Range
Dim Matches As Variant
Set Myrange = ActiveSheet.Range("A1:A10")
For Each C In Myrange
strPattern = "\s\d{3}"
If strPattern <> "" Then
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
Set Matches = .Execute(C.Value)
End With
If Matches.Count > 0 Then
Debug.Print Matches(0)
C.Offset(0, 1) = Matches(0)
Else
C.Offset(0, 1) = "(Not matched)"
Debug.Print "Not Found "
End If
End If
Next
End Sub

RegEx to extract first set of digits from a string

I am trying to extract the first set of digits only with regex function from col A in Vba.
PRECEDEX 200 mcg 2 mL FTV should print only 200. Currently my code prints all the numbers.
Private Sub splitUpRegexPattern()
Dim Regex As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("E3:E1500")
For Each C In Myrange
strPattern = "\D+"
If strPattern <> "" Then
strInput = C.Value
strReplace = "$1"
With Regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If Regex.test(strInput) Then
C.Offset(0, 1) = Regex.Replace(strInput, " ")
Else
C.Offset(0, 1) = "(Not matched)"
End If
End If
Next
End Sub
You should just use \d+ pattern, and use .Execute rather than .Replace method to actually extract the digits (you also need to use RegExp.Global=False to find only the first match).
Use
Sub splitUpRegexPattern()
Dim Regex As New regexp
Dim strPattern As String
Dim strInput As String
Dim Myrange As Range
Dim mtch As Object
Set Myrange = ActiveSheet.Range("E3:E1500")
For Each c In Myrange
strPattern = "\d+"
If strPattern <> "" Then
strInput = c.Value
With Regex
.Global = False
.MultiLine = True
.IgnoreCase = False
.pattern = strPattern
End With
If Regex.test(strInput) Then
Set mtch = Regex.Execute(strInput)
If mtch.Count > 0 Then
c.Offset(0, 1) = mtch.Item(0).Value
End If
Else
c.Offset(0, 1) = "(Not matched)"
End If
End If
Next
End Sub
Here, Set mtch = Regex.Execute(strInput) tries to find the match and if a match is found (If mtch.Count > 0), the value (mtch.Item(0).Value) is added to the next column on the right.

Regular expression to match year?

I'm new to regular expressions in excel vba, been looking at a few questions about it on stack overflow, found a great one at the following link "How to use Regular Expressions (Regex) in Microsoft Excel both in-cell and loops"
There was some very useful code here that I thought I might try to learn and adapt for my purposes, I'm trying to match a 4 digit string representing a year from a cell on a spreadsheet ie. "2016 was a good year" would yield "2016".
I used some slightly altered code from that question posted there and it manages to recognize that a string contains a year, however I'm not sure how to separate and extract the string from the rest of the cell contents, ie. getting 2016 on it's own in an adjacent cell, any changes I should make?
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("D2:D244")
For Each c In Myrange
strPattern = "([0-9]{4})" 'looks for (4 consecutive numbers)
If strPattern <> "" Then
strInput = c.Value
strReplace = "$1"
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
c.Offset(0, 5) = regEx.Replace(strInput, "$1") 'puts the string in an adjacent cell
Else
c.Offset(0, 5) = "(Not matched)"
End If
End If
Next
End Sub
You could significantly improve your code as below:
Use variant arrays rather than a range
Move the RegExp out of the loop (you are setting it the same way for each cell)
Your RegExp parameters can be reduced for what you want (minor).
Private Sub splitUpRegexPattern()
Dim regEx As Object
Dim strPattern As String
Dim strInput As String
Dim X
Dim Y
Dim lngCnt As Long
Set regEx = CreateObject("vbscript.regexp")
X = ActiveSheet.Range("D2:D244").Value2
Y = X
strPattern = "\b[0-9]{4}\b" 'looks for (4 consecutive numbers)
With regEx
.MultiLine = True
.Pattern = strPattern
For lngCnt = 1 To UBound(X)
If .Test(X(lngCnt, 1)) Then
Y(lngCnt, 1) = .Execute(X(lngCnt, 1))(0)
Else
Y(lngCnt, 1) = "(Not matched)"
End If
Next
Range("D2:D244").Offset(0, 5).Value2 = Y
End With
End Sub
user1016274, thanks, your comment really helped, had to do some searching on it, but I found the answer
using regEx.Execute(strInput) I managed to return the string matched:
Private Sub splitUpRegexPattern()
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim Myrange As Range
Set Myrange = ActiveSheet.Range("D2:D244")
For Each c In Myrange
strPattern = "([0-9]{4})" 'looks for (4 consecutive numbers)
If strPattern <> "" Then
strInput = c.Value
strReplace = "$1"
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
c.Offset(0, 5) = regEx.Execute(strInput).Item(0).SubMatches.Item(0) 'this was the part I changed
Else
c.Offset(0, 5) = "(Not matched)"
End If
End If
Next
End Sub

excel VB regexp 5.5 capturing group

I have a problem using regexp in excel macro, by calling regex.execute(string), instead of getting an array of returned capturing groups, I always get single return which is the whole string specified in the pattern.
By using the same pattern in http://www.regexr.com/, I can see the return nicely grouped. What am I missing from this:
Private Sub ParseFileName(strInput As String)
Dim regEx As New RegExp
Dim strPattern As String
Dim strReplace
'Sample string \\Work_DIR\FTP\Results\RevA\FTP_01_01_06_Results\4F\ACC2X2R33371_SASSSD_run1
strPattern = "FTP_(\w+)_Results\\(\w+)\\([\d,\D]+)_(SAS|SATA)(HDD|SSD)_run(\d)"
With regEx
.Global = True
.MultiLine = False
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
Set strReplace = regEx.Execute(strInput)
ActiveCell.Offset(0, 1) = strReplace.Count
Else
ActiveCell.Offset(0, 1) = "(Not matched)"
End If
End sub
In the end, strReplace.Count always shows 1, which is the whole string FTP_01_01_06_Results\4F\ACC2X8R133371_SASSSD_run1
Use .SubMatches to get capturing groups values:
Private Sub ParseFileName(strInput As String)
Dim regEx As New RegExp
Dim strPattern As String
Dim strReplace As MatchCollection
Dim i As Long
'Sample string \\Work_DIR\FTP\Results\RevA\FTP_01_01_06_Results\4F\ACC2X2R33371_SASSSD_run1
strPattern = "FTP_(\w+)_Results\\(\w+)\\([\d,\D]+)_(SAS|SATA)(HDD|SSD)_run(\d)"
With regEx
.Global = True
.MultiLine = False
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
Set strReplace = regEx.Execute(strInput)
ActiveCell.Offset(0, 1) = strReplace.Count
For i = 0 To 5
ActiveCell.Offset(i + 1, 1) = strReplace(0).SubMatches(i)
Next
Else
ActiveCell.Offset(0, 1) = "(Not matched)"
End If
End Sub