How do I remove the dot at the end of each substring - regex

In trying to remove the dot at the end of each substring A6.3., A6.5. I came up with the following solution. However I get a run-time error "5017"
This is what I have
With CreateObject("VBScript.RegExp")
.Global = True
.IgnoreCase = True
.Pattern = "(?<=[1-9]\.[1-9])\."
RemoveNumbers = .Replace("A6.3., A6.5. ", "")
End With
MsgBox RemoveNumbers
The outcome should be: A6.3, A6.5

Just remove dot without regex:
Sub RemoveLastDot()
myValues = Split("A6.3., A6.5.", ",")
myRslt = ""
myFirst = ""
For Each myVal In myValues
If InStrB(Trim(myVal), ".") = Len(Trim(myVal)) Then
myRslt = myRslt & myFirst & Left(Trim(myVal), Len(Trim(myVal)) - 1)
else
myRslt = myRslt & myFirst & Trim(myVal)
End If
myFirst = ", "
Next
MsgBox myRslt
End Sub

Related

Word VBA Macro to comment out nested parenthesis

I have a macro as follows:
Sub CommentOutParenthsLocal()
'
' CommentBubble Macro
'
'
Dim myRange As Range
Set myRange = Selection.Range
Set oScope = myRange.Duplicate
searchText = "\(*\)"
With myRange.Find
.MatchWildcards = True
Do While .Execute(findText:=searchText, Forward:=True) = True
If myRange.InRange(oScope) Then
If Len(myRange.Text) > 4 Then
ActiveDocument.Comments.Add myRange, myRange.Text
myRange.Text = ""
End If
Else
Exit Do
End If
Loop
End With
End Sub
However, this doesn't work if I have nested parenthesis for example This is my (nested parenthesis (document ) in full)
It will match to the first right parenthesis not the outermost. Is there a way to write a regular expression where it matches?
Try:
Sub Demo()
Application.ScreenUpdating = False
Dim Rng As Range
With Selection
Set Rng = .Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "\([!\(]#\)"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchWildcards = True
.Execute
End With
Do While .Find.Found
If Not .InRange(Rng) Then Exit Do
If Len(.Text) > 4 Then
.Comments.Add .Range, .Text
.Text = ""
End If
.Collapse wdCollapseEnd
.Find.Execute
Loop
End With
Application.ScreenUpdating = True
End Sub
As advice, please try to use Option Explicit at the start of every Module/Class/Form. This will prevent you from using variables that you haven't declared.
The code below will reduce
This is my (nested parenthesis (document ) in full)
To
This is my
with
(nested parenthesis (document ) in full)
added as a comment.
Option Explicit
Sub CommentOutParenthsLocal()
'
' CommentBubble Macro
'
'
Dim myRange As Range
Set myRange = Selection.Range
Dim oScope As Word.Range
Set oScope = myRange.Duplicate
Dim searchtext As String
searchtext = "\(*\)"
With myRange.Find
.MatchWildcards = True
Do While .Execute(findText:=searchtext, Forward:=True) = True
myRange.Select
If myRange.InRange(oScope) Then
Dim myCount As Long
Dim myText As String
myText = myRange.Text
myCount = Len(myText) - Len(Replace(myText, "(", vbNullString)) - 1
Do Until myCount = 0
myRange.MoveEndUntil cset:=")"
myRange.MoveEnd Count:=1
myCount = myCount - 1
Loop
If Len(myRange.Text) > 4 Then
ActiveDocument.Comments.Add myRange, myRange.Text
myRange.Text = ""
End If
Else
Exit Do
End If
myRange.Start = myRange.End + 1
myRange.End = oScope.End
Loop
End With
End Sub

Match all dates without an asterisk in front

I'm trying to use negative lookback to match all dates without an asterisk in front but it doesn't seem to be working.
(?<!\\*)(\b(?:0[1-9]|[0-2])/(?:0[1-9]|[12]\d|3[01])/(?:19\d{2}|[2-9]\d{3})\b)
This is the string I'm trying to match:
02/02/2019 *03/20/2019 AB CART 9000341 FAXED TO INSTITUTION
Here's the full code for what I have. It extracts the most recent date preceding the word faxed. The problem is if there is a date with an asterisk in front of it (such as *03/20/2019) it chooses that instead of the date (02/02/2019)
This is the Function:
Option Explicit
Function lastFaxedDt(s As String) As Date
Dim re As RegExp, MC As MatchCollection
Const sPat As String = "(\b(?:0[1-9]|1[0-2])/(?:0[1-9]|[12]\d|3[01])/(?:19\d{2}|[2-9]\d{3})\b)(?=.*?faxed)"
Set re = New RegExp
With re
.Pattern = sPat
.IgnoreCase = True
.Global = True
If .Test(s) = True Then
Set MC = .Execute(s)
lastFaxedDt = CDate(MC(MC.Count - 1))
End If
End With
End Function
This is the Macro:
Sub ExtractDate()
marker = 0
Set objShell = CreateObject("Shell.Application")
IE_count = objShell.Windows.Count
For x = 0 To (IE_count - 1)
On Error Resume Next
my_url = objShell.Windows(x).document.Location
my_title = objShell.Windows(x).document.Title
If my_title Like "RFT" & "*" Then
Set IE = objShell.Windows(x)
marker = 1
Exit For
Else
End If
Next
Dim Text As String
Text = Trim$(IE.document.getElementById("ctl00_ContentPlaceHolder1_txtNotes").innerText)
ExtractedDate = lastFaxedDt(Text)
If ExtractedDate = "12:00:00 AM" Then
ExtractedDate = "0"
Else
End If
ExtractedDate = CLng(ExtractedDate)
MaxDate = Application.WorksheetFunction.Max(ExtractedDate)
If MaxDate = "0" Then
MsgBox "No Date Found"
Else
End If
MaxDate = CDate(MaxDate)
Dim ws5 As Worksheet: Set ws5 = ActiveWorkbook.ActiveSheet
ws5.Range("C" & (ActiveCell.Row)).Value = MaxDate
Range("C" & (ActiveCell.Row)).NumberFormat = "[$-409]d-mmm;#"
End Sub
As mentioned in the comments, VBA does not support Lookbehinds. To work around this, you can replace your Lookbehind with the following:
(?:^|[^*])
And then find the date in the capturing group (sub-match) instead of the full match. In this case, your function should look something like this:
Function lastFaxedDt(s As String) As Date
Const sPat As String = _
"(?:^|[^*])" & _
"(\b(?:0[1-9]|1[0-2])/(?:0[1-9]|[12]\d|3[01])/(?:19\d{2}|[2-9]\d{3})\b)" & _
"(?=.*?faxed)"
Dim re As New RegExp, matches As MatchCollection
With re
.Pattern = sPat
.IgnoreCase = True
.Global = True
Set matches = .Execute(s)
If matches.Count > 0 Then
Dim lastMatch As Match: Set lastMatch = matches(matches.Count - 1)
lastFaxedDt = CDate(lastMatch.SubMatches.Item(0))
Else
' TODO: handle the case where no matches are found
End If
End With
End Function
Usage:
Dim s As String
s = "02/02/2019 *03/20/2019 AB CART 9000341 FAXED TO INSTITUTION"
MsgBox lastFaxedDt(s) ' 02/02/2019

Extract all lines of a file between 2 delimiters without them in VBScript Regex

I try to Extract all lines of a file between 2 strings in another file and without these delimiters.
Example:
[General]
Description=Description
[extractSection]
First Line extracted. It is not an ini section
Last Line extracted
[OthersSection]
blablabla
It seems to work with this script. One of my first vbs.
Set objFS = CreateObject("Scripting.FileSystemObject")
strFile = "E:\Temp\Test.txt"
strTemp = "E:\Temp\Temp.txt"
If objFS.FileExists(strTemp) Then objFS.DeleteFile(strTemp)
Set objFile = objFS.OpenTextFile(strFile)
Do Until objFile.AtEndOfStream
strLine = objFile.ReadLine
If isReading = True Then
If instr(strline,"[") Then
Set objOutFile = objFS.CreateTextFile(strTemp, True)
objOutFile.Write(strLine1)
objOutFile.Close
Exit Do
Else
strline1 = strline1 & strline & vbNewLine
End If
Else
If instr(LCase(strline),"[extractsection]") Then
isReading = True
End If
End If
Loop
objFile.Close
But it seems not very optimized, I have files up to 8Mb.
I would like to try the same thing using Regex. I never used, I have to learn.
I have this as beginning: \[extractsection\]([\s\S]*?)\[[\s\S]
But I would like without the delimiters.
Thank you Wiktor. It seems it is OK with (?<=\[extractSection\]\n)(.*(?:\n(?!\[).*)*) Just let me know what is best (Ram | speed) vs my ReadLine script on top, please
You can give a try for this vbscript without a Regex :
Option Explicit
Dim strFile,strTemp,Full_String,First_Delimiter,Second_Delimiter,Extracted_Data
strFile = "E:\Temp\Test.txt"
strTemp = "E:\Temp\Temp.txt"
Full_String = ReadFileText(strFile)
First_Delimiter = "[extractSection]"
Second_Delimiter = "[OthersSection]"
Extracted_Data = String_Between(Full_String,First_Delimiter,Second_Delimiter)
wscript.echo Extracted_Data
Write2File Extracted_Data,strTemp
'************************************************************************************************
Function String_Between(ByVal Full_String, ByVal First_Delimiter, ByVal Second_Delimiter)
Dim Pos,Pos2
Pos = InStr(Full_String, First_Delimiter)
Pos2 = InStr(Full_String, Second_Delimiter)
If Pos = 0 Or Pos2 = 0 Then
String_Between = "Missing Delimiter"
Exit Function
End If
String_Between = Mid(Full_String, Pos + Len(First_Delimiter), Pos2 - (Pos + Len(First_Delimiter)))
End Function
'***********************************************************************************************
Function ReadFileText(sFile)
Dim objFSO,oTS,sText
Set objFSO = CreateObject("Scripting.FileSystemObject")
If Not objFSO.FileExists(sFile) Then
MsgBox "CRITICAL ERROR " & VbCrLF & "The File "& DblQuote(sFile) &_
" dosen't exists !",VbCritical,"CRITICAL ERROR"
Wscript.Quit
Else
Set oTS = objFSO.OpenTextFile(sFile)
sText = oTS.ReadAll
oTS.close
set oTS = nothing
Set objFSO = nothing
ReadFileText = sText
End if
End Function
'*********************************************************************************************
Sub Write2File(strText,OutputFile)
Dim fs,ts
Const ForWriting = 2
Set fs = CreateObject("Scripting.FileSystemObject")
Set ts = fs.OpenTextFile(OutputFile,ForWriting,True)
ts.WriteLine strText
ts.Close
End Sub
'*********************************************************************************************
Function DblQuote(Str)
DblQuote = Chr(34) & Str & Chr(34)
End Function
'**********************************************************************************************
And this one with RegEx
Option Explicit
Dim strFile,strTemp,Full_String,First_Delimiter,Second_Delimiter,Extracted_Data
strFile = "E:\Temp\Test.txt"
strTemp = "E:\Temp\Temp.txt"
Full_String = ReadFileText(strFile)
First_Delimiter = "[extractSection]"
Second_Delimiter = "[OthersSection]"
Extracted_Data = ExtractData(Full_String,First_Delimiter,Second_Delimiter)
wscript.echo Extracted_Data
Write2File Extracted_Data,strTemp
'***********************************************************************************************
Function ExtractData(Full_String,Start_Delim,End_Delim)
Dim fso,f,r,Matches,Contents,Data
Start_Delim = Replace(Start_Delim,"[","\[")
Start_Delim = Replace(Start_Delim,"]","\]")
End_Delim = Replace(End_Delim,"[","\[")
End_Delim = Replace(End_Delim,"]","\]")
Set r=new regexp
r.pattern = "(?:^|(?:\r\n))(:?"& Start_Delim &"\r\n)([\s\S]*?)(?:\r\n)(?:"& End_Delim &")"
Set Matches = r.Execute(Full_String)
If Matches.Count > 0 Then Data = Matches(0).SubMatches(1)
ExtractData = Data
End Function
'***********************************************************************************************
Function ReadFileText(sFile)
Dim objFSO,oTS,sText
Set objFSO = CreateObject("Scripting.FileSystemObject")
If Not objFSO.FileExists(sFile) Then
MsgBox "CRITICAL ERROR " & VbCrLF & "The File "& DblQuote(sFile) &_
" dosen't exists !",VbCritical,"CRITICAL ERROR"
Wscript.Quit
Else
Set oTS = objFSO.OpenTextFile(sFile)
sText = oTS.ReadAll
oTS.close
set oTS = nothing
Set objFSO = nothing
ReadFileText = sText
End if
End Function
'*********************************************************************************************
Sub Write2File(strText,OutputFile)
Dim fs,ts
Const ForWriting = 2
Set fs = CreateObject("Scripting.FileSystemObject")
Set ts = fs.OpenTextFile(OutputFile,ForWriting,True)
ts.WriteLine strText
ts.Close
End Sub
'*********************************************************************************************
Function DblQuote(Str)
DblQuote = Chr(34) & Str & Chr(34)
End Function
'**********************************************************************************************

Regular expression to substitute a pattern in VB script

I am trying to write a regular expression in VB script to substitute some patterns.
My string may contain zero or more following patterns -
&USERID
&USERID
&USERID.
&USERID.
&USERID(n)
&USERID(n)
&USERID(n).
&USERID(n).
&USERID(n1, n2)
&USERID(n1, n2)
&USERID(n1, n2).
&USERID(n1, n2).
Sample string -
C:\temp\&USERID_&USERID._&USERID(4)_&USERID(2,2)..txt
If USERID=ABCDEF, then once substituted the resultant string should look like -
C:\temp\ABCDEF_ABCDEF_ABCD_BC.txt
The number in the bracket denotes the number of characters to substitute. Range can be specified using comma separated numbers. In order to achieve this I wrote a regular expression as follows -
"((&USERID\(\d+,\d+\)\.)|(&USERID\(\d+,\d+\)\.)|(&USERID\(\d+,\d+\))|(&USERID\(\d+,\d+\)))|((&USERID\(\d\)\.)|(&USERID\(\d\)\.)|(&USERID\(\d\))|(&USERID\(\d\))|(&USERID\.)|(&USERID\.))"
Using VBScript.RegExp I match the pattern and obtain collection of the matches. Iterating over each match object, I substitute either the complete USERID or part of it based on subscript.
The regular expression works fine. BUT it is very slow compared to string manipulation function.
Can above pattern be optimized?
Update:
I accepted the answer which solves one of my problem. Based on the regular expression, I tried to solve another find and replace problem as follows -
I have following patterns
DATE
DATE(MMDDYYYY)
DATE(DDMMYYYY)
DATE(YYYYMMDD)
DATE(YYYY)
DATE(MM)
DATE(DD)
DATE(DDMONYYYY)
DATE(MON)
DATE(MONTH)
DATE(YYDDD)
DATE(YYYYDDD)
It may have a terminating "." at the end.
Function replaceDate(matchString, label, position, sourceString)
If label = "MMDDYYYY" or label = "" then
replaceDate = "<MMDDYYYY>"
ElseIf label = "DDMMYYYY" then
replaceDate = "<DDMMYYYY>"
ElseIf label = "YYYYMMDD" then
replaceDate = "<YYYYMMDD>"
ElseIf label = "YYYY" then
replaceDate = "<YYYY>"
ElseIf label = "MM" then
replaceDate = "<MM>"
ElseIf label = "DD" then
replaceDate = "<DD>"
ElseIf label = "DDMONYYYY" then
replaceDate = "<DDMONYYYY>"
ElseIf label = "MON" then
replaceDate = "<MON>"
ElseIf label = "MONTH" then
replaceDate = "<MONTH>"
ElseIf label = "YYDD" then
replaceDate = "<YYYYDDD>"
Else
replaceDate = ""
end if
End Function
With new RegExp
.Global = True
.IgnoreCase = False
.Pattern = "(?:&(?:amp;)?)?DATE(?:\((MMDDYYYY|DDMMYYYY|YYYYMMDD|YYYY|MM|DD|DDMONYYYY|MON|MONTH|YYDDD|YYYYDDD)?\))?\.?"
strTempValue = .Replace(strTempValue, GetRef("replaceDate"))
End with
Without more data it is not easy to test, but you can try if this performs better
Dim USERID
USERID = "ABCDEF"
Dim originalString
originalString = "C:\temp\&USERID_&USERID._&USERID(4)_&USERID(2,2)..txt"
Dim convertedString
Function replaceUSERID(matchString, n1, n2, position, sourceString)
n1 = CLng("0" & Trim(n1))
n2 = CLng("0" & Trim(Replace(n2, ",", "")))
If n1 < 1 Then
replaceUSERID = USERID
ElseIf n2 > 0 Then
replaceUSERID = Mid(USERID, n1, n2)
Else
replaceUSERID = Left(USERID, n1)
End If
End Function
With New RegExp
.Pattern = "(?:&(?:amp;)?)?USERID(?:\((\s*\d+\s*)(,\s*\d+\s*)?\))?\.?"
.Global = True
.IgnoreCase = False
convertedString = .Replace(originalString, GetRef("replaceUSERID"))
End With
WScript.Echo originalString
WScript.Echo convertedString
For a multiple "label" replacement
Option Explicit
Dim dicLabels
Set dicLabels = WScript.CreateObject("Scripting.Dictionary")
With dicLabels
.Add "USERID", "ABCDEF"
.Add "LUSER", "ABCDEF"
.Add "ID", "GHIJKL"
End With
Dim originalString
originalString = "C:\temp\&USERID_&USERID._&USERID(4)_&USERID(2,2)_ID(2,3)_&LUSER..txt"
Dim convertedString
Function replaceLabels(matchString, label, n1, n2, position, sourceString)
If Not dicLabels.Exists(label) Then
replaceLabels = matchString
Else
n1 = CLng("0" & Trim(n1))
n2 = CLng("0" & Trim(Replace(n2,",","")))
replaceLabels = dicLabels.Item(label)
If n1 > 0 Then
If n2 > 0 Then
replaceLabels = Mid(dicLabels.Item(label), n1, n2)
Else
replaceLabels = Left(dicLabels.Item(label), n1)
End If
End If
End If
End Function
With New RegExp
.Pattern = "(?:&(?:amp;)?)?("& Join(dicLabels.Keys, "|") &")(?:\((\s*\d+\s*)(,\s*\d+\s*)?\))?\.?"
.Global = True
.IgnoreCase = False
convertedString = .Replace(originalString, GetRef("replaceLabels"))
End With
WScript.Echo originalString
WScript.Echo convertedString

RegEx to extract email

I need to extract only the email from a spreadsheet in Excel. I've found some example VBA code here at this StackOverflow link, courtesy of Portland Runner.
I created an Excel module and it seems to be working fine, except it only returns the first uppercase character of the address into the cell and it's ignoring the email.
For example:
Text | Result
----------------------------------------|------------------------------
My email address is address#gmail.com | My email address is
Yes Address#gmail.com | Yes A
Below is the code I'm using:
Function simpleCellRegex(Myrange As Range) As String
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim strOutput As String
strPattern = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"
If strPattern <> "" Then
strInput = Myrange.Value
strReplace = ""
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = strPattern
End With
If regEx.test(strInput) Then
simpleCellRegex = regEx.Replace(strInput, strReplace)
Else
simpleCellRegex = "Not matched"
End If
End If
End Function
I do not have enough experience with VBA to really diagnose what might be happening here, hopefully someone will be able to spot what I'm doing wrong.
Working Code
Function simpleCellRegex(Myrange As Range) As String
Dim regEx As New RegExp
Dim strPattern As String
Dim strInput As String
Dim strReplace As String
Dim strOutput As String
strPattern = "[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-zA-Z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"
If strPattern <> "" Then
strInput = Myrange.Value
strReplace = ""
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = strPattern
End With
If regEx.Test(strInput) Then
Set matches = regEx.Execute(strInput)
simpleCellRegex = matches(0).Value
Else
simpleCellRegex = "Not matched"
End If
End If
End Function
When You return strInput You just get the same string as the input.
You need to return Value that has been found using RegExp.
Try
Set matches = regEx.Execute(strInput)
simpleCellRegex = matches(1).Value
Instead of
simpleCellRegex = regEx.Replace(strInput, strReplace)
You can change the line
simpleCellRegex = regEx.Replace(strInput, strReplace)
To
simpleCellRegex = strInput
Because you are not making any replacement
The easiest way to do this is by installing the software called KUtool. After installing, highlight the content you want to extract emails==>Click ku tools at the top middle==>click on text==>extract emails.
You can also use the following code.(ALT+F1==>INSERT MODULE)
Function ExtractEmailFun(extractStr As String) As String
'Update 20130829
Dim CharList As String
On Error Resume Next
CheckStr = "[A-Za-z0-9._-]"
OutStr = ""
Index = 1
Do While True
Index1 = VBA.InStr(Index, extractStr, "#")
getStr = ""
If Index1 > 0 Then
For p = Index1 - 1 To 1 Step -1
If Mid(extractStr, p, 1) Like CheckStr Then
getStr = Mid(extractStr, p, 1) & getStr
Else
Exit For
End If
Next
getStr = getStr & "#"
For p = Index1 + 1 To Len(extractStr)
If Mid(extractStr, p, 1) Like CheckStr Then
getStr = getStr & Mid(extractStr, p, 1)
Else
Exit For
End If
Next
Index = Index1 + 1
If OutStr = "" Then
OutStr = getStr
Else
OutStr = OutStr & Chr(10) & getStr
End If
Else
Exit Do
End Ifenter code here
Loop
ExtractEmailFun = OutStr
End Function
You can also go the code way
Open excell, click on ALT +F1, Click on insert Module and paste this code
Click save and enter the formula(Column=ExtractEmailFun(A1)) in a blank cell. press enter and your emails will be extracted. Hope this will help
Try the below pattern
strPattern = "^([a-zA-Z0-9_\-\.]+)#[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,3})$"