How can I select and crop certain characters from a string? - regex

For this code, I am having a problem in using the MID and INSTR functions:
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set file = objFSO.OpenTextFile("sample.txt" , ForReading)
Const ForReading = 1
Dim re
Dim controller
Dim action
Set re = new regexp
re.pattern = "(contextPath\s*?[+]\s*?[""][/]\w+?[?]action[=]\w+?[""])"
re.IgnoreCase = True
re.Global = True
Dim line
Do Until file.AtEndOfStream
line = file.ReadLine
For Each m In re.Execute(line)
var = m.Submatches(0)
'I am having a problem with the next two lines:
controller = Mid(var, 1, InStr(var, "contextPath\")) & "[?]action[=]\w+?[""]n"
action = Mid(var, 1, InStr(var, "contextPath\s*?[+]\s*?[""][/]\w+?[?]action[=]")) & """"
Wscript.Echo "controller :" & controller
Wscript.Echo "action: " & action
Next
Loop
With the text file "sample.txt":
contextPath+"/GIACOrderOfPaymentController?action=showORDetails"
contextPath +"/GIACPremDepositController?action=showPremDep"
contextPath+ "/GIACCommPaytsController?action=showCommPayts"
(Notice the spaces beside the plus(+) sign)
How can I make the output look like this:
controller: GIACOrderOfPaymentController
controller: GIACPremDepositController
controller: GIACCommPaytsController
action: showORDetails
action: showPremDep
action: showCommPayts

Instead of capturing the full line, capture the needed data
Option Explicit
Const ForReading = 1
Dim re
Set re = New RegExp
With re
.Pattern = "contextPath\s*\+\s*\""/(\w+)\?action=(\w+)\"""
.IgnoreCase = True
.Global = True
End With
Dim controllers, actions
Set controllers = CreateObject("Scripting.Dictionary")
Set actions = CreateObject("Scripting.Dictionary")
Dim file
Set file = CreateObject("Scripting.FileSystemObject").OpenTextFile("sample.txt" , ForReading)
Dim line, m
Do Until file.AtEndOfStream
line = file.ReadLine
For Each m In re.Execute(line)
controllers.Add "K" & controllers.Count, m.Submatches(0)
actions.Add "K" & actions.Count, m.Submatches(1)
Next
Loop
Dim item
For Each item in controllers.Items()
WScript.Echo "controller: " & item
Next
WScript.Echo ""
For Each item in actions.Items()
WScript.Echo "action: " & item
Next

Related

Regex Adding Unnecessary Spaces During Replace

I'm currently having difficulty with a VBScript I'm writing that contains several read and replaces from a text file. The expression I'm using finds the expression and replaces it, but adds three tab spaces afterwords, making the original line below it mess up the formatting. Here's a picture of what I'm talking about:
Here's a pastebin of the before and after, rather than an image:
https://pastebin.com/Uw3H59QK
Here's my RegExp code:
Set fso = CreateObject("Scripting.FileSystemObject")
Dim strPath
strPath = SelectFolder( "" )
If strPath = vbNull Then
WScript.Echo "Script Cancelled - No files have been modified" 'if the user cancels the open folder dialog
Else
WScript.Echo "Selected Folder: """ & strPath & """" 'prompt that tells you the folder you selected
End If
Function SelectFolder( myStartFolder )
Dim objFolder, objItem, objShell
Dim objFolderItems
On Error Resume Next
SelectFolder = vbNull
Set objShell = CreateObject( "Shell.Application" )
Set objFolder = objShell.BrowseForFolder( 0, "Please select the .dat file location folder", 0, myStartFolder)
set objFolderItems = objFolder.Items
If IsObject( objFolder ) Then SelectFolder = objFolder.Self.Path
Set objFolder = Nothing
Set objShell = Nothing
On Error Goto 0
End Function
Set re = New RegExp 'Replacing Position Lines
re.Pattern = "Pos = \((.*)\)"
re.Global = True
re.IgnoreCase = True
For Each f in fso.GetFolder(strPath).Files
If LCase(fso.GetExtensionName(f.Name)) = "txt" Then
text = f.OpenAsTextStream.ReadAll 'reading the text file
f.OpenAsTextStream(2).Write re.Replace(text, """Position"" : mathutils.Vector(($1)),")
count = count + 1
End If
Next
Set reAngles = New RegExp 'Replacing Angles
reAngles.Pattern = "Angles = \((.*)\)"
reAngles.Global = True
reAngles.IgnoreCase = True
For Each f in fso.GetFolder(strPath).files
If LCase(fso.GetExtensionName(f.Name)) = "txt" Then
text = f.OpenAsTextStream.ReadAll
f.OpenAsTextStream(2).Write reAngles.Replace(text, """Angles"" : mathutils.Vector(($1)),")
End If
Next
Set reNames = New RegExp 'Replacing Names
reNames.Pattern = "Name = (.*)"
reNames.Global = True
'reNames.Multiline = True
reNames.IgnoreCase = True
For Each f in fso.GetFolder(strPath).files
If LCase(fso.GetExtensionName(f.Name)) = "txt" Then
text = f.OpenAsTextStream.ReadAll
f.OpenAsTextStream(2).Write reNames.Replace(text, """Name"" : ""$1"",")
End If
Next
My best guess is that the wildcard is grabbing more info than needed...but I'm unsure how to fix that. I used a lot of these expressions in Notepad++ so I was hoping to translate them to a VBS easily!

Extract all lines of a file between 2 delimiters without them in VBScript Regex

I try to Extract all lines of a file between 2 strings in another file and without these delimiters.
Example:
[General]
Description=Description
[extractSection]
First Line extracted. It is not an ini section
Last Line extracted
[OthersSection]
blablabla
It seems to work with this script. One of my first vbs.
Set objFS = CreateObject("Scripting.FileSystemObject")
strFile = "E:\Temp\Test.txt"
strTemp = "E:\Temp\Temp.txt"
If objFS.FileExists(strTemp) Then objFS.DeleteFile(strTemp)
Set objFile = objFS.OpenTextFile(strFile)
Do Until objFile.AtEndOfStream
strLine = objFile.ReadLine
If isReading = True Then
If instr(strline,"[") Then
Set objOutFile = objFS.CreateTextFile(strTemp, True)
objOutFile.Write(strLine1)
objOutFile.Close
Exit Do
Else
strline1 = strline1 & strline & vbNewLine
End If
Else
If instr(LCase(strline),"[extractsection]") Then
isReading = True
End If
End If
Loop
objFile.Close
But it seems not very optimized, I have files up to 8Mb.
I would like to try the same thing using Regex. I never used, I have to learn.
I have this as beginning: \[extractsection\]([\s\S]*?)\[[\s\S]
But I would like without the delimiters.
Thank you Wiktor. It seems it is OK with (?<=\[extractSection\]\n)(.*(?:\n(?!\[).*)*) Just let me know what is best (Ram | speed) vs my ReadLine script on top, please
You can give a try for this vbscript without a Regex :
Option Explicit
Dim strFile,strTemp,Full_String,First_Delimiter,Second_Delimiter,Extracted_Data
strFile = "E:\Temp\Test.txt"
strTemp = "E:\Temp\Temp.txt"
Full_String = ReadFileText(strFile)
First_Delimiter = "[extractSection]"
Second_Delimiter = "[OthersSection]"
Extracted_Data = String_Between(Full_String,First_Delimiter,Second_Delimiter)
wscript.echo Extracted_Data
Write2File Extracted_Data,strTemp
'************************************************************************************************
Function String_Between(ByVal Full_String, ByVal First_Delimiter, ByVal Second_Delimiter)
Dim Pos,Pos2
Pos = InStr(Full_String, First_Delimiter)
Pos2 = InStr(Full_String, Second_Delimiter)
If Pos = 0 Or Pos2 = 0 Then
String_Between = "Missing Delimiter"
Exit Function
End If
String_Between = Mid(Full_String, Pos + Len(First_Delimiter), Pos2 - (Pos + Len(First_Delimiter)))
End Function
'***********************************************************************************************
Function ReadFileText(sFile)
Dim objFSO,oTS,sText
Set objFSO = CreateObject("Scripting.FileSystemObject")
If Not objFSO.FileExists(sFile) Then
MsgBox "CRITICAL ERROR " & VbCrLF & "The File "& DblQuote(sFile) &_
" dosen't exists !",VbCritical,"CRITICAL ERROR"
Wscript.Quit
Else
Set oTS = objFSO.OpenTextFile(sFile)
sText = oTS.ReadAll
oTS.close
set oTS = nothing
Set objFSO = nothing
ReadFileText = sText
End if
End Function
'*********************************************************************************************
Sub Write2File(strText,OutputFile)
Dim fs,ts
Const ForWriting = 2
Set fs = CreateObject("Scripting.FileSystemObject")
Set ts = fs.OpenTextFile(OutputFile,ForWriting,True)
ts.WriteLine strText
ts.Close
End Sub
'*********************************************************************************************
Function DblQuote(Str)
DblQuote = Chr(34) & Str & Chr(34)
End Function
'**********************************************************************************************
And this one with RegEx
Option Explicit
Dim strFile,strTemp,Full_String,First_Delimiter,Second_Delimiter,Extracted_Data
strFile = "E:\Temp\Test.txt"
strTemp = "E:\Temp\Temp.txt"
Full_String = ReadFileText(strFile)
First_Delimiter = "[extractSection]"
Second_Delimiter = "[OthersSection]"
Extracted_Data = ExtractData(Full_String,First_Delimiter,Second_Delimiter)
wscript.echo Extracted_Data
Write2File Extracted_Data,strTemp
'***********************************************************************************************
Function ExtractData(Full_String,Start_Delim,End_Delim)
Dim fso,f,r,Matches,Contents,Data
Start_Delim = Replace(Start_Delim,"[","\[")
Start_Delim = Replace(Start_Delim,"]","\]")
End_Delim = Replace(End_Delim,"[","\[")
End_Delim = Replace(End_Delim,"]","\]")
Set r=new regexp
r.pattern = "(?:^|(?:\r\n))(:?"& Start_Delim &"\r\n)([\s\S]*?)(?:\r\n)(?:"& End_Delim &")"
Set Matches = r.Execute(Full_String)
If Matches.Count > 0 Then Data = Matches(0).SubMatches(1)
ExtractData = Data
End Function
'***********************************************************************************************
Function ReadFileText(sFile)
Dim objFSO,oTS,sText
Set objFSO = CreateObject("Scripting.FileSystemObject")
If Not objFSO.FileExists(sFile) Then
MsgBox "CRITICAL ERROR " & VbCrLF & "The File "& DblQuote(sFile) &_
" dosen't exists !",VbCritical,"CRITICAL ERROR"
Wscript.Quit
Else
Set oTS = objFSO.OpenTextFile(sFile)
sText = oTS.ReadAll
oTS.close
set oTS = nothing
Set objFSO = nothing
ReadFileText = sText
End if
End Function
'*********************************************************************************************
Sub Write2File(strText,OutputFile)
Dim fs,ts
Const ForWriting = 2
Set fs = CreateObject("Scripting.FileSystemObject")
Set ts = fs.OpenTextFile(OutputFile,ForWriting,True)
ts.WriteLine strText
ts.Close
End Sub
'*********************************************************************************************
Function DblQuote(Str)
DblQuote = Chr(34) & Str & Chr(34)
End Function
'**********************************************************************************************

Extract IP Addresses in Email body

I am trying to extract all IP addresses in the body of an Outlook message from the following example.
I tried replacing the regex from:
With Reg1
.Pattern = "((P130\w*)\s*(\w*)\s*(\w*)\s*(\w*)\s*([\d-\.]*))"
End With
To:
With Reg1
.Pattern = "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))"
End With
But it only matches one octet.
Sample Text:
The IP from 192.168.10.2 needs attention.
The IP from 192.168.11.3 needs attention.
The IP from 192.168.12.4 needs attention.
Currently it only matches 168
Added extra brackets and now matches the first IP in the body of the message but not the rest.
Full code below:
Option Explicit
Private Const xlUp As Long = -4162
Sub CopyToExcel(olItem As Outlook.MailItem)
Dim xlApp As Object
Dim xlWB As Object
Dim xlSheet As Object
Dim vText, vText2, vText3, vText4, vText5 As Variant
Dim sText As String
Dim rCount As Long
Dim bXStarted As Boolean
Dim enviro As String
Dim strPath As String
Dim Reg1 As Object
Dim M1 As Object
Dim M As Object
enviro = CStr(Environ("USERPROFILE"))
'the path of the workbook
strPath = enviro & "\Documents\test.xlsx"
On Error Resume Next
Set xlApp = GetObject(, "Excel.Application")
If Err <> 0 Then
Application.StatusBar = "Please wait while Excel source is opened ... "
Set xlApp = CreateObject("Excel.Application")
bXStarted = True
End If
On Error GoTo 0
'Open the workbook to input the data
Set xlWB = xlApp.Workbooks.Open(strPath)
Set xlSheet = xlWB.Sheets("Sheet1")
'Find the next empty line of the worksheet
rCount = xlSheet.Range("B" & xlSheet.Rows.Count).End(xlUp).Row
rCount = rCount + 1
sText = olItem.Body
Set Reg1 = CreateObject("VBScript.RegExp")
' \s* = invisible spaces
' \d* = match digits
' \w* = match alphanumeric
With Reg1
' .Pattern = "((P130\w*)\s*(\w*)\s*(\w*)\s*(\w*)\s*([\d-\.]*))"
.Pattern = "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))"
'.Pattern = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
End With
If Reg1.Test(sText) Then
' each "(\w*)" and the "(\d)" are assigned a vText variable
Set M1 = Reg1.Execute(sText)
For Each M In M1
vText = Trim(M.SubMatches(1))
vText2 = Trim(M.SubMatches(2))
vText3 = Trim(M.SubMatches(3))
vText4 = Trim(M.SubMatches(4))
' vText5 = Trim(M.SubMatches(5))
Next
End If
xlSheet.Range("B" & rCount) = vText
xlSheet.Range("c" & rCount) = vText2
xlSheet.Range("d" & rCount) = vText3
xlSheet.Range("e" & rCount) = vText4
xlSheet.Range("f" & rCount) = vText5
xlWB.Close 1
If bXStarted Then
xlApp.Quit
End If
Set M = Nothing
Set M1 = Nothing
Set Reg1 = Nothing
Set xlApp = Nothing
Set xlWB = Nothing
Set xlSheet = Nothing
End Sub
^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$
http://www.regextester.com/22
Regular expression to match DNS hostname or IP Address?

Regex pattern not working in VB Script

I have this VBS code:
Option Explicit
Dim reMethod, reInterface
Dim vaction
Dim fileService
Dim mService
Dim lineService
Dim objFSO
Const ForReading = 1
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set fileService = objFSO.OpenTextFile("GIISAssuredController.java" , ForReading)
Set reMethod = new regexp
reMethod.Pattern = """\w+?""\.equals\(ACTION\)[\s\S]*?\{[\s\S]*?\.([^(]*)\("
reMethod.IgnoreCase = True
reMethod.Global = True
Do Until fileService.AtEndOfStream
lineService = fileService.ReadLine
For Each mService In reMethod.Execute(lineService)
vaction = mService.Submatches(0)
Set reInterface = new regexp
Wscript.Echo vaction
Next
Loop
And 'GIISAssuredController.java':
} else if ("hello".equals(ACTION)) {
Integer assuredNo = giisAssuredService.saveAssured(assured);
The regex pattern is not working.
I am expecting the output to be is:
saveAssured
But instead, it's not echoing anything. I tried the regex pattern here > https://regex101.com/r/kH3aZ4/1, and it's getting the 'saveAssured' string.
This question is related to: Multiline REGEX using VB Script
If the expression needs to match a text that spawns over multiple lines, but you read the file line by line and test line by line, there will never be a match.
Option Explicit
Const ForReading = 1
Dim code
code = CreateObject("Scripting.FileSystemObject" _
).OpenTextFile("GIISAssuredController.java" , ForReading _
).ReadAll()
Dim mService
With new RegExp
.Pattern = """\w+?""\.equals\(ACTION\)[\s\S]*?\{[\s\S]*?\.([^(]*)\("
.IgnoreCase = True
.Global = True
For Each mService in .Execute(code)
WScript.Echo mService.Submatches(0)
Next
End With

How to replace 'at' with #

I have about 17k emails containing orders, news, contacts etc. going back 11 years.
Users' email addresses have been shoddily encrypted to stop crawlers and spam by changing the # to either *#* or 'at'.
I am trying to create a comma separated list to build a database of our users.
The code works with writing the file and looping the folders because if I write the senders email address to the file where I am currently using the body of the email then it prints fine.
The problem is, the Replaces aren't changing *at* etc to #.
First of all, why not?
Is there a better way for me to be doing this as a whole?
Private Sub Form_Load()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objInbox As MAPIFolder
Dim objFolder As MAPIFolder
Dim fldName As String
fldName = "TEST"
' Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
' Pick up the Inbox
Set objInbox = objNameSpace.GetDefaultFolder(olFolderInbox)
'Loop through the folders under the Inbox
For Each objFolder In objInbox.Folders
RecurseFolders fldName, objFolder
Next objFolder
End Sub
Public Sub RecurseFolders(targetFolder As String, currentFolder As MAPIFolder)
If currentFolder.Name = targetFolder Then
GetEmails currentFolder
Else
Dim objFolder As MAPIFolder
If currentFolder.Folders.Count > 0 Then
For Each objFolder In currentFolder.Folders
RecurseFolders targetFolder, objFolder
Next
End If
End If
End Sub
Sub WriteToATextFile(e As String)
MyFile = "c:\" & "emailist.txt"
'set and open file for output
fnum = FreeFile()
Open MyFile For Append As fnum
Print #fnum, e; ","
Close #fnum
End Sub
Sub GetEmails(folder As MAPIFolder)
Dim objMail As MailItem
' Read through all the items
For i = 1 To folder.Items.Count
Set objMail = folder.Items(i)
GetEmail objMail.Body
Next i
End Sub
Sub GetEmail(s As String)
Dim txt = s
Do Until InStr(txt, "#") <= 0
Dim tleft As Integer
Dim tright As Integer
Dim start As Integer
Dim text As String
Dim email As String
text = Replace(text, " at ", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "'at'", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "<", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ">", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ":", " ", VbCompareMethod.vbTextCompare)
'one two ab#bd.com one two
tleft = InStr(text, "#") '11
WriteToATextFile Str(tleft)
WriteToATextFile Str(Len(text))
start = InStrRev(text, " ", Len(text) - tleft)
'WriteToATextFile Str(start)
'WriteToATextFile Str(Len(text))
'start = Len(text) - tleft
text = left(text, start)
'ab#bd.com one two
tright = InStr(text, " ") '9
email = left(text, tright)
WriteToATextFile email
text = right(text, Len(text) - Len(email))
GetEmail txt
Loop
End Sub
What about using a regex (Regular Expression)?
Something like:
Public Function ReplaceAT(ByVal sInput as String)
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "( at |'at'|<at>)"
End With
ReplaceAT = RegEx.Replace(sInput, "#")
Set RegEx = Nothing
End Function
Just replace the regexp with every cases you might get.
See http://www.regular-expressions.info/ for more tips and infos.
I've taken a crack at this to extract emails such as this sample below which will take out the three email addresses in yellow in the sample message below to a csv file
Any valids emails are written to a csv file Set objTF = objFSO.createtextfile("c:\myemail.csv")
This code scans all emails in a folder called temp under Inbox I cut out your recursive portion of testing and simplicity
There are four string manipulations
This line converts any non printing blank spaces to normal spaces strMsgBody = Replace(strMsgBody, Chr(160), Chr(32) (unlikely but it happened in my testing)
Regex1 converts any " at " or "at" etc into "#" "(\s+at\s+|'at'|<at>|\*at\*|at)"
Regex2 converts any " dot " or "dot" etc into "." "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
Regex3 converts any of "<" ">" or ":" into "" .Pattern = "[<:>]"
Regex4 extracts any valid email from the emailbody
Any valid emails are written to the csv file using objTF.writeline objRegM
Code below
Public Test()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objFolder As MAPIFolder
Dim strfld As String
Dim objRegex As Object
Dim objRegMC As Object
Dim objRegM As Object
Dim objFSO As Object
Dim oMailItem As MailItem
Dim objTF As Object
Dim strMsgBody As String
Set objRegex = CreateObject("vbscript.regexp")
Set objFSO = CreateObject("scripting.filesystemobject")
Set objTF = objFSO.createtextfile("c:\myemail.csv")
With objRegex
.Global = True
.MultiLine = True
.ignorecase = True
strfld = "temp"
'Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
'Pick up the Inbox
Set objFolder = objNameSpace.GetDefaultFolder(olFolderInbox)
Set objFolder = objFolder.Folders(strfld)
For Each oMailItem In objFolder.Items
strMsgBody = oMailItem.Body
strMsgBody = Replace(strMsgBody, Chr(160), Chr(32))
.Pattern = "(\s+at\s+|'at'|<at>|\*at\*|at)"
strMsgBody = .Replace(strMsgBody, "#")
.Pattern = "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
strMsgBody = .Replace(strMsgBody, ".")
.Pattern = "[<:>]"
strMsgBody = .Replace(strMsgBody, vbNullString)
.Pattern = "[\w-\.]{1,}\#([\da-zA-Z-]{1,}\.){1,}[\da-zA-Z-]{2,3}"
If .Test(strMsgBody) Then
Set objRegMC = .Execute(strMsgBody)
For Each objRegM In objRegMC
objTF.writeline objRegM
Next
End If
Next
End With
objTF.Close
End Sub