How to replace 'at' with # - regex

I have about 17k emails containing orders, news, contacts etc. going back 11 years.
Users' email addresses have been shoddily encrypted to stop crawlers and spam by changing the # to either *#* or 'at'.
I am trying to create a comma separated list to build a database of our users.
The code works with writing the file and looping the folders because if I write the senders email address to the file where I am currently using the body of the email then it prints fine.
The problem is, the Replaces aren't changing *at* etc to #.
First of all, why not?
Is there a better way for me to be doing this as a whole?
Private Sub Form_Load()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objInbox As MAPIFolder
Dim objFolder As MAPIFolder
Dim fldName As String
fldName = "TEST"
' Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
' Pick up the Inbox
Set objInbox = objNameSpace.GetDefaultFolder(olFolderInbox)
'Loop through the folders under the Inbox
For Each objFolder In objInbox.Folders
RecurseFolders fldName, objFolder
Next objFolder
End Sub
Public Sub RecurseFolders(targetFolder As String, currentFolder As MAPIFolder)
If currentFolder.Name = targetFolder Then
GetEmails currentFolder
Else
Dim objFolder As MAPIFolder
If currentFolder.Folders.Count > 0 Then
For Each objFolder In currentFolder.Folders
RecurseFolders targetFolder, objFolder
Next
End If
End If
End Sub
Sub WriteToATextFile(e As String)
MyFile = "c:\" & "emailist.txt"
'set and open file for output
fnum = FreeFile()
Open MyFile For Append As fnum
Print #fnum, e; ","
Close #fnum
End Sub
Sub GetEmails(folder As MAPIFolder)
Dim objMail As MailItem
' Read through all the items
For i = 1 To folder.Items.Count
Set objMail = folder.Items(i)
GetEmail objMail.Body
Next i
End Sub
Sub GetEmail(s As String)
Dim txt = s
Do Until InStr(txt, "#") <= 0
Dim tleft As Integer
Dim tright As Integer
Dim start As Integer
Dim text As String
Dim email As String
text = Replace(text, " at ", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "'at'", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "<", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ">", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ":", " ", VbCompareMethod.vbTextCompare)
'one two ab#bd.com one two
tleft = InStr(text, "#") '11
WriteToATextFile Str(tleft)
WriteToATextFile Str(Len(text))
start = InStrRev(text, " ", Len(text) - tleft)
'WriteToATextFile Str(start)
'WriteToATextFile Str(Len(text))
'start = Len(text) - tleft
text = left(text, start)
'ab#bd.com one two
tright = InStr(text, " ") '9
email = left(text, tright)
WriteToATextFile email
text = right(text, Len(text) - Len(email))
GetEmail txt
Loop
End Sub

What about using a regex (Regular Expression)?
Something like:
Public Function ReplaceAT(ByVal sInput as String)
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "( at |'at'|<at>)"
End With
ReplaceAT = RegEx.Replace(sInput, "#")
Set RegEx = Nothing
End Function
Just replace the regexp with every cases you might get.
See http://www.regular-expressions.info/ for more tips and infos.

I've taken a crack at this to extract emails such as this sample below which will take out the three email addresses in yellow in the sample message below to a csv file
Any valids emails are written to a csv file Set objTF = objFSO.createtextfile("c:\myemail.csv")
This code scans all emails in a folder called temp under Inbox I cut out your recursive portion of testing and simplicity
There are four string manipulations
This line converts any non printing blank spaces to normal spaces strMsgBody = Replace(strMsgBody, Chr(160), Chr(32) (unlikely but it happened in my testing)
Regex1 converts any " at " or "at" etc into "#" "(\s+at\s+|'at'|<at>|\*at\*|at)"
Regex2 converts any " dot " or "dot" etc into "." "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
Regex3 converts any of "<" ">" or ":" into "" .Pattern = "[<:>]"
Regex4 extracts any valid email from the emailbody
Any valid emails are written to the csv file using objTF.writeline objRegM
Code below
Public Test()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objFolder As MAPIFolder
Dim strfld As String
Dim objRegex As Object
Dim objRegMC As Object
Dim objRegM As Object
Dim objFSO As Object
Dim oMailItem As MailItem
Dim objTF As Object
Dim strMsgBody As String
Set objRegex = CreateObject("vbscript.regexp")
Set objFSO = CreateObject("scripting.filesystemobject")
Set objTF = objFSO.createtextfile("c:\myemail.csv")
With objRegex
.Global = True
.MultiLine = True
.ignorecase = True
strfld = "temp"
'Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
'Pick up the Inbox
Set objFolder = objNameSpace.GetDefaultFolder(olFolderInbox)
Set objFolder = objFolder.Folders(strfld)
For Each oMailItem In objFolder.Items
strMsgBody = oMailItem.Body
strMsgBody = Replace(strMsgBody, Chr(160), Chr(32))
.Pattern = "(\s+at\s+|'at'|<at>|\*at\*|at)"
strMsgBody = .Replace(strMsgBody, "#")
.Pattern = "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
strMsgBody = .Replace(strMsgBody, ".")
.Pattern = "[<:>]"
strMsgBody = .Replace(strMsgBody, vbNullString)
.Pattern = "[\w-\.]{1,}\#([\da-zA-Z-]{1,}\.){1,}[\da-zA-Z-]{2,3}"
If .Test(strMsgBody) Then
Set objRegMC = .Execute(strMsgBody)
For Each objRegM In objRegMC
objTF.writeline objRegM
Next
End If
Next
End With
objTF.Close
End Sub

Related

Splitting a string and capitalizing letters based on cases

I have some column names with starting coding convention that I would like to transform, see example:
Original Target
------------- --------------
partID Part ID
completedBy Completed By
I have a function in VBA that splits the original string by capital letters:
Function SplitCaps(strIn As String) As String
Dim objRegex As Object
Set objRegex = CreateObject("vbscript.regexp")
With objRegex
.Global = True
.Pattern = "([a-z])([A-Z])"
SplitCaps = .Replace(strIn, "$1 $2")
End With
End Function
I wrap this function within PROPER, for example, PROPER(SplitCaps(A3)) produces the desired result for the third row but leaves the "D" in ID uncapitalized.
Original Actual
------------- --------------
partID Part Id
completedBy Completed By
Can anyone think of a solution to add cases to this function?
split the word and loop the results and test whether it is all caps before using Proper. then join them back:
Sub kjl()
Dim str As String
str = "partID"
Dim strArr() As String
strArr = Split(SplitCaps(str), " ")
Dim i As Long
For i = 0 To UBound(strArr)
If UCase(strArr(i)) <> strArr(i) Then
strArr(i) = Application.Proper(strArr(i))
End If
Next i
str = Join(strArr, " ")
Debug.Print str
End Sub
If you want a formula to do what you are asking then:
=TEXTJOIN(" ",TRUE,IF(EXACT(UPPER(TRIM(MID(SUBSTITUTE(SplitCaps(A1)," ",REPT(" ",999)),{1,999},999))),TRIM(MID(SUBSTITUTE(SplitCaps(A1)," ",REPT(" ",999)),{1,999},999))),TRIM(MID(SUBSTITUTE(SplitCaps(A1)," ",REPT(" ",999)),{1,999},999)),PROPER(TRIM(MID(SUBSTITUTE(SplitCaps(A1)," ",REPT(" ",999)),{1,999},999)))))
Entered as an array formula by confirming with Ctrl-Shift-Enter instead of Enter when exiting edit mode.
Or use the code above as a Function:
Function propSplitCaps(str As String)
Dim strArr() As String
strArr = Split(SplitCaps(str), " ")
Dim i As Long
For i = 0 To UBound(strArr)
If UCase(strArr(i)) <> strArr(i) Then
strArr(i) = Application.Proper(strArr(i))
End If
Next i
propSplitCaps = Join(strArr, " ")
End Function
and call it =propSplitCaps(A1)
Instead of using the Proper function, just capitalize the first letter of each word after you have split the string on the transition.
Option Explicit
Function Cap(s As String) As String
Dim RE As RegExp, MC As MatchCollection, M As Match
Const sPatSplit = "([a-z])([A-Z])"
Const sPatFirstLtr As String = "\b(\w)"
Const sSplit As String = "$1 $2"
Set RE = New RegExp
With RE
.Global = True
.Pattern = sPatSplit
.IgnoreCase = False
If .Test(s) = True Then
s = .Replace(s, sSplit)
.Pattern = sPatFirstLtr
Set MC = .Execute(s)
For Each M In MC
s = WorksheetFunction.Replace(s, M.FirstIndex + 1, 1, UCase(M))
Next M
End If
End With
Cap = s
End Function

Extract text from 2 strings from selected Outlook email

I have code to import email body data from Outlook to Excel. I only need Name, ID, code from the email.
I have done everything except to extract the ID from a fixed sentence:
cn=SVCLMCH,OU=Users,OU=CX,DC=dm001,DC=corp,DC=dcsa,DC=com
The id is SVCLMCH in this case, that means I need to extract the text between "cn=" and ",OU=Users".
Sub import_code()
Dim O As Outlook.Application
Set O = New Outlook.Application
Dim ONS As Outlook.Namespace
Set ONS = O.GetNamespace("MAPI")
Dim OMAIL As Outlook.MailItem
Set OMAIL = Nothing
Dim ws As Object
Set ws = ThisWorkbook.Worksheets("Import code from Outlook")
Dim rcount As Long
Dim vText As Variant
Dim sText As String
Dim i As Long
If O.ActiveExplorer.Selection.Count = 0 Then
msgbox "No Items selected!", vbCritical, "Error"
End If
On Error Resume Next
'Process each selected record
rcount = ws.UsedRange.Rows.Count
For Each OMAIL In O.ActiveExplorer.Selection
sText = OMAIL.Body
vText = Split(sText, Chr(13))
'Find the next empty line of the worksheet
rcount = rcount + 1
'Check each line of text in the message body
For i = UBound(vText) To 0 Step -1
If InStr(1, vText(i), "Password Generated and set for:") > 0 Then
vItem = Split(vText(i), Chr(58))
ws.Range("A" & rcount) = Trim(vItem(1))
End If
If InStr(1, vText(i), "cn=") > 0 Then
vItem = Split(vText(i), Chr(58))
ws.Range("b" & rcount) = Trim(vItem(1))
End If
If InStr(1, vText(i), "Password:") > 0 Then
vItem = Split(vText(i), Chr(58))
ws.Range("c" & rcount) = Trim(vItem(1))
End If
Next i
Next OMAIL
End Sub
The trick here is to use the Split() function
Dim Arr() As String
Dim j As Integer
Dim k As Integer
Dim strvar As String
Dim strval As String
Dim strID As String
If InStr(1, vtext(i), "cn=") > 0 Then
' split the whole line in an array - "," beeing the value separator
Arr = Split(vtext(i), ",")
' loop through all array elements
For j = 0 To UBound(r) - 1
' find the position of =
k = InStr(Arr(j), "=")
strvar = Left(Arr(j), k - 1) ' this is the variable name, ie "cn"
strval = Mid(Arr(j), k + 1) ' this is the value, ie "SVCLMCH"
' now do what you want with a specific variable
Select Case strvar
Case "cn"
strID = strval
Case Else
' do nothing
End Select
Next j
End If
you can use a helper function like this:
Function GetID(strng As String)
Dim el As Variant
For Each el In Split(strng, ",")
If InStr(1, el, "cn=") > 0 Then
GetID = Mid(el, InStr(1, el, "cn=") + 3)
Exit Function
End If
Next
End Function
and your main code would exploit it as:
If InStr(1, vText(i), "cn=") > 0 Then ws.Range("b" & rcount) = GetID(CStr(vText(i)))
Use Regular Expression extract the ID from the sentence
Example Pattern = "(?<=cn=)([^.]+)(?=\,OU=Users)"
https://regex101.com/r/67u84s/2
Code Example
Option Explicit
Private Sub Examplea()
Dim Matches As Variant
Dim RegEx As Object
Set RegEx = CreateObject("VbScript.RegExp")
Dim olApp As Object
Set olApp = CreateObject("Outlook.Application")
Dim Item As Outlook.MailItem
Set Item = olApp.ActiveExplorer.Selection.Item(1)
Dim Pattern As String
Pattern = "(?<=cn=)([^.]+)(?=\,OU=Users)"
With RegEx
.Global = False
.Pattern = Pattern
.IgnoreCase = True
Set Matches = .Execute(Item.Body)
End With
If Matches.Count > 0 Then
Debug.Print Matches(0).SubMatches(0)
With ThisWorkbook.Sheets("Sheet1")
.Range("A1").Value = Trim(Matches(0).SubMatches(0))
End With
End If
End Sub

Filter items with an email body that contains a less than symbol `<`

I'm trying to filter items with an email body that contains a less than symbol <.
Here is a sample email body that contains less than symbol.
Our drive E: is now < 10%.
Sub CodeSubjectForward(Item As Outlook.MailItem)
Dim M1 As MatchCollection
Dim M As Match
Set Reg1 = New RegExp
With Reg1
.Pattern = "([<]\s*(\w*)\s*)"
.Global = True
End With
If Reg1.Test(Item.Body) Then
Set M1 = Reg1.Execute(Item.Body)
For Each M In M1
Next
End If
Item.Save
Set myForward = Item.Forward
myForward.Recipients.Add "alias#domain.com"
myForward.Send
End Sub
Should be something like this
Public Sub FWItem(Item As Outlook.mailitem)
Dim Email As Outlook.mailitem
Dim Matches As Variant
Dim RegExp As Object
Dim Pattern As String
Set RegExp = CreateObject("VbScript.RegExp")
If TypeOf Item Is Outlook.mailitem Then
Pattern = "(\s[<]\s)"
With RegExp
.Global = False
.Pattern = Pattern
.IgnoreCase = True
Set Matches = .Execute(Item.Body)
End With
If Matches.Count > 0 Then
Debug.Print Item.subject ' Print on Immediate Window
Set Email = Item.Forward
Email.subject = Item.subject
Email.Recipients.Add "0m3r#Email.com"
Email.Save
Email.Send
End If
End If
Set RegExp = Nothing
Set Matches = Nothing
Set Email = Nothing
Set Item = Nothing
End Sub
https://regex101.com/r/KOFM8E/1/

Extract IP Addresses in Email body

I am trying to extract all IP addresses in the body of an Outlook message from the following example.
I tried replacing the regex from:
With Reg1
.Pattern = "((P130\w*)\s*(\w*)\s*(\w*)\s*(\w*)\s*([\d-\.]*))"
End With
To:
With Reg1
.Pattern = "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))"
End With
But it only matches one octet.
Sample Text:
The IP from 192.168.10.2 needs attention.
The IP from 192.168.11.3 needs attention.
The IP from 192.168.12.4 needs attention.
Currently it only matches 168
Added extra brackets and now matches the first IP in the body of the message but not the rest.
Full code below:
Option Explicit
Private Const xlUp As Long = -4162
Sub CopyToExcel(olItem As Outlook.MailItem)
Dim xlApp As Object
Dim xlWB As Object
Dim xlSheet As Object
Dim vText, vText2, vText3, vText4, vText5 As Variant
Dim sText As String
Dim rCount As Long
Dim bXStarted As Boolean
Dim enviro As String
Dim strPath As String
Dim Reg1 As Object
Dim M1 As Object
Dim M As Object
enviro = CStr(Environ("USERPROFILE"))
'the path of the workbook
strPath = enviro & "\Documents\test.xlsx"
On Error Resume Next
Set xlApp = GetObject(, "Excel.Application")
If Err <> 0 Then
Application.StatusBar = "Please wait while Excel source is opened ... "
Set xlApp = CreateObject("Excel.Application")
bXStarted = True
End If
On Error GoTo 0
'Open the workbook to input the data
Set xlWB = xlApp.Workbooks.Open(strPath)
Set xlSheet = xlWB.Sheets("Sheet1")
'Find the next empty line of the worksheet
rCount = xlSheet.Range("B" & xlSheet.Rows.Count).End(xlUp).Row
rCount = rCount + 1
sText = olItem.Body
Set Reg1 = CreateObject("VBScript.RegExp")
' \s* = invisible spaces
' \d* = match digits
' \w* = match alphanumeric
With Reg1
' .Pattern = "((P130\w*)\s*(\w*)\s*(\w*)\s*(\w*)\s*([\d-\.]*))"
.Pattern = "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))"
'.Pattern = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
End With
If Reg1.Test(sText) Then
' each "(\w*)" and the "(\d)" are assigned a vText variable
Set M1 = Reg1.Execute(sText)
For Each M In M1
vText = Trim(M.SubMatches(1))
vText2 = Trim(M.SubMatches(2))
vText3 = Trim(M.SubMatches(3))
vText4 = Trim(M.SubMatches(4))
' vText5 = Trim(M.SubMatches(5))
Next
End If
xlSheet.Range("B" & rCount) = vText
xlSheet.Range("c" & rCount) = vText2
xlSheet.Range("d" & rCount) = vText3
xlSheet.Range("e" & rCount) = vText4
xlSheet.Range("f" & rCount) = vText5
xlWB.Close 1
If bXStarted Then
xlApp.Quit
End If
Set M = Nothing
Set M1 = Nothing
Set Reg1 = Nothing
Set xlApp = Nothing
Set xlWB = Nothing
Set xlSheet = Nothing
End Sub
^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$
http://www.regextester.com/22
Regular expression to match DNS hostname or IP Address?

VBA Regex Pattern: failing when underscore in my pattern

Looking for some help with my macro that loops through subfolders and brings back data from the workbooks that match my filename pattern, because the name changes each month.
It works seamlessly if the pattern is "[0-9][0-9][0-9][0-9][0-9][0-9] Filename"
But fails if "[0-9][0-9][0-9][0-9]_[0-9][0-9] Filename"
Any ideas on how to handle the underscore please?
This fails "[0-9][0-9][0-9][0-9][_][0-9][0-9] Filename"
Thanks heaps
GWS
Option Explicit
Option Base 1
Private Const PORTFOLIO_CODE As String = "G030"
Private Sub ExtractData()
' get workbook list
Dim wbList As Collection
Set wbList = New Collection
Application.DisplayAlerts = False
RecursiveFileSearch _
"O:\Sales and Marketing\Monthly Reports\", _
"[0-9][0-9][0-9][0-9][_][0-9][0-9] Monthly Report.xlsm", _ 'fails to find any workbooks
'"[0-9][0-9][0-9][0-9][0-9][0-9] Monthly Report.xlsm", _ 'would work except my file names contain underscores
wbList
Dim resultOffset As Integer
wsResult.Name = result
resultOffset = 1
Dim wbName As Variant, wbOpen As Workbook, wsFund As Worksheet
For Each wbName In wbList
' loop through workbook list
' - open workbook, hidden
Application.ScreenUpdating = False
Set wbOpen = Workbooks.Open(Filename:=wbName, ReadOnly:=True)
wbOpen.Windows(1).Visible = False
' - get worksheet for fund
Set wsFund = wbOpen.Worksheets(PORTFOLIO_CODE)
Application.ScreenUpdating = True
' - find top of data
Dim valueDate As Date
valueDate = WorksheetFunction.EoMonth(DateSerial(2000 + CInt(Left(wbOpen.Name, 2)), CInt(Mid(wbOpen.Name, 3, 2)), 1), 0)
Debug.Print valueDate, wbOpen.Name
ThisWorkbook.Worksheets(PORTFOLIO_CODE).Activate
Dim baseData As Excel.Range
Set baseData = wsFund.Range("AQ:AQ").Find("Currency")
If Not baseData Is Nothing Then
' - loop through data
Dim rowOffset As Integer
rowOffset = 0
wsResult.Range("A1").Offset(resultOffset, 0).Value = valueDate ' baseData.Offset(rowOffset, 0).Value
wsResult.Range("A1").Offset(resultOffset, 1).Value = baseData.Offset(rowOffset, 0).Value
wsResult.Range("A1").Offset(resultOffset, 2).Value = baseData.Offset(rowOffset, 5).Value
resultOffset = resultOffset + 1
End If
' - close workbook
wbOpen.Close SaveChanges:=False
DoEvents
Next
Application.DisplayAlerts = True
End Sub
RecursiveFileSearch
Sub RecursiveFileSearch( _
ByVal targetFolder As String, _
ByRef filePattern As String, _
ByRef matchedFiles As Collection _
)
Dim oRegExp As New VBScript_RegExp_55.RegExp
oRegExp.Global = False
oRegExp.IgnoreCase = True
oRegExp.MultiLine = False
oRegExp.Pattern = filePattern
Dim oFSO As Scripting.FileSystemObject
Set oFSO = New Scripting.FileSystemObject
'Get the folder oect associated with the target directory
Dim oFolder As Variant
Set oFolder = oFSO.GetFolder(targetFolder)
'Loop through the files current folder
Dim oFile As Variant
For Each oFile In oFolder.Files
If oRegExp.test(oFile.Name) Then
matchedFiles.Add oFile
End If
Next
'Loop through the each of the sub folders recursively
Dim oSubFolders As Object
Set oSubFolders = oFolder.Subfolders
Dim oSubfolder As Variant
For Each oSubfolder In oSubFolders
RecursiveFileSearch oSubfolder, filePattern, matchedFiles
Next
'Garbage Collection
Set oFolder = Nothing
Set oFile = Nothing
Set oSubFolders = Nothing
Set oSubfolder = Nothing
Set oFSO = Nothing
Set oRegExp = Nothing
End Sub
Perhaps:
\d{4}_\d{2}.*Monthly Report\.xlsm
My code was escaping the () and . to override the defined regex behavior. Portland Runner suggestion solved the question. ^[0-9]{3,4}[_][0-9]{2} SAMPSON International Shares Passive (Hedged) Trust Mandate Monthly Report.xlsm