Parsing mean temperature from weather web site HTML - regex

Hi I want to use VBA to pull data from weather web site. What I'm trying to do is to get number 6 from this HTML code:
</tr>
<tr>
<td class="indent"><span>Temperatura średnia</span></td>
<td>
<span class="wx-data"><span class="wx-value">6</span><span class="wx-unit"> ° C</span></span>
</td>
<td>
-
</td>
<td> </td>
</tr>
<tr>
<td class="indent"><span>Temperatura maksymalna</span></td>
<td>
<span class="wx-data"><span class="wx-value">7</span><span class="wx-unit"> ° C</span></span>
</td>
<td>
<span class="wx-data"><span class="wx-value">8</span><span class="wx-unit"> ° C</span></span>
</td>
I tried code like this:
Private Sub CommandButton1_Click()
Dim IE As Object
' Create InternetExplorer Object
Set IE = CreateObject("InternetExplorer.Application")
' You can uncoment Next line To see form results
IE.Visible = False
' URL to get data from
IE.Navigate "https://www.wunderground.com/history/airport/EPGD/2016/10/24/DailyHistory.html?req_city=Pruszcz%20Gdanski&req_statename=Polska&reqdb.zip=00000&reqdb.magic=86&reqdb.wmo=12140"
' Statusbar
Application.StatusBar = "Loading, Please wait..."
' Wait while IE loading...
Do While IE.Busy
Application.Wait DateAdd("s", 1, Now)
Loop
Application.StatusBar = "Searching for value. Please wait..."
Dim dd As String
dd = IE.Document.getElementsByClassName("Temperatura średnia")(0).innerText
MsgBox dd
' Show IE
IE.Visible = True
' Clean up
Set IE = Nothing
Application.StatusBar = ""
End Sub
Without any result (the code does nothing). I will appreciate any help.

Here is the example using XHR and RegEx to retrieve all table data from the webpage:
Option Explicit
Sub ExtractDataWunderground()
Dim aResult() As String
Dim sContent As String
Dim i As Long
Dim j As Long
' retrieve html content
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.wunderground.com/history/airport/EPGD/2016/10/24/DailyHistory.html", False
.Send
sContent = .ResponseText
End With
' parse with regex
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
' minor html simplification
.Pattern = "<span[^>]*>|</span>|[\r\n\t]*"
sContent = .Replace(sContent, "")
' match each table row
.Pattern = "<tr><td class=""indent"">(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr>"
With .Execute(sContent)
ReDim aResult(1 To .Count, 1 To 4)
' each row
For i = 1 To .Count
With .Item(i - 1)
' each cell
For j = 1 To 4
aResult(i, j) = DecodeHTMLEntities(.SubMatches(j - 1))
Next
End With
Next
End With
End With
' output result
Cells.Delete
Output Cells(1, 1), aResult
MsgBox "Completed"
End Sub
Function DecodeHTMLEntities(sText As String) As String
Static oHtmlfile As Object
Static oDiv As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.Open
Set oDiv = oHtmlfile.createElement("div")
End If
oDiv.innerHTML = sText
DecodeHTMLEntities = oDiv.innerText
End Function
Sub Output(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1 _
)
.NumberFormat = "#"
.Value = aCells
.Columns.AutoFit
End With
End With
End Sub
The output is as follows for me:
To extract the mean temperature only you can get the value from the first match having 0 index, since the mean temperature is in the first row of the table:
Sub ExtractMeanTempWunderground()
Dim sContent As String
' retrieve html content
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.wunderground.com/history/airport/EPGD/2016/10/24/DailyHistory.html", False
.Send
sContent = .ResponseText
End With
' parse with regex
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
' minor html simplification
.Pattern = "<span[^>]*>|</span>|[\r\n\t]*"
sContent = .Replace(sContent, "")
' match each table row
.Pattern = "<tr><td class=""indent"">.*?</td><td>(.*?)</td><td>.*?</td><td>.*?</td></tr>"
With .Execute(sContent)
If .Count = 15 Then
' get the first row value only
MsgBox DecodeHTMLEntities(.Item(0).SubMatches(0))
Else
MsgBox "Data structure inconsistence detected"
End If
End With
End With
End Sub
Function DecodeHTMLEntities(sText As String) As String
Static oHtmlfile As Object
Static oDiv As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.Open
Set oDiv = oHtmlfile.createElement("div")
End If
oDiv.innerHTML = sText
DecodeHTMLEntities = oDiv.innerText
End Function
Note, such methods will work until the webpage structure is changed.

Related

Crawling web page links through Visaul Basic 2008

I have tried to convert one of my Visual Basic 6 programs to Visual Basic.NET. This program basically crawls email and mobile number data from a web page link. This program works great in Visual Basic 6 but after converting to Visual Basic.NET is not providing any function or result in .NET.
Imports VBScript_RegExp_55
Imports System.Collections.Specialized
Imports System.Diagnostics
Public Class autoextratorform
Dim emailsearch As New ListViewItem()
Dim numbersearch As New ListViewItem
Private Declare Function SendMessageByString _
Lib "user32" _
Alias "SendMessageA" _
(ByVal hwnd As Long, _
ByVal wMsg As Long, _
ByVal wParam As Long, _
ByVal lParam As String) _
As Long
Private Const LB_SELECTSTRING = &H18C
Private Declare Function ShellExecute Lib "shell32.dll" Alias "ShellExecuteA" (ByVal hwnd As Long, ByVal lpOperation As String, ByVal lpFile As String, ByVal lpParameters As String, ByVal lpDirectory As String, ByVal nShowCmd As Long) As Long
Private WithEvents cIE As InternetHtml
Private WithEvents cExtLinks As cExtractLinks
Private cGSearch As cGoogleSearch
Dim Excel As Object
Dim Excelsheet As Object
Dim a() As String
Dim b() As String
Dim c() As String
Dim i As Integer
' Needs reference to Microsoft VBscript Regular Expressions I recomend ver 5.5.
'
Dim baseurl As String ' var to store base url so we can build the full path
Dim dVisited ' Dictionary to hold visited urls = I think Missing here
Dim dEmail ' dictionary to hold emails = I think Missing here
Dim dnumber ' = I think Missing here
Dim dweb '= I think Missing here
' We are putting the emails in a list also, for user feed back
'It would be less momery intensive and faster to just keep these in the dictionry object
'which allows to easily tell if the email already exist
Dim regxPage ' var to hold regular expression to extract urls
Dim regxEmail ' var to hold regular expression to extract emails
Dim regnumber
Dim regweb
Dim Match, Matches As String ' we use these to store are regx matches
Dim Match1, Matches1 As String
Dim Match2, Matches2 As String
' Regular expressions are super powerfull and have been a part of unix for a long time
' goto the form load event to see the regex initialization
' to learn more about regular expressions and to download the latest scripting runtime see
' http://msdn.microsoft.com/scripting/default.htm?/scripting/vbscript/techinfo/vbsdocs.htm
Dim email, pageurl, mnumber, sweb As String
' The above is only because dictionary.exist does not work directly on Match var
Dim stopcrawl As Integer ' Used to exit crawl loop
Private Sub ComboBox1_SelectedIndexChanged(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles ComboBox1.SelectedIndexChanged
Select Case ComboBox1.SelectedIndex
Case 0
ListBox2.Items.Clear()
ListBox2.Items.Add("http://classads.jang.com.pk/default.asp?cityid=1")
ListBox2.Items.Add("http://www.merapakistan.com/classifieds/")
ListBox2.Items.Add("http://olx.com.pk/")
ListBox2.SelectedIndex = 0
txtStartUrl.Text = ListBox2.Text
Case 1
ListBox2.Items.Clear()
ListBox2.Items.Add("http://www.locanto.com.pk/")
ListBox2.Items.Add("http://pakistanclassifiedads.com/")
ListBox2.Items.Add("http://pkr.com.pk/")
txtStartUrl.Text = ListBox2.Text
Case 2
ListBox2.Items.Clear()
ListBox2.Items.Add("http://adpost.pk/")
ListBox2.Items.Add("http://page.com.pk/")
ListBox2.Items.Add("http://www.clasf.pk/")
ListBox2.SelectedIndex = 0
txtStartUrl.Text = ListBox2.Text
Case 3
ListBox2.Items.Clear()
ListBox2.Items.Add("http://www.nelaam.com/")
ListBox2.Items.Add("http://www.asani.com.pk/")
ListBox2.Items.Add("http://freepost.com.pk/")
ListBox2.SelectedIndex = 0
txtStartUrl.Text = ListBox2.Text
End Select
End Sub
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Command1.Click
stopcrawl = 0 ' set stop crawl so we do not exit loop
If txtStartUrl.Text & "" = "" Then
lblresult.Text = ("Please Load your Desired Site List !")
Exit Sub
ElseIf txtStartUrl.Text & "" = "http://" Then
lblresult.Text = ("Please Load your Desired Site List !")
Exit Sub
ElseIf txtStartUrl.Text = "" Then
lblresult.Text = ("Please Load your Desired Site List !")
Exit Sub
End If
' the above should really check for a valid url, but I am a lazy PERL programmer
ListBox1.Items.Add(txtStartUrl.Text) 'add item to list
Label16.Text = (ListBox2.Items.Count - 1)
Label8.Text = (ListBox2.Items.Count)
lblresult.Text = " Start service for searching Email address and Mobile Numbers"
Command1.Enabled = False
ComboBox1.Enabled = False
LaVolpeButton4.Enabled = True
'UPGRADE_ISSUE: (2064) ComboBox property Combo1.Locked was not upgraded. More Information: https://docs.mobilize.net/vbuc/ewis#2064
LaVolpeButton8.Enabled = False
LaVolpeButton9.Enabled = False
txtStartUrl.ReadOnly = True
crawl() ' and were off
End Sub
Private Sub autoextrator_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
With listnumber
.Columns.Clear()
.Columns.Add("Count!", CInt(Width * 0.1))
.Columns.Add("Mobile Number!", CInt(Width * 0.2))
.Columns.Add("Web URL!", CInt(Width * 0.68))
End With
With listemail
.Columns.Clear()
.Columns.Add("Count!", CInt(Width * 0.1))
.Columns.Add("Email Address!", CInt(Width * 0.2))
.Columns.Add("Web URL!", CInt(Width * 0.68))
End With
'initialize dictionary and regx objects
dVisited = CreateObject("Scripting.Dictionary")
dVisited.CompareMode = CompareMethod.Binary
dEmail = CreateObject("Scripting.Dictionary")
dEmail.CompareMode = CompareMethod.Binary
dnumber = CreateObject("Scripting.Dictionary")
dnumber.CompareMode = CompareMethod.Binary
dweb = CreateObject("Scripting.Dictionary")
dweb.CompareMode = CompareMethod.Binary
Dim counting1 As String
Dim counting2 As String
regxPage = New VBScript_RegExp_55.RegExp() ' Create a regular expression.
regxPage.Pattern = "HREF=""[^""#]+[.][^""#]+" ' Set pattern."
regxPage.IgnoreCase = True ' Set case insensitivity.
regxPage.Global = True ' Set global applicability.
regxEmail = New VBScript_RegExp_55.RegExp() ' Create a regular expression.
regxEmail.Pattern = "\b[a-z0-9._%+-]+#[a-z0-9.-]+\.[a-z]{2,4}\b" ' Set pattern."
regxEmail.IgnoreCase = True ' Set case insensitivity.
regxEmail.Global = True ' Set global applicability.
regnumber = New VBScript_RegExp_55.RegExp()
counting1 = "(\+92)-?\d{3}-?\d{7}"
counting2 = "(?:0092|0|\+92)-?\d{3}-?\d{7}"
regnumber.Pattern = counting1
regnumber.Pattern = counting2
regnumber.IgnoreCase = True
regnumber.Global = True
cIE = New InternetHtml()
cGSearch = New cGoogleSearch()
cExtLinks = New cExtractLinks()
End Sub
Sub crawl()
While ListBox1.Items.Count > 0 ' loop while list has data
If stopcrawl = 1 Then GoTo exitcrawl
getpage(ListBox1.Items(0)) ' This is the heart of the prog, except for the regx = I think Missing here
' stuff in the form load event
ListBox1.Items.Remove(0) ' remove item from list
End While
ListBox2.Items.Remove(ListBox2.SelectedIndex)
If ListBox2.SelectedIndex < ListBox2.Items.Count - 1 Then
ListBox2.SelectedIndex += 1
txtStartUrl.Text = ListBox2.Items.ToString
exitcrawl:
If ListBox2.Items.Count = 0 Then
MessageBox.Show("Searching has completed !", "Done ! searching", MessageBoxButtons.OK, MessageBoxIcon.Information)
txtStartUrl.ReadOnly = False
txtStartUrl.Text = ""
Label3.Text = "-----------------------------------------------------------"
LaVolpeButton4.Enabled = False
Command1.Enabled = True
LaVolpeButton8.Enabled = True
LaVolpeButton9.Enabled = True
End If
End If
End Sub
Sub getpage(ByVal page As String)
On Error Resume Next
If dVisited.Contains(page) Then
Exit Sub
Else
dVisited.Add(page, 1) ' add page to dVisited dictionary
Label6.Text = CStr(ListBox1.Items.Count)
Label14.Text = CStr(dVisited.Count)
Label3.Text = page
End If
baseurl = getpath(page) ' build full url - see getpath
TextBox1.Text = ""
If ListBox1.Items.Count > 5000 Then Exit Sub ' sets the maximum cache (so we don't run out of mem)
Using wc As New System.Net.WebClient
TextBox1.Text = wc.DownloadString(page)
End Using
Matches = regxPage.Execute(TextBox1.Text) ' Execute search.
For Each Me.Match In Matches
pageurl = Match
If InStr(1, pageurl, "http://", vbTextCompare) Then
If dVisited.Exists(pageurl) = False Then ListBox1.Items.Add(Mid(pageurl, 7))
Else
If dVisited.Exists(baseurl & Mid(pageurl, 7)) = False Then ListBox1.Items.Add(baseurl & Mid(pageurl, 7))
End If
Next
' search for email
Matches = regxEmail.Execute(TextBox1.Text) ' Execute search.
For Each Me.Match In Matches ' Iterate Matches collection.
' check if email exist
email = Match
Debug.Print(email & dEmail.Exists(email))
If dEmail.Exists(email) = False Then
dEmail.Add((email), 1)
Dim d As Integer
d = listemail.Items.Count
d = d + 1
emailsearch = listemail.Items.Add(d)
emailsearch.SubItems.Add(email.ToString)
emailsearch.SubItems.Add(pageurl.ToString)
ListBox3.Items.Add(email)
Label10.Text = listemail.Items.Count
End If
Next
Matches1 = regnumber.Execute(TextBox1.Text)
For Each Me.Match1 In Matches1
mnumber = Match1
Debug.Print(mnumber & dnumber.Exists(mnumber))
If dnumber.Exists(mnumber) = False Then
dnumber.Add((mnumber), 1)
Dim c As Integer
c = listnumber.Items.Count
c = c + 1
numbersearch = listnumber.Items.Add(c)
numbersearch.SubItems.Add(mnumber.ToString)
numbersearch.SubItems.Add(pageurl.ToString)
Label12.Text = listnumber.Items.Count
End If
Next
End Sub
Function getpath(ByVal URL As String) As String
' look for the last / and get a string up to that location
Dim lastbar As Integer = URL.LastIndexOf("/") + 1
Dim tmppath As String = URL.Substring(0, Math.Min(lastbar, URL.Length))
If tmppath = "http://" Then tmppath = URL ' full path already so return url
Return tmppath
End Function
Public Function StripOut(ByVal From As String, ByVal What As String) As String
Dim result As String = ""
result = From
For i As Integer = 1 To Strings.Len(What)
result = result.Replace(What.Substring(i - 1, Math.Min(1, What.Length - (i - 1))), "")
Next i
Return result
End Function
Private Sub LaVolpeButton4_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles LaVolpeButton4.Click
stopcrawl = 1
MsgBox(" Service has Stoped by USER !")
LaVolpeButton4.Enabled = False
Command1.Enabled = True
End Sub
Private Sub LaVolpeButton8_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles LaVolpeButton8.Click
Dim strData As String = ""
Dim strQ() As String
Dim intFile As Integer
' Set up the Common Dialog initial Directory
' to the application path. Filter for Text and All files
' Prompt the user to select a file
'
cmd.InitialDirectory = My.Application.Info.DirectoryPath
cmd.Filter = "Text Files(*.txt)|*.txt|All Files(*.*)|*.*"
cmd.Title = "Select Email Addresses File"
cmd.ShowDialog()
If cmd.FileName <> "" Then
'
' If the user selected a file
' open and read the entire contents
' then split it into records
'
intFile = FileSystem.FreeFile()
FileSystem.FileOpen(intFile, cmd.FileName, OpenMode.Input)
strData = FileSystem.InputString(intFile, FileSystem.LOF(intFile))
FileSystem.FileClose(intFile)
strQ = strData.Split(CChar(Environment.NewLine))
'
' Populate the texbox array with the questions
' (either the number of textboxes in the control array or
' number of questions, which ever is the smaller)
For Each strQ_item As String In strQ
ListBox2.Items.Add(strQ_item)
Next strQ_item
txtStartUrl.Text = ListBox2.Text
End If
End Sub
End Class
2- This is Visual basic 6 Code that works perfect.
Dim baseurl As String ' var to store base url so we can build the full path
Dim dVisited As Dictionary ' Dictionary to hold visited urls
Dim dEmail As Dictionary ' dictionary to hold emails
Dim dnumber As Dictionary
Dim dweb As Dictionary
Sub crawl()
While List1.ListCount > 0 ' loop while list has data
If stopcrawl = 1 Then GoTo exitcrawl
getpage List1.List(0) ' This is the heart of the prog, except for the regx
' stuff in the form load event
List1.RemoveItem (0) ' remove item from list
Wend
List2.RemoveItem (List2.ListIndex)
If List2.ListIndex < List2.ListCount - 1 Then
List2.ListIndex = List2.ListIndex + 1
txtStartUrl = List2.Text
Command1_Click
Else
exitcrawl:
If List2.ListCount = 0 Then
MsgBox "Searching has completed !", vbInformation, "Done ! searching"
WebBrowser1.Stop
txtStartUrl.Locked = False
txtStartUrl.Text = ""
Label3.Caption = "-----------------------------------------------------------"
LaVolpeButton4.Enabled = False
Command1.Enabled = True
LaVolpeButton8.Enabled = True
LaVolpeButton9.Enabled = True
End If
End If
End Sub

Filter items with an email body that contains a less than symbol `<`

I'm trying to filter items with an email body that contains a less than symbol <.
Here is a sample email body that contains less than symbol.
Our drive E: is now < 10%.
Sub CodeSubjectForward(Item As Outlook.MailItem)
Dim M1 As MatchCollection
Dim M As Match
Set Reg1 = New RegExp
With Reg1
.Pattern = "([<]\s*(\w*)\s*)"
.Global = True
End With
If Reg1.Test(Item.Body) Then
Set M1 = Reg1.Execute(Item.Body)
For Each M In M1
Next
End If
Item.Save
Set myForward = Item.Forward
myForward.Recipients.Add "alias#domain.com"
myForward.Send
End Sub
Should be something like this
Public Sub FWItem(Item As Outlook.mailitem)
Dim Email As Outlook.mailitem
Dim Matches As Variant
Dim RegExp As Object
Dim Pattern As String
Set RegExp = CreateObject("VbScript.RegExp")
If TypeOf Item Is Outlook.mailitem Then
Pattern = "(\s[<]\s)"
With RegExp
.Global = False
.Pattern = Pattern
.IgnoreCase = True
Set Matches = .Execute(Item.Body)
End With
If Matches.Count > 0 Then
Debug.Print Item.subject ' Print on Immediate Window
Set Email = Item.Forward
Email.subject = Item.subject
Email.Recipients.Add "0m3r#Email.com"
Email.Save
Email.Send
End If
End If
Set RegExp = Nothing
Set Matches = Nothing
Set Email = Nothing
Set Item = Nothing
End Sub
https://regex101.com/r/KOFM8E/1/

IE fast automation

I am currently trying to web scrape some exchange rates from a website called X-Rates, using VBA. My current problem is that it just takes too long to run. I have narrowed it down to the Do Events of my IE object.
My question: Is there a better way of doing this (maybe some better efficiency of code), or my logic is just flawed?
Here's what the code does:
1 - Loop for each country (1-9 = offsetCurr);
2- Convert to exchange rate and preserve value on cell
'Define variables
Dim strElm As String
Dim i As Integer
Dim ie As InternetExplorer
Dim period As Variant
Dim offsetCurr As Integer
Dim offsetDesc As String
'Define period
period = Application.InputBox("What's the year and period?", "Period", , , , , 2)
'Define start row
i = 2
Application.ScreenUpdating = False
On Error GoTo ErrHandler
For offsetCurr = 1 To 9
If offsetCurr = 1 Then
'ARS to EURO
Set ie = New InternetExplorer
offsetDesc = "ARS"
Cells(i, 1).Value = period
Cells(i, 2).Value = offsetDesc
ie.navigate "http://www.x-rates.com/calculator/?from=ARS&to=EUR&amount=1"
Do While ie.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
d = ie.document.getElementsByClassName("ccOutputRslt")(0).innerText
strElm = d
Cells(i, 3).Value = strElm
ie.Quit
Set ie = Nothing
'ARS to USD
Set ie = New InternetExplorer
ie.navigate "http://www.x-rates.com/calculator/?from=ARS&to=USD&amount=1"
Do While ie.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
d = ie.document.getElementsByClassName("ccOutputRslt")(0).innerText
strElm = d
Cells(i, 4).Value = strElm
'Quit IE for automation purposes
ie.Quit
Set ie = Nothing
'ARS to GBP
Set ie = New InternetExplorer
ie.navigate "http://www.x-rates.com/calculator/?from=ARS&to=GBP&amount=1"
Do While ie.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
d = ie.document.getElementsByClassName("ccOutputRslt")(0).innerText
strElm = d
Cells(i, 5).Value = strElm
ie.Quit
Set ie = Nothing
End If
ErrHandler:
If Err.Number <> 0 Then
Msg = "Error #" & Str(Err.Number) & " was generated by " & Err.Source & "." & Chr(13) & "Error description: " & Err.Description
MsgBox Msg, , "Error", Err.HelpFile, Err.HelpContext
Exit Sub
End If
End Sub
I know it's a significant amount of code, if needed I can edit the question to make it simpler.
Here is an example showing how to retrieve rates via XHR:
Option Explicit
Sub TestGetRate()
Dim sCrcy As Variant
For Each sCrcy In Array("EUR", "USD", "GBP")
Debug.Print GetRate("ARS", sCrcy)
Next
End Sub
Function GetRate(sFromCrcy, sToCrcy)
Dim sUrl, sContent
sUrl = "http://www.x-rates.com/calculator/?from=" & sFromCrcy & "&to=" & sToCrcy & "&amount=1"
With CreateObject("MSXML2.XMLHttp")
.Open "GET", sUrl, False
.send
sContent = .ResponseText
End With
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "<span class=""ccOutputRslt"">(.*?)<span class=""ccOutputTrail"">(.*?)</span><span class=""ccOutputCode"">(.*?)</span></span>"
With .Execute(sContent).Item(0)
GetRate = .SubMatches(0) & .SubMatches(1) & .SubMatches(2)
End With
End With
End Function
Output is as follows for me:
0.061688 EUR
0.070373 USD
0.048865 GBP
It looks like you are starting a new instance of IE and closing it completely for each of the 9 loops. Try starting IE once at beginning, then loop through each currency type, then quit IE.

Conditional Regular Expression in VBA

I am parsing multiple HTML files using RegEx in Excel VBA (i know not the best thing to do) but I have this case which can either be - Scenario 1:
<span class="big vc vc_2 "><strong><i class="icon icon-angle-circled-down text-danger"></i>£51,038</strong> <span class="small">(-2.12%)</span></span>
or could be - Scenario 2:
<span class="big vc vc_2 "><strong><i class="icon icon-angle-circled-up text-success"></i>£292,539</strong> <span class="small">(14.13%)</span></span>
If the class ends in danger, I want to return -51038 and -2.12%
If the class ends in success, I want to return +292539 and 14.13%
The code I have been using for the second scenario and works fine is:
Sub Test()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "<i class=""icon icon-angle-circled-up text-success""></i>([\s\S]*?)<"
sValue = HtmlSpecialCharsDecode(.Execute(sContent).Item(0).SubMatches(0))
End With
sValue = CleanString(sValue)
End sub
Function HtmlSpecialCharsDecode(sText)
With CreateObject("htmlfile")
.Open
With .createElement("textarea")
.innerHTML = sText
HtmlSpecialCharsDecode = .Value
End With
End With
End Function
Function CleanString(strIn As String) As String
Dim objRegex
Set objRegex = CreateObject("vbscript.regexp")
With objRegex
.Global = True
.Pattern = "[^\d]+"
CleanString = .Replace(strIn, vbNullString)
End With
End Function
All you need to do is add some more capturing groups with "or" conditions in them. In your case, you want the group (success|danger) (also (up|down) based on the examples). Then, instead of just checking the only submatch, check for the conditions that you put in your pattern:
Dim regex As Object
Dim matches As Object
Dim expr As String
expr = "<i class=""icon icon-angle-circled-(up|down) text-(success|danger)""></i>(.*?)</.*\((.*)%\)<.*"
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = expr
Set matches = .Execute(sContent)
End With
Dim isDanger As Boolean
If matches.Count > 0 Then
isDanger = (HtmlSpecialCharsDecode(matches.item(0).SubMatches(1)) = "danger")
sValue1 = HtmlSpecialCharsDecode(matches.item(0).SubMatches(2))
sValue2 = HtmlSpecialCharsDecode(matches.item(0).SubMatches(3))
End If
If isDanger Then
'Was "danger"
Debug.Print -CLng(CleanString(sValue1))
Debug.Print -CDbl(sValue2)
Else
'Was "success"
Debug.Print CLng(CleanString(sValue1))
Debug.Print CDbl(sValue2)
End If

How to replace 'at' with #

I have about 17k emails containing orders, news, contacts etc. going back 11 years.
Users' email addresses have been shoddily encrypted to stop crawlers and spam by changing the # to either *#* or 'at'.
I am trying to create a comma separated list to build a database of our users.
The code works with writing the file and looping the folders because if I write the senders email address to the file where I am currently using the body of the email then it prints fine.
The problem is, the Replaces aren't changing *at* etc to #.
First of all, why not?
Is there a better way for me to be doing this as a whole?
Private Sub Form_Load()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objInbox As MAPIFolder
Dim objFolder As MAPIFolder
Dim fldName As String
fldName = "TEST"
' Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
' Pick up the Inbox
Set objInbox = objNameSpace.GetDefaultFolder(olFolderInbox)
'Loop through the folders under the Inbox
For Each objFolder In objInbox.Folders
RecurseFolders fldName, objFolder
Next objFolder
End Sub
Public Sub RecurseFolders(targetFolder As String, currentFolder As MAPIFolder)
If currentFolder.Name = targetFolder Then
GetEmails currentFolder
Else
Dim objFolder As MAPIFolder
If currentFolder.Folders.Count > 0 Then
For Each objFolder In currentFolder.Folders
RecurseFolders targetFolder, objFolder
Next
End If
End If
End Sub
Sub WriteToATextFile(e As String)
MyFile = "c:\" & "emailist.txt"
'set and open file for output
fnum = FreeFile()
Open MyFile For Append As fnum
Print #fnum, e; ","
Close #fnum
End Sub
Sub GetEmails(folder As MAPIFolder)
Dim objMail As MailItem
' Read through all the items
For i = 1 To folder.Items.Count
Set objMail = folder.Items(i)
GetEmail objMail.Body
Next i
End Sub
Sub GetEmail(s As String)
Dim txt = s
Do Until InStr(txt, "#") <= 0
Dim tleft As Integer
Dim tright As Integer
Dim start As Integer
Dim text As String
Dim email As String
text = Replace(text, " at ", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "'at'", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "#", VbCompareMethod.vbTextCompare)
text = Replace(text, "<", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ">", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ":", " ", VbCompareMethod.vbTextCompare)
'one two ab#bd.com one two
tleft = InStr(text, "#") '11
WriteToATextFile Str(tleft)
WriteToATextFile Str(Len(text))
start = InStrRev(text, " ", Len(text) - tleft)
'WriteToATextFile Str(start)
'WriteToATextFile Str(Len(text))
'start = Len(text) - tleft
text = left(text, start)
'ab#bd.com one two
tright = InStr(text, " ") '9
email = left(text, tright)
WriteToATextFile email
text = right(text, Len(text) - Len(email))
GetEmail txt
Loop
End Sub
What about using a regex (Regular Expression)?
Something like:
Public Function ReplaceAT(ByVal sInput as String)
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "( at |'at'|<at>)"
End With
ReplaceAT = RegEx.Replace(sInput, "#")
Set RegEx = Nothing
End Function
Just replace the regexp with every cases you might get.
See http://www.regular-expressions.info/ for more tips and infos.
I've taken a crack at this to extract emails such as this sample below which will take out the three email addresses in yellow in the sample message below to a csv file
Any valids emails are written to a csv file Set objTF = objFSO.createtextfile("c:\myemail.csv")
This code scans all emails in a folder called temp under Inbox I cut out your recursive portion of testing and simplicity
There are four string manipulations
This line converts any non printing blank spaces to normal spaces strMsgBody = Replace(strMsgBody, Chr(160), Chr(32) (unlikely but it happened in my testing)
Regex1 converts any " at " or "at" etc into "#" "(\s+at\s+|'at'|<at>|\*at\*|at)"
Regex2 converts any " dot " or "dot" etc into "." "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
Regex3 converts any of "<" ">" or ":" into "" .Pattern = "[<:>]"
Regex4 extracts any valid email from the emailbody
Any valid emails are written to the csv file using objTF.writeline objRegM
Code below
Public Test()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objFolder As MAPIFolder
Dim strfld As String
Dim objRegex As Object
Dim objRegMC As Object
Dim objRegM As Object
Dim objFSO As Object
Dim oMailItem As MailItem
Dim objTF As Object
Dim strMsgBody As String
Set objRegex = CreateObject("vbscript.regexp")
Set objFSO = CreateObject("scripting.filesystemobject")
Set objTF = objFSO.createtextfile("c:\myemail.csv")
With objRegex
.Global = True
.MultiLine = True
.ignorecase = True
strfld = "temp"
'Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
'Pick up the Inbox
Set objFolder = objNameSpace.GetDefaultFolder(olFolderInbox)
Set objFolder = objFolder.Folders(strfld)
For Each oMailItem In objFolder.Items
strMsgBody = oMailItem.Body
strMsgBody = Replace(strMsgBody, Chr(160), Chr(32))
.Pattern = "(\s+at\s+|'at'|<at>|\*at\*|at)"
strMsgBody = .Replace(strMsgBody, "#")
.Pattern = "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
strMsgBody = .Replace(strMsgBody, ".")
.Pattern = "[<:>]"
strMsgBody = .Replace(strMsgBody, vbNullString)
.Pattern = "[\w-\.]{1,}\#([\da-zA-Z-]{1,}\.){1,}[\da-zA-Z-]{2,3}"
If .Test(strMsgBody) Then
Set objRegMC = .Execute(strMsgBody)
For Each objRegM In objRegMC
objTF.writeline objRegM
Next
End If
Next
End With
objTF.Close
End Sub