VBScript Regexp Does Not Give Result As Expected - regex

Dim regEx
Set regEx = New RegExp
With regEx
.Pattern = "\[QUOTE=(.*?)\](.*?)\[\/QUOTE\]"
.IgnoreCase = True
.Global = True
.MultiLine = True
End With
string1="[QUOTE=P2]A[/QUOTE]B[QUOTE=P3][QUOTE=P1]C[/QUOTE]D[/QUOTE]E"
response.write regEx.Replace(string1, "")
I want BE as a result but I get BD[/QUOTE]E
Where is the problem?

Just do some conversions step by step to get the necessary structure, then retrieve result:
string1 = "[QUOTE=P2]A[/QUOTE]B[QUOTE=P3][QUOTE=P1]C[/QUOTE]D[/QUOTE]E"
With New RegExp
.IgnoreCase = True
.Global = True
.MultiLine = True
.Pattern = "\[QUOTE=(.*?)\]"
string1 = .Replace(string1, "[")
.Pattern = "\[\/QUOTE\]"
string1 = .Replace(string1, "]")
.Pattern = "\[[^[]]*?\]"
Do While .Test(string1)
string1 = .Replace(string1, "")
Loop
End With
response.write string1

Related

Unable to parse two fields from all the containers out of some json response in the right way

I'm trying to fetch two fields from each container from some json response using regex. When I execute the script that I've written so far can produce the two fields from all the containers. However, the way I've defined the last loop doesn't seem to be an ideal one. To be clearer, I used the count of name and created a loop to parse the required fields. If the count of names and changeAmount are different the results will be real messy. How can I rectify the loop to scrape the two fields in the right way?
I've tried with (working script):
Sub FetchContent()
Const Url$ = "https://api-global.morningstar.com/sal-service/v1/stock/ownership/v1/0P000000GY/OwnershipData/mutualfund/20/data?locale=en&clientId=MDC&benchmarkId=category&version=3.21.1"
Dim elem As Object, oelem As Object, I&, R&, S$
Dim Http As Object, Rgxp As Object, wb As Workbook, ws As Worksheet
Set wb = ThisWorkbook
Set ws = wb.Worksheets("Sheet1")
Set Http = CreateObject("MSXML2.XMLHTTP")
Set Rgxp = CreateObject("VBScript.RegExp")
With Http
.Open "GET", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
.setRequestHeader "ApiKey", "lstzFDEOhfFNMLikKa0am9mgEKLBl49T"
.send
S = .responseText
End With
With Rgxp
.Global = True
.MultiLine = True
.Pattern = "name"":""(.*?)"""
Set elem = .Execute(S)
.Pattern = "changeAmount"":(.*?),"
Set oelem = .Execute(S)
End With
For I = 0 To elem.Count - 1
R = R + 1: ws.Cells(R, 1) = elem(I).SubMatches(0)
ws.Cells(R, 2) = oelem(I).SubMatches(0)
Next I
End Sub
The following content represents how the first three containers look like:
{
"secId": "FOUSA00FQU",
"name": "Vanguard Total Stock Mkt Idx Inv",
"totalSharesHeld": 2.564925871507663,
"totalAssets": 4.16033,
"currentShares": 115913617,
"changeAmount": -1331374,
"changePercentage": -1.1355487246359206,
"date": "2020-04-30T00:00:00.000",
"trend": "_PO_",
"starRating": "4"
},
{
"secId": "FOUSA00FS1",
"name": "Vanguard 500 Index Investor",
"totalSharesHeld": 1.8912105957275436,
"totalAssets": 5.08629,
"currentShares": 85467211,
"changeAmount": -487891,
"changePercentage": -0.5676114490562759,
"date": "2020-04-30T00:00:00.000",
"trend": "_PO_",
"starRating": "4"
},
{
"secId": "FEUSA00001",
"name": "SPDR\u00ae S&P 500 ETF Trust",
"totalSharesHeld": 0.994538610986949,
"totalAssets": 5.07929,
"currentShares": 44944990,
"changeAmount": -436740,
"changePercentage": -0.9623696584506585,
"date": "2020-04-30T00:00:00.000",
"trend": "_PO_",
"starRating": "5"
}
How can I fetch the two fields from all the containers?
PS I'm not after any solution related to any json converter.
Option Explicit
Sub FetchContent()
Const url = "https://api-global.morningstar.com/sal-service/v1/stock/ownership/v1/0P000000GY/OwnershipData/mutualfund/20/data?locale=en&clientId=MDC&benchmarkId=category&version=3.21.1"
Dim wb As Workbook
Set wb = ThisWorkbook
Dim ws As Worksheet
Set ws = wb.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
.setRequestHeader "ApiKey", "lstzFDEOhfFNMLikKa0am9mgEKLBl49T"
.send
Dim resp
resp = .responseText
End With
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.pattern = "\{[^}]*""name""\:""(.*?)"",.*?""changeAmount""\:([-.\d]*),"
Dim r
r = 1
Dim item
For Each item In .Execute(resp)
ws.Cells(r, 1) = decodeJsonString(item.SubMatches(0))
ws.Cells(r, 2) = decodeJsonString(item.SubMatches(1))
r = r + 1
Next
End With
End Sub
Function decodeJsonString(jsonString)
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
decodeJsonString = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace( _
jsonString, _
"\""", """"), _
"\\", "\" & vbNullChar), _
"\/", "/"), _
"\b", Chr(8)), _
"\f", Chr(12)), _
"\n", vbLf), _
"\r", vbCr), _
"\t", vbTab)
.Global = False
.pattern = "\\u[0-9a-fA-F]{4}"
Do While .test(decodeJsonString)
decodeJsonString = .Replace(decodeJsonString, ChrW(("&H" & Right(.Execute(decodeJsonString)(0).Value, 4)) * 1))
Loop
decodeJsonString = Replace(decodeJsonString, "\" & vbNullChar, "\")
End With
End Function
You forgot opening quote:
.Pattern = """name"":""(.*?)"""
.Pattern = """changeAmount"":(.*?),"
This is another way I found success with apart from what omegastripes has showed in his answer:
Sub FetchContent()
Const Url$ = "https://api-global.morningstar.com/sal-service/v1/stock/ownership/v1/0P000000GY/OwnershipData/mutualfund/20/data?locale=en&clientId=MDC&benchmarkId=category&version=3.21.1"
Dim elem As Object, I&, R&, S$, wb As Workbook
Dim Http As Object, Rgxp As Object, ws As Worksheet
Dim subElem As Object, subElemAno As Object
Set wb = ThisWorkbook
Set ws = wb.Worksheets("Sheet1")
Set Http = CreateObject("MSXML2.XMLHTTP")
Set Rgxp = CreateObject("VBScript.RegExp")
With Http
.Open "GET", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
.setRequestHeader "ApiKey", "lstzFDEOhfFNMLikKa0am9mgEKLBl49T"
.send
S = .responseText
End With
With Rgxp
.Global = True
.MultiLine = True
.Pattern = "{""secId[\s\S]+?}"
Set elem = .Execute(S)
For I = 0 To elem.Count - 1
.Pattern = "name"":""(.*?)"""
Set subElem = .Execute(elem(I).Value)
If subElem.Count > 0 Then
R = R + 1: ws.Cells(R, 1) = subElem(0).SubMatches(0)
End If
.Pattern = "changeAmount"":(.*?),"
Set subElemAno = .Execute(elem(I).Value)
If subElemAno.Count > 0 Then
ws.Cells(R, 2) = subElemAno(0).SubMatches(0)
End If
Next I
End With
End Sub

Regex + Word Macro for Replacing Styles

I've got a bunch of documents with disparate styles that I've been adding to a long Macro that finds and replaces these styles with the correct ones. Right now, I'm just adding to a list as I find a wrong style. For example, there can be Heading 1, heading 1, H1, or h1. I want to write a find and replace function for each of those for the moment. What would be cooler is if I could write a catch all macro for these sorts of things using Regex: (h|H).{6}\s1 (not the best Regex writer, so bear with that). Ideally that would catch anything the variations of heading 1 (though it would not catch the h1, H1 cases, though I could add that easily enough.
I know that VBA supports Regex. I've added the reference to it. I also know how this would work for replacing specific text. I'm not replacing text though. ONLY formatting. I haven't played around with it too much. I just want to know if I can use the Regex when working specifically with a style. Here's what the functions look like right now:
Selection.Find.ClearFormatting
Selection.Find.Style = ActiveDocument.Styles("Heading 1")
Selection.Find.Replacement.ClearFormatting
Selection.Find.Replacement.Style = ActiveDocument.Styles("SSC TOC 2")
With Selection.Find
.Text = ""
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
I simply recorded that. Now, would I be able to put Regex in place of that string, like so:
Selection.Find.ClearFormatting
Selection.Find.Style = ActiveDocument.Styles(someRegex function (h|H).{6}\s1)
Selection.Find.Replacement.ClearFormatting
Selection.Find.Replacement.Style = ActiveDocument.Styles("SSC TOC 2")
With Selection.Find
.Text = ""
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
Basically just using a function someRegex function (h|H).{6}\s1 in place of the string literal. Is there any way to do this? Would appreciate any guidance or help!
You could use something along the lines the following macro to delete all unused user-defined Styles (except Linked Styles) in a document, and to clean up the various H1, etc. Styles you mentioned:
Sub CleanUpStyles()
Application.ScreenUpdating = False
Dim Doc As Document, bDel As Boolean, bHid As Boolean
Dim Rng As Range, StlNm As String, i As Long
bHid = ActiveWindow.View.ShowHiddenText
ActiveWindow.View.ShowHiddenText = True
Set Doc = ActiveDocument
With Doc
For i = .Styles.Count To 1 Step -1
With .Styles(i)
If .BuiltIn = False And .Linked = False Then
bDel = True: StlNm = .NameLocal
For Each Rng In Doc.StoryRanges
With Rng
With .Find
.ClearFormatting
.Format = True
.Style = StlNm
.Execute
End With
If .Find.Found = True Then
If StlNm Like "[Hh]*#" Then
If StlNm <> "Heading " & Right(StlNm, 1) Then
.Style = "Heading " & Right(StlNm, 1)
bDel = True
End If
Else
bDel = False
End If
Exit For
End If
End With
Next
If bDel = True Then .Delete
End If
End With
Next
End With
ActiveWindow.View.ShowHiddenText = bHid
Application.ScreenUpdating = True
End Sub

VBA Code is picking up a column not called out

Sub UpdateDMDCLCSIM()
Dim SIM_DM_DCLC As Worksheet
Dim TextFileUpdated As Date
Set SIM_DM_DCLC = ThisWorkbook.Sheets(Sheet52.Name)
TextFileUpdated = DateValue(FileDateTime("\\networkshare\dept\DCGSI\Extracts\SIM_DM_DCLC.csv"))
Application.DisplayAlerts = False
Application.StatusBar = "Importing latest DM DCLC SIM Data..."
With SIM_DM_DCLC.QueryTables.Add(Connection:= _
"TEXT;\\networkshare\dept\DCGSI\Extracts\SIM_DM_DCLC.csv" _
, Destination:=SIM_DM_DCLC.Range("$A$1"))
.Name = "SIM_DM_DCLC"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.TextFilePromptOnRefresh = False
.TextFilePlatform = 936
.TextFileStartRow = 1
.TextFileParseType = xlDelimited
.TextFileTextQualifier = xlTextQualifierDoubleQuote
.TextFileConsecutiveDelimiter = False
.TextFileTabDelimiter = True
.TextFileSemicolonDelimiter = False
.TextFileCommaDelimiter = True
.TextFileSpaceDelimiter = False
.TextFileTrailingMinusNumbers = True
.Refresh BackgroundQuery:=False
End With
'Change to MySQL date format.
SIM_DM_DCLC.Range("I:K", "P:T").Replace Chr(84), " "
SIM_DM_DCLC.Range("I:K", "P:T").Replace Chr(90), ""
SIM_DM_DCLC.Range("I:K", "P:T").NumberFormat = "yyyy-mm-dd hh:mm:ss"
Okay so this opens a csv that is downloaded to a network share and fixes some dates. The dates in the original file are formatted YYYY-MM-DDTHH:MM:SSZ and this is supposed to strip the T and Z from those dates in the appropriate columns. The issue I am having is that for some strange reason it is processing column L in the file and I can't figure out why.
So I looked up some code for regex replace in VBA and tried to refactor the code to use the following code to try and fix the issue:
Sub UpdateDMDCLCSIM()
On Error GoTo ErrorHandler
Dim SIM_DM_DCLC As Worksheet
Dim TextFileUpdated As Date
Set SIM_DM_DCLC = ThisWorkbook.Sheets(Sheet52.Name)
TextFileUpdated = DateValue(FileDateTime("\\networksharem\dept\DCGSI\Extracts\SIM_DM_DCLC.csv"))
Application.DisplayAlerts = False
Application.StatusBar = "Importing latest DM DCLC SIM Data..."
With SIM_DM_DCLC.QueryTables.Add(Connection:= _
"TEXT;\\networkshare\dept\DCGSI\Extracts\SIM_DM_DCLC.csv" _
, Destination:=SIM_DM_DCLC.Range("$A$1"))
.Name = "SIM_DM_DCLC"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.TextFilePromptOnRefresh = False
.TextFilePlatform = 936
.TextFileStartRow = 1
.TextFileParseType = xlDelimited
.TextFileTextQualifier = xlTextQualifierDoubleQuote
.TextFileConsecutiveDelimiter = False
.TextFileTabDelimiter = True
.TextFileSemicolonDelimiter = False
.TextFileCommaDelimiter = True
.TextFileSpaceDelimiter = False
.TextFileTrailingMinusNumbers = True
.Refresh BackgroundQuery:=False
End With
'Change to MySQL date format.
Set regex = CreateObject("VBScript.RegExp")
regex.Pattern = "/^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z)$/"
For Each cell In SIM_DM_DCLC.UsedRange
If cell.Value <> "" Then cell.Value = regex.Replace(cell.Value, "/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/")
Next cell
Pretty sure that the 5017 - Application-defined or object-defined error I am getting on the regex.Replace means I have something wrong with the regex piece. Just not sure what it is.
Well you have to check to an actual match and not just a blank; here is the updated and appropriate section of code.
'Change to MySQL date format.
Set regex = CreateObject("VBScript.RegExp")
regex.Pattern = "^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z)$"
For Each cell In SIM_DM_DCLC.UsedRange
If cell.Value = regex.Pattern Then cell.Value = regex.Replace(cell.Value, "^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$")
Next cell

VBA Using regex for replacing stuff

I never did VBA before, and I am not skilled in programming in any way xD
I am trying do remove some things from a list that contains html descriptions, but it is not working.
Any advice what horrible things I did wrong here?
Function entferne_sonstige_zeichen(description)
entferne_sonstige_zeichen = description
Dim oRegExp As RegExp
Set oRegExp = New RegExp
With oRegExp
.IgnoreCase = False
.Global = True
.MultiLine = True
.Pattern = "[^A-Za-z\d,/-]"
End With
Dim ReplacePattern As String
ReplacePattern = ""
description = oRegExp.Replace(ReplacePattern)
End Function
Maybe...
Function entferne_sonstige_zeichen(str As String) As String
Dim objRegex As Object
Set objRegex = CreateObject("vbscript.regexp")
With objRegex
.IgnoreCase = False
.Global = True
.MultiLine = True
.Pattern = "[^A-Za-z\d,/-]"
.ignorecase = True
entferne_sonstige_zeichen = .Replace(str, vbNullString)
End With
End Function

MS Word VBA How to Search and copy selected text to beginning of the Paragraph

I want to search and copy year specified in the paragraph and copy it to beginning of the paragraph. following is the code i am working with, it does selects the year but doesn't copy it to the beginning:
Sub CopyYeartoFirst()
'
' Macro1 Macro
'
' Selection.Find.ClearFormatting
With ActiveDocument.Content
With Selection.Find
.Text = "[0-9]{4}"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
.Execute
End With
While .Find.Found = True
Selection.Copy
Selection.HomeKey Unit:=wdLine
Selection.PasteAndFormat (wdPasteDefault)
'Selection.TypeText Text:=" -- "
.Find.Execute
'Selection.Find.ClearFormatting
Wend
End With
End Sub
You will need to use something like this before pasting your search results
Dim oRng As Range
Set oRng = Selection.Paragraphs(1).Range
oRng.Collapse wdCollapseStart
oRng.Select