I am using Swift 3 and trying to access captured groups.
let regexp = "((ALREADY PAID | NOT ALR | PROVIDER MAY | READY | MAY BILL | BILL YOU | PAID)((.|\\n)*))(( \\d+)(\\.+|-+)(\\d\\d))"
// check if some substring is in the recognized text
if let range = stringText.range(of:regexp, options: .regularExpression) {
let result = tesseract.recognizedText.substring(with:range)
}
I want to be able to extract out the last two numbers captured (\d\d) so if the text was: ALREADY PAID asfasdfadsfasdf 39.15, it would extract 15. Here is a regex builder that shows what I want. Normally, I would be able to do $8 to get the 8th group that was extracted but I don't know how to do that in Swift 3.
http://regexr.com/3fh1e
Swift 4, Swift 5
extension String {
func groups(for regexPattern: String) -> [[String]] {
do {
let text = self
let regex = try NSRegularExpression(pattern: regexPattern)
let matches = regex.matches(in: text,
range: NSRange(text.startIndex..., in: text))
return matches.map { match in
return (0..<match.numberOfRanges).map {
let rangeBounds = match.range(at: $0)
guard let range = Range(rangeBounds, in: text) else {
return ""
}
return String(text[range])
}
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
}
example:
let res = "1my 2own 3string".groups(for:"(([0-9]+)[a-z]+) ")
(lldb) po res ▿ 2 elements
▿ 0 : 3 elements
- 0 : "1my "
- 1 : "1my"
- 2 : "1"
▿ 1 : 3 elements
- 0 : "2own "
- 1 : "2own"
- 2 : "2"
but I don't know how to do that in Swift 3.
When you receive a match from NSRegularExpression, what you get is an NSTextCheckingResult. You call rangeAt to get a specific capture group.
Example:
let s = "hey ho ha"
let pattern = "(h).*(h).*(h)"
// our goal is capture group 3, "h" in "ha"
let regex = try! NSRegularExpression(pattern: pattern)
let result = regex.matches(in:s, range:NSMakeRange(0, s.utf16.count))
let third = result[0].rangeAt(3) // <-- !!
third.location // 7
third.length // 1
As ever, a simple extension seems to be the way around swift's bizarre overcomplication...
extension NSTextCheckingResult {
func groups(testedString:String) -> [String] {
var groups = [String]()
for i in 0 ..< self.numberOfRanges
{
let group = String(testedString[Range(self.range(at: i), in: testedString)!])
groups.append(group)
}
return groups
}
}
Use it like this:
if let match = myRegex.firstMatch(in: someString, range: NSMakeRange(0, someString.count)) {
let groups = match.groups(testedString: someString)
//... do something with groups
}
A slightly altered version based on #Vyacheslav's answer with different error handling approach:
enum ParsingError: Error {
// You can pass more info here with parameter(s) if you want, e.g. `case let invalidRange(originalString, failedAtRange)`
case invalidRange
}
protocol StringUtilityRequired {
var stringUtility: StringUtility { get }
}
extension StringUtilityRequired {
var stringUtility: StringUtility { StringUtility() }
}
enum StringUtility {
func groups(_ str: String, pattern: String) throws -> [[String]] {
let regex = try NSRegularExpression(pattern: pattern)
let matches = regex.matches(in: str, range: NSRange(str.startIndex..., in: str))
return try matches.map { match throws in
return try (0 ..< match.numberOfRanges).map { range throws in
let rangeBounds = match.range(at: range)
guard let range = Range(rangeBounds, in: str) else {
throw ParsingError.invalidRange
}
return String(str[range])
}
}
}
// This component is stateless; it doesn't have any side effect
case pure
init() { self = .pure }
}
Usage:
struct MyComponent: StringUtilityRequired {
func myFunc() throws {
let groups = try stringUtility.groups("Test 123", pattern: "(.+)\s(.+)")
print(groups)
}
}
Related
I was trying to change hello_world to helloWorld by this snippet of code (Swift 3.0):
import Foundation
let oldLine = "hello_world"
let fullRange = NSRange(location: 0, length: oldLine.characters.count)
let newLine = NSMutableString(string: oldLine)
let regex = try! NSRegularExpression(pattern: "(_)(\\w)", options: [])
regex.replaceMatches(in: newLine, options: [], range: fullRange,
withTemplate: "\\L$2")
The result was newLine = "helloLworld"
I used "\\L$2" as template because I saw this answer: https://stackoverflow.com/a/20742304/5282792 saying \L$2 is the pattern for the second group's uppercase in replacement template. But it didn't work in NSRegularExpression.
So can I replace a string with its uppercase with a replacement template pattern in NSRegularExpression.
One way to work with your case is subclassing NSRegularExpression and override replacementString(for:in:offset:template:) method.
class ToUpperRegex: NSRegularExpression {
override func replacementString(for result: NSTextCheckingResult, in string: String, offset: Int, template templ: String) -> String {
guard result.numberOfRanges > 2 else {
return ""
}
let matchingString = (string as NSString).substring(with: result.rangeAt(2)) as String
return matchingString.uppercased()
}
}
let oldLine = "hello_world"
let fullRange = NSRange(0..<oldLine.utf16.count) //<-
let tuRegex = try! ToUpperRegex(pattern: "(_)(\\w)")
let newLine = tuRegex.stringByReplacingMatches(in: oldLine, range: fullRange, withTemplate: "")
print(newLine) //->helloWorld
This doesn't answer the question pertaining regex, but might be of interest for readers not necessarily needing to use regex to perform this task (rather, using native Swift)
extension String {
func camelCased(givenSeparators separators: [Character]) -> String {
let charChunks = characters.split { separators.contains($0) }
guard let firstChunk = charChunks.first else { return self }
return String(firstChunk).lowercased() + charChunks.dropFirst()
.map { String($0).onlyFirstCharacterUppercased }.joined()
}
// helper (uppercase first char, lowercase rest)
var onlyFirstCharacterUppercased: String {
let chars = characters
guard let firstChar = chars.first else { return self }
return String(firstChar).uppercased() + String(chars.dropFirst()).lowercased()
}
}
/* Example usage */
let oldLine1 = "hello_world"
let oldLine2 = "fOo_baR BAX BaZ_fOX"
print(oldLine1.camelCased(givenSeparators: ["_"])) // helloWorld
print(oldLine2.camelCased(givenSeparators: ["_", " "])) // fooBarBazBazFox
I'm trying to parse a Localizable.string file for a small project in swift on MacOS.
I just want to retrieve all the keys and values inside a file to sort them into a dictionary.
To do so I used regex with the NSRegularExpression cocoa class.
Here is what those file look like :
"key 1" = "Value 1";
"key 2" = "Value 2";
"key 3" = "Value 3";
Here is my code that is supposed to get the keys and values from the file loaded into a String :
static func getDictionaryFormText(text: String) -> [String: String] {
var dict: [String : String] = [:]
let exp = "\"(.*)\"[ ]*=[ ]*\"(.*)\";"
for line in text.components(separatedBy: "\n") {
let match = self.matches(for: exp, in: line)
// Following line can be uncommented when working
//dict[match[0]] = match[1]
print("(\(match.count)) matches = \(match)")
}
return dict
}
static func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = text as NSString
let results = regex.matches(in: text, range: NSRange(location: 0, length: nsString.length))
return results.map { nsString.substring(with: $0.range) }
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
When running this code with the provided Localizable example here is the output :
(1) matches = ["\"key 1\" = \"Value 1\";"]
(1) matches = ["\"key 2\" = \"Value 2\";"]
(1) matches = ["\"key 3\" = \"Value 3\";"]
It sounds like the match doesn't stop after the first " occurence. When i try the same expression \"(.*)\"[ ]*=[ ]*\"(.*)\"; on regex101.com the output is correct though. What am i doing wrong ?
Your function (from Swift extract regex matches ?) matches the entire pattern
only. If you are interested in the particular capture groups then
you have to access them with rangeAt() as for example in
Convert a JavaScript Regex to a Swift Regex (not yet updated for Swift 3).
However there is a much simpler solution, because .strings files actually use one possible format of property lists, and
can be directly read into a dictionary. Example:
if let url = Bundle.main.url(forResource: "Localizable", withExtension: "strings"),
let stringsDict = NSDictionary(contentsOf: url) as? [String: String] {
print(stringsDict)
}
Output:
["key 1": "Value 1", "key 2": "Value 2", "key 3": "Value 3"]
For anyone interested I got the original function working. I needed it for a small command-line script where the NSDictionary(contentsOf: URL) wasn't working.
func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = text as NSString
guard let result = regex.firstMatch(in: text, options: [], range: NSRange(location: 0, length: nsString.length)) else {
return [] // pattern does not match the string
}
return (1 ..< result.numberOfRanges).map {
nsString.substring(with: result.range(at: $0))
}
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
func getParsedText(text: String) -> [(key: String, text: String)] {
var dict: [(key: String, text: String)] = []
let exp = "\"(.*)\"[ ]*=[ ]*\"(.*)\";"
for line in text.components(separatedBy: "\n") {
let match = matches(for: exp, in: line)
if match.count == 2 {
dict.append((key: match[0], text: match[1]))
}
}
return dict
}
Call it using something like this.
let text = try! String(contentsOf: url, encoding: .utf8)
let stringDict = getParsedText(text: text)
Really nice solution parsing directly to dictionary, but if someone wants to also parse the comments you can use a small library I made for this csv2strings.
import libcsv2strings
let contents: StringsFile = StringsFileParser(stringsFilePath: "path/to/Localizable.strings")?.parse()
It parses the file to a StringsFile model
/// Top level model of a Apple's strings file
public struct StringsFile {
let entries: [Translation]
/// Model of a strings file translation item
public struct Translation {
let translationKey: String
let translation: String
let comment: String?
}
}
Eventually I want to be able to input a string like "\mycard{front1}{back1} \mycard{front2}{back2} \mycard{front3}{back3}" and return the front and back of each card.
I found this website on NSRegularExpression, but I'm having a hard time adjusting it to my problem.
Here is what I have so far.
import Foundation
func rangeFromNSRange(nsRange: NSRange, forString str: String) -> Range<String.Index>? {
let fromUTF16 = str.utf16.startIndex.advancedBy(nsRange.location, limit: str.utf16.endIndex)
let toUTF16 = fromUTF16.advancedBy(nsRange.length, limit: str.utf16.endIndex)
if let from = String.Index(fromUTF16, within: str), let to = String.Index(toUTF16, within: str) {
return from ..< to
}
return nil
}
do {
// let input = "My name is Taylor Swift"
// let regex = try NSRegularExpression(pattern: "My name is (.*)", options: NSRegularExpressionOptions.CaseInsensitive)
let input = "mycard{front}{back}"
let regex = try NSRegularExpression(pattern: "mycard{(.*)}{(.*)}", options: NSRegularExpressionOptions.CaseInsensitive)
let matches = regex.matchesInString(input, options: [], range: NSMakeRange(0, input.characters.count))
if let match = matches.first {
let range = match.rangeAtIndex(1)
if let swiftRange = rangeFromNSRange(range, forString: input) {
let name = input.substringWithRange(swiftRange)
}
}
} catch {
// regex was bad!
}
As stated in my comment you need to escape the { and }. That results in the following regex: mycard\\{(.*)\\}\\{(.*)\\}.
You then might want to change your match logic a little bit to output the expected results:
if let match = matches.first {
for i in 1..<match.numberOfRanges {
let range = match.rangeAtIndex(i)
if let swiftRange = rangeFromNSRange(range, forString: input) {
let name = input.substringWithRange(swiftRange)
print(name)
}
}
}
Which outputs
front
back
If you want to match multiple cards use the following regex:
mycard\\{([^{]*)\\}\\{([^{]*)\\}
Then iterate over the matches
for match in matches {
for i in 1..<match.numberOfRanges {
let range = match.rangeAtIndex(i)
if let swiftRange = rangeFromNSRange(range, forString: input) {
let name = input.substringWithRange(swiftRange)
print(name)
}
}
}
For the input mycard{front}{back} mycard{front1}{back1} the output correctly is
front
back
front1
back1
I gave up on regex. I just don't think it will do the trick here. I came up with another solution.
import Foundation
extension String {
subscript (r: Int) -> Character? {
var cur = 0
for char in self.characters {
if cur == r {
return char
}
cur += 1
}
return nil
}
subscript (r: Range<Int>) -> String {
return substringWithRange(Range(start: startIndex.advancedBy(r.startIndex), end: startIndex.advancedBy(r.endIndex)))
}
func parseBrackets () -> [String]? {
var list: [String] = []
var level = 0
var start = 0
for var i=0; i < self.characters.count - 1; i++ {
if self[i] == "{" {
level += 1
if level == 1 {
start = i + 1
}
} else if self[i] == "}" {
if level == 1 {
list.append(self[start..<i])
}
level -= 1
}
}
if list.count > 0 {
return list
} else {
return nil
}
}
}
let testString = "mycard{f{}ront}{termins{x}{n}} mycard{front1}{back1} mycard{front2}{back2}"
let list = testString.parseBrackets()
for a in list! {
print(a)
}
Which gives the desired output
f{}ront
termins{x}{n}
front1
back1
front2
I have as string:
let inputText:String = "myemail_at_gmail.com_organizer#company.com"
I want to get in output: myemail#gmail.com
So I need to write 1st some pattern that matches this rule:
<email_prefix>_at_<domain>_organizer#company.com
after that I can combine:
<email_prefix>#<domain>
I use following class:
class Regex {
let internalExpression: NSRegularExpression
let pattern: String
init(_ pattern: String) {
self.pattern = pattern
var error: NSError?
self.internalExpression = NSRegularExpression(pattern: pattern, options: .CaseInsensitive, error: &error)!
}
func test(input: String) -> Bool {
let matches = self.internalExpression.matchesInString(input, options: nil, range:NSMakeRange(0, count(input)))
return matches.count > 0
}
}
and look for regex syntax:
if Regex("^\\w+_at_\\w+_organizer#company.com$") // id doesn't work
.test(inputText) {
let result:String = inputText.split("_at_")[0] + "#" + inputText.split("_at_")[1].split("_organizer#company.com")[0]
}
This one doesn't work: "^\\w+_at_\\w+_organizer#company.com$"
This one works but its not completed: "\\w+_organizer#company.com$"
Please help,
Ok, I found solution:
since i work with email, i need validate email name and email domain separatly:
let inputText:String = "myemail_at_gmail.com_organizer#company.com"
if Regex("^[A-Z0-9a-z._%+-]+_at_[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}_organizer#company.com$")
.test(inputText) {
let result:String = inputText.split("_at_")[0] + "#" +
inputText.split("_at_")[1].split("_organizer#company.com")[0]
print(result) // myemail#gmail.com
}
In regular expressions you can group different matches to easily "pattern match" a given match.
while match != nil {
match = source.rangeOfString(regex, options: .RegularExpressionSearch)
if let m = match {
result.append(source.substringWithRange(m)
source.replaceRange(m, with: "")
}
}
The above works find to find a range of the match, but it cannot tell me the group. For instance if I search for words encapsulated in "" I would like to match a "word" but quickly fetch only word
Is it possible to do so in swift?
Swift is pretty ugly right now with regular expressions -- let's hope for more-native support soon! The method on NSRegularExpression you want is matchesInString. Here's how to use it:
let string = "This is my \"string\" of \"words\"."
let re = NSRegularExpression(pattern: "\"(.+?)\"", options: nil, error: nil)!
let matches = re.matchesInString(string, options: nil, range: NSRange(location: 0, length: string.utf16Count))
println("number of matches: \(matches.count)")
for match in matches as [NSTextCheckingResult] {
// range at index 0: full match
// range at index 1: first capture group
let substring = (string as NSString).substringWithRange(match.rangeAtIndex(1))
println(substring)
}
Output:
number of matches: 2
string
words
You can use this if you want to collect the matched strings.
(My answer is derived from Nate Cooks very helpful answer.)
Updated for Swift 2.1
extension String {
func regexMatches(pattern: String) -> Array<String> {
let re: NSRegularExpression
do {
re = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return []
}
let matches = re.matchesInString(self, options: [], range: NSRange(location: 0, length: self.utf16.count))
var collectMatches: Array<String> = []
for match in matches {
// range at index 0: full match
// range at index 1: first capture group
let substring = (self as NSString).substringWithRange(match.rangeAtIndex(1))
collectMatches.append(substring)
}
return collectMatches
}
}
Updated for Swift 3.0
extension String {
func regexMatches(pattern: String) -> Array<String> {
let re: NSRegularExpression
do {
re = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return []
}
let matches = re.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
var collectMatches: Array<String> = []
for match in matches {
// range at index 0: full match
// range at index 1: first capture group
let substring = (self as NSString).substring(with: match.rangeAt(1))
collectMatches.append(substring)
}
return collectMatches
}}
how about this guys, add as extension to String? )) all matches, all groups ) self = String if you want to add not as extension then add String parameter and replace all self to your parameter :)
func matchesForRegexInTextAll(regex: String!) -> [[String]] {
do {
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = self as NSString
var resultsFinal = [[String]]()
let results = regex.matchesInString(self,
options: [], range: NSMakeRange(0, nsString.length))
for result in results {
var internalString = [String]()
for var i = 0; i < result.numberOfRanges; ++i{
internalString.append(nsString.substringWithRange(result.rangeAtIndex(i)))
}
resultsFinal.append(internalString)
}
return resultsFinal
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
All the answers provided are good, but nonetheless I am going to provide my String extension written in Swift 2.2.
Noted differences:
only use the first match
supports multiple captured groups
a more accurate function name (it is capture groups, not matches)
.
extension String {
func capturedGroups(withRegex pattern: String) -> [String]? {
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return nil
}
let matches = regex.matchesInString(self, options: [], range: NSRange(location:0, length: self.characters.count))
guard let match = matches.first else { return nil }
// Note: Index 1 is 1st capture group, 2 is 2nd, ..., while index 0 is full match which we don't use
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return nil }
var results = [String]()
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.rangeAtIndex(i)
let matchedString = (self as NSString).substringWithRange(capturedGroupIndex)
results.append(matchedString)
}
return results
}
}
To use:
// Will match "bcde"
"abcdefg".capturedGroups(withRegex: "a(.*)f")
Updated for Swift 4
/**
String extension that extract the captured groups with a regex pattern
- parameter pattern: regex pattern
- Returns: captured groups
*/
public func capturedGroups(withRegex pattern: String) -> [String] {
var results = [String]()
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return results
}
let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))
guard let match = matches.first else { return results }
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return results }
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.range(at: i)
let matchedString = (self as NSString).substring(with: capturedGroupIndex)
results.append(matchedString)
}
return results
}
To use:
// Will match "bcde"
"abcdefg".capturedGroups(withRegex: "a(.*)f")
Gist on github: https://gist.github.com/unshapedesign/1b95f78d7f74241f706f346aed5384ff