How to replace a string with its uppercase with NSRegularExpression in Swift? - regex

I was trying to change hello_world to helloWorld by this snippet of code (Swift 3.0):
import Foundation
let oldLine = "hello_world"
let fullRange = NSRange(location: 0, length: oldLine.characters.count)
let newLine = NSMutableString(string: oldLine)
let regex = try! NSRegularExpression(pattern: "(_)(\\w)", options: [])
regex.replaceMatches(in: newLine, options: [], range: fullRange,
withTemplate: "\\L$2")
The result was newLine = "helloLworld"
I used "\\L$2" as template because I saw this answer: https://stackoverflow.com/a/20742304/5282792 saying \L$2 is the pattern for the second group's uppercase in replacement template. But it didn't work in NSRegularExpression.
So can I replace a string with its uppercase with a replacement template pattern in NSRegularExpression.

One way to work with your case is subclassing NSRegularExpression and override replacementString(for:in:offset:template:) method.
class ToUpperRegex: NSRegularExpression {
override func replacementString(for result: NSTextCheckingResult, in string: String, offset: Int, template templ: String) -> String {
guard result.numberOfRanges > 2 else {
return ""
}
let matchingString = (string as NSString).substring(with: result.rangeAt(2)) as String
return matchingString.uppercased()
}
}
let oldLine = "hello_world"
let fullRange = NSRange(0..<oldLine.utf16.count) //<-
let tuRegex = try! ToUpperRegex(pattern: "(_)(\\w)")
let newLine = tuRegex.stringByReplacingMatches(in: oldLine, range: fullRange, withTemplate: "")
print(newLine) //->helloWorld

This doesn't answer the question pertaining regex, but might be of interest for readers not necessarily needing to use regex to perform this task (rather, using native Swift)
extension String {
func camelCased(givenSeparators separators: [Character]) -> String {
let charChunks = characters.split { separators.contains($0) }
guard let firstChunk = charChunks.first else { return self }
return String(firstChunk).lowercased() + charChunks.dropFirst()
.map { String($0).onlyFirstCharacterUppercased }.joined()
}
// helper (uppercase first char, lowercase rest)
var onlyFirstCharacterUppercased: String {
let chars = characters
guard let firstChar = chars.first else { return self }
return String(firstChar).uppercased() + String(chars.dropFirst()).lowercased()
}
}
/* Example usage */
let oldLine1 = "hello_world"
let oldLine2 = "fOo_baR BAX BaZ_fOX"
print(oldLine1.camelCased(givenSeparators: ["_"])) // helloWorld
print(oldLine2.camelCased(givenSeparators: ["_", " "])) // fooBarBazBazFox

Related

Swift 3 - How do I extract captured groups in regular expressions?

I am using Swift 3 and trying to access captured groups.
let regexp = "((ALREADY PAID | NOT ALR | PROVIDER MAY | READY | MAY BILL | BILL YOU | PAID)((.|\\n)*))(( \\d+)(\\.+|-+)(\\d\\d))"
// check if some substring is in the recognized text
if let range = stringText.range(of:regexp, options: .regularExpression) {
let result = tesseract.recognizedText.substring(with:range)
}
I want to be able to extract out the last two numbers captured (\d\d) so if the text was: ALREADY PAID asfasdfadsfasdf 39.15, it would extract 15. Here is a regex builder that shows what I want. Normally, I would be able to do $8 to get the 8th group that was extracted but I don't know how to do that in Swift 3.
http://regexr.com/3fh1e
Swift 4, Swift 5
extension String {
func groups(for regexPattern: String) -> [[String]] {
do {
let text = self
let regex = try NSRegularExpression(pattern: regexPattern)
let matches = regex.matches(in: text,
range: NSRange(text.startIndex..., in: text))
return matches.map { match in
return (0..<match.numberOfRanges).map {
let rangeBounds = match.range(at: $0)
guard let range = Range(rangeBounds, in: text) else {
return ""
}
return String(text[range])
}
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
}
example:
let res = "1my 2own 3string".groups(for:"(([0-9]+)[a-z]+) ")
(lldb) po res ▿ 2 elements
▿ 0 : 3 elements
- 0 : "1my "
- 1 : "1my"
- 2 : "1"
▿ 1 : 3 elements
- 0 : "2own "
- 1 : "2own"
- 2 : "2"
but I don't know how to do that in Swift 3.
When you receive a match from NSRegularExpression, what you get is an NSTextCheckingResult. You call rangeAt to get a specific capture group.
Example:
let s = "hey ho ha"
let pattern = "(h).*(h).*(h)"
// our goal is capture group 3, "h" in "ha"
let regex = try! NSRegularExpression(pattern: pattern)
let result = regex.matches(in:s, range:NSMakeRange(0, s.utf16.count))
let third = result[0].rangeAt(3) // <-- !!
third.location // 7
third.length // 1
As ever, a simple extension seems to be the way around swift's bizarre overcomplication...
extension NSTextCheckingResult {
func groups(testedString:String) -> [String] {
var groups = [String]()
for i in 0 ..< self.numberOfRanges
{
let group = String(testedString[Range(self.range(at: i), in: testedString)!])
groups.append(group)
}
return groups
}
}
Use it like this:
if let match = myRegex.firstMatch(in: someString, range: NSMakeRange(0, someString.count)) {
let groups = match.groups(testedString: someString)
//... do something with groups
}
A slightly altered version based on #Vyacheslav's answer with different error handling approach:
enum ParsingError: Error {
// You can pass more info here with parameter(s) if you want, e.g. `case let invalidRange(originalString, failedAtRange)`
case invalidRange
}
protocol StringUtilityRequired {
var stringUtility: StringUtility { get }
}
extension StringUtilityRequired {
var stringUtility: StringUtility { StringUtility() }
}
enum StringUtility {
func groups(_ str: String, pattern: String) throws -> [[String]] {
let regex = try NSRegularExpression(pattern: pattern)
let matches = regex.matches(in: str, range: NSRange(str.startIndex..., in: str))
return try matches.map { match throws in
return try (0 ..< match.numberOfRanges).map { range throws in
let rangeBounds = match.range(at: range)
guard let range = Range(rangeBounds, in: str) else {
throw ParsingError.invalidRange
}
return String(str[range])
}
}
}
// This component is stateless; it doesn't have any side effect
case pure
init() { self = .pure }
}
Usage:
struct MyComponent: StringUtilityRequired {
func myFunc() throws {
let groups = try stringUtility.groups("Test 123", pattern: "(.+)\s(.+)")
print(groups)
}
}

trying to parse a Localizable.string file for a small project in swift on MacOS

I'm trying to parse a Localizable.string file for a small project in swift on MacOS.
I just want to retrieve all the keys and values inside a file to sort them into a dictionary.
To do so I used regex with the NSRegularExpression cocoa class.
Here is what those file look like :
"key 1" = "Value 1";
"key 2" = "Value 2";
"key 3" = "Value 3";
Here is my code that is supposed to get the keys and values from the file loaded into a String :
static func getDictionaryFormText(text: String) -> [String: String] {
var dict: [String : String] = [:]
let exp = "\"(.*)\"[ ]*=[ ]*\"(.*)\";"
for line in text.components(separatedBy: "\n") {
let match = self.matches(for: exp, in: line)
// Following line can be uncommented when working
//dict[match[0]] = match[1]
print("(\(match.count)) matches = \(match)")
}
return dict
}
static func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = text as NSString
let results = regex.matches(in: text, range: NSRange(location: 0, length: nsString.length))
return results.map { nsString.substring(with: $0.range) }
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
When running this code with the provided Localizable example here is the output :
(1) matches = ["\"key 1\" = \"Value 1\";"]
(1) matches = ["\"key 2\" = \"Value 2\";"]
(1) matches = ["\"key 3\" = \"Value 3\";"]
It sounds like the match doesn't stop after the first " occurence. When i try the same expression \"(.*)\"[ ]*=[ ]*\"(.*)\"; on regex101.com the output is correct though. What am i doing wrong ?
Your function (from Swift extract regex matches ?) matches the entire pattern
only. If you are interested in the particular capture groups then
you have to access them with rangeAt() as for example in
Convert a JavaScript Regex to a Swift Regex (not yet updated for Swift 3).
However there is a much simpler solution, because .strings files actually use one possible format of property lists, and
can be directly read into a dictionary. Example:
if let url = Bundle.main.url(forResource: "Localizable", withExtension: "strings"),
let stringsDict = NSDictionary(contentsOf: url) as? [String: String] {
print(stringsDict)
}
Output:
["key 1": "Value 1", "key 2": "Value 2", "key 3": "Value 3"]
For anyone interested I got the original function working. I needed it for a small command-line script where the NSDictionary(contentsOf: URL) wasn't working.
func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = text as NSString
guard let result = regex.firstMatch(in: text, options: [], range: NSRange(location: 0, length: nsString.length)) else {
return [] // pattern does not match the string
}
return (1 ..< result.numberOfRanges).map {
nsString.substring(with: result.range(at: $0))
}
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
func getParsedText(text: String) -> [(key: String, text: String)] {
var dict: [(key: String, text: String)] = []
let exp = "\"(.*)\"[ ]*=[ ]*\"(.*)\";"
for line in text.components(separatedBy: "\n") {
let match = matches(for: exp, in: line)
if match.count == 2 {
dict.append((key: match[0], text: match[1]))
}
}
return dict
}
Call it using something like this.
let text = try! String(contentsOf: url, encoding: .utf8)
let stringDict = getParsedText(text: text)
Really nice solution parsing directly to dictionary, but if someone wants to also parse the comments you can use a small library I made for this csv2strings.
import libcsv2strings
let contents: StringsFile = StringsFileParser(stringsFilePath: "path/to/Localizable.strings")?.parse()
It parses the file to a StringsFile model
/// Top level model of a Apple's strings file
public struct StringsFile {
let entries: [Translation]
/// Model of a strings file translation item
public struct Translation {
let translationKey: String
let translation: String
let comment: String?
}
}

How to fetch words from text with regex in Swift iOS?

I have as string:
let inputText:String = "myemail_at_gmail.com_organizer#company.com"
I want to get in output: myemail#gmail.com
So I need to write 1st some pattern that matches this rule:
<email_prefix>_at_<domain>_organizer#company.com
after that I can combine:
<email_prefix>#<domain>
I use following class:
class Regex {
let internalExpression: NSRegularExpression
let pattern: String
init(_ pattern: String) {
self.pattern = pattern
var error: NSError?
self.internalExpression = NSRegularExpression(pattern: pattern, options: .CaseInsensitive, error: &error)!
}
func test(input: String) -> Bool {
let matches = self.internalExpression.matchesInString(input, options: nil, range:NSMakeRange(0, count(input)))
return matches.count > 0
}
}
and look for regex syntax:
if Regex("^\\w+_at_\\w+_organizer#company.com$") // id doesn't work
.test(inputText) {
let result:String = inputText.split("_at_")[0] + "#" + inputText.split("_at_")[1].split("_organizer#company.com")[0]
}
This one doesn't work: "^\\w+_at_\\w+_organizer#company.com$"
This one works but its not completed: "\\w+_organizer#company.com$"
Please help,
Ok, I found solution:
since i work with email, i need validate email name and email domain separatly:
let inputText:String = "myemail_at_gmail.com_organizer#company.com"
if Regex("^[A-Z0-9a-z._%+-]+_at_[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}_organizer#company.com$")
.test(inputText) {
let result:String = inputText.split("_at_")[0] + "#" +
inputText.split("_at_")[1].split("_organizer#company.com")[0]
print(result) // myemail#gmail.com
}

How to extract all email address from text in swift

I have a text
var txt = "+abc#gmail.com heyyyyy cool +def#gmail.com"
I want to extract the email address from the text and store it in an array. I want to do it with regular expression. I found the regular expression, but i am not able to save the email in to an array.
i tried
let regEx = "/(\\+[a-zA-Z0-9._-]+#[a-zA-Z0-9._-]+\\.[a-zA-Z0-9._-]+)/gi"
if let email = NSPredicate(format: "SELF MATCHES %#", regEx) {
//what to do here
}
Or am i doing wrong?
I know this is a basic question. Please help
Thanks in advance.
These code worked for me. You can checkout email regex from here.
SWIFT 5
func extractEmailAddrIn(text: String) -> [String] {
var results = [String]()
let emailRegex = "[A-Z0-9a-z._%+-]+#[A-Za-z0-9.-]+\\.[A-Za-z]{2,64}"
let nsText = text as NSString
do {
let regExp = try NSRegularExpression(pattern: emailRegex, options: .caseInsensitive)
let range = NSMakeRange(0, text.count)
let matches = regExp.matches(in: text, options: .reportProgress, range: range)
for match in matches {
let matchRange = match.range
results.append(nsText.substring(with: matchRange))
}
} catch (let error) {
print(error)
}
return results
}
SWIFT 3
func extractEmailAddrIn(text: String) -> [String] {
var results = [String]()
let emailRegex = "[A-Z0-9a-z._%+-]+#[A-Za-z0-9.-]+\\.[A-Za-z]{2,64}"
let nsText = text as NSString
do {
let regExp = try NSRegularExpression(pattern: emailRegex, options: NSRegularExpressionOptions.CaseInsensitive)
let range = NSMakeRange(0, text.characters.count)
let matches = regExp.matchesInString(text, options: .ReportProgress, range: range)
for match in matches {
let matchRange = match.range
results.append(nsText.substringWithRange(matchRange))
}
} catch _ {
}
return results
}
I recommend using the NSRegularExpression class instead of NSPredicate. The format for the regular expressions is from the ICU.
Here is one way to do it:
let pattern = "(\\+[a-zA-Z0-9._-]+#[a-zA-Z0-9._-]+\\.[a-zA-Z0-9._-]+)"
let regexp = NSRegularExpression(pattern: pattern, options: NSRegularExpressionOptions.CaseInsensitive, error: nil)
let str = "+abc#gmail.com heyyyyy cool +def#gmail.com" as NSString
var results = [String]()
regexp?.enumerateMatchesInString(str, options: NSMatchingOptions(0), range: NSRange(location: 0, length: str.length), usingBlock: { (result: NSTextCheckingResult!, _, _) in
results.append(str.substringWithRange(result.range))
})
// Gives [+abc#gmail.com, +def#gmail.com]
Looks like your regex name is wrong. You declare it as regEx but in your NSPredicate you use emailRegEx.
Do you need the plus sign in the mail address?
Without + sign in the address:
([a-zA-Z0-9._-]+#[a-zA-Z0-9._-]+\\.[a-zA-Z0-9._-]+)
Result: ["abc#gmail.com", "def#gmail.com"]
With + sign in the address:
([\\+a-zA-Z0-9._-]+#[a-zA-Z0-9._-]+\\.[a-zA-Z0-9._-]+)
Result: ["+abc#gmail.com", "+def#gmail.com"]
Here is the String extensions I have created to extract emails from It works well in swift 4.
extension String {
func getEmails() -> [String] {
if let regex = try? NSRegularExpression(pattern: "[A-Z0-9a-z._%+-]+#[A-Za-z0-9.-]+\\.[A-Za-z]{2,6}", options: .caseInsensitive)
{
let string = self as NSString
return regex.matches(in: self, options: [], range: NSRange(location: 0, length: string.length)).map {
string.substring(with: $0.range).lowercased()
}
}
return []
}
}
Usage
let test = "Precision Construction John Smith cONSTRUCTION WORKER 123 Main St, Ste. 30o
www.precsioncontructien.com adil#gmail.com fraz#gmail.pk janesmit#precisiencenstruction.com 555.555.5SS5"
let emails = test.getEmails()
print(emails)
// results ["adil#gmail.com", "fraz#gmail.pk", "janesmit#precisiencenstruction.com"]

How to group search regular expressions using swift

In regular expressions you can group different matches to easily "pattern match" a given match.
while match != nil {
match = source.rangeOfString(regex, options: .RegularExpressionSearch)
if let m = match {
result.append(source.substringWithRange(m)
source.replaceRange(m, with: "")
}
}
The above works find to find a range of the match, but it cannot tell me the group. For instance if I search for words encapsulated in "" I would like to match a "word" but quickly fetch only word
Is it possible to do so in swift?
Swift is pretty ugly right now with regular expressions -- let's hope for more-native support soon! The method on NSRegularExpression you want is matchesInString. Here's how to use it:
let string = "This is my \"string\" of \"words\"."
let re = NSRegularExpression(pattern: "\"(.+?)\"", options: nil, error: nil)!
let matches = re.matchesInString(string, options: nil, range: NSRange(location: 0, length: string.utf16Count))
println("number of matches: \(matches.count)")
for match in matches as [NSTextCheckingResult] {
// range at index 0: full match
// range at index 1: first capture group
let substring = (string as NSString).substringWithRange(match.rangeAtIndex(1))
println(substring)
}
Output:
number of matches: 2
string
words
You can use this if you want to collect the matched strings.
(My answer is derived from Nate Cooks very helpful answer.)
Updated for Swift 2.1
extension String {
func regexMatches(pattern: String) -> Array<String> {
let re: NSRegularExpression
do {
re = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return []
}
let matches = re.matchesInString(self, options: [], range: NSRange(location: 0, length: self.utf16.count))
var collectMatches: Array<String> = []
for match in matches {
// range at index 0: full match
// range at index 1: first capture group
let substring = (self as NSString).substringWithRange(match.rangeAtIndex(1))
collectMatches.append(substring)
}
return collectMatches
}
}
Updated for Swift 3.0
extension String {
func regexMatches(pattern: String) -> Array<String> {
let re: NSRegularExpression
do {
re = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return []
}
let matches = re.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
var collectMatches: Array<String> = []
for match in matches {
// range at index 0: full match
// range at index 1: first capture group
let substring = (self as NSString).substring(with: match.rangeAt(1))
collectMatches.append(substring)
}
return collectMatches
}}
how about this guys, add as extension to String? )) all matches, all groups ) self = String if you want to add not as extension then add String parameter and replace all self to your parameter :)
func matchesForRegexInTextAll(regex: String!) -> [[String]] {
do {
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = self as NSString
var resultsFinal = [[String]]()
let results = regex.matchesInString(self,
options: [], range: NSMakeRange(0, nsString.length))
for result in results {
var internalString = [String]()
for var i = 0; i < result.numberOfRanges; ++i{
internalString.append(nsString.substringWithRange(result.rangeAtIndex(i)))
}
resultsFinal.append(internalString)
}
return resultsFinal
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
All the answers provided are good, but nonetheless I am going to provide my String extension written in Swift 2.2.
Noted differences:
only use the first match
supports multiple captured groups
a more accurate function name (it is capture groups, not matches)
.
extension String {
func capturedGroups(withRegex pattern: String) -> [String]? {
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return nil
}
let matches = regex.matchesInString(self, options: [], range: NSRange(location:0, length: self.characters.count))
guard let match = matches.first else { return nil }
// Note: Index 1 is 1st capture group, 2 is 2nd, ..., while index 0 is full match which we don't use
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return nil }
var results = [String]()
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.rangeAtIndex(i)
let matchedString = (self as NSString).substringWithRange(capturedGroupIndex)
results.append(matchedString)
}
return results
}
}
To use:
// Will match "bcde"
"abcdefg".capturedGroups(withRegex: "a(.*)f")
Updated for Swift 4
/**
String extension that extract the captured groups with a regex pattern
- parameter pattern: regex pattern
- Returns: captured groups
*/
public func capturedGroups(withRegex pattern: String) -> [String] {
var results = [String]()
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return results
}
let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))
guard let match = matches.first else { return results }
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return results }
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.range(at: i)
let matchedString = (self as NSString).substring(with: capturedGroupIndex)
results.append(matchedString)
}
return results
}
To use:
// Will match "bcde"
"abcdefg".capturedGroups(withRegex: "a(.*)f")
Gist on github: https://gist.github.com/unshapedesign/1b95f78d7f74241f706f346aed5384ff