Swift 2 NSRegularExpression - regex

Eventually I want to be able to input a string like "\mycard{front1}{back1} \mycard{front2}{back2} \mycard{front3}{back3}" and return the front and back of each card.
I found this website on NSRegularExpression, but I'm having a hard time adjusting it to my problem.
Here is what I have so far.
import Foundation
func rangeFromNSRange(nsRange: NSRange, forString str: String) -> Range<String.Index>? {
let fromUTF16 = str.utf16.startIndex.advancedBy(nsRange.location, limit: str.utf16.endIndex)
let toUTF16 = fromUTF16.advancedBy(nsRange.length, limit: str.utf16.endIndex)
if let from = String.Index(fromUTF16, within: str), let to = String.Index(toUTF16, within: str) {
return from ..< to
}
return nil
}
do {
// let input = "My name is Taylor Swift"
// let regex = try NSRegularExpression(pattern: "My name is (.*)", options: NSRegularExpressionOptions.CaseInsensitive)
let input = "mycard{front}{back}"
let regex = try NSRegularExpression(pattern: "mycard{(.*)}{(.*)}", options: NSRegularExpressionOptions.CaseInsensitive)
let matches = regex.matchesInString(input, options: [], range: NSMakeRange(0, input.characters.count))
if let match = matches.first {
let range = match.rangeAtIndex(1)
if let swiftRange = rangeFromNSRange(range, forString: input) {
let name = input.substringWithRange(swiftRange)
}
}
} catch {
// regex was bad!
}

As stated in my comment you need to escape the { and }. That results in the following regex: mycard\\{(.*)\\}\\{(.*)\\}.
You then might want to change your match logic a little bit to output the expected results:
if let match = matches.first {
for i in 1..<match.numberOfRanges {
let range = match.rangeAtIndex(i)
if let swiftRange = rangeFromNSRange(range, forString: input) {
let name = input.substringWithRange(swiftRange)
print(name)
}
}
}
Which outputs
front
back
If you want to match multiple cards use the following regex:
mycard\\{([^{]*)\\}\\{([^{]*)\\}
Then iterate over the matches
for match in matches {
for i in 1..<match.numberOfRanges {
let range = match.rangeAtIndex(i)
if let swiftRange = rangeFromNSRange(range, forString: input) {
let name = input.substringWithRange(swiftRange)
print(name)
}
}
}
For the input mycard{front}{back} mycard{front1}{back1} the output correctly is
front
back
front1
back1

I gave up on regex. I just don't think it will do the trick here. I came up with another solution.
import Foundation
extension String {
subscript (r: Int) -> Character? {
var cur = 0
for char in self.characters {
if cur == r {
return char
}
cur += 1
}
return nil
}
subscript (r: Range<Int>) -> String {
return substringWithRange(Range(start: startIndex.advancedBy(r.startIndex), end: startIndex.advancedBy(r.endIndex)))
}
func parseBrackets () -> [String]? {
var list: [String] = []
var level = 0
var start = 0
for var i=0; i < self.characters.count - 1; i++ {
if self[i] == "{" {
level += 1
if level == 1 {
start = i + 1
}
} else if self[i] == "}" {
if level == 1 {
list.append(self[start..<i])
}
level -= 1
}
}
if list.count > 0 {
return list
} else {
return nil
}
}
}
let testString = "mycard{f{}ront}{termins{x}{n}} mycard{front1}{back1} mycard{front2}{back2}"
let list = testString.parseBrackets()
for a in list! {
print(a)
}
Which gives the desired output
f{}ront
termins{x}{n}
front1
back1
front2

Related

How to read a timecode track with AVAsset and Swift 3?

I'd like to read the time value of a timecode track. There is an
excellent documentation from Apple (see Technical Note 2310)
but it's written in Objective C.
I have translated the core logic to Swift 3. It works exactly as the
ObjC version, which means that a CMSampleBuffer from a timecode
track is read and converted to a CMBlockBuffer. It fails when I
create the data pointer CMBlockBufferGetDataPointer (in the
timecodeFrame() func), which means, that the raw data is always
giving me 0 frames. So it boils down to the question, how do I
handle the raw data correctly?
import Foundation
import AVFoundation
import CoreMedia
let movie = URL(fileURLWithPath: "videoWithTimecodeTrack.mov")
let asset = AVAsset(url: movie)
asset.loadValuesAsynchronously(forKeys: ["tracks"]) {
var error: NSError?
guard asset.statusOfValue(forKey: "tracks", error: &error) == AVKeyValueStatus.loaded
else { if let error = error { return print(error) } }
readStartTimecode(asset: asset)
}
func readStartTimecode(ofAsset asset: AVAsset) {
let timecodeTracks = asset.tracks(withMediaType: AVMediaTypeTimecode)
guard let timecodeTrack = timecodeTracks.first,
let assetReader = try? AVAssetReader(asset: asset) else { return }
let readerOutput = AVAssetReaderTrackOutput(track: timecodeTrack, outputSettings: nil)
assetReader.add(readerOutput)
guard assetReader.startReading() else { return }
while let sampleBuffer = readerOutput.copyNextSampleBuffer() {
if let frame = timecodeFrame(sampleBuffer: sampleBuffer) {
print("timecodeFrame: \(frame)")
}
}
}
func timecodeFrame(sampleBuffer: CMSampleBuffer) -> UInt32? {
guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer),
let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer)
else { return nil }
var rawData: UnsafeMutablePointer<Int8>? = nil
var length: Int = 0
var totalLength: Int = 0
let status = CMBlockBufferGetDataPointer(blockBuffer, 0, &length, &totalLength, &rawData)
guard status == kCMBlockBufferNoErr,
let frameRead = rawData?.pointee
else { return nil }
let type = CMFormatDescriptionGetMediaSubType(formatDescription)
if type == kCMTimeCodeFormatType_TimeCode32 {
let frame = UInt32(frameRead)
let bigFrame = CFSwapInt32BigToHost(frame)
print("kCMTimeCodeFormatType_TimeCode32: \(bigFrame)")
}
if type == kCMTimeCodeFormatType_TimeCode64 {
print("kCMTimeCodeFormatType_TimeCode64")
// todo
}
return nil
}
Edit: the Objective C version of the data pointer retrieval looks like this:
size_t length = 0;
size_t totalLength = 0;
char *rawData = NULL;
CMBlockBufferGetDataPointer(blockBuffer, 0, &length, &totalLength, &rawData);
if (status == kCMBlockBufferNoErr) {
int32_t *frameNumberRead = (int32_t *)rawData;
(int)Endian32_Swap(*frameNumberRead)]
}
The solution is to not convert the Int8 data like UInt32(rawData.pointee) but to access the UnsafeMutablePointer<Int8> pointer's memory as a different type (temporarily). This would look like this:
if let frames = rawData?.withMemoryRebound(to: UInt32.self, capacity: 1, { CFSwapInt32BigToHost($0.pointee) }) {
return frames
}
The full function would look like this:
func timecodeFrame(sampleBuffer: CMSampleBuffer) -> UInt32? {
guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer),
let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer)
else { return nil }
var rawData: UnsafeMutablePointer<Int8>? = nil
var length: Int = 0
var totalLength: Int = 0
let status = CMBlockBufferGetDataPointer(blockBuffer, 0, &length, &totalLength, &rawData)
guard status == kCMBlockBufferNoErr else { return nil }
let type = CMFormatDescriptionGetMediaSubType(formatDescription)
if type == kCMTimeCodeFormatType_TimeCode32 {
if let frames = rawData?.withMemoryRebound(to: UInt32.self, capacity: 1, { CFSwapInt32BigToHost($0.pointee) }) {
return frames
}
}
if type == kCMTimeCodeFormatType_TimeCode64 {
if let frames = rawData?.withMemoryRebound(to: UInt64.self, capacity: 1, { CFSwapInt64BigToHost($0.pointee) }) {
return UInt32(frames)
}
}
return nil
}
I hope this is useful to others who want to read the start timecode of a video's timecode track.

Returning a substring after a specified character

If I have a string, e.g. spider, how do you create a new string that starts at the first vowel and ends with the last character of the initial string.
For example:
- spider would be ider
- elephant would be elephant
- campus would be ampus
Thank you for the help.
Simple solution with a custom CharacterSet as String extension
extension String {
func substringFromFirstVowel() -> String
{
let vowelCharacterSet = CharacterSet(charactersIn: "aeiouAEIOU")
guard let range = self.rangeOfCharacter(from: vowelCharacterSet) else { return self }
return self.substring(from: range.lowerBound)
}
}
"elephant".substringFromFirstVowel() // elephant
"spider".substringFromFirstVowel() // ider
"campus".substringFromFirstVowel() // ampus
Try this little function
func firstVowel(input : String) -> String {
var firstVowel = true
let vowels = "aAeEiIoOuU".characters
var result = ""
for char in input.characters {
if(!firstVowel) {
result.append(char)
}
if(vowels.contains(char) && firstVowel) {
firstVowel = false
result.append(char)
}
}
return result
}
print(firstVowels(input: "elephant")) //prints elephant
print(firstVowels(input: "Spider")) //prints ider

Swift 3 - How do I extract captured groups in regular expressions?

I am using Swift 3 and trying to access captured groups.
let regexp = "((ALREADY PAID | NOT ALR | PROVIDER MAY | READY | MAY BILL | BILL YOU | PAID)((.|\\n)*))(( \\d+)(\\.+|-+)(\\d\\d))"
// check if some substring is in the recognized text
if let range = stringText.range(of:regexp, options: .regularExpression) {
let result = tesseract.recognizedText.substring(with:range)
}
I want to be able to extract out the last two numbers captured (\d\d) so if the text was: ALREADY PAID asfasdfadsfasdf 39.15, it would extract 15. Here is a regex builder that shows what I want. Normally, I would be able to do $8 to get the 8th group that was extracted but I don't know how to do that in Swift 3.
http://regexr.com/3fh1e
Swift 4, Swift 5
extension String {
func groups(for regexPattern: String) -> [[String]] {
do {
let text = self
let regex = try NSRegularExpression(pattern: regexPattern)
let matches = regex.matches(in: text,
range: NSRange(text.startIndex..., in: text))
return matches.map { match in
return (0..<match.numberOfRanges).map {
let rangeBounds = match.range(at: $0)
guard let range = Range(rangeBounds, in: text) else {
return ""
}
return String(text[range])
}
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
}
example:
let res = "1my 2own 3string".groups(for:"(([0-9]+)[a-z]+) ")
(lldb) po res ▿ 2 elements
▿ 0 : 3 elements
- 0 : "1my "
- 1 : "1my"
- 2 : "1"
▿ 1 : 3 elements
- 0 : "2own "
- 1 : "2own"
- 2 : "2"
but I don't know how to do that in Swift 3.
When you receive a match from NSRegularExpression, what you get is an NSTextCheckingResult. You call rangeAt to get a specific capture group.
Example:
let s = "hey ho ha"
let pattern = "(h).*(h).*(h)"
// our goal is capture group 3, "h" in "ha"
let regex = try! NSRegularExpression(pattern: pattern)
let result = regex.matches(in:s, range:NSMakeRange(0, s.utf16.count))
let third = result[0].rangeAt(3) // <-- !!
third.location // 7
third.length // 1
As ever, a simple extension seems to be the way around swift's bizarre overcomplication...
extension NSTextCheckingResult {
func groups(testedString:String) -> [String] {
var groups = [String]()
for i in 0 ..< self.numberOfRanges
{
let group = String(testedString[Range(self.range(at: i), in: testedString)!])
groups.append(group)
}
return groups
}
}
Use it like this:
if let match = myRegex.firstMatch(in: someString, range: NSMakeRange(0, someString.count)) {
let groups = match.groups(testedString: someString)
//... do something with groups
}
A slightly altered version based on #Vyacheslav's answer with different error handling approach:
enum ParsingError: Error {
// You can pass more info here with parameter(s) if you want, e.g. `case let invalidRange(originalString, failedAtRange)`
case invalidRange
}
protocol StringUtilityRequired {
var stringUtility: StringUtility { get }
}
extension StringUtilityRequired {
var stringUtility: StringUtility { StringUtility() }
}
enum StringUtility {
func groups(_ str: String, pattern: String) throws -> [[String]] {
let regex = try NSRegularExpression(pattern: pattern)
let matches = regex.matches(in: str, range: NSRange(str.startIndex..., in: str))
return try matches.map { match throws in
return try (0 ..< match.numberOfRanges).map { range throws in
let rangeBounds = match.range(at: range)
guard let range = Range(rangeBounds, in: str) else {
throw ParsingError.invalidRange
}
return String(str[range])
}
}
}
// This component is stateless; it doesn't have any side effect
case pure
init() { self = .pure }
}
Usage:
struct MyComponent: StringUtilityRequired {
func myFunc() throws {
let groups = try stringUtility.groups("Test 123", pattern: "(.+)\s(.+)")
print(groups)
}
}

Swift 3 Variable self-removes

I have very-very strange things.
In my simple function I create variable which contains dictionary of settings parameters. It is set as 'let', so inner loop just reads it.
In a random moment of loop time it crashes with "unresolved settings".
It seems like smth makes it nil. Who does it?
private static func preferencesFilter(userIDs: [Int], access: String) -> [User] {
self.sharedInstance.delegate?.updateActionLabel(label: "Filter")
var result = [VKUser]()
let settings = self.parseSettings()
let progressFraction = 1.00 / Float(userIDs.count)
var n = 0
for userID in userIDs {
if sharedInstance.stopped {
return []
}
n += 1
let user = VKUser.getUser(id: userID, access_token: access_token)
if settings["gender"] != nil {
if user.sex == settings["gender"] as! String {
if (user.born?.isBetweeen(date1: settings["minAge"] as! Date, date2: settings["maxAge"] as! Date))! {
if settings["country"] != nil {
if user.country == settings["country"] as! String {
result.append(user)
}
}
else {
result.append(user)
}
}
}
}
else {
if (user.born?.isBetweeen(date1: settings["minAge"] as! Date, date2: settings["maxAge"] as! Date))! {
if settings["country"] != nil {
if user.country == settings["country"] as! String {
result.append(user)
}
}
else {
result.append(user)
}
}
}
self.sharedInstance.delegate?.updateProgress(value: Float(n) * progressFraction)
}
return result
}
I refactored your code into something more swift like:
private static func preferencesFilter(userIDs: [Int], access_token: String) -> [User]? {
guard userIDs.count > 0 else {
return [User]() // no input, return empty list
}
let settings = self.parseSettings()
guard let minAge = settings["minAge"] as? Date,
let maxAge = settings["maxAge"] as? Date
else {
return nil
}
let country = settings["country"] as? String // specified or nil
let gender = settings["gender"] as? String // specified or nil
sharedInstance.delegate?.updateActionLabel(label: "Filter")
var result = [VKUser]()
let progressFraction = 1.00 / Float(userIDs.count)
var n = 0
for userID in userIDs {
if !sharedInstance.stopped {
n += 1
let user = VKUser.getUser(id: userID, access_token: access_token)
var shouldInclude = true
if user.sex != gender { // wrong sex or no required gender specified
shouldInclude = false
}
if user.country != country { // wrong country or no required country specified
shouldInclude = false
}
if let born = user.born {
if !born.isBetweeen(date1: minAge, date2: maxAge) {
shouldInclude = false
}
} else { // no user.born date, cant check if in range
shouldInclude = false
}
if shouldInclude {
result.append(user)
}
sharedInstance.delegate?.updateProgress(value: Float(n) * progressFraction)
}
}
return result
}
Is that what you intended to write? How is that running for you?
Can you change this into a non-static method? Makes more sense to me.
You can see it returns an optional now, since the method might fail with a nil. Your calling code should handle that correctly.

How to group search regular expressions using swift

In regular expressions you can group different matches to easily "pattern match" a given match.
while match != nil {
match = source.rangeOfString(regex, options: .RegularExpressionSearch)
if let m = match {
result.append(source.substringWithRange(m)
source.replaceRange(m, with: "")
}
}
The above works find to find a range of the match, but it cannot tell me the group. For instance if I search for words encapsulated in "" I would like to match a "word" but quickly fetch only word
Is it possible to do so in swift?
Swift is pretty ugly right now with regular expressions -- let's hope for more-native support soon! The method on NSRegularExpression you want is matchesInString. Here's how to use it:
let string = "This is my \"string\" of \"words\"."
let re = NSRegularExpression(pattern: "\"(.+?)\"", options: nil, error: nil)!
let matches = re.matchesInString(string, options: nil, range: NSRange(location: 0, length: string.utf16Count))
println("number of matches: \(matches.count)")
for match in matches as [NSTextCheckingResult] {
// range at index 0: full match
// range at index 1: first capture group
let substring = (string as NSString).substringWithRange(match.rangeAtIndex(1))
println(substring)
}
Output:
number of matches: 2
string
words
You can use this if you want to collect the matched strings.
(My answer is derived from Nate Cooks very helpful answer.)
Updated for Swift 2.1
extension String {
func regexMatches(pattern: String) -> Array<String> {
let re: NSRegularExpression
do {
re = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return []
}
let matches = re.matchesInString(self, options: [], range: NSRange(location: 0, length: self.utf16.count))
var collectMatches: Array<String> = []
for match in matches {
// range at index 0: full match
// range at index 1: first capture group
let substring = (self as NSString).substringWithRange(match.rangeAtIndex(1))
collectMatches.append(substring)
}
return collectMatches
}
}
Updated for Swift 3.0
extension String {
func regexMatches(pattern: String) -> Array<String> {
let re: NSRegularExpression
do {
re = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return []
}
let matches = re.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
var collectMatches: Array<String> = []
for match in matches {
// range at index 0: full match
// range at index 1: first capture group
let substring = (self as NSString).substring(with: match.rangeAt(1))
collectMatches.append(substring)
}
return collectMatches
}}
how about this guys, add as extension to String? )) all matches, all groups ) self = String if you want to add not as extension then add String parameter and replace all self to your parameter :)
func matchesForRegexInTextAll(regex: String!) -> [[String]] {
do {
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = self as NSString
var resultsFinal = [[String]]()
let results = regex.matchesInString(self,
options: [], range: NSMakeRange(0, nsString.length))
for result in results {
var internalString = [String]()
for var i = 0; i < result.numberOfRanges; ++i{
internalString.append(nsString.substringWithRange(result.rangeAtIndex(i)))
}
resultsFinal.append(internalString)
}
return resultsFinal
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
All the answers provided are good, but nonetheless I am going to provide my String extension written in Swift 2.2.
Noted differences:
only use the first match
supports multiple captured groups
a more accurate function name (it is capture groups, not matches)
.
extension String {
func capturedGroups(withRegex pattern: String) -> [String]? {
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return nil
}
let matches = regex.matchesInString(self, options: [], range: NSRange(location:0, length: self.characters.count))
guard let match = matches.first else { return nil }
// Note: Index 1 is 1st capture group, 2 is 2nd, ..., while index 0 is full match which we don't use
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return nil }
var results = [String]()
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.rangeAtIndex(i)
let matchedString = (self as NSString).substringWithRange(capturedGroupIndex)
results.append(matchedString)
}
return results
}
}
To use:
// Will match "bcde"
"abcdefg".capturedGroups(withRegex: "a(.*)f")
Updated for Swift 4
/**
String extension that extract the captured groups with a regex pattern
- parameter pattern: regex pattern
- Returns: captured groups
*/
public func capturedGroups(withRegex pattern: String) -> [String] {
var results = [String]()
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return results
}
let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))
guard let match = matches.first else { return results }
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return results }
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.range(at: i)
let matchedString = (self as NSString).substring(with: capturedGroupIndex)
results.append(matchedString)
}
return results
}
To use:
// Will match "bcde"
"abcdefg".capturedGroups(withRegex: "a(.*)f")
Gist on github: https://gist.github.com/unshapedesign/1b95f78d7f74241f706f346aed5384ff