Number of occurrences of substring in string in Swift - regex

My main string is "hello Swift Swift and Swift" and substring is Swift.
I need to get the number of times the substring "Swift" occurs in the mentioned string.
This code can determine whether the pattern exists.
var string = "hello Swift Swift and Swift"
if string.rangeOfString("Swift") != nil {
println("exists")
}
Now I need to know the number of occurrence.

A simple approach would be to split on "Swift", and subtract 1 from the number of parts:
let s = "hello Swift Swift and Swift"
let tok = s.components(separatedBy:"Swift")
print(tok.count-1)
This code prints 3.
Edit: Before Swift 3 syntax the code looked like this:
let tok = s.componentsSeparatedByString("Swift")

Should you want to count characters rather than substrings:
extension String {
func count(of needle: Character) -> Int {
return reduce(0) {
$1 == needle ? $0 + 1 : $0
}
}
}

Optimising dwsolbergs solution to count faster. Also faster than componentsSeparatedByString.
extension String {
/// stringToFind must be at least 1 character.
func countInstances(of stringToFind: String) -> Int {
assert(!stringToFind.isEmpty)
var count = 0
var searchRange: Range<String.Index>?
while let foundRange = range(of: stringToFind, options: [], range: searchRange) {
count += 1
searchRange = Range(uncheckedBounds: (lower: foundRange.upperBound, upper: endIndex))
}
return count
}
}
Usage:
// return 2
"aaaa".countInstances(of: "aa")
If you want to ignore accents, you may replace options: [] with options: .diacriticInsensitive like dwsolbergs did.
If you want to ignore case, you may replace options: [] with options: .caseInsensitive like ConfusionTowers suggested.
If you want to ignore both accents and case, you may replace options: [] with options: [.caseInsensitive, .diacriticInsensitive] like ConfusionTowers suggested.
If, on the other hand, you want the fastest comparison possible and you can guarantee some canonical form for composed character sequences, then you may consider option .literal and it will only perform exact matchs.

Swift 5 Extension
extension String {
func numberOfOccurrencesOf(string: String) -> Int {
return self.components(separatedBy:string).count - 1
}
}
Example use
let string = "hello Swift Swift and Swift"
let numberOfOccurrences = string.numberOfOccurrencesOf(string: "Swift")
// numberOfOccurrences = 3

I'd recommend an extension to string in Swift 3 such as:
extension String {
func countInstances(of stringToFind: String) -> Int {
var stringToSearch = self
var count = 0
while let foundRange = stringToSearch.range(of: stringToFind, options: .diacriticInsensitive) {
stringToSearch = stringToSearch.replacingCharacters(in: foundRange, with: "")
count += 1
}
return count
}
}
It's a loop that finds and removes each instance of the stringToFind, incrementing the count on each go-round. Once the searchString no longer contains any stringToFind, the loop breaks and the count returns.
Note that I'm using .diacriticInsensitive so it ignore accents (for example résume and resume would both be found). You might want to add or change the options depending on the types of strings you want to find.

I needed a way to count substrings that may contain the start of the next matched substring. Leveraging dwsolbergs extension and Strings range(of:options:range:locale:) method I came up with this String extension
extension String
{
/**
Counts the occurrences of a given substring by calling Strings `range(of:options:range:locale:)` method multiple times.
- Parameter substring : The string to search for, optional for convenience
- Parameter allowOverlap : Bool flag indicating whether the matched substrings may overlap. Count of "🐼🐼" in "🐼🐼🐼🐼" is 2 if allowOverlap is **false**, and 3 if it is **true**
- Parameter options : String compare-options to use while counting
- Parameter range : An optional range to limit the search, default is **nil**, meaning search whole string
- Parameter locale : Locale to use while counting
- Returns : The number of occurrences of the substring in this String
*/
public func count(
occurrencesOf substring: String?,
allowOverlap: Bool = false,
options: String.CompareOptions = [],
range searchRange: Range<String.Index>? = nil,
locale: Locale? = nil) -> Int
{
guard let substring = substring, !substring.isEmpty else { return 0 }
var count = 0
let searchRange = searchRange ?? startIndex..<endIndex
var searchStartIndex = searchRange.lowerBound
let searchEndIndex = searchRange.upperBound
while let rangeFound = range(of: substring, options: options, range: searchStartIndex..<searchEndIndex, locale: locale)
{
count += 1
if allowOverlap
{
searchStartIndex = index(rangeFound.lowerBound, offsetBy: 1)
}
else
{
searchStartIndex = rangeFound.upperBound
}
}
return count
}
}

why not just use some length maths??
extension String {
func occurences(of search:String) -> Int {
guard search.count > 0 else {
preconditionFailure()
}
let shrunk = self.replacingOccurrences(of: search, with: "")
return (self.count - shrunk.count)/search.count
}
}

Try this
var mainString = "hello Swift Swift and Swift"
var count = 0
mainString.enumerateSubstrings(in: mainString.startIndex..<mainString.endIndex, options: .byWords) { (subString, subStringRange, enclosingRange, stop) in
if case let s? = subString{
if s.caseInsensitiveCompare("swift") == .orderedSame{
count += 1
}
}
}
print(count)

For the sake of completeness – and because there is a regex tag – this is a solution with Regular Expression
let string = "hello Swift Swift and Swift"
let regex = try! NSRegularExpression(pattern: "swift", options: .caseInsensitive)
let numberOfOccurrences = regex.numberOfMatches(in: string, range: NSRange(string.startIndex..., in: string))
The option .caseInsensitive is optional.

My solution, maybe it will be better to use String.Index instead of Int range but I think in such way it is a bit easier to read.
extension String {
func count(of char: Character, range: (Int, Int)? = nil) -> Int {
let range = range ?? (0, self.count)
return self.enumerated().reduce(0) {
guard ($1.0 >= range.0) && ($1.0 < range.1) else { return $0 }
return ($1.1 == char) ? $0 + 1 : $0
}
}
}

Solution which uses a higher order functions
func subStringCount(str: String, substr: String) -> Int {
{ $0.isEmpty ? 0 : $0.count - 1 } ( str.components(separatedBy: substr))
}
Unit Tests
import XCTest
class HigherOrderFunctions: XCTestCase {
func testSubstringWhichIsPresentInString() {
XCTAssertEqual(subStringCount(str: "hello Swift Swift and Swift", substr: "Swift"), 3)
}
func testSubstringWhichIsNotPresentInString() {
XCTAssertEqual(subStringCount(str: "hello", substr: "Swift"), 0)
}
}

Another way using RegexBuilder in iOS 16+ & swift 5.7+.
import RegexBuilder
let text = "hello Swift Swift and Swift"
let match = text.matches(of: Regex{"Swift"})
print(match.count) // prints 3
Using this as a function
func countSubstrings(string : String, subString : String)-> Int{
return string.matches(of: Regex{subString}).count
}
print(countSubstrings(string: text, subString: "Swift")) //prints 3
Using this as an Extension
extension String {
func countSubstrings(subString : String)-> Int{
return self.matches(of: Regex{subString}).count
}
}
print(text.countSubstrings(subString: "Swift")) // prints 3

Related

Most efficient way to remove leading zeros from Swift 3 string

I have a string such as "00123456" that I would like to have in an string "123456", with the leading zeros removed.
I've found several examples for Objective-C but not sure best way to do so with Swift 3.
Thanks
You can do that with Regular Expression
let string = "00123456"
let trimmedString = string.replacingOccurrences(of: "^0+", with: "", options: .regularExpression)
The benefit is no double conversion and no force unwrapping.
Just convert the string to an int and then back to a string again. It will remove the leading zeros.
let numberString = "00123456"
let numberAsInt = Int(numberString)
let backToString = "\(numberAsInt!)"
Result: "123456"
First, create Validator then use it in any class.
This is an example and it works :)
This is swift 4.0
class PhoneNumberExcludeZeroValidator {
func validate(_ value: String) -> String {
var subscriberNumber = value
let prefixCase = "0"
if subscriberNumber.hasPrefix(prefixCase) {
subscriberNumber.remove(at: subscriberNumber.startIndex)
}
return subscriberNumber
}
}
example for usage:
if let countryCallingCode = countryCallingCodeTextField.text, var subscriberNumber = phoneNumberTextField.text {
subscriberNumber = PhoneNumberExcludeZeroValidator().validate(subscriberNumber)
let phoneNumber = "\(countryCallingCode)\(subscriberNumber)"
registerUserWith(phoneNumber: phoneNumber)
}
let number = "\(String(describing: Int(text)!))"

Without using NSRegularExpression, How can I get all matches of my string regular expression?

Swift 3 introduced String.range(of:options). Then, with this function, is possible match a part of string without creating a NSRegularExpression object, for example:
let text = "it is need #match my both #hashtag!"
let match = text.range(of: "(?:^#|\\s#)[\\p{L}0-9_]*", options: .regularExpression)!
print(text[match]) // #math
But, is possible match both occurrences of the regexp (that is, #match and #hashtag), instead of only the first?
let text = "it is need #match my both #hashtag!"
// create an object to store the ranges found
var ranges: [Range<String.Index>] = []
// create an object to store your search position
var start = text.startIndex
// create a while loop to find your regex ranges
while let range = text.range(of: "(?:^#|\\s#)[\\p{L}0-9_]*", options: .regularExpression, range: start..<text.endIndex) {
// append your range found
ranges.append(range)
// and change the startIndex of your string search
start = range.lowerBound < range.upperBound ? range.upperBound : text.index(range.lowerBound, offsetBy: 1, limitedBy: text.endIndex) ?? text.endIndex
}
ranges.forEach({print(text[$0])})
This will print
#match
#hashtag
If you need to use it more than once in your code you should add this extension to your project:
extension StringProtocol {
func ranges<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Range<Index>] {
var result: [Range<Index>] = []
var start = startIndex
while start < endIndex,
let range = self[start...].range(of: string, options: options) {
result.append(range)
start = range.lowerBound < range.upperBound ?
range.upperBound : index(after: range.lowerBound)
}
return result
}
}
usage:
let text = "it is need #match my both #hashtag!"
let pattern = "(?<!\\S)#[\\p{L}0-9_]*"
let ranges = text.ranges(of: pattern, options: .regularExpression)
let matches = ranges.map{text[$0]}
print(matches) // ["#match", "#hashtag"]

Swift 3 conversion emcompassing muliple issues

I have started converting to swift 3 while removing NS classes as much as possible, but ran into a snag with his code:
var S: String = ADataItem.description_text;
// FRegExBufui_Image is of type NSRegularExpression
let matches: [NSTextCheckingResult] = FRegexBufUI_Image.matches(in: S, options: NSRegularExpression.MatchingOptions(), range: NSRange(location: 0, length: S.characters.count));
if matches.count > 0 {
for m in 0 ..< matches.count {
S = S.substring(with: match.rangeAt(m));
I get error
Cannot convert value of type 'NSRange' (aka '_NSRange') to expected
argument type 'Range'
(aka'Range')
I think maybe the reason for the problem is I am now mixing swift datatypes/classes with NS.
The mos clean solution here... is that simply casting NSRange to Range? Or is there a way to go fully Swift when I need to use regular expressions as well?
A Swift Range and an NSRange are different things. It looks like the function is expecting a Swift range which you can create using the ..< operator. Instead of
NSRange(location: 0, length: S.characters.count)
write
0 ..< S.characters.count
Note that the above two things are not identical in semantics although they both represent the same set of characters. The NSRange takes the start location and the length of the character sequence. The Swift Range uses the lower and upper bound (the upper bound is excluded).
The easiest way is to bridge the string to NSString
let matches = FRegexBufUI_Image.matches(in: S, options: NSRegularExpression.MatchingOptions(), range: NSRange(location: 0, length: S.characters.count));
for match in matches { // don't use ugly C-style index based loops
let substring = (S as NSString).substring(with: match.rangeAt(m))
}
If you don't want to use mixed types implement this String extension which converts Range<String.Index> to NSRange:
extension String {
func range(from nsRange: NSRange) -> Range<String.Index>? {
guard
let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex),
let to16 = utf16.index(from16, offsetBy: nsRange.length, limitedBy: utf16.endIndex),
let from = String.Index(from16, within: self),
let to = String.Index(to16, within: self)
else { return nil }
return from ..< to
}
func substring(withNSRange range : NSRange) -> String
{
let swiftRange = self.range(from : range)
return swiftRange != nil ? self.substring(with: swiftRange!) : self
}
}
and use it:
for match in matches { // don't use ugly C-style index based loops
let substring = S.substring(withNSRange: match.rangeAt(m))
}
Edit:
In Swift 4+ the extension has become obsolete. There is a convenience initializer to create Range<String.Index> from NSRange
for match in matches { // don't use ugly C-style index based loops
let stringRange = Range(match.range(at: m), in: S)!
let substring = String(S[stringRange])
}

How to use regex with Swift?

I am making an app in Swift and I need to catch 8 numbers from a string.
Here's the string:
index.php?page=index&l=99182677
My pattern is:
&l=(\d{8,})
And here's my code:
var yourAccountNumber = "index.php?page=index&l=99182677"
let regex = try! NSRegularExpression(pattern: "&l=(\\d{8,})", options: NSRegularExpressionOptions.CaseInsensitive)
let range = NSMakeRange(0, yourAccountNumber.characters.count)
let match = regex.matchesInString(yourAccountNumber, options: NSMatchingOptions.Anchored, range: range)
Firstly, I don't know what the NSMatchingOptions means, on the official Apple library, I don't get all the .Anchored, .ReportProgress, etc stuff. Anyone would be able to lighten me up on this?
Then, when I print(match), nothing seems to contain on that variable ([]).
I am using Xcode 7 Beta 3, with Swift 2.0.
ORIGINAL ANSWER
Here is a function you can leverage to get captured group texts:
import Foundation
extension String {
func firstMatchIn(string: NSString!, atRangeIndex: Int!) -> String {
var error : NSError?
let re = NSRegularExpression(pattern: self, options: .CaseInsensitive, error: &error)
let match = re.firstMatchInString(string, options: .WithoutAnchoringBounds, range: NSMakeRange(0, string.length))
return string.substringWithRange(match.rangeAtIndex(atRangeIndex))
}
}
And then:
var result = "&l=(\\d{8,})".firstMatchIn(yourAccountNumber, atRangeIndex: 1)
The 1 in atRangeIndex: 1 will extract the text captured by (\d{8,}) capture group.
NOTE1: If you plan to extract 8, and only 8 digits after &l=, you do not need the , in the limiting quantifier, as {8,} means 8 or more. Change to {8} if you plan to capture just 8 digits.
NOTE2: NSMatchingAnchored is something you would like to avoid if your expected result is not at the beginning of a search range. See documentation:
Specifies that matches are limited to those at the start of the search range.
NOTE3: Speaking about "simplest" things, I'd advise to avoid using look-arounds whenever you do not have to. Look-arounds usually come at some cost to performance, and if you are not going to capture overlapping text, I'd recommend to use capture groups.
UPDATE FOR SWIFT 2
I have come up with a function that will return all matches with all capturing groups (similar to preg_match_all in PHP). Here is a way to use it for your scenario:
func regMatchGroup(regex: String, text: String) -> [[String]] {
do {
var resultsFinal = [[String]]()
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = text as NSString
let results = regex.matchesInString(text,
options: [], range: NSMakeRange(0, nsString.length))
for result in results {
var internalString = [String]()
for var i = 0; i < result.numberOfRanges; ++i{
internalString.append(nsString.substringWithRange(result.rangeAtIndex(i)))
}
resultsFinal.append(internalString)
}
return resultsFinal
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return [[]]
}
}
// USAGE:
let yourAccountNumber = "index.php?page=index&l=99182677"
let matches = regMatchGroup("&l=(\\d{8,})", text: yourAccountNumber)
if (matches.count > 0) // If we have matches....
{
print(matches[0][1]) // Print the first one, Group 1.
}
It may be easier just to use the NSString method instead of NSRegularExpression.
var yourAccountNumber = "index.php?page=index&l=99182677"
println(yourAccountNumber) // index.php?page=index&l=99182677
let regexString = "(?<=&l=)\\d{8,}+"
let options :NSStringCompareOptions = .RegularExpressionSearch | .CaseInsensitiveSearch
if let range = yourAccountNumber.rangeOfString(regexString, options:options) {
let digits = yourAccountNumber.substringWithRange(range)
println("digits: \(digits)")
}
else {
print("Match not found")
}
The (?<=&l=) means precedes but not part of.
In detail:
Look-behind assertion. True if the parenthesized pattern matches text preceding the current input position, with the last character of the match being the input character just before the current position. Does not alter the input position. The length of possible strings matched by the look-behind pattern must not be unbounded (no * or + operators.)
In general performance considerations of a look-behind without instrumented proof is just premature optimization. That being said there may be other valid reasons for and against look-arounds in regular expressions.
ICU User Guide: Regular Expressions
For Swift 2, you can use this extension of String:
import Foundation
extension String {
func firstMatchIn(string: NSString!, atRangeIndex: Int!) -> String {
do {
let re = try NSRegularExpression(pattern: self, options: NSRegularExpressionOptions.CaseInsensitive)
let match = re.firstMatchInString(string as String, options: .WithoutAnchoringBounds, range: NSMakeRange(0, string.length))
return string.substringWithRange(match!.rangeAtIndex(atRangeIndex))
} catch {
return ""
}
}
}
You can get the account-number with:
var result = "&l=(\\d{8,})".firstMatchIn(yourAccountNumber, atRangeIndex: 1)
Replace NSMatchingOptions.Anchored with NSMatchingOptions() (no options)

extract the first word from a string - regex

I have the following string:
str1 = "cat-one,cat2,cat-3";
OR
str1 = "catone,cat-2,cat3";
OR
str1 = "catone";
OR
str1 = "cat-one";
The point here is words may/may not have "-"s in it
Using regex:
How could I extract the 1st word?
Appreciate any help on this.
Thanks,
L
It's pretty easy, just include allowed characters in brackets:
^([\w\-]+)
An approach not using a regex: assuming the first word is delimited always by a comma "," you can do this:
var str1 = "cat-one";
var i = str1.indexOf(",");
var firstTerm = i == -1 ? str1 : str1.substring(0, i);
Edit: Assumed this was a javascript question, for some reason.
If someone, one day would like to do it in Swift here you go with an extension :
extension String {
func firstWord() -> String? {
var error : NSError?
let internalExpression = NSRegularExpression(pattern: "^[a-zA-Z0-9]*", options: .CaseInsensitive, error: &error)!
let matches = internalExpression.matchesInString(self, options: nil, range:NSMakeRange(0, countElements(self)))
if (matches.count > 0) {
let range = (matches[0] as NSTextCheckingResult).range
return (self as NSString).substringWithRange(range)
}
return nil
}
}
To use it just write:
myString.firstWord()