Extracting numbers from a string in a dataframe

Extracting numbers from a string in a dataframe - regex

I was hoping somebody would be able to show me a way to extract data from a character vector.
The dataframe is as below
structure(list(Sensitivity = structure(c(1L, 5L, 4L, 4L, 4L,
4L, 3L, 5L, 2L), .Label = c(" 1.01 [ 0.21, 2.91]", " 89.60 [ 85.56, 92.82]",
" 92.95 [ 89.43, 95.59]", " 99.66 [ 98.14, 99.99]", " 100.00 [ 98.77, 100.00]"
), class = "factor"), Specificity = structure(c(8L, 1L, 3L, 4L,
2L, 5L, 6L, 1L, 7L), .Label = c(" 27.17 [ 25.15, 29.26]", " 44.96 [ 42.67, 47.26]",
" 53.31 [ 51.00, 55.61]", " 69.90 [ 67.75, 71.99]", " 70.23 [ 68.08, 72.31]",
" 90.18 [ 88.73, 91.50]", " 91.70 [ 90.35, 92.92]", " 100.00 [ 99.80, 100.00]"
), class = "factor")), .Names = c("Sensitivity", "Specificity"
), class = "data.frame", row.names = c(NA, -9L))
As an example for the first column element of the first column i would ideally get three columns of data of 1.01, 0.21 and 2.91.
The first and second numerical value is separated by a "[" and the second and third by a ",". I am not au fait with grep but have tried using and am going wrong somewhere!

Here is a regular expression solution you can try with using the str_extract_all from stringr package, where we use \\d+\\.\\d+ to match decimal numbers which start from one or more digits followed by . and another one or more digits pattern.
library(stringr)
lapply(df, function(col) do.call(rbind, str_extract_all(col, "\\d+\\.\\d+")))
$Sensitivity
[,1] [,2] [,3]
[1,] "1.01" "0.21" "2.91"
[2,] "100.00" "98.77" "100.00"
[3,] "99.66" "98.14" "99.99"
[4,] "99.66" "98.14" "99.99"
[5,] "99.66" "98.14" "99.99"
[6,] "99.66" "98.14" "99.99"
[7,] "92.95" "89.43" "95.59"
[8,] "100.00" "98.77" "100.00"
[9,] "89.60" "85.56" "92.82"
$Specificity
[,1] [,2] [,3]
[1,] "100.00" "99.80" "100.00"
[2,] "27.17" "25.15" "29.26"
[3,] "53.31" "51.00" "55.61"
[4,] "69.90" "67.75" "71.99"
[5,] "44.96" "42.67" "47.26"
[6,] "70.23" "68.08" "72.31"
[7,] "90.18" "88.73" "91.50"
[8,] "27.17" "25.15" "29.26"
[9,] "91.70" "90.35" "92.92"

Try this:
cbind(
matrix(as.numeric(unlist(strsplit(unlist(strsplit(gsub("]","",
dat$Sensitivity), ",")),"\\["))),ncol=3,byrow = T)
,
matrix(as.numeric(unlist(strsplit(unlist(strsplit(gsub("]","",
dat$Specificity), ",")),"\\["))),ncol=3,byrow = T)
)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1.01 0.21 2.91 100.00 99.80 100.00
[2,] 100.00 98.77 100.00 27.17 25.15 29.26
[3,] 99.66 98.14 99.99 53.31 51.00 55.61
[4,] 99.66 98.14 99.99 69.90 67.75 71.99
[5,] 99.66 98.14 99.99 44.96 42.67 47.26
[6,] 99.66 98.14 99.99 70.23 68.08 72.31
[7,] 92.95 89.43 95.59 90.18 88.73 91.50
[8,] 100.00 98.77 100.00 27.17 25.15 29.26
[9,] 89.60 85.56 92.82 91.70 90.35 92.92

Here is an option using base R to extract the numeric part with the type as numeric
lst <- lapply(d1, function(x) read.csv(text=gsub("[][]", ", ", x), header=FALSE)[-4])
lst
#$Sensitivity
# V1 V2 V3
#1 1.01 0.21 2.91
#2 100.00 98.77 100.00
#3 99.66 98.14 99.99
#4 99.66 98.14 99.99
#5 99.66 98.14 99.99
#6 99.66 98.14 99.99
#7 92.95 89.43 95.59
#8 100.00 98.77 100.00
#9 89.60 85.56 92.82
#$Specificity
# V1 V2 V3
#1 100.00 99.80 100.00
#2 27.17 25.15 29.26
#3 53.31 51.00 55.61
#4 69.90 67.75 71.99
#5 44.96 42.67 47.26
#6 70.23 68.08 72.31
#7 90.18 88.73 91.50
#8 27.17 25.15 29.26
#9 91.70 90.35 92.92
If needed, the list of data.frames can be converted to a single data.frame by cbinding
do.call(cbind, lst)

Related

Faster way to capture regex

I want to use regex to capture substrings - I already have a working solution, but I wonder if there is a faster solution. I am applying applyCaptureRegex on a vector with about 400.000 entries.
exampleData <- as.data.frame(c("[hg19:21:34809787-34809808:+]","[hg19:11:105851118-105851139:+]","[hg19:17:7482245-7482266:+]","[hg19:6:19839915-19839936:+]"))
captureRegex <- function(captRegEx,str){
sapply(regmatches(str,gregexpr(captRegEx,str))[[1]], function(m) regmatches(m,regexec(captRegEx,m)))
}
applyCaptureRegex <- function(mir,r){
mir <- unlist(apply(mir, 1, function(x) captureRegex(r,x[1])))
mir <- matrix(mir ,ncol=5, byrow = TRUE)
mir
}
Usage and results:
> captureRegex("\\[[a-z0-9]+:([0-9]+):([0-9]+)-([0-9]+):([-+])\\]","[hg19:12:125627828-125627847:-]")
$`[hg19:12:125627828-125627847:-]`
[1] "[hg19:12:125627828-125627847:-]" "12" "125627828" "125627847" "-"
> applyCaptureRegex(exampleData,"\\[[a-z0-9]+:([0-9]+):([0-9]+)-([0-9]+):([-+])\\]")
[,1] [,2] [,3] [,4] [,5]
[1,] "[hg19:21:34809787-34809808:+]" "21" "34809787" "34809808" "+"
[2,] "[hg19:11:105851118-105851139:+]" "11" "105851118" "105851139" "+"
[3,] "[hg19:17:7482245-7482266:+]" "17" "7482245" "7482266" "+"
[4,] "[hg19:6:19839915-19839936:+]" "6" "19839915" "19839936" "+"
Thank you!

Why reinvent the wheel? You have several library packages to choose from with functions that return a character matrix with one column for each capturing group in your pattern.
stri_match_all_regex — stringi
x <- c('[hg19:21:34809787-34809808:+]', '[hg19:11:105851118-105851139:+]', '[hg19:17:7482245-7482266:+]', '[hg19:6:19839915-19839936:+]')
do.call(rbind, stri_match_all_regex(x, '\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])]'))
# [,1] [,2] [,3] [,4] [,5]
# [1,] "[hg19:21:34809787-34809808:+]" "21" "34809787" "34809808" "+"
# [2,] "[hg19:11:105851118-105851139:+]" "11" "105851118" "105851139" "+"
# [3,] "[hg19:17:7482245-7482266:+]" "17" "7482245" "7482266" "+"
# [4,] "[hg19:6:19839915-19839936:+]" "6" "19839915" "19839936" "+"
str_match — stringr
str_match(x, '\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])]')
strapplyc — gsubfn
strapplyc(x, "(\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])])", simplify = rbind)
Below is a benchmark comparison of all combined solutions.
x <- rep(c('[hg19:21:34809787-34809808:+]',
'[hg19:11:105851118-105851139:+]',
'[hg19:17:7482245-7482266:+]',
'[hg19:6:19839915-19839936:+]'), 1000)
applyCaptureRegex <- function(mir, r) {
do.call(rbind, lapply(mir, function(x) regmatches(x, regexec(r, x))[[1]]))
}
gsubfn <- function(x1) strapplyc(x1, '(\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])])', simplify = rbind)
regmtch <- function(x1) applyCaptureRegex(x1, '\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])]')
stringr <- function(x1) str_match(x1, '\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])]')
stringi <- function(x1) do.call(rbind, stri_match_all_regex(x1, '\\[[^:]+:(\\d+):(\\d+)-(\\d+):([-+])]'))
require(microbenchmark)
microbenchmark(gsubfn(x), regmtch(x), stringr(x), stringi(x))
Result
Unit: milliseconds
expr min lq mean median uq max neval
gsubfn(x) 372.27072 382.82179 391.21837 388.32396 396.27361 449.03091 100
regmtch(x) 394.03164 409.87523 419.42936 417.76770 427.08208 456.92460 100
stringr(x) 65.81644 70.28327 76.02298 75.43162 78.92567 116.18026 100
stringi(x) 15.88171 16.53047 17.52434 16.96127 17.76007 23.94449 100

Regular expression matching and replacement R

I am attempting to write a regular expression that replaces each element in this matrix with only the two numbers after the first colon before and after the comma. There is also "./.:.:.:.:." which I would like to change to "0,0".
head(data)
Offspring-95_CAATCG Offspring-96_AAACGG Offspring-97_ACTCTT
[1,] "./.:1,7:8:18:262,0,18" "0/1:18,4:21:56:56,0,591" "0/0:27,0:27:78:0,78,723"
[2,] "0/0:49,0:49:99:0,147,1891" "0/0:107,0:107:99:0,319,4185" "1/1:0,22:22:66:902,66,0"
[3,] "0/0:42,0:42:99:0,126,1324" "./.:.:.:.:." "0/1:35,88:117:99:3152,0,718"
I have tried:
try <- gsub("\\:[0-9]*\\,[0-9]*\\:", \\1, data)
The desired output is:
Offspring-95_CAATCG Offspring-96_AAACGG Offspring-97_ACTCTT
[1,] "1,7" "18,4" "27,0"
[2,] "49,0" "107,0" "0,22"
[3,] "42,0" "0,0" "35,88"
Thanks,

This could be done by
sub('[^:]+:([^:]+).*', '\\1', data)
# Offspring.95_CAATCG Offspring.96_AAACGG Offspring.97_ACTCTT
#[1,] "1,7" "18,4" "27,0"
#[2,] "49,0" "107,0" "0,22"
#[3,] "9,4" "33,13" "13,0"
Visualization
[^:]+:([^:]+).*
Debuggex Demo
Or using regmatches from base R
data[] <- regmatches(data, regexpr('(?<=:)[0-9]+,[0-9]+', data, perl=TRUE))
Visualization
(?<=:)[0-9]+,[0-9]+
Debuggex Demo
The above regex can be used with stringr or stringi (for big dataset)
library(stringr)
`dim<-`(str_extract(data, perl('(?<=:)[0-9]+,[0-9]+')), dim(data))
# [,1] [,2] [,3]
#[1,] "1,7" "18,4" "27,0"
#[2,] "49,0" "107,0" "0,22"
#[3,] "9,4" "33,13" "13,0"
Or
library(stringi)
`dim<-`(stri_extract(data, regex='(?<=:)[0-9]+,[0-9]+'), dim(data))
# [,1] [,2] [,3]
#[1,] "1,7" "18,4" "27,0"
#[2,] "49,0" "107,0" "0,22"
#[3,] "9,4" "33,13" "13,0"
Update
data1[] <- sub('[^:]+:([^:]+).*', '\\1', data1)
data1[!grepl(',', data1)] <- '0,0'
data1
# Offspring.95_CAATCG Offspring.96_AAACGG Offspring.97_ACTCTT
#[1,] "1,7" "18,4" "27,0"
#[2,] "49,0" "107,0" "0,22"
#[3,] "42,0" "0,0" "35,88"
data
data <- structure(c("./.:1,7:8:18:262,0,18", "0/0:49,0:49:99:0,147,1891",
"0/1:9,4:13:99:129,0,334", "0/1:18,4:21:56:56,0,591",
"0/0:107,0:107:99:0,319,4185",
"0/1:33,13:44:99:317,0,1150", "0/0:27,0:27:78:0,78,723",
"1/1:0,22:22:66:902,66,0", "0/0:13,0:13:39:0,39,528"), .Dim = c(3L, 3L),
.Dimnames = list(NULL, c("Offspring.95_CAATCG", "Offspring.96_AAACGG",
"Offspring.97_ACTCTT")))
data1 <- structure(c("./.:1,7:8:18:262,0,18", "0/0:49,0:49:99:0,147,1891",
"0/0:42,0:42:99:0,126,1324", "0/1:18,4:21:56:56,0,591",
"0/0:107,0:107:99:0,319,4185",
"./.:.:.:.:.", "0/0:27,0:27:78:0,78,723", "1/1:0,22:22:66:902,66,0",
"0/1:35,88:117:99:3152,0,718"), .Dim = c(3L, 3L), .Dimnames = list(
NULL, c("Offspring.95_CAATCG", "Offspring.96_AAACGG", "Offspring.97_ACTCTT"
)))

Not regex subbing but probably pretty darn quick.
apply(data, 2, function(x) sapply(strsplit(x, ":"), "[[", 2))
## Offspring.95_CAATCG Offspring.96_AAACGG Offspring.97_ACTCTT
## [1,] "1,7" "18,4" "27,0"
## [2,] "49,0" "107,0" "0,22"
## [3,] "9,4" "33,13" "13,0"

Try this:
out<-list()
for(i in seq(ncol(data)))
out[[i]]<-gsub('[^:]*:([0-9]+,[0-9]+).*','\\1',data[,i])
out<-as.data.frame(out)
dimnames(out)<-dimnames(data)
out

R using regexpr, with multiple pattern

I would like to find the string just after some patterns. My code seem to work but I cannot finish the job.
Here is an illustration:
pattern <- c("Iligan", "Cabeseria 25|Sta. Lucia", "Capitol", "Osmeña",
"Nowhere", "Aglayan")
# I want to match the string just after each pattern. For example I'm going to
# match City just after Iligan.
target <-c("Iligan City", "Sta. Lucia, Ozamiz City", " Oroquieta City",
"Osmeña St. Dipolog City", "Lucia St., Zamboanga City",
"Aglayan str, Oroquieta City", "Gingoog City", "Capitol br., Ozamiz City",
"Dumaguete City", "Poblacion, Misamis")
#The matches seems to work fine
(matches <- sapply(pattern,FUN=function(x){regexpr(paste0("
(?<=\\b",x,"\\b ",")","[\\w-*\\.]*"),target,perl=T)}))
print (matches)
#But I cannot get the results. I would need use the column of each matrix
#at a time
villain <- lapply(matches,FUN = function(x)(regmatches(target,x)))
Do you have a solution to this problem.
unpdate 1
For the sake of being precise here is the desired output.
results <- c("City", "St.", "br.")
#[1] "City" "St." "br."

There are some helpers in the stringr package that can simplify the process:
pattern <- c("Iligan", "Cabeseria 25|Sta. Lucia", "Capitol", "Osmeña",
"Nowhere", "Aglayan")
target <-c("Iligan City", "Sta. Lucia, Ozamiz City", " Oroquieta City",
"Osmeña St. Dipolog City", "Lucia St., Zamboanga City",
"Aglayan str, Oroquieta City", "Gingoog City", "Capitol br., Ozamiz City",
"Dumaguete City", "Poblacion, Misamis")
matchPat <- function(x) {
unlist(str_extract(target, perl(paste0("(?<=\\b", x, "\\b ",")","[\\w-*\\.]*"))))
}
matches <- sapply(pattern, matchPat)
print(matches)
## Iligan Cabeseria 25|Sta. Lucia Capitol Osmeña Nowhere Aglayan
## [1,] "City" NA NA NA NA NA
## [2,] NA NA NA NA NA NA
## [3,] NA NA NA NA NA NA
## [4,] NA NA NA "St." NA NA
## [5,] NA NA NA NA NA NA
## [6,] NA NA NA NA NA "str"
## [7,] NA NA NA NA NA NA
## [8,] NA NA "br." NA NA NA
## [9,] NA NA NA NA NA NA
## [10,] NA NA NA NA NA NA
This can be simplified further if you don't need indicators for non-matches, but no sample/expected output was provided.

Pasting character vectors, removing NA's and separators between NAs

I've got several character vectors that I want to paste together. The problem is that some of the character vectors are pretty sparse. So, when I paste them, I get NA's and extra separators. How can I efficiently remove the NA's and extra separators while still joining the vectors?
I've got something like:
n1 = c("goats", "goats", "spatula", NA, "rectitude", "boink")
n2 = c("forever", NA, "...yes", NA, NA, NA)
cbind(paste(n1,n2, sep=", "))
which gives me:
[1,] "goats, forever"
[2,] "goats, NA"
[3,] "spatula, ...yes"
[4,] "NA, NA"
[5,] "rectitude, NA"
[6,] "boink, NA"
but I want:
[1,] "goats, forever"
[2,] "goats"
[3,] "spatula, ...yes"
[4,] <NA>
[5,] "rectitude"
[6,] "boink"
There are clearly inefficient and tedious ways of doing this with a lot of regular expressions and string splitting. But anything quick/simple?

Not a lot of regex, just 1 line and 1 more to replace NA
n1 <- c("goats", "goats", "spatula", NA, "rectitude", "boink")
n2 <- c("forever", NA, "...yes", NA, NA, NA)
n3 <- cbind(paste(n1,n2, sep=", "))
n3 <- gsub("(, )?NA", "", n3)
n3[n3==""] <- NA

Code (no regex or string splitting):
vec <- apply(cbind(n1,n2),1,function(x)
ifelse(all(is.na(x)), NA, paste(na.omit(x),collapse=", ")) )
Result:
> vec # as a vector
[1] "goats, forever" "goats" "spatula, ...yes" NA "rectitude" "boink"
> cbind(vec) # as a matrix
vec
[1,] "goats, forever"
[2,] "goats"
[3,] "spatula, ...yes"
[4,] NA
[5,] "rectitude"
[6,] "boink"

Here's an option using the qdap package (though the other options seem better to me as they use base R):
library(qdap)
gsub(" ", ", ", blank2NA(Trim(gsub("NA", "", paste(n1, n2)))))
## [1] "goats, forever" "goats" "spatula, ...yes" NA
## [5] "rectitude" "boink"
Or...
## gsub(" ", ", ", blank2NA(gsub("NA| NA", "", paste(n1, n2))))

Regex group capture in R with multiple capture-groups

In R, is it possible to extract group capture from a regular expression match? As far as I can tell, none of grep, grepl, regexpr, gregexpr, sub, or gsub return the group captures.
I need to extract key-value pairs from strings that are encoded thus:
\((.*?) :: (0\.[0-9]+)\)
I can always just do multiple full-match greps, or do some outside (non-R) processing, but I was hoping I can do it all within R. Is there's a function or a package that provides such a function to do this?

str_match(), from the stringr package, will do this. It returns a character matrix with one column for each group in the match (and one for the whole match):
> s = c("(sometext :: 0.1231313213)", "(moretext :: 0.111222)")
> str_match(s, "\\((.*?) :: (0\\.[0-9]+)\\)")
[,1] [,2] [,3]
[1,] "(sometext :: 0.1231313213)" "sometext" "0.1231313213"
[2,] "(moretext :: 0.111222)" "moretext" "0.111222"

gsub does this, from your example:
gsub("\\((.*?) :: (0\\.[0-9]+)\\)","\\1 \\2", "(sometext :: 0.1231313213)")
[1] "sometext 0.1231313213"
you need to double escape the \s in the quotes then they work for the regex.
Hope this helps.

Try regmatches() and regexec():
regmatches("(sometext :: 0.1231313213)",regexec("\\((.*?) :: (0\\.[0-9]+)\\)","(sometext :: 0.1231313213)"))
[[1]]
[1] "(sometext :: 0.1231313213)" "sometext" "0.1231313213"

gsub() can do this and return only the capture group:
However, in order for this to work, you must explicitly select elements outside your capture group as mentioned in the gsub() help.
(...) elements of character vectors 'x' which are not substituted will be returned unchanged.
So if your text to be selected lies in the middle of some string, adding .* before and after the capture group should allow you to only return it.
gsub(".*\\((.*?) :: (0\\.[0-9]+)\\).*","\\1 \\2", "(sometext :: 0.1231313213)")
[1] "sometext 0.1231313213"

Solution with strcapture from the utils:
x <- c("key1 :: 0.01",
"key2 :: 0.02")
strcapture(pattern = "(.*) :: (0\\.[0-9]+)",
x = x,
proto = list(key = character(), value = double()))
#> key value
#> 1 key1 0.01
#> 2 key2 0.02

This is how I ended up working around this problem. I used two separate regexes to match the first and second capture groups and run two gregexpr calls, then pull out the matched substrings:
regex.string <- "(?<=\\().*?(?= :: )"
regex.number <- "(?<= :: )\\d\\.\\d+"
match.string <- gregexpr(regex.string, str, perl=T)[[1]]
match.number <- gregexpr(regex.number, str, perl=T)[[1]]
strings <- mapply(function (start, len) substr(str, start, start+len-1),
match.string,
attr(match.string, "match.length"))
numbers <- mapply(function (start, len) as.numeric(substr(str, start, start+len-1)),
match.number,
attr(match.number, "match.length"))

I like perl compatible regular expressions. Probably someone else does too...
Here is a function that does perl compatible regular expressions and matches the functionality of functions in other languages that I am used to:
regexpr_perl <- function(expr, str) {
match <- regexpr(expr, str, perl=T)
matches <- character(0)
if (attr(match, 'match.length') >= 0) {
capture_start <- attr(match, 'capture.start')
capture_length <- attr(match, 'capture.length')
total_matches <- 1 + length(capture_start)
matches <- character(total_matches)
matches[1] <- substr(str, match, match + attr(match, 'match.length') - 1)
if (length(capture_start) > 1) {
for (i in 1:length(capture_start)) {
matches[i + 1] <- substr(str, capture_start[[i]], capture_start[[i]] + capture_length[[i]] - 1)
}
}
}
matches
}

As suggested in the stringr package, this can be achieved using either str_match() or str_extract().
Adapted from the manual:
library(stringr)
strings <- c(" 219 733 8965", "329-293-8753 ", "banana",
"239 923 8115 and 842 566 4692",
"Work: 579-499-7527", "$1000",
"Home: 543.355.3679")
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
Extracting and combining our groups:
str_extract_all(strings, phone, simplify=T)
# [,1] [,2]
# [1,] "219 733 8965" ""
# [2,] "329-293-8753" ""
# [3,] "" ""
# [4,] "239 923 8115" "842 566 4692"
# [5,] "579-499-7527" ""
# [6,] "" ""
# [7,] "543.355.3679" ""
Indicating groups with an output matrix (we're interested in columns 2+):
str_match_all(strings, phone)
# [[1]]
# [,1] [,2] [,3] [,4]
# [1,] "219 733 8965" "219" "733" "8965"
#
# [[2]]
# [,1] [,2] [,3] [,4]
# [1,] "329-293-8753" "329" "293" "8753"
#
# [[3]]
# [,1] [,2] [,3] [,4]
#
# [[4]]
# [,1] [,2] [,3] [,4]
# [1,] "239 923 8115" "239" "923" "8115"
# [2,] "842 566 4692" "842" "566" "4692"
#
# [[5]]
# [,1] [,2] [,3] [,4]
# [1,] "579-499-7527" "579" "499" "7527"
#
# [[6]]
# [,1] [,2] [,3] [,4]
#
# [[7]]
# [,1] [,2] [,3] [,4]
# [1,] "543.355.3679" "543" "355" "3679"

This can be done using the package unglue, taking the example from the selected answer:
# install.packages("unglue")
library(unglue)
s <- c("(sometext :: 0.1231313213)", "(moretext :: 0.111222)")
unglue_data(s, "({x} :: {y})")
#> x y
#> 1 sometext 0.1231313213
#> 2 moretext 0.111222
Or starting from a data frame
df <- data.frame(col = s)
unglue_unnest(df, col, "({x} :: {y})",remove = FALSE)
#> col x y
#> 1 (sometext :: 0.1231313213) sometext 0.1231313213
#> 2 (moretext :: 0.111222) moretext 0.111222
you can get the raw regex from the unglue pattern, optionally with named capture :
unglue_regex("({x} :: {y})")
#> ({x} :: {y})
#> "^\\((.*?) :: (.*?)\\)$"
unglue_regex("({x} :: {y})",named_capture = TRUE)
#> ({x} :: {y})
#> "^\\((?<x>.*?) :: (?<y>.*?)\\)$"
More info : https://github.com/moodymudskipper/unglue/blob/master/README.md

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Extracting numbers from a string in a dataframe - regex

Related

Faster way to capture regex

Regular expression matching and replacement R

R using regexpr, with multiple pattern

Pasting character vectors, removing NA's and separators between NAs

Regex group capture in R with multiple capture-groups

Categories

Resources