apply nested foreach loops on a list - list

I am trying to apply nested foreach loops to a list. When using nested for loops my codes works. But when I try to use foreach loops I do not get the full results (in a list), but just a few values.
This is my nested for loops code:
library(sn)
library(mnormt)
library(mokken)
library(polycor)
library(foreach)
library(parallel)
data("DS14")
data<-DS14[,3:5] # for testing I only use 3 variables
source("C:/Users/.../code to apply function fit_skewnorm (Kolbe et al., 2021).R")
# Kolbe et al. for reference: https://doi.org/10.3390/psych3040037
# see Appendix B
allresults_skew <- replicate(ncol(data)-1, matrix(NA,ncol(data),9), simplify = FALSE)
for(p in 1:ncol(data)){
for(q in 2:ncol(data)){
if(q<=p){
next}
tryCatch({ # a function to continue with loop in case of errors
obsn = table(data[,p], data[,q])
ncats1 = nrow(obsn)
ncats2 = ncol(obsn)
ntot = sum(obsn)
obsp = obsn/ntot
proportions2 = matrix(colSums(obsp), 1, ncats2)
proportions1 = matrix(rowSums(obsp), ncats1 , 1)
premultiplier = matrix(0, ncats1, ncats1)
for(l in 1:ncats1)for(m in 1:l)premultiplier[l,m] = 1
postmultiplier = matrix(0, ncats2, ncats2)
for(l in 1:ncats2)for(m in l:ncats2)postmultiplier[l,m] = 1
cumulprops2 = proportions2 %*% postmultiplier
cumulprops1 = premultiplier %*% proportions1
nthresholds1 = ncats1 - 1
nthresholds2 = ncats2 - 1
thresholds1 = matrix(0, 1, nthresholds1)
for(l in 1:nthresholds1)thresholds1[l] = qnorm(cumulprops1[l])
thresholds2 = matrix(0, 1, nthresholds2)
for(l in 1:nthresholds2)thresholds2[l] = qnorm(cumulprops2[l])
pcorr = polycor::polychor(obsn)
results_fit = fit_skewnorm(c("th1" = thresholds1, "th2" = thresholds2, "corr" = pcorr, "alpha" = c(2 ,2)))
allresults_skew[[p]][q,1] <- p
allresults_skew[[p]][q,2] <- q
allresults_skew[[p]][q,3] <- results_fit[,1]
allresults_skew[[p]][q,4] <- results_fit[,2]
allresults_skew[[p]][q,5] <- results_fit[,3]
allresults_skew[[p]][q,6] <- results_fit[,4]
allresults_skew[[p]][q,7] <- results_fit[,5]
allresults_skew[[p]][q,8] <- results_fit[,6]
allresults_skew[[p]][q,9] <- results_fit[,7]
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) # part of tryCatch
}
}
Then, allresults_skew is:
[[1]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] NA NA NA NA NA NA NA NA NA
[2,] 1 2 19.97874 13 0.095741675130554 0.2705112 0 1.4656923 0.7528304
[3,] 1 3 65.49704 13 0.000000005354567 0.8426818 0 0.2512463 2.2963329
[[2]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA
[3,] 2 3 31.14632 13 0.003209404 0.2753952 0 0.7247398 0.5957852
My current nested foreach loops look like this:
allresults_skew <- replicate(ncol(data)-1, matrix(NA,ncol(data),9), simplify = FALSE)
no_cores <- detectCores(logical = TRUE)
cl <- makeCluster(no_cores-1)
registerDoParallel(cl)
getDoParWorkers()
foreach(i = 1:ncol(data),.combine = 'cbind') %:%
foreach(j = 2:ncol(data), .combine = 'rbind') %dopar% {
if(j<=i){
return(NA)}
tryCatch({ # a function to continue with loop in case of errors
#progress(i, ncol(data)-1)
obsn = table(data[,i], data[,j])
ncats1 = nrow(obsn)
ncats2 = ncol(obsn)
ntot = sum(obsn)
obsp = obsn/ntot
proportions2 = matrix(colSums(obsp), 1, ncats2)
proportions1 = matrix(rowSums(obsp), ncats1 , 1)
premultiplier = matrix(0, ncats1, ncats1)
for(l in 1:ncats1)for(m in 1:l)premultiplier[l,m] = 1
postmultiplier = matrix(0, ncats2, ncats2)
for(l in 1:ncats2)for(m in l:ncats2)postmultiplier[l,m] = 1
cumulprops2 = proportions2 %*% postmultiplier
cumulprops1 = premultiplier %*% proportions1
nthresholds1 = ncats1 - 1
nthresholds2 = ncats2 - 1
thresholds1 = matrix(0, 1, nthresholds1)
for(l in 1:nthresholds1)thresholds1[l] = qnorm(cumulprops1[l])
thresholds2 = matrix(0, 1, nthresholds2)
for(l in 1:nthresholds2)thresholds2[l] = qnorm(cumulprops2[l])
pcorr = polycor::polychor(obsn)
results_fit = fit_skewnorm(c("th1" = thresholds1, "th2" = thresholds2, "corr" = pcorr, "alpha" = c(2 ,2)))
allresults_skew[[i]][j,1] <- i
allresults_skew[[i]][j,2] <- j
allresults_skew[[i]][j,3] <- results_fit[,1]
allresults_skew[[i]][j,4] <- results_fit[,2]
allresults_skew[[i]][j,5] <- results_fit[,3]
allresults_skew[[i]][j,6] <- results_fit[,4]
allresults_skew[[i]][j,7] <- results_fit[,5]
allresults_skew[[i]][j,8] <- results_fit[,6]
allresults_skew[[i]][j,9] <- results_fit[,7]
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) # part of tryCatch
NULL
}
stopCluster(cl)
After these foreach loops are run, I get this matrix:
[,1] [,2] [,3]
result.1 0.7528304 NA NA
result.2 2.2963329 0.5957852 NA
And asking for allresults_skew, gives me:
[[1]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA
[3,] NA NA NA NA NA NA NA NA NA
[[2]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA
[3,] NA NA NA NA NA NA NA NA NA
So the foreach loops just gives values from the last columns from the for loops, but only directly after the code ran. Using allresults_skew the matrices are still completely NA.
I would be grateful if anyone could help me this and tell me what to change.
I am also uncertain whether to choose either .combine = 'c', 'rbind', or 'cbind' in the two foreach loops. But I assume this does not explain why the code does not result in a list that includes 3 x 9 matrices.

I found a solution that works for me:
allresults_skew <- foreach(i = 1:ncol(data)) %:% foreach(j = 2:ncol(data)) %dopar% {
if(j<=i){
return(NA)}
tryCatch({ # a function to continue with loop in case of errors
#progress(i, ncol(data)-1)
obsn = table(data[,i], data[,j])
ncats1 = nrow(obsn)
ncats2 = ncol(obsn)
ntot = sum(obsn)
obsp = obsn/ntot
proportions2 = matrix(colSums(obsp), 1, ncats2)
proportions1 = matrix(rowSums(obsp), ncats1 , 1)
premultiplier = matrix(0, ncats1, ncats1)
for(l in 1:ncats1)for(m in 1:l)premultiplier[l,m] = 1
postmultiplier = matrix(0, ncats2, ncats2)
for(l in 1:ncats2)for(m in l:ncats2)postmultiplier[l,m] = 1
cumulprops2 = proportions2 %*% postmultiplier
cumulprops1 = premultiplier %*% proportions1
nthresholds1 = ncats1 - 1
nthresholds2 = ncats2 - 1
thresholds1 = matrix(0, 1, nthresholds1)
for(l in 1:nthresholds1)thresholds1[l] = qnorm(cumulprops1[l])
thresholds2 = matrix(0, 1, nthresholds2)
for(l in 1:nthresholds2)thresholds2[l] = qnorm(cumulprops2[l])
pcorr = polycor::polychor(obsn)
results_fit = fit_skewnorm(c("th1" = thresholds1, "th2" = thresholds2, "corr" = pcorr, "alpha" = c(2 ,2)))
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) # part of tryCatch
#NULL
}
stopCluster(cl)
Then, using
allresults_skew0 <- unlist(allresults_skew, recursive = FALSE)
allresults_skew0 <- Reduce(rbind,allresults_skew0)
gave me
chisq df p corr conv alpha1 alpha2
1 19.97874 13 0.095741675130554 0.2705112 0 1.4656923 0.7528304
2 65.49704 13 0.000000005354567 0.8426818 0 0.2512463 2.2963329
3 NA NA NA NA NA NA NA
4 31.14632 13 0.003209403883258 0.2753952 0 0.7247398 0.5957852
5 NA NA NA NA NA NA NA
6 NA NA NA NA NA NA NA

Related

Can I modify a row in-place with Rcpp?

Say, I would like to increase the first row of a matrix by one. The obvious approach is A.row(0) = A.row(0) + 1;, but it creates a new row instead of modifying the existing one, and may cause some performance issue when the matrix is large.
From a previous answer, I learned that I can do a point aliasing, but it only works for the whole matrix, not for individual rows.
library(Rcpp)
cppFunction('
void increaseFirstRow(NumericMatrix& A) {
NumericVector B = A.row(0);
B = B + 1;
}')
A <- diag(c(1.0, 2.0, 3.0))
increaseFirstRow(A)
The output is shown below. Note that the first row is not changed.
> A
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 2 0
[3,] 0 0 3
Under the formulation above, I think you wanted to obtain a reference to specific parts of the matrix. The following work across matrix types:
*Matrix::Row = x( 0 , Rcpp::_); // first row
*Matrix::Column = x( Rcpp::_ , 0); // first column
*Matrix::Sub = x( Rcpp::Range(0, 1) , Rcpp::Range(2, 3)); // first 2 rows and 3 -4th column.
In your case, that would be:
#include <Rcpp.h>
// [[Rcpp::export]]
void row_ref(Rcpp::NumericMatrix M) {
// Create a reference to the 1st row in M.
Rcpp::NumericMatrix::Row x = M.row(0);
// Increase the first row in M.
x = x + 10;
}
Example:
(A <- diag(c(1.0, 2.0, 3.0)))
# [,1] [,2] [,3]
# [1,] 1 0 0
# [2,] 0 2 0
# [3,] 0 0 3
row_ref(A)
A
# [,1] [,2] [,3]
# [1,] 11 10 10
# [2,] 0 2 0
# [3,] 0 0 3
Here is a simple solution in RcppArmadillo, and, following an edit, in Rcpp itself:
Code with Example
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
void incRow(arma::mat& M) {
M.row(0) = M.row(0) + 1;
}
// [[Rcpp::export]]
void incRow2(Rcpp::NumericMatrix M) {
M(0,Rcpp::_) = M(0,Rcpp::_) + 1;
}
/*** R
A <- diag(c(1.0, 2.0, 3.0))
incRow(A)
A
incRow2(A)
A
*/
Output
R> Rcpp::sourceCpp("/tmp/armarow.cpp")
R> A <- diag(c(1.0, 2.0, 3.0))
R> incRow(A)
R> A
[,1] [,2] [,3]
[1,] 2 1 1
[2,] 0 2 0
[3,] 0 0 3
R> incRow2(A)
R> A
[,1] [,2] [,3]
[1,] 3 2 2
[2,] 0 2 0
[3,] 0 0 3
R>

Error: std::bad_alloc Rstudio

After running this code:
t1 <-Sys.time()
df.m <- left_join(df.h,daRta3,by=c("year","month","MA","day"))
t2 <- Sys.time()
difftime(t2,t1)
I have this error.
Error: std::bad_alloc
The dimension of the matrix that I have tried to create is 74495*2695 = 180.10^6 rows.
The computer in which I run the code has 20 GB of RAM
I tried the memory.limit() but it did not solve my issue.
Examine cardinality of your join key
Is the c("year","month","MA","day") unique in both df.h and daRta3?
What are the most frequent values?
NA values. left_join can treat NA values as equal or different:
> tibble(x = c(NA, NA, NA)) %>% left_join(., ., by = 'x')
# A tibble: 9 x 1
x
<lgl>
1 NA
2 NA
3 NA
4 NA
5 NA
6 NA
7 NA
8 NA
9 NA
> tibble(x = c(NA, NA, NA)) %>% left_join(., ., by = 'x', na_matches = 'never')
# A tibble: 3 x 1
x
<lgl>
1 NA
2 NA
3 NA
If order and values in c("year","month","MA","day") can be guaranteed to be the same then simple cbind or bind_cols might be an efficient solution

Split a vector of strings over a character to return a matrix

I have
rownames(results.summary)
[1] "2 - 1" "3 - 1" "4 - 1"
What I want is to return a matrix of
2 1
3 1
4 1
The way Ive done it as:
for(i in 1:length(rownames(results.summary)){
current.split <- unlist(strsplit(rownames(results.summary)[i], "-"))
matrix.results$comparison.group[i] <- trim(current.split[1])
matrix.results$control.group[i] <- trim(current.split[2])
}
The trim function basically removes any whitespace on either end.
I've been learning regex and was wondering if there's perhaps a more elegant vectorized solution?
No need to use strsplit, just read it using read.table:
read.table(text=vec,sep='-',strip.white = TRUE) ## see #flodel comment
V1 V2
1 2 1
2 3 1
3 4 1
where vec is :
vec <- c("2 - 1", "3 - 1", "4 - 1")
This should work:
vv <- c("2 - 1", "3 - 1", "4 - 1")
matrix(as.numeric(unlist(strsplit(vv, " - "))), ncol = 2, byrow = TRUE)
# [,1] [,2]
# [1,] 2 1
# [2,] 3 1
# [3,] 4 1
You can also try scan
vec <- c("2 - 1", "3 - 1", "4 - 1")
s <- scan(text = vec, what = integer(), sep = "-", quiet = TRUE)
matrix(s, length(s)/2, byrow = TRUE)
# [,1] [,2]
# [1,] 2 1
# [2,] 3 1
# [3,] 4 1
Another option is cSplit.
library(splitstackshape)
cSplit(data.frame(vec), "vec", sep = " - ", fixed=TRUE)
# vec_1 vec_2
# 1: 2 1
# 2: 3 1
# 3: 4 1
You can use str_match from the package stringr for this:
library(stringr)
##
x <- c("2 - 1","3 - 1","4 - 1")
##
cmat <- str_match(x, "(\\d).+(\\d)")[,-1]
> apply(cmat,2,as.numeric)
[,1] [,2]
[1,] 2 1
[2,] 3 1
[3,] 4 1
Using reshape2 colsplit
library(reshape2)
colsplit(x, " - ", c("A", "B"))
# A B
# 1 2 1
# 2 3 1
# 3 4 1
Or using tidyrs separate
library(tidyr)
separate(data.frame(x), x, c("A", "B"), sep = " - ")
# A B
# 1 2 1
# 2 3 1
# 3 4 1

R using regexpr, with multiple pattern

I would like to find the string just after some patterns. My code seem to work but I cannot finish the job.
Here is an illustration:
pattern <- c("Iligan", "Cabeseria 25|Sta. Lucia", "Capitol", "Osmeña",
"Nowhere", "Aglayan")
# I want to match the string just after each pattern. For example I'm going to
# match City just after Iligan.
target <-c("Iligan City", "Sta. Lucia, Ozamiz City", " Oroquieta City",
"Osmeña St. Dipolog City", "Lucia St., Zamboanga City",
"Aglayan str, Oroquieta City", "Gingoog City", "Capitol br., Ozamiz City",
"Dumaguete City", "Poblacion, Misamis")
#The matches seems to work fine
(matches <- sapply(pattern,FUN=function(x){regexpr(paste0("
(?<=\\b",x,"\\b ",")","[\\w-*\\.]*"),target,perl=T)}))
print (matches)
#But I cannot get the results. I would need use the column of each matrix
#at a time
villain <- lapply(matches,FUN = function(x)(regmatches(target,x)))
Do you have a solution to this problem.
unpdate 1
For the sake of being precise here is the desired output.
results <- c("City", "St.", "br.")
#[1] "City" "St." "br."
There are some helpers in the stringr package that can simplify the process:
pattern <- c("Iligan", "Cabeseria 25|Sta. Lucia", "Capitol", "Osmeña",
"Nowhere", "Aglayan")
target <-c("Iligan City", "Sta. Lucia, Ozamiz City", " Oroquieta City",
"Osmeña St. Dipolog City", "Lucia St., Zamboanga City",
"Aglayan str, Oroquieta City", "Gingoog City", "Capitol br., Ozamiz City",
"Dumaguete City", "Poblacion, Misamis")
matchPat <- function(x) {
unlist(str_extract(target, perl(paste0("(?<=\\b", x, "\\b ",")","[\\w-*\\.]*"))))
}
matches <- sapply(pattern, matchPat)
print(matches)
## Iligan Cabeseria 25|Sta. Lucia Capitol Osmeña Nowhere Aglayan
## [1,] "City" NA NA NA NA NA
## [2,] NA NA NA NA NA NA
## [3,] NA NA NA NA NA NA
## [4,] NA NA NA "St." NA NA
## [5,] NA NA NA NA NA NA
## [6,] NA NA NA NA NA "str"
## [7,] NA NA NA NA NA NA
## [8,] NA NA "br." NA NA NA
## [9,] NA NA NA NA NA NA
## [10,] NA NA NA NA NA NA
This can be simplified further if you don't need indicators for non-matches, but no sample/expected output was provided.

Combine rownames from different lists in a dataframe

I have a question about lists in R. I have a list within 16 list containing a list with variables like this:
x
[[1]]
A 1 3
B 4 2
[[2]]
C 23 4
D 9 22
E 4 54
The A,B,C and D are rownames in the lists. Now I want to create a file that paste only the rownames in a dataframe. Each row in the dataframe contains 1 list in the total list.
A B
C D E
Can anyone help me with this? I thought maybe someting like do.call(rbind, rownames(x))
EDIT! 05-08-2011
Is there a way to save the rownames list by list? So in the end there are no NA's in the data and the data is unequal?
Thank you all!
Making an assumption about the nature of x, if we use:
x <- list(matrix(c(1,4,3,2), ncol = 2,
dimnames = list(c("A","B"), NULL)),
matrix(c(23,9,4,4,22,54), ncol = 2,
dimnames = list(c("C","D","E"), NULL)))
which gives:
> x
[[1]]
[,1] [,2]
A 1 3
B 4 2
[[2]]
[,1] [,2]
C 23 4
D 9 22
E 4 54
Then
> lapply(x, rownames)
[[1]]
[1] "A" "B"
[[2]]
[1] "C" "D" "E"
seems the only plausible answer. Unless we pad the ("A","B") vector with something, we can't use a matrix or a data frame because the component lengths do not match. Hence one of the reasons the do.call() idea fails:
> do.call(rbind, rownames(x))
Error in do.call(rbind, rownames(x)) : second argument must be a list
> do.call(rbind, lapply(x, rownames))
[,1] [,2] [,3]
[1,] "A" "B" "A"
[2,] "C" "D" "E"
Warning message:
In function (..., deparse.level = 1) :
number of columns of result is not a multiple of vector length (arg 1)
To pad the result with NA and get a data frame, we could do:
out <- lapply(x, rownames)
foo <- function(x, max, repl = NA) {
if(length(x) == max)
out <- x
else {
out <- rep(repl, max)
out[seq_along(x)] <- x
}
out
}
out <- lapply(out, foo, max = max(sapply(out, length)))
(out <- do.call(rbind, out))
The last line gives:
> (out <- do.call(rbind, out))
[,1] [,2] [,3]
[1,] "A" "B" NA
[2,] "C" "D" "E"
If you want that nicely printed, then
> print(format(out), quote = FALSE)
[,1] [,2] [,3]
[1,] A B NA
[2,] C D E
is an option inside R.
This should do it:
lapply(x, function(curdfr){paste(rownames(curdfr))})
This results in a vector with each element the space-separated rownames of the elements of the list.
Your sample data:
x <- list(
matrix(c(1,4,3,2), nrow = 2, dimnames = list(LETTERS[1:2])),
matrix(c(23,9,4,4,22,54), nrow = 3, dimnames = list(LETTERS[3:5]))
)
What you want:
unlist(lapply(x, rownames))
Or, if you are keen on do.call, then this is equivalent:
do.call(c, lapply(x, rownames))