WinBugs if else using step function - if-statement

I want to construct a model like
Model3 <- function() {
# Likelihood of the model
for (i in 1:n) {
response2[i] ~ dbern(p[i])
logit(p[i]) <- m[i]
m[i] <- mfe[i]
mfe[i] <- beta[1] + f[i]
# the f functions or the if-else step like
f_branch[i, 2] <- alpha
f_branch[i, 1] <- 1+alpha*exp(-((1/phi)*X2[i,1])^2)
# the decision
if_branch[i] <- 1 + step(-(X2[i,1] - 2))
f[i] <- f_branch[i, if_branch[i]]
f[i] <- f_branch[i, if_branch[i]]
}
# Prediction step
for(ii in 1:ndist) {
if_branch2[ii] <- 1 + step(-(Dist[ii] - 2))
fhat[ii] <- f_branch2[ii, if_branch2[ii]]
fhat[ii] <- f_branch2[ii, if_branch2[ii]]
f_branch2[ii, 2] <- alpha
f_branch2[ii, 1] <- 1 + alpha*exp(-((1/phi)*Dist[ii])^2)
}
# Prior distribution of the fixed effects parameters
beta[1] ~ dnorm(0, 1.0E-6)}
# priors for the f function
alpha ~ dgamma(1, 1)
phi ~ dunif(0, 2)
}
And I am getting an error: Attempt to redefine node f[1]

In this case, the error message is pretty self-explanatory - you define f[1], and then define it again on the following line, since you have repeated the line:
f[i] <- f_branch[i, if_branch[i]]
Remove the second instance of that line, and that error should disappear.
You have also duplicated the line:
fhat[ii] <- f_branch2[ii, if_branch2[ii]]
so you'll need to remove the second instance of that as well.

Related

Problem generating wavelet variance/covarance

How do I solve the error?
Here's my script:
A=x
B=y
wf <- "la8"
J <- 8
N<- length(A)
A.modwt <- modwt(A, wf, J)
B.modwt <- modwt(B, wf, J)
A.modwt.bw <- brick.wall(A.modwt, wf)
B.modwt.bw <- brick.wall(B.modwt, wf)
WCOV = wave.covariance(A.modwt.bw, B.modwt.bw)
Error in x[c((n%/%2):n, 1:(n%/%2 - 1))] :
only 0's may be mixed with negative subscripts

Splitting string columns FAST in R

I have a data frame with 107 columns and 745000 rows (much bigger than in my example).
The case is, that I have character type columns which I want to separate, because they seem to contain some type-ish ending at the end of each sequence.
I want to saparate these type-ending parts to new columns.
I have made my own solution, but it seem to be far too slow for iterating through all the 745000 rows 53 times.
So I embed my solution in the following code, with some arbitrary data:
set.seed(1)
code_1 <- paste0(round(runif(5000, 100000, 999999)), "_", round(runif(1000, 1, 15)))
code_2 <- sample(c(paste0(round(runif(10, 100000, 999999)), "_", round(runif(10, 1, 15))), NA), 5000, replace = TRUE)
code_3 <- sample(c(paste0(round(runif(3, 100000, 999999)), "_", round(runif(3, 1, 15))), NA), 5000, replace = TRUE)
code_4 <- sample(c(paste0(round(runif(1, 100000, 999999)), "_", round(runif(1, 1, 15))), NA), 5000, replace = TRUE)
code_type_1 <- rep(NA, 5000)
code_type_2 <- rep(NA, 5000)
code_type_3 <- rep(NA, 5000)
code_type_4 <- rep(NA, 5000)
df <- data.frame(cbind(code_1,
code_2,
code_3,
code_4,
code_type_1,
code_type_2,
code_type_3,
code_type_4),
stringsAsFactors = FALSE)
df_new <- data.frame(code_1 = character(),
code_2 = character(),
code_3 = character(),
code_4 = character(),
code_type_1 = character(),
code_type_2 = character(),
code_type_3 = character(),
code_type_4 = character(),
stringsAsFactors = FALSE)
for (i in 1:4) {
i_t <- i + 4
temp <- strsplit(df[, c(i)], "[_]")
for (j in 1:nrow(df)) {
df_new[c(j), c(i)] <- unlist(temp[j])[1]
df_new[c(j), c(i_t)] <- ifelse(is.na(unlist(temp[j])[1]), NA, unlist(temp[j])[2])
}
print(i)
}
for (i in 1:8) {
df_new[, c(i)] <- factor(df_new[, c(i)])
}
Do anyone have some ideas how to speed things up here?
First we pre-allocate the results data.frame to the desired final length. This is very important; see The R Inferno, Circle 2. Then we vectorize the inner loop. We also use fixed = TRUE and avoid the regex in strsplit.
system.time({
df_new1 <- data.frame(code_1 = character(nrow(df)),
code_2 = character(nrow(df)),
code_3 = character(nrow(df)),
code_4 = character(nrow(df)),
code_type_1 = character(nrow(df)),
code_type_2 = character(nrow(df)),
code_type_3 = character(nrow(df)),
code_type_4 = character(nrow(df)),
stringsAsFactors = FALSE)
for (i in 1:4) {
i_t <- i + 4
temp <- do.call(rbind, strsplit(df[, c(i)], "_", fixed = TRUE))
df_new1[, i] <- temp[,1]
df_new1[, i_t] <- ifelse(is.na(temp[,1]), NA, temp[,2])
}
df_new1[] <- lapply(df_new1, factor)
})
# user system elapsed
# 0.029 0.000 0.029
all.equal(df_new, df_new1)
#[1] TRUE
Of course, there are ways to make this even faster, but this is close to your original approach and should be sufficient.
Here's another way, using gsub inside a custom function in combination with purrr::dmap() - which is equivalent to lapply, but outputs a data.frame instead of a list.
library(purrr)
# Define function which gets rid of everything after and including "_"
replace01 <- function(df, ptrn = "_.*")
dmap(df[,1:4], gsub, pattern = ptrn, replacement = "")
# Because "pattern" is argument we can change it to get 2nd part, then cbind()
test <- cbind(replace01(df),
replace01(df, ptrn = ".*_"))
Note that the output here character columns, you can always convert them to factor if you like.
Another possibility:
setNames(do.call(rbind.data.frame, lapply(1:nrow(df), function(i) {
x <- stri_split_fixed(df[i, 1:4], "_", 2, simplify=TRUE)
y <- c(x[,1], x[,2])
y[y==""] <- NA
y
})), colnames(df)) -> df_new
or
setNames(do.call(rbind.data.frame, lapply(1:nrow(df), function(i) {
x <- stri_split_fixed(df[i, 1:4], "_", 2, simplify=TRUE)
c(x[,1], x[,2])
})), colnames(df)) -> df_new
df_new[df_new==""] <- NA
df_new
which is marginally faster:
Unit: milliseconds
expr min lq mean median uq max neval cld
na_after 669.8357 718.1301 724.8803 723.5521 732.9998 790.1405 10 a
na_inner 719.3362 738.1569 766.4267 762.1594 791.6198 825.0269 10 b

R deleting duplicates when duplicates slightly differ by 1 or 2 letters

I am collecting tweets with the twitteR package and get many duplicates. This code works fine:
tweets <- searchTwitter(keyword, n=500, lang="en", since=NULL, until=NULL, retryOnRateLimit=100)
mydata <- sapply(tweets, function(x) x$getText())
mydata <- unique(mydata, incomparables = F, nmax = NA)
The problem is that it actually doesn't delete any duplicates because it doesn't recognises them as such. The duplicate tweets typically contain shortened URLS that differ by 1 or 2 digits. So I tried to clean the tweets of URLS with this code:
tweets <- searchTwitter(keyword, n=500, lang="en", since=NULL, until=NULL,
retryOnRateLimit=100)
mydata <- sapply(tweets, function(x) x$getText())
mydata <- data.frame(tweetsText, stringsAsFactors = FALSE)
names(mydata) <- c('words')
removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
mydata$words <- removeURL(mydata$words)
removeURL <- function(x) gsub("https[[:alnum:]]*", "", x)
mydata$words <- removeURL(mydata$words)
mydata$words <- unique(mydata$words, incomparables = F, nmax = NA)
Now I get the error message:
Error in $<-.data.frame(*tmp*, "words", value = c("Tripping around #DisneySprings.....) : replacement has 295 rows, data has 300
Advice? Thanks!
Your error is easily reproducible:
mydata <- data.frame(list(w = c(0, 1, 0, 1)))
mydata$words <- c(0, 1, 1)
# Error in `$<-.data.frame`(`*tmp*`, "words", value = c(0, 1, 1)) :
# replacement has 3 rows, data has 4
This just means that you need to assign a vector of the same length as the length of the data frame.
To filter out duplicate values you need to change the last line of your code to:
res <- mydata[!duplicated(mydata$words), ]

How can i find the sum of a list of functions in R?

So i have a list of functions.I want to create a for loop that returns (obviously as a function) the sum of them.
In order to create a list of functions inside a for loop i am using this code
##CODE
f=dnorm
h=function(x){log(f(x))}
S=c(-3,-2,-1,0,1,2,3)
K=matrix(rep(1:length(S),2),ncol=2)
for(i in 1:length(S)){
K[i,]=c(S[i],h(S[i]))
}
funcs=list()
## LOOP TO DEFINE THE LINES
for(i in 1:6){
## Make function name
funcName <- paste( 'hl', i,i+1, sep = '' )
## Make function
func1 = paste('function(x){ (K[',i,'+1,2]-K[',i,',2])/(K[',i,'+1,1]-K[',i,',1])*x+
K[',i,'+1,2]-((K[',i,'+1,2]-K[',i,',2])/(K[',i,'+1,1]-K[',i,',1]))*K[',i,'+1,1]}',sep
= '')
funcs[[funcName]] = eval(parse(text=func1))
}
which creates a list of 6 functions. How can I get their sum? I tried using the apply commands but either my syntax is not correct or they do not work.
P.S I am actually trying to write my one code for the ars command.
As Nick pointed out, "the sum of functions" doesn't make sense. I'm wildly guessing that you want to evaluate at function at some point (at S?) and then take the sum of those values. This should do the trick.
rowSums(sapply(funcs, function(f) f(S)))
Much of your code can be written more cleanly, and in a vectorised way.
f <- dnorm
h <- function(x) log(f(x))
S <- -3:3
K <- cbind(S, h(S)) #No need to define this twice; no need to use a loop
i <- seq_len(6)
funcNames <- paste('hl', i, i+1, sep = '') #paste is vectorised
#You can avoid using `paste`/`eval`/`parse` with this function to create the functions
#Can possibly be done even more cleanly by using local
makeFunc <- function(i)
{
evalq(substitute(
function(x)
{
(K[i + 1, 2] - K[i, 2]) / (K[i + 1, 1] - K[i, 1]) * x +
K[i + 1, 2] -
((K[i + 1, 2] - K[i, 2]) / (K[i + 1, 1] - K[i, 1])) * K[i + 1, 1]
},
list(i = i)
))
}
funcs <- lapply(i, makeFunc)
names(funcs) <- funcNames
rowSums(sapply(funcs, function(f) f(S)))

trying to append a list, but something breaks

I'm trying to create an empty list which will have as many elements as there are num.of.walkers. I then try to append, to each created element, a new sub-list (length of new sub-list corresponds to a value in a.
When I fiddle around in R everything goes smooth:
list.of.dist[[1]] <- vector("list", a[1])
list.of.dist[[2]] <- vector("list", a[2])
list.of.dist[[3]] <- vector("list", a[3])
list.of.dist[[4]] <- vector("list", a[4])
I then try to write a function. Here is my feeble attempt that results in an error. Can someone chip in what am I doing wrong?
countNumberOfWalks <- function(walk.df) {
list.of.walkers <- sort(unique(walk.df$label))
num.of.walkers <- length(unique(walk.df$label))
#Pre-allocate objects for further manipulation
list.of.dist <- vector("list", num.of.walkers)
a <- c()
# Count the number of walks per walker.
for (i in list.of.walkers) {
a[i] <- nrow(walk.df[walk.df$label == i,])
}
a <- as.vector(a)
# Add a sublist (length = number of walks) for each walker.
for (i in i:num.of.walkers) {
list.of.dist[[i]] <- vector("list", a[i])
}
return(list.of.dist)
}
> num.of.walks.per.walker <- countNumberOfWalks(walk.df)
Error in vector("list", a[i]) : vector size cannot be NA
Assuming 'walk.df' is something like:
walk.df <- data.frame(label=sample(1:10,100,T),var2=1:100)
then:
countNumberOfWalks <- function(walk.df) {
list.of.walkers <- sort(unique(walk.df$label))
num.of.walkers <- length(unique(walk.df$label))
list.of.dist <- vector("list", num.of.walkers)
for (i in 1:num.of.walkers) {
list.of.dist[[i]] <- vector("list",
nrow(walk.df[walk.df$label == list.of.walkers[i],]))}
return(list.of.dist)
}
Will achieve what you're after.