Output result not changing when I choose input values in dropdown menu - shiny

I have a problem with the shiny app in that the plots does not change when I choose different input values in the app. In my example I want to choose different "miRNA" resulting in a different survival-plot
Here is my app:
library(dplyr)
require(survminer)
library(tidyverse)
require(reshape2)
library(shiny)
library(tidyr)
require(survival)
example data:
df.miRNA.cpm <- structure(list(`86` = c(5.57979757386892, 17.0240095264258, 4.28380151026145,
13.0457611762755, 12.5531123449841), `175` = c(5.21619202802748,
15.2849097474841, 2.46719979911461, 10.879496005461, 9.66416497290915
), `217` = c(5.42796072966512, 17.1413407297933, 5.15230233060323,
12.2646127361351, 12.1031024927547), `394` = c(-1.1390337316217,
15.1021660424984, 4.63168157763046, 11.1299079134792, 9.55572588729967
), `444` = c(5.06134249676025, 14.5442494311861, -0.399445049232868,
7.45775961504073, 9.92629675808998)), row.names = c("hsa_let_7a_3p",
"hsa_let_7a_5p", "hsa_let_7b_3p", "hsa_let_7b_5p", "hsa_let_7c_5p"
), class = "data.frame")
df.miRNA.cpm$miRNA <- rownames(df.miRNA.cpm)
ss.survival.shiny.miRNA.miRNA <- structure(list(ID = c("86", "175", "217", "394", "444"), TimeDiff = c(71.0416666666667,
601.958333333333, 1130, 1393, 117.041666666667), Status = c(1L,
1L, 0L, 0L, 1L)), row.names = c(NA, 5L), class = "data.frame")
Joint the two example data frames:
data_prep.miRNA <- df.miRNA.cpm %>%
tidyr::pivot_longer(-miRNA, names_to = "ID") %>%
left_join(ss.survival.shiny.miRNA.miRNA)
Example of the joined data:
> data_prep.miRNA
# A tibble: 153,033 x 5
miRNA ID value TimeDiff Status
<chr> <chr> <dbl> <dbl> <int>
1 hsa_let_7a_3p 86 5.58 71.0 1
2 hsa_let_7a_3p 175 5.22 602. 1
3 hsa_let_7a_3p 217 5.43 1130 0
4 hsa_let_7a_3p 394 -1.14 1393 0
5 hsa_let_7a_3p 444 5.06 117. 1
6 hsa_let_7a_3p 618 4.37 1508 0
7 hsa_let_7a_3p 640 2.46 1409 0
8 hsa_let_7a_3p 829 0.435 919. 0
9 hsa_let_7a_3p 851 -1.36 976. 0
10 hsa_let_7a_3p 998 3.87 1196. 0
# … with 153,023 more rows
Shiny:
ui.miRNA <- fluidPage(
selectInput("MicroRNA", "miRNA", choices = unique(data_prep.miRNA$miRNA)),
plotOutput("myplot"))
server <- function(input, output, session) {
data_selected <- reactive({
filter(data_prep.miRNA, miRNA %in% input$MicroRNA)
})
output$myplot <- renderPlot({
survfit(Surv(TimeDiff, Status) ~ cut(value, quantile(value, probs = c(0, .4, .8)), include.lowest=TRUE),data = data_selected())
ggsurvplot(fitSurv)
})
}
shinyApp(ui.miRNA, server)

You have to assign the output of survfit to fitSurv:
output$myplot <- renderPlot({
fitSurv <- survfit(Surv(TimeDiff, Status) ~ cut(value, quantile(value, probs = c(0, .4, .8)), include.lowest=TRUE),data = data_selected())
ggsurvplot(fitSurv)
})

Related

geom_smooth not showing in ggplot function

I am trying to add geom_smooth(method = 'loess'), however this is not showing up in the plot. I believe it is something about the numeric values, that geom_smooth is not recognizing the input as numeric?
> head(CH12F3.miRNA_prep.miRNA)
miRNA variable value
1 mmu-let-7a-1-3p 0h 0.5098628
2 mmu-let-7a-5p 0h 0.4286451
3 mmu-let-7b-3p 0h 0.0000000
4 mmu-let-7b-5p 0h 1.4925830
5 mmu-let-7c-2-3p 0h 1.0715206
6 mmu-let-7c-5p 0h 1.3836720
server <- function(input, output, session) {
data_selected <- reactive({
filter(CH12F3.miRNA_prep.miRNA, miRNA %in% input$MicroRNA)
})
output$myplot <- renderPlot({
ggplot(data_selected(), aes_string("variable", "value", colour = "variable")) +
geom_point() + theme_classic(base_size = 12) +
labs(colour="Time Point",x="Time",y="Expression (cpm,log2)\nTreated/Control")+
theme(axis.text.x = element_text(angle = 45,hjust = 1)) + geom_smooth(method = 'loess')
} )
}
In your ggplot, try aes(as.numeric(variable), as.numeric(value), color=variable) instead of aes_string().

split string into non-overlapping segments

I wish to split strings into non-overlapping segments where the endpoints of a segment are numbers within a field of dots. I can do this using the code below. However, this code seems to be overly complex and involves nested for-loops. Is there a simpler way, ideally using regex in base R?
Here is an example and the desired.result.
my.data <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1.2.1.1 2 B
1234..... 3 C
1...2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE)
desired.result <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1...... 2 B
..1.2.... 2 B
....2.1.. 2 B
......1.1 2 B
12....... 3 C
.23...... 3 C
..34..... 3 C
1...2.... 4 C
....2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE, na.strings = 'NA')
new.data <- data.frame(do.call(rbind, strsplit(my.data$my.string,'')), stringsAsFactors = FALSE)
n.segments <- rowSums(!(new.data[1:ncol(new.data)] == '.')) - 1
my.end.points <- do.call(rbind, gregexpr("[0-9]", my.data$my.string, perl=TRUE))
my.end.point.char <- do.call(rbind, strsplit(my.data$my.string, ""))
my.end.point.char <- t(apply(my.end.point.char, 1, as.numeric))
new.strings <- matrix('.', nrow = sum(n.segments), ncol = max(nchar(my.data$my.string)))
new.cov <- as.data.frame(matrix(NA, nrow = sum(n.segments), ncol = (ncol(my.data) - 1)))
m <- 1
for(i in 1:nrow(new.data)) {
for(j in 1:n.segments[i]) {
for(k in 1:ncol(new.strings)) {
new.strings[m, my.end.points[i, j ] ] <- my.end.point.char[i, my.end.points[i, j ]]
new.strings[m, my.end.points[i, (j+1)] ] <- my.end.point.char[i, my.end.points[i,(j+1)]]
new.cov[m,] <- my.data[i, c(2:ncol(my.data))]
}
m <- m + 1
}
}
my.result <- data.frame(my.string = apply(new.strings, 1, function(x) paste0(x, collapse = '')), stringsAsFactors = FALSE)
my.result <- data.frame(my.result, new.cov)
colnames(my.result) <- names(my.data)
all.equal(desired.result, my.result)
# [1] TRUE
w <- nchar(my.data$my.string[1L]);
dps <- character(w+1L); dps[1L] <- ''; for (i in seq_len(w)) dps[i+1L] <- paste0(dps[i],'.');
x <- Map(my.data$my.string,gregexpr('[^.]',my.data$my.string),f=function(s,g)
if (length(g)<3L) s else sapply(seq_len(length(g)-1L),function(gi)
paste0(dps[g[gi]],substr(s,g[gi],g[gi+1L]),dps[w-g[gi+1L]+1L])
)
);
res <- transform(my.data[rep(seq_len(nrow(my.data)),sapply(x,length)),],my.string=unlist(x));
res;
## my.string cov1 cov2
## 1 11....... 1 A
## 2 1.1...... 2 B
## 2.1 ..1.2.... 2 B
## 2.2 ....2.1.. 2 B
## 2.3 ......1.1 2 B
## 3 12....... 3 C
## 3.1 .23...... 3 C
## 3.2 ..34..... 3 C
## 4 1...2.... 4 C
## 4.1 ....2...3 4 C
## 5 ..3..4... 5 D
Note: You can replace the sapply(x,length) piece with lengths(x) if you have a recent enough version of R.
Benchmarking
library(microbenchmark);
bgoldst <- function(my.data) { w <- nchar(my.data$my.string[1L]); dps <- character(w+1L); dps[1L] <- ''; for (i in seq_len(w)) dps[i+1L] <- paste0(dps[i],'.'); x <- Map(my.data$my.string,gregexpr('[^.]',my.data$my.string),f=function(s,g) if (length(g)<3L) s else sapply(seq_len(length(g)-1L),function(gi) paste0(dps[g[gi]],substr(s,g[gi],g[gi+1L]),dps[w-g[gi+1L]+1L]))); transform(my.data[rep(seq_len(nrow(my.data)),sapply(x,length)),],my.string=unlist(x)); };
rawr <- function(my.data) { f <- function(x, m) { y <- gsub('.', '\\.', x); cs <- attr(m, "capture.start"); cl <- attr(m, "capture.length"); Vectorize(`substr<-`)(y, cs, cl + cs - 1, Vectorize(substr)(x, cs, cl + cs - 1)); }; m <- gregexpr('(?=([0-9][.]*[0-9]))', my.data$my.string, perl = TRUE); strs <- Map(f, my.data$my.string, m); tmp <- `rownames<-`(my.data[rep(1:nrow(my.data), sapply(strs,length)), ], NULL); tmp$my.string <- unlist(strs); tmp; };
carroll <- function(my.data) { strings <- sapply(my.data$my.string, function(x) { stri_match_all_regex(x, "(?=([0-9]{1}\\.*[0-9]{1}))")[[1]][,2]; }); strpos <- lapply(1:length(strings), function(x) { y <- {nchar(sub(perl=T,'^\\.*\\K.*','',my.data$my.string[x]))+c(0, cumsum(nchar(strings[[x]])-1))}; return(y[-length(y)]); }); w <- nchar(my.data$my.string[1L]); output.result <- data.frame(my.string = cbind(unlist(sapply(1:length(strings), function(y) { cbind(sapply(1:length(strings[[y]]), function(x) { leftstr <- paste0(paste0(rep(".", strpos[[y]][[x]]), collapse=""), strings[[y]][x]); rightstr <- paste0(rep(".", w-nchar(leftstr)), collapse=""); paste0(leftstr, rightstr, collapse=""); })); }))), my.data[unlist(sapply(1:length(strings), function(x) { rep(x, sapply(strings, length)[x]); })), c(2,3)], stringsAsFactors=FALSE); row.names(output.result) <- NULL; output.result; };
## OP's sample input
my.data <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1.2.1.1 2 B
1234..... 3 C
1...2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE);
ex <- bgoldst(my.data);
all.equal(ex,rawr(my.data),check.attributes=F);
## [1] TRUE
all.equal(ex,carroll(my.data),check.attributes=F);
## [1] TRUE
microbenchmark(bgoldst(my.data),rawr(my.data),carroll(my.data));
## Unit: microseconds
## expr min lq mean median uq max neval
## bgoldst(my.data) 422.094 451.816 483.5305 476.6195 503.775 801.421 100
## rawr(my.data) 1096.502 1160.863 1277.7457 1236.7720 1298.996 3092.785 100
## carroll(my.data) 1130.287 1176.900 1224.6911 1213.2515 1247.249 1525.437 100
## scale test
set.seed(1L);
NR <- 1e4; NS <- 30L; probDot <- 3/4;
x <- c('.',0:9); probs <- c(probDot,rep((1-probDot)/10,10L)); my.data <- data.frame(my.string=do.call(paste0,as.data.frame(replicate(NS,sample(x,NR,T,probs)))),cov1=sample(seq_len(NR)),cov2=sample(make.unique(rep(LETTERS,len=NR))),stringsAsFactors=F);
repeat { w <- which(sapply(gregexpr('[^.]',my.data$my.string),length)==1L); if (length(w)==0L) break; my.data$my.string[w] <- do.call(paste0,as.data.frame(replicate(NS,sample(x,length(w),T,probs)))); }; ## prevent single-digit strings, which rawr and carroll solutions don't support
ex <- bgoldst(my.data);
all.equal(ex,rawr(my.data),check.attributes=F);
## [1] TRUE
all.equal(ex,carroll(my.data),check.attributes=F);
## [1] TRUE
microbenchmark(bgoldst(my.data),rawr(my.data),carroll(my.data),times=1L);
## Unit: milliseconds
## expr min lq mean median uq max neval
## bgoldst(my.data) 904.887 904.887 904.887 904.887 904.887 904.887 1
## rawr(my.data) 2736.462 2736.462 2736.462 2736.462 2736.462 2736.462 1
## carroll(my.data) 108575.001 108575.001 108575.001 108575.001 108575.001 108575.001 1
my.data <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1.2.1.1 2 B
1234..... 3 C
1...2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE)
f <- function(x, m) {
if (nchar(gsub('.', '', x, fixed = TRUE)) < 2L) return(x)
y <- gsub('.', '\\.', x)
cs <- attr(m, "capture.start")
cl <- attr(m, "capture.length")
Vectorize(`substr<-`)(y, cs, cl + cs - 1, Vectorize(substr)(x, cs, cl + cs - 1))
}
m <- gregexpr('(?=([0-9][.]*[0-9]))', my.data$my.string, perl = TRUE)
strs <- Map(f, my.data$my.string, m)
tmp <- `rownames<-`(my.data[rep(1:nrow(my.data), lengths(strs)), ], NULL)
tmp$my.string <- unlist(strs)
# my.string cov1 cov2
# 1 11....... 1 A
# 2 1.1...... 2 B
# 3 ..1.2.... 2 B
# 4 ....2.1.. 2 B
# 5 ......1.1 2 B
# 6 12....... 3 C
# 7 .23...... 3 C
# 8 ..34..... 3 C
# 9 1...2.... 4 C
# 10 ....2...3 4 C
# 11 ..3..4... 5 D
identical(tmp, desired.result)
# [1] TRUE
Here's an option. Not clean, but neither is the problem.
library(stringi)
## isolate the strings, allowing overlap via positive lookaheads
strings <- sapply(my.data$my.string, function(x) {
stri_match_all_regex(x, "(?=([0-9]{1}\\.*[0-9]{1}))")[[1]][,2]
})
Identify the offsets at the start of each group.
## identify the . offsets
strpos <- lapply(1:length(strings), function(x) {
y <- {nchar(sub(perl=T,'^\\.*\\K.*','',my.data$my.string[x]))+c(0, cumsum(nchar(strings[[x]])-1))}
return(y[-length(y)])
})
Build up the data.frame with only 2 sapply loops.
## collate the results using sapply
w <- nchar(my.data$my.string[1L]);
output.result <- data.frame(
my.string = cbind(unlist(sapply(1:length(strings), function(y) {
cbind(sapply(1:length(strings[[y]]), function(x) {
leftstr <- paste0(paste0(rep(".", strpos[[y]][[x]]), collapse=""), strings[[y]][x])
rightstr <- paste0(rep(".", w-nchar(leftstr)), collapse="")
paste0(leftstr, rightstr, collapse="")
}))
}))),
my.data[unlist(sapply(1:length(strings), function(x) {
rep(x, sapply(strings, length)[x])
})), c(2,3)], stringsAsFactors=FALSE
)
row.names(output.result) <- NULL
output.result
my.string cov1 cov2
1 11....... 1 A
2 1.1...... 2 B
3 ..1.2.... 2 B
4 ....2.1.. 2 B
5 ......1.1 2 B
6 12....... 3 C
7 .23...... 3 C
8 ..34..... 3 C
9 1...2.... 4 C
10 ....2...3 4 C
11 ..3..4... 5 D
identical(desired.result, output.result)
[1] TRUE

How can I convert Degree minute sec to Decimal in R?

I have this dataframe:
Lat Long
59 44 50 151 45 11
59 49 28 154 52 56
59 46 42 150 45 15
How can I convert this into decimal columns?
lat is in dd mm ss and long is in ddd mm ss
I found a similar solution here, but couldn't adapt the regex for my case.
Converting geo coordinates from degree to decimal
Try this function:
angle2dec <- function(angle) {
angle <- as.character(angle)
x <- do.call(rbind, strsplit(angle, split=' '))
x <- apply(x, 1L, function(y) {
y <- as.numeric(y)
y[1] + y[2]/60 + y[3]/3600
})
return(x)
}
Then you can apply it to each column in your data frame:
new_df <- apply(df, 2L, angle2dec)
new_df
Lat Long
[1,] 59.74722 151.7531
[2,] 59.82444 154.8822
[3,] 59.77833 150.7542
or just
df$Lat <- angle2dec(df$Lat)
df$Long <- angle2dec(df$Long)
May I suggest the tidyr approach:
df <- data.frame( Lat=c("59 44 50","59 49 28","59 46 42"),
Long=c("151 45 11","154 52 56","150 45 15"))
library(tidyr); library(dplyr)
df %>%
separate(Lat, paste("lat",c("d","m","s"), sep="_") ) %>%
separate(Long, paste("long",c("d","m","s"), sep="_" ) ) %>%
mutate_each(funs(as.numeric)) %>%
transmute(lat_dec=lat_d + lat_m/60 + lat_s/60^2,
long_dec=long_d + long_m/60 + long_s/60^2)
# lat_dec long_dec
# 1 59.74722 151.7531
# 2 59.82444 154.8822
# 3 59.77833 150.7542
Here's an idea using splitstackshape:
library(dplyr)
library(splitstackshape)
df %>%
cSplit(c("Lat", "Long"), sep = " ") %>%
transmute(Lat = Lat_1 + Lat_2 / 60 + Lat_3 / 60^2,
Long = Long_1 + Long_2 / 60 + Long_3 / 60^2)
Which gives:
# Lat Long
#1: 59.74722 151.7531
#2: 59.82444 154.8822
#3: 59.77833 150.7542

Rearranging the structure of many txt files and then merging them in one data frame

I would appreciate your help with this a lot!
I have ~4.5k txt files which look like this:
Simple statistics using MSPA parameters: 8_3_1_1 on input file: 20130815 104359 875 000000 0528 0548_result.tif
MSPA-class [color]: Foreground/data pixels [%] Frequency
============================================================
CORE(s) [green]: -- 0
CORE(m) [green]: 48.43/13.45 1
CORE(l) [green]: -- 0
ISLET [brown]: 3.70/ 1.03 20
PERFORATION [blue]: 0.00/ 0.00 0
EDGE [black]: 30.93/ 8.59 11
LOOP [yellow]: 9.66/ 2.68 6
BRIDGE [red]: 0.00/ 0.00 0
BRANCH [orange]: 7.28/ 2.02 40
Background [grey]: --- /72.22 11
Missing [white]: 0.00 0
I want to read all txt files from a directory into R and then perform a rearranging task on them before merging them together.
The values in the txt files can change, so in places where there is a 0.00 now, could be a relevant number in some files (so we need those). For the fields where there are -- now, it would be good if the script could test if there are -- , or a number. If there are the --, then it should turn them into NAs. On the other hand, real 0.00 values are of value and I need them. There is only one value for the Missing white column (or row here), that value should then be copied into both columns, foreground% and data pixels%.
The general rearranging which I need is to make all the data available as columns with only 1 row per txt file. For every row of data in the txt file here, there should be 3 columns in the output file (foreground%, data pixel% and frequency for every color). The name of the row should be the image name which is mentioned in the beginning of the file, here: 20130815 104359 875 000000 0528 0548
The rest can be omitted.
The output should look something like this:
I am working on this simultaneously but am not sure which direction to take. So any help is more than welcome!
Best,
Moritz
This puts it in the format you want, I think, but the example doesn't match your image so I can't be sure:
(lf <- list.files('~/desktop', pattern = '^image\\d+.txt', full.names = TRUE))
# [1] "/Users/rawr/desktop/image001.txt" "/Users/rawr/desktop/image002.txt"
# [3] "/Users/rawr/desktop/image003.txt"
lapply(lf, function(xx) {
rl <- readLines(con <- file(xx), warn = FALSE)
close(con)
## assuming the file name is after "file: " until the end of the string
## and ends in .tif
img_name <- gsub('.*file:\\s+(.*).tif', '\\1', rl[1])
## removes each string up to and including the ===== string
rl <- rl[-(1:grep('==', rl))]
## remove leading whitespace
rl <- gsub('^\\s+', '', rl)
## split the remaining lines by larger chunks of whitespace
mat <- do.call('rbind', strsplit(rl, '\\s{2, }'))
## more cleaning, setting attributes, etc
mat[mat == '--'] <- NA
mat <- cbind(image_name = img_name, `colnames<-`(t(mat[, 2]), mat[, 1]))
as.data.frame(mat)
})
I created three files using your example and made each one slightly different to show how this would work on a directory with several files:
# [[1]]
# image_name CORE(s) [green]: CORE(m) [green]: CORE(l) [green]: ISLET [brown]: PERFORATION [blue]: EDGE [black]: LOOP [yellow]: BRIDGE [red]: BRANCH [orange]: Background [grey]: Missing [white]:
# 1 20130815 104359 875 000000 0528 0548_result <NA> 48.43/13.45 <NA> 3.70/ 1.03 0.00/ 0.00 30.93/ 8.59 9.66/ 2.68 0.00/ 0.00 7.28/ 2.02 --- /72.22 0.00
#
# [[2]]
# image_name CORE(s) [green]: CORE(m) [green]: CORE(l) [green]: ISLET [brown]: PERFORATION [blue]: EDGE [black]: LOOP [yellow]: BRIDGE [red]: BRANCH [orange]: Background [grey]: Missing [white]:
# 1 20139341 104359 875 000000 0528 0548_result 23 48.43/13.45 23 <NA> 0.00/ 0.00 30.93/ 8.59 9.66/ 2.68 0.00/ 0.00 7.28/ 2.02 --- /72.22 0.00
#
# [[3]]
# image_name CORE(s) [green]: CORE(m) [green]: CORE(l) [green]: ISLET [brown]: PERFORATION [blue]: EDGE [black]: LOOP [yellow]: BRIDGE [red]: BRANCH [orange]: Background [grey]: Missing [white]:
# 1 20132343 104359 875 000000 0528 0548_result <NA> <NA> <NA> <NA> <NA> 30.93/ 8.59 9.66/ 2.68 0.00/ 0.00 7.28/ 2.02 <NA> 0.00
EDIT
made a few changes to extract all the info:
(lf <- list.files('~/desktop', pattern = '^image\\d+.txt', full.names = TRUE))
# [1] "/Users/rawr/desktop/image001.txt" "/Users/rawr/desktop/image002.txt"
# [3] "/Users/rawr/desktop/image003.txt"
res <- lapply(lf, function(xx) {
rl <- readLines(con <- file(xx), warn = FALSE)
close(con)
img_name <- gsub('.*file:\\s+(.*).tif', '\\1', rl[1])
rl <- rl[-(1:grep('==', rl))]
rl <- gsub('^\\s+', '', rl)
mat <- do.call('rbind', strsplit(rl, '\\s{2, }'))
dat <- as.data.frame(mat, stringsAsFactors = FALSE)
tmp <- `colnames<-`(do.call('rbind', strsplit(dat$V2, '[-\\/\\s]+', perl = TRUE)),
c('Foreground','Data pixels'))
dat <- cbind(dat[, -2], tmp, image_name = img_name)
dat[] <- lapply(dat, as.character)
dat[dat == ''] <- NA
names(dat)[1:2] <- c('MSPA-class','Frequency')
zzz <- reshape(dat, direction = 'wide', idvar = 'image_name', timevar = 'MSPA-class')
names(zzz)[-1] <- gsub('(.*)\\.(.*) (?:.*)', '\\2_\\1', names(zzz)[-1], perl = TRUE)
zzz
})
here is the result (I just transformed into a long matrix so it would be easier to read. the real results are in a very wide data frame, one for each file):
`rownames<-`(matrix(res[[1]]), names(res[[1]]))
# [,1]
# image_name "20130815 104359 875 000000 0528 0548_result"
# CORE(s)_Frequency "0"
# CORE(s)_Foreground "NA"
# CORE(s)_Data pixels "NA"
# CORE(m)_Frequency "1"
# CORE(m)_Foreground "48.43"
# CORE(m)_Data pixels "13.45"
# CORE(l)_Frequency "0"
# CORE(l)_Foreground "NA"
# CORE(l)_Data pixels "NA"
# ISLET_Frequency "20"
# ISLET_Foreground "3.70"
# ISLET_Data pixels "1.03"
# PERFORATION_Frequency "0"
# PERFORATION_Foreground "0.00"
# PERFORATION_Data pixels "0.00"
# EDGE_Frequency "11"
# EDGE_Foreground "30.93"
# EDGE_Data pixels "8.59"
# LOOP_Frequency "6"
# LOOP_Foreground "9.66"
# LOOP_Data pixels "2.68"
# BRIDGE_Frequency "0"
# BRIDGE_Foreground "0.00"
# BRIDGE_Data pixels "0.00"
# BRANCH_Frequency "40"
# BRANCH_Foreground "7.28"
# BRANCH_Data pixels "2.02"
# Background_Frequency "11"
# Background_Foreground "NA"
# Background_Data pixels "72.22"
# Missing_Frequency "0"
# Missing_Foreground "0.00"
# Missing_Data pixels "0.00"
with your sample data:
lf <- list.files('~/desktop/data', pattern = '.txt', full.names = TRUE)
`rownames<-`(matrix(res[[1]]), names(res[[1]]))
# [,1]
# image_name "20130815 103704 780 000000 0372 0616"
# CORE(s)_Frequency "0"
# CORE(s)_Foreground "NA"
# CORE(s)_Data pixels "NA"
# CORE(m)_Frequency "1"
# CORE(m)_Foreground "54.18"
# CORE(m)_Data pixels "15.16"
# CORE(l)_Frequency "0"
# CORE(l)_Foreground "NA"
# CORE(l)_Data pixels "NA"
# ISLET_Frequency "11"
# ISLET_Foreground "3.14"
# ISLET_Data pixels "0.88"
# PERFORATION_Frequency "0"
# PERFORATION_Foreground "0.00"
# PERFORATION_Data pixels "0.00"
# EDGE_Frequency "1"
# EDGE_Foreground "34.82"
# EDGE_Data pixels "9.75"
# LOOP_Frequency "1"
# LOOP_Foreground "4.96"
# LOOP_Data pixels "1.39"
# BRIDGE_Frequency "0"
# BRIDGE_Foreground "0.00"
# BRIDGE_Data pixels "0.00"
# BRANCH_Frequency "20"
# BRANCH_Foreground "2.89"
# BRANCH_Data pixels "0.81"
# Background_Frequency "1"
# Background_Foreground "NA"
# Background_Data pixels "72.01"
# Missing_Frequency "0"
# Missing_Foreground "0.00"
# Missing_Data pixels "0.00"
I copied and pasted your data on a text file and adjusted the space in order to have consistency between them. You might want to do it or if you can attach a text file, it would be easy to work with. You may use pastebin - http://en.wikipedia.org/wiki/Pastebin
First set your working directory as follows:
setwd("path of your file")
#EDIT: Create a single data frame of all files
split.row.data <- function(x){
a1 = sub("( )+(.*)", '\\2', x)
b1 = unlist(strsplit(sub("( )+(.*)", '\\2', (strsplit(a1, ":"))[[1]][2]), " "))
c1 = unlist(strsplit(b1[1], "/"))
if(length(c1) == 1){
if(which(b1[1:2] %in% "") == 1){
c1 = c(NA, c1)
}else if(which(b1[1:2] %in% "") == 2){
c1 = c(c1, NA)
}
}
c1[which(c1 %in% c("--", "--- "))] <- NA
return(c(unlist(strsplit(strsplit(a1, ":")[[1]][1], " ")),
c1,
b1[length(b1)]))
}
df2 <- data.frame(matrix(nrow = 1, ncol = 6), stringsAsFactors = FALSE)
file_list = list.files('~/desktop', pattern = '^image\\d+.txt', full.names = TRUE)
for (infile in file_list){
file_data <- readLines(con <- file(infile))
close(con)
filename = sub("(.*)(input file:)(.*)(.tif)", "\\3", file_data[3])
a2 <- file_data[7:length(file_data)]
d1 = lapply(a2, function(x) split.row.data(x))
df1 <- data.frame(matrix(nrow= length(d1), ncol = 5), stringsAsFactors = FALSE)
for(i in 1:length(d1)){df1[i, ] <- d1[[i]]}
df1 <- cbind(data.frame(rep(filename, nrow(df1)), stringsAsFactors = FALSE),
df1)
colnames(df1) <- colnames(df2)
df2 <- rbind(df2, df1)
}
df2 <- df2[2:nrow(df2), ]
df2[,4] <- as.numeric(df2[,4])
df2[,5] <- as.numeric(df2[,5])
df2[,6] <- as.numeric(df2[,6])
e1 = unlist(lapply(df2[,3], function(x) gsub(']', '', x)))
df2[,3] = unlist(lapply(e1, function(x) gsub("[[]", '', x)))
header_names <- unlist(lapply(strsplit(file_data[5], "/"), function(x) strsplit(x, " ")))
colnames(df2) <- c("filename",
strsplit(header_names[1], " ")[[1]][2],
"color",
header_names[2:length(header_names)])
row.names(df2) <- 1:nrow(df2)
output:
print(head(df2))
filename MSPA-class color Foreground data pixels [%] Frequency
1 20130815 103739 599 000000 0944 0788 CORE(s) green NA NA 0
2 20130815 103739 599 000000 0944 0788 CORE(m) green 63.46 17.41 1
3 20130815 103739 599 000000 0944 0788 CORE(l) green NA NA 0
4 20130815 103739 599 000000 0944 0788 ISLET brown 0.00 0.00 0
5 20130815 103739 599 000000 0944 0788 PERFORATION blue 0.00 0.00 0
6 20130815 103739 599 000000 0944 0788 EDGE black 35.00 9.60 1
#get data for only "background" from "MSPA-class" column
df2_background <- df2[which(df2[, "MSPA-class"] %in% "Background"), ]
print(df2_background)
filename MSPA-class color Foreground data pixels [%] Frequency
11 20130815 103739 599 000000 0944 0788 Background grey NA 72.57 1
22 20130815 143233 712 000000 1048 0520 Background grey NA 77.51 1
33 20130902 163929 019 000000 0394 0290 Background grey NA 54.55 6

Replace the first N dots of a string revisited

In January I asked how to replace the first N dots of a string: replace the first N dots of a string
DWin's answer was very helpful. Can it be generalized?
df.1 <- read.table(text = '
my.string other.stuff
1111111111111111 120
..............11 220
11.............. 320
1............... 320
.......1........ 420
................ 820
11111111111111.1 120
', header = TRUE)
nn <- 14
# this works:
df.1$my.string <- sub("^\\.{14}", paste(as.character(rep(0, nn)), collapse = ""),
df.1$my.string)
# this does not work:
df.1$my.string <- sub("^\\.{nn}", paste(as.character(rep(0, nn)), collapse = ""),
df.1$my.string)
Using sprintf you can have the desired output
nn <- 3
sub(sprintf("^\\.{%s}", nn),
paste(rep(0, nn), collapse = ""), df.1$my.string)
## [1] "1111111111111111" "000...........11" "11.............."
## [4] "1..............." "000....1........" "000............."
## [7] "11111111111111.1"
pattstr <- paste0("\\.", paste0( rep(".",nn), collapse="") )
pattstr
#[1] "\\..............."
df.1$my.string <- sub(pattstr,
paste0( rep("0", nn), collapse=""),
df.1$my.string)
> df.1
my.string other.stuff
1 1111111111111111 120
2 000000000000001 220
3 11.............. 320
4 100000000000000 320
5 00000000000000. 420
6 00000000000000. 820
7 11111111111111.1 120