Error : length of 'dimnames' [2] not equal to array extent - but number of columns IS equal to number of columnnames - rbind

maybe you can help me with a small problem. I picked some data out of a bigger data file and combined those data in the end to a table. In the last line I want to put columnnames to that table. Before I put those columnnames to that table the length is = 115. But when I put the columnnames to that table the length is suddenly = 112 and the above mentioned error occurs. But I also counted the number of columnnames and there are 115. Maybe do you have clue what to do?
Thank you in advance and kind regards,
Julian
Code:
setwd("/home/julian/Schreibtisch/Test")
# Alle einzulesenden Daten
dirnames <- dir()
for(i in 1:length(dirnames)){
# Basis-Angaben
INSIST_basics_1 <- read.csv(dirnames[i])[ ,c('mouse.x', 'TN.Nr.')]
INSIST_basics_1 <- subset(INSIST_basics_1, mouse.x > 0)
INSIST_basics_1 <- INSIST_basics_1[,c('TN.Nr.')]
INSIST_basics_1 <- c(INSIST_basics_1)
INSIST_basics_1 <- rbind(INSIST_basics_1)
INSIST_basics_2 <- read.csv(dirnames[i])[ ,c('mouse.x', 'Termin')]
INSIST_basics_2 <- subset(INSIST_basics_2, mouse.x > 0)
INSIST_basics_2 <- INSIST_basics_2[,c('Termin')]
INSIST_basics_2 <- c(INSIST_basics_2)
INSIST_basics_2 <- rbind(INSIST_basics_2)
INSIST_basics_3 <- read.csv(dirnames[i])[ ,c('mouse.x', 'Zuweisung')]
INSIST_basics_3 <- subset(INSIST_basics_3, mouse.x > 0)
INSIST_basics_3 <- INSIST_basics_3[,c('Zuweisung')]
INSIST_basics_3 <- c(INSIST_basics_3)
INSIST_basics_3 <- rbind(INSIST_basics_3)
INSIST_basics <- c(INSIST_basics_1,INSIST_basics_2,INSIST_basics_3)
INSIST_basics <- rbind(INSIST_basics)
# VAS
INSIST_VAS <- read.csv(dirnames[i])[ ,c('Word', 'trials_4.thisIndex', 'trials_6.thisIndex', 'trials_10.thisIndex', 'trials_7.thisIndex', 'trials_8.thisIndex', 'slider.response')]
INSIST_VAS <- INSIST_VAS[-which(INSIST_VAS$Word == ""), ]
INSIST_VAS_1 <- subset(INSIST_VAS, trials_4.thisIndex >= 0)
INSIST_VAS_1 <- INSIST_VAS_1[order(INSIST_VAS_1$trials_4.thisIndex),]
INSIST_VAS_1 <- INSIST_VAS_1[,c('slider.response')]
INSIST_VAS_1 <- (INSIST_VAS_1 - 1) *100
INSIST_VAS_2 <- subset(INSIST_VAS, trials_6.thisIndex >= 0)
INSIST_VAS_2 <- INSIST_VAS_2[order(INSIST_VAS_2$trials_6.thisIndex),]
INSIST_VAS_2 <- INSIST_VAS_2[,c('slider.response')]
INSIST_VAS_2 <- (INSIST_VAS_2 - 1) *100
INSIST_VAS_3 <- subset(INSIST_VAS, trials_10.thisIndex >= 0)
INSIST_VAS_3 <- INSIST_VAS_3[order(INSIST_VAS_3$trials_10.thisIndex),]
INSIST_VAS_3 <- INSIST_VAS_3[,c('slider.response')]
INSIST_VAS_3 <- (INSIST_VAS_3 - 1) *100
INSIST_VAS_4 <- subset(INSIST_VAS, trials_7.thisIndex >= 0)
INSIST_VAS_4 <- INSIST_VAS_4[order(INSIST_VAS_4$trials_7.thisIndex),]
INSIST_VAS_4 <- INSIST_VAS_4[,c('slider.response')]
INSIST_VAS_4 <- (INSIST_VAS_4 - 1) *100
INSIST_VAS_5 <- subset(INSIST_VAS, trials_8.thisIndex >= 0)
INSIST_VAS_5 <- INSIST_VAS_5[order(INSIST_VAS_5$trials_8.thisIndex),]
INSIST_VAS_5 <- INSIST_VAS_5[,c('slider.response')]
INSIST_VAS_5 <- (INSIST_VAS_5 - 1) *100
INSIST_VAS_all <- c(INSIST_VAS_1,INSIST_VAS_2,INSIST_VAS_3,INSIST_VAS_4,INSIST_VAS_5)
INSIST_VAS_all <- rbind(INSIST_VAS_all)
# FB tDCS
INSIST_tDCS <- read.csv(dirnames[i])[ ,c('itemIndex','questions', 'ratings')]
INSIST_tDCS <- INSIST_tDCS[-which(INSIST_tDCS$questions == ""), ]
INSIST_tDCS <- INSIST_tDCS[,c('ratings')]
INSIST_tDCS <- c(INSIST_tDCS)
INSIST_tDCS <- rbind(INSIST_tDCS)
# FB Zustand
INSIST_Zustand <- read.csv(dirnames[i])[ ,c('questionText', 'trials_9.thisIndex', 'trials_13.thisIndex', 'trials_15.thisIndex', 'slider_13.response')]
INSIST_Zustand <- INSIST_Zustand[-which(INSIST_Zustand$questionText == ""), ]
INSIST_Zustand_1 <- subset(INSIST_Zustand, trials_9.thisIndex >= 0)
INSIST_Zustand_1 <- INSIST_Zustand_1[order(INSIST_Zustand_1$trials_9.thisIndex),]
INSIST_Zustand_1 <- INSIST_Zustand_1[,c('slider_13.response')]
INSIST_Zustand_1 <- (INSIST_Zustand_1 - 1) *100
INSIST_Zustand_2 <- subset(INSIST_Zustand, trials_13.thisIndex >= 0)
INSIST_Zustand_2 <- INSIST_Zustand_2[order(INSIST_Zustand_2$trials_13.thisIndex),]
INSIST_Zustand_2 <- INSIST_Zustand_2[,c('slider_13.response')]
INSIST_Zustand_2 <- (INSIST_Zustand_2 - 1) *100
INSIST_Zustand_3 <- subset(INSIST_Zustand, trials_15.thisIndex >= 0)
INSIST_Zustand_3 <- INSIST_Zustand_3[order(INSIST_Zustand_3$trials_15.thisIndex),]
INSIST_Zustand_3 <- INSIST_Zustand_3[,c('slider_13.response')]
INSIST_Zustand_3 <- (INSIST_Zustand_3 - 1) *100
INSIST_Zustand_all <- c(INSIST_Zustand_1,INSIST_Zustand_2,INSIST_Zustand_3)
INSIST_Zustand_all <- rbind(INSIST_Zustand_all)
# Szenarien
INSIST_Szenarien <- read.csv(dirnames[i])[ ,c('scenario', 'trials_11.thisIndex', 'rating_2.response')]
INSIST_Szenarien <- INSIST_Szenarien[-which(INSIST_Szenarien$scenario == ""), ]
INSIST_Szenarien <- INSIST_Szenarien[order(INSIST_Szenarien$trials_11.thisIndex),]
INSIST_Szenarien <- INSIST_Szenarien[,c('rating_2.response')]
INSIST_Szenarien <- c(INSIST_Szenarien)
INSIST_Szenarien <- rbind(INSIST_Szenarien)
# Alle Abschnitte zusammenfuehren
INSIST_tab <- c(INSIST_basics, INSIST_VAS_all, INSIST_tDCS, INSIST_Zustand_all, INSIST_Szenarien)
INSIST_tab <- rbind(INSIST_tab)
colnames(INSIST_tab) <- c( "TN_Nr", "Termin", "Zuweisung", "Hungrig_1", "Satt_1", "Durstig_1", "Aengstlich_1", "Froehlich_1", "Gestresst_1", "Schlaefrig_1", "Konzentriert_1", "Traurig_1", "Essen_generell_1", "Essen_suess_1", "Essen_herzhaft_1", "Hungrig_2", "Satt_2", "Durstig_2", "Aengstlich_2", "Froehlich_2", "Gestresst_2", "Schlaefrig_2", "Konzentriert_2", "Traurig_2", "Essen_generell_2", "Essen_suess_2", "Essen_herzhaft_2", "Hungrig_3", "Satt_3", "Durstig_3", "Aengstlich_3", "Froehlich_3", "Gestresst_3", "Schlaefrig_3", "Konzentriert_3", "Traurig_3", "Essen_generell_3", "Essen_suess_3", "Essen_herzhaft_3", "Hungrig_4", "Satt_4", "Durstig_4", "Aengstlich_4", "Froehlich_4", "Gestresst_4", "Schlaefrig_4", "Konzentriert_4", "Traurig_4", "Essen_generell_4", "Essen_suess_4", "Essen_herzhaft_4", "Hungrig_5", "Satt_5", "Durstig_5", "Aengstlich_5", "Froehlich_5", "Gestresst_5", "Schlaefrig_5", "Konzentriert_5", "Traurig_5", "Essen_generell_5", "Essen_suess_5", "Essen_herzhaft_5", "tDCS_Jucken_1", "tDCS_Schmerzen_1", "tDCS_Brennen_1", "tDCS_Waerme_1", "tDCS_Metall_1", "tDCS_Ermuedung_1", "tDCS_Kopf_1", "tDCS_Jucken_2", "tDCS_Schmerzen_2", "tDCS_Brennen_2", "tDCS_Waerme_2", "tDCS_Metall_2", "tDCS_Ermuedung_2", "tDCS_Kopf_2","tDCS_Jucken_3", "tDCS_Schmerzen_3", "tDCS_Brennen_3", "tDCS_Waerme_3", "tDCS_Metall_3", "tDCS_Ermuedung_3", "tDCS_Kopf_3", 'Zustand_Herr_1', 'Zustand_kontrollieren_1', 'Zustand_Versuchungen_1', 'Zustand_Stimmung_1', 'Zustand_Druck_1', 'Zustand_Kontrolle_1', 'Zustand_willensstark_1', 'Zustand_erschrocken_1', 'Zustand_ueberrascht_1', 'Zustand_Herr_2', 'Zustand_kontrollieren_2', 'Zustand_Versuchungen_2', 'Zustand_Stimmung_2', 'Zustand_Druck_2', 'Zustand_Kontrolle_2', 'Zustand_willensstark_2', 'Zustand_erschrocken_2', 'Zustand_ueberrascht_2','Zustand_Herr_3', 'Zustand_kontrollieren_3', 'Zustand_Versuchungen_3', 'Zustand_Stimmung_3', 'Zustand_Druck_3', 'Zustand_Kontrolle_3', 'Zustand_willensstark_3', 'Zustand_erschrocken_3', 'Zustand_ueberrascht_3','Kino', 'Pralinen', 'Zuhause', 'Weihnachtsessen' ) # Spaltennamen
}```

Related

shiny DT::renderDT() multiple tables

I can't get renderDT() to display multiple data tables that my script creates. The code below. Reading the input table works, the progress indicator progresses through each line, but the hg38 and hg19 tabs are empty in the display.
If I move the hg38 renderDT() inside of the my_data <- reactive( it will display the hg38 table, but I get the following error and nothing in the hg19 tab
```Warning: Error in $: object of type 'closure' is not subsettable```
```105: <Anonymous>```
If I move both renderDT() inside the my_data <- reactive( I get nothing in either tab. I'm clearly misunderstanding something, but I'm not sure what.
library(shiny)
library(DT)
library("dplyr")
library(GenomeInfoDb)
library(BSgenome)
library("MafDb.gnomAD.r2.1.hs37d5")
library("MafH5.gnomAD.v3.1.2.GRCh38")
library(BSgenome.Hsapiens.UCSC.hg19)
library(BSgenome.Hsapiens.UCSC.hg38)
mafdb_hg19 <- MafDb.gnomAD.r2.1.hs37d5
mafdb_hg38 <- MafH5.gnomAD.v3.1.2.GRCh38
hg19 <- BSgenome.Hsapiens.UCSC.hg19
hg38 <- BSgenome.Hsapiens.UCSC.hg38
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
fileInput("file1", "Choose CSV File", accept = ".csv"),
#checkboxInput("header", "Header", TRUE),
width = 2
),
mainPanel(
tabsetPanel(
tabPanel("Primers(input)", DT::dataTableOutput("primers")),
tabPanel("SNPs(hg38)", DT::dataTableOutput("hg38")),
tabPanel("SNPs(hg19)", DT::dataTableOutput("hg19"))
)
)
)
)
server <- function(input, output){
primer_file <- reactive(
{
file <- input$file1
ext <- tools::file_ext(file$datapath)
req(file)
validate(need(ext == "csv", "Please upload a csv file"))
primer_file <- as.data.frame(read.csv(file$datapath, header = TRUE))
return(primer_file)
})
output$primers <- DT::renderDT(primer_file())
my_data <- reactive(
{
primers_hg38 <- data.frame(primer_id=character(),
seq=character(),
chr=character(),
hg38_pos=integer(),
AF_allpopmax_hg38=integer(),
stringsAsFactors=FALSE)
primers_hg19 <- data.frame(primer_id=character(),
seq=character(),
chr=character(),
hg19_pos=integer(),
AF_afr=integer(),AF_amr=integer(), AF_asj=integer(), AF_eas=integer(), AF_fin=integer(), AF_nfe=integer(), AF_oth=integer(),
stringsAsFactors=FALSE)
progress <- shiny::Progress$new()
# Make sure it closes when we exit this reactive, even if there's an error
on.exit(progress$close())
progress$set(message = "Calculating", value = 0)
lastrow <- nrow(primer_file())
firstrow=1
for (no in (firstrow:lastrow))
{
row = primer_file()[no,]
temp_chr <- row$chr
temp_FP <- row$FP
temp_RP <- row$RP
progress$inc(1/lastrow, detail = paste("line ", no))
###################lets do hg38 first######################
subject_hg38 <- hg38[[temp_chr]]
products_hg38 <- matchProbePair(temp_FP,temp_RP,subject_hg38)
amp_start_hg38 = (start(products_hg38))
fp_end_hg38 = (start(products_hg38) + as.integer(nchar(row$FP)) - 1)
rp_start_hg38 = ( (start(products_hg38) + width(products_hg38)) - as.integer(nchar(row$RP)) )
amp_end_hg38 = (start(products_hg38) + width(products_hg38) - 1)
fp_range_hg38 <- GRanges(seqnames=temp_chr, IRanges(start=amp_start_hg38:fp_end_hg38, width=1))
fp_scores_hg38 <- gscores(mafdb_hg38,fp_range_hg38,pop="AF_allpopmax")
fp_scores_hg38 <- as.data.frame(fp_scores_hg38)
rp_range_hg38 <- GRanges(seqnames=temp_chr, IRanges(start=rp_start_hg38:amp_end_hg38, width=1))
rp_scores_hg38 <- gscores(mafdb_hg38,rp_range_hg38,pop="AF_allpopmax")
rp_scores_hg38 <- as.data.frame(rp_scores_hg38)
#primers_hg38 <- data.frame(primer_id=character(),
# seq=character(),
# chr=character(),
# hg38_pos=integer(),
# AF_allpopmax_hg38=integer(),
# stringsAsFactors=FALSE)
primers_hg38 <- bind_rows(primers_hg38,setNames(data.frame(primer_id <- paste0(row$ID,"_F"),
seq <- temp_FP,
chr <- fp_scores_hg38$seqnames,
hg38_pos <- fp_scores_hg38$start,
AF_allpopmax_hg38 <- fp_scores_hg38$AF_allpopmax, stringsAsFactors=FALSE),c("primer_id", "seq", "chr","hg38_pos","AF_allpopmax_hg38")))
#names(primers_hg38) <- c("primer_id","seq","chr","hg38_pos","AF_allpopmax_hg38")
primers_hg38 <- bind_rows(primers_hg38,setNames(data.frame(primer_id <- paste0(row$ID,"_R"),
seq <- temp_RP,
chr <- rp_scores_hg38$seqnames,
hg38_pos <- rp_scores_hg38$start,
AF_allpopmax_hg38 <- rp_scores_hg38$AF_allpopmax,stringsAsFactors=FALSE),c("primer_id", "seq", "chr","hg38_pos","AF_allpopmax_hg38")))
##########################now hg19######################################
#names(primers_hg38) <- c("primer_id","seq","chr","hg38_pos","AF_allpopmax_hg38")
subject_hg19 <- hg19[[temp_chr]]
products_hg19 <- matchProbePair(temp_FP,temp_RP,subject_hg19)
amp_start_hg19 = start(products_hg19)
fp_end_hg19 = ( start(products_hg19) + as.integer(nchar(row$FP)) - 1)
rp_start_hg19 = ( (start(products_hg19) + width(products_hg19)) - as.integer(nchar(row$RP)) )
amp_end_hg19 = (start(products_hg19) + width(products_hg19) - 1)
fp_range_hg19 <- GRanges(seqnames=temp_chr, IRanges(start=amp_start_hg19:fp_end_hg19, width=1))
fp_scores_hg19 <- gscores(mafdb_hg19,fp_range_hg19,pop=c("AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth"))
fp_scores_hg19 <- as.data.frame(fp_scores_hg19)
rp_range_hg19 <- GRanges(seqnames=temp_chr, IRanges(start=rp_start_hg19:amp_end_hg19, width=1))
rp_scores_hg19 <- gscores(mafdb_hg19,rp_range_hg19,pop=c("AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth"))
rp_scores_hg19 <- as.data.frame(rp_scores_hg19)
primers_hg19 <- bind_rows(primers_hg19,setNames(data.frame(primer_id <- paste0(row$ID,"_F"),
seq <- temp_FP,
chr <- fp_scores_hg19$seqnames,
hg19_pos <- fp_scores_hg19$start,
AF_afr <- fp_scores_hg19$AF_afr,
AF_amr <- fp_scores_hg19$AF_amr,
AF_asj <- fp_scores_hg19$AF_asj,
AF_eas <- fp_scores_hg19$AF_eas,
AF_fin <- fp_scores_hg19$AF_fin,
AF_nfe<- fp_scores_hg19$AF_nfe,
AF_oth<- fp_scores_hg19$AF_oth,
stringsAsFactors=FALSE),
c("primer_id", "seq", "chr","hg19_pos","AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth")))
primers_hg19 <- bind_rows(primers_hg19,setNames(data.frame(primer_id <- paste0(row$ID,"_R"),
seq <- temp_RP,
chr <- rp_scores_hg19$seqnames,
hg19_pos <- rp_scores_hg19$start,
AF_afr <- rp_scores_hg19$AF_afr,
AF_amr <- rp_scores_hg19$AF_amr,
AF_asj <- rp_scores_hg19$AF_asj,
AF_eas <- rp_scores_hg19$AF_eas,
AF_fin <- rp_scores_hg19$AF_fin,
AF_nfe<- rp_scores_hg19$AF_nfe,
AF_oth<- rp_scores_hg19$AF_oth,
stringsAsFactors=FALSE),
c("primer_id", "seq", "chr","hg19_pos","AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth")))
}
return(list(as.data.frame(primers_hg38), as.data.frame(primers_hg19)))
})
output$hg38 <- DT::renderDT(my_data()$primers_hg38)
#, options = list(paging = F, info = F, search = list(),
# dom = "Brtip", fixedColumns = T, fixedHeader = T,
# buttons = c("copy", "excel")),
# fillContainer = TRUE)
output$hg19 <- DT::renderDT(my_data()$primers_hg19, options = list(paging = F, info = F, search = list(),
dom = "Brtip", fixedColumns = T, fixedHeader = T,
buttons = c("copy", "excel")),
fillContainer = TRUE)
}
shinyApp(ui, server)
Please consider posting a MRE in the future. If you access the data as my_data()[[1]] it should work. However, if you define a named list, your method works. Take a look at an MRE below.
library(DT)
ui <- fluidPage(
DTOutput("t1"), DTOutput("t2")
)
server <- function(input, output) {
myd <- reactive(list(iris,mtcars))
myd2 <- reactive(list(ab=rock,cd=pressure))
output$t1 <- renderDT(head(myd()[[2]])) ### works
# output$t1 <- renderDT(myd()$mtcars) ### does not work
output$t2 <- renderDT(head(myd2()$ab)) ### works as a named list
}
shinyApp(ui = ui, server = server)

How can I get a table to print under a picture using a loop in a .rmd with word_document?

I am trying to create a .rmd file that takes all of the pictures for a field day and the notes that was taken and create a report. I am able to get the pictures to plot but the no matter what I try the table with the notes does not want to print. Below is the loop I am utilizing:
for(i in 1:nrow(subset_Inventory_data)) {
singlept <- subset_Inventory_data[i,]
picture <- pictureLookup[singlept$GlobalID == pictureLookup$REL_GLOBAL,]
#PRINT PICTURE
plot(image_read(paste(baseURL,picture$UID,sep = "")) %>%
# image_resize("400x400") %>%
image_rotate(degrees = 90)
)
#creating table underneath picture
Categories <- c("Latitude", "Longitude", "Road Width", "Conditon", "Lock Present","Additional Notes")
sum_table <- data.frame(Category = character(),
Information = character(),
stringsAsFactors = FALSE)
sum_table <- rbind(sum_table,Categories,
stringsAsFactors = FALSE)
colnames(sum_table) <- Categories
sum_table$Latitude <- sprintf("%f",singlept$LAT)
sum_table$Longitude <-sprintf("%f",singlept$LONG)
sum_table$`Road Width` <- paste(singlept$Gate_Width,"feet")
sum_table$Conditon <- singlept$Condition
sum_table$`Lock Present` <- singlept$GlobalID
sum_table$`Additional Notes` <- singlept$General_Notes
#TRIED FLEXTABLE
ft <- flextable(sum_table)
ft <- fontsize(ft, size = 12)
ft <- autofit(ft)
print(ft)
#TRIED KABLE
print(kable(sum_table,"latex"))
}

valuebox missing true/false statement

I am attempting to have two if else statements in a renderValuebox in shiny R. Here's the code:
observe({
a <- which(blkname==input$e0)
start <- input$dateRange[1]
end <- input$dateRange[2]
confInt <- input$CI
inc <- input$increment
output$Name <- renderText(paste0("Overview of ",input$e0," block"))
start1 <- as.character(format(input$dateRange[1], format="%B %d %Y"))
end1 <- as.character(format(input$dateRange[2], format="%B %d %Y"))
output$Head <- renderText(paste0("Distribution of dry matter across ", input$e0, " between ",start1, " and ", end1))
pts$DryMatter <- pts$DryMatter+(pts$Amend*inc)
pts$DryMatter <- round(pts$DryMatter, digits=2)
grap <- pts[which(pts$Date>start&pts$Date<end),]
grap <- grap[which(grap$paddock==a),]
grap$DryMatter<- round(grap$DryMatter, digits=2)
yup <- round(mean(grap$DryMatter), digits=2)
nump <- nrow(grap)
grapdevdm <- grap[which(grap$Amend==min(grap$Amend)),]
grapdev <- sd(grapdevdm$DryMatter)
grapsampnum <- nrow(grapdevdm)
grapdevmean <- mean(grapdevdm$DryMatter)
cv <- (grapdev/grapdevmean)*100
sampReq <-(1.7*1.7)*(cv*cv)/((100-confInt)^2)
~
output$treenum <- renderValueBox({
valueBox(
paste0(nrow(grap), "Trees sampled out of ", sampReq, "required."),
if(nrow(grap)>=sampReq){
icon = icon("thumbs-up", lib = "glyphicon")}else
{icon = icon("thumbs-down", lib = "glyphicon")},
if(nrow(grap)>=sampReq){ color = "green"} else {colour = "red"}
)
})
I am receiving Warning: Error in if: missing value where TRUE/FALSE needed
However both if else is completed, maybe I am missing something.
Cheers,

split string into non-overlapping segments

I wish to split strings into non-overlapping segments where the endpoints of a segment are numbers within a field of dots. I can do this using the code below. However, this code seems to be overly complex and involves nested for-loops. Is there a simpler way, ideally using regex in base R?
Here is an example and the desired.result.
my.data <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1.2.1.1 2 B
1234..... 3 C
1...2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE)
desired.result <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1...... 2 B
..1.2.... 2 B
....2.1.. 2 B
......1.1 2 B
12....... 3 C
.23...... 3 C
..34..... 3 C
1...2.... 4 C
....2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE, na.strings = 'NA')
new.data <- data.frame(do.call(rbind, strsplit(my.data$my.string,'')), stringsAsFactors = FALSE)
n.segments <- rowSums(!(new.data[1:ncol(new.data)] == '.')) - 1
my.end.points <- do.call(rbind, gregexpr("[0-9]", my.data$my.string, perl=TRUE))
my.end.point.char <- do.call(rbind, strsplit(my.data$my.string, ""))
my.end.point.char <- t(apply(my.end.point.char, 1, as.numeric))
new.strings <- matrix('.', nrow = sum(n.segments), ncol = max(nchar(my.data$my.string)))
new.cov <- as.data.frame(matrix(NA, nrow = sum(n.segments), ncol = (ncol(my.data) - 1)))
m <- 1
for(i in 1:nrow(new.data)) {
for(j in 1:n.segments[i]) {
for(k in 1:ncol(new.strings)) {
new.strings[m, my.end.points[i, j ] ] <- my.end.point.char[i, my.end.points[i, j ]]
new.strings[m, my.end.points[i, (j+1)] ] <- my.end.point.char[i, my.end.points[i,(j+1)]]
new.cov[m,] <- my.data[i, c(2:ncol(my.data))]
}
m <- m + 1
}
}
my.result <- data.frame(my.string = apply(new.strings, 1, function(x) paste0(x, collapse = '')), stringsAsFactors = FALSE)
my.result <- data.frame(my.result, new.cov)
colnames(my.result) <- names(my.data)
all.equal(desired.result, my.result)
# [1] TRUE
w <- nchar(my.data$my.string[1L]);
dps <- character(w+1L); dps[1L] <- ''; for (i in seq_len(w)) dps[i+1L] <- paste0(dps[i],'.');
x <- Map(my.data$my.string,gregexpr('[^.]',my.data$my.string),f=function(s,g)
if (length(g)<3L) s else sapply(seq_len(length(g)-1L),function(gi)
paste0(dps[g[gi]],substr(s,g[gi],g[gi+1L]),dps[w-g[gi+1L]+1L])
)
);
res <- transform(my.data[rep(seq_len(nrow(my.data)),sapply(x,length)),],my.string=unlist(x));
res;
## my.string cov1 cov2
## 1 11....... 1 A
## 2 1.1...... 2 B
## 2.1 ..1.2.... 2 B
## 2.2 ....2.1.. 2 B
## 2.3 ......1.1 2 B
## 3 12....... 3 C
## 3.1 .23...... 3 C
## 3.2 ..34..... 3 C
## 4 1...2.... 4 C
## 4.1 ....2...3 4 C
## 5 ..3..4... 5 D
Note: You can replace the sapply(x,length) piece with lengths(x) if you have a recent enough version of R.
Benchmarking
library(microbenchmark);
bgoldst <- function(my.data) { w <- nchar(my.data$my.string[1L]); dps <- character(w+1L); dps[1L] <- ''; for (i in seq_len(w)) dps[i+1L] <- paste0(dps[i],'.'); x <- Map(my.data$my.string,gregexpr('[^.]',my.data$my.string),f=function(s,g) if (length(g)<3L) s else sapply(seq_len(length(g)-1L),function(gi) paste0(dps[g[gi]],substr(s,g[gi],g[gi+1L]),dps[w-g[gi+1L]+1L]))); transform(my.data[rep(seq_len(nrow(my.data)),sapply(x,length)),],my.string=unlist(x)); };
rawr <- function(my.data) { f <- function(x, m) { y <- gsub('.', '\\.', x); cs <- attr(m, "capture.start"); cl <- attr(m, "capture.length"); Vectorize(`substr<-`)(y, cs, cl + cs - 1, Vectorize(substr)(x, cs, cl + cs - 1)); }; m <- gregexpr('(?=([0-9][.]*[0-9]))', my.data$my.string, perl = TRUE); strs <- Map(f, my.data$my.string, m); tmp <- `rownames<-`(my.data[rep(1:nrow(my.data), sapply(strs,length)), ], NULL); tmp$my.string <- unlist(strs); tmp; };
carroll <- function(my.data) { strings <- sapply(my.data$my.string, function(x) { stri_match_all_regex(x, "(?=([0-9]{1}\\.*[0-9]{1}))")[[1]][,2]; }); strpos <- lapply(1:length(strings), function(x) { y <- {nchar(sub(perl=T,'^\\.*\\K.*','',my.data$my.string[x]))+c(0, cumsum(nchar(strings[[x]])-1))}; return(y[-length(y)]); }); w <- nchar(my.data$my.string[1L]); output.result <- data.frame(my.string = cbind(unlist(sapply(1:length(strings), function(y) { cbind(sapply(1:length(strings[[y]]), function(x) { leftstr <- paste0(paste0(rep(".", strpos[[y]][[x]]), collapse=""), strings[[y]][x]); rightstr <- paste0(rep(".", w-nchar(leftstr)), collapse=""); paste0(leftstr, rightstr, collapse=""); })); }))), my.data[unlist(sapply(1:length(strings), function(x) { rep(x, sapply(strings, length)[x]); })), c(2,3)], stringsAsFactors=FALSE); row.names(output.result) <- NULL; output.result; };
## OP's sample input
my.data <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1.2.1.1 2 B
1234..... 3 C
1...2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE);
ex <- bgoldst(my.data);
all.equal(ex,rawr(my.data),check.attributes=F);
## [1] TRUE
all.equal(ex,carroll(my.data),check.attributes=F);
## [1] TRUE
microbenchmark(bgoldst(my.data),rawr(my.data),carroll(my.data));
## Unit: microseconds
## expr min lq mean median uq max neval
## bgoldst(my.data) 422.094 451.816 483.5305 476.6195 503.775 801.421 100
## rawr(my.data) 1096.502 1160.863 1277.7457 1236.7720 1298.996 3092.785 100
## carroll(my.data) 1130.287 1176.900 1224.6911 1213.2515 1247.249 1525.437 100
## scale test
set.seed(1L);
NR <- 1e4; NS <- 30L; probDot <- 3/4;
x <- c('.',0:9); probs <- c(probDot,rep((1-probDot)/10,10L)); my.data <- data.frame(my.string=do.call(paste0,as.data.frame(replicate(NS,sample(x,NR,T,probs)))),cov1=sample(seq_len(NR)),cov2=sample(make.unique(rep(LETTERS,len=NR))),stringsAsFactors=F);
repeat { w <- which(sapply(gregexpr('[^.]',my.data$my.string),length)==1L); if (length(w)==0L) break; my.data$my.string[w] <- do.call(paste0,as.data.frame(replicate(NS,sample(x,length(w),T,probs)))); }; ## prevent single-digit strings, which rawr and carroll solutions don't support
ex <- bgoldst(my.data);
all.equal(ex,rawr(my.data),check.attributes=F);
## [1] TRUE
all.equal(ex,carroll(my.data),check.attributes=F);
## [1] TRUE
microbenchmark(bgoldst(my.data),rawr(my.data),carroll(my.data),times=1L);
## Unit: milliseconds
## expr min lq mean median uq max neval
## bgoldst(my.data) 904.887 904.887 904.887 904.887 904.887 904.887 1
## rawr(my.data) 2736.462 2736.462 2736.462 2736.462 2736.462 2736.462 1
## carroll(my.data) 108575.001 108575.001 108575.001 108575.001 108575.001 108575.001 1
my.data <- read.table(text = '
my.string cov1 cov2
11....... 1 A
1.1.2.1.1 2 B
1234..... 3 C
1...2...3 4 C
..3..4... 5 D
', header = TRUE, stringsAsFactors = FALSE)
f <- function(x, m) {
if (nchar(gsub('.', '', x, fixed = TRUE)) < 2L) return(x)
y <- gsub('.', '\\.', x)
cs <- attr(m, "capture.start")
cl <- attr(m, "capture.length")
Vectorize(`substr<-`)(y, cs, cl + cs - 1, Vectorize(substr)(x, cs, cl + cs - 1))
}
m <- gregexpr('(?=([0-9][.]*[0-9]))', my.data$my.string, perl = TRUE)
strs <- Map(f, my.data$my.string, m)
tmp <- `rownames<-`(my.data[rep(1:nrow(my.data), lengths(strs)), ], NULL)
tmp$my.string <- unlist(strs)
# my.string cov1 cov2
# 1 11....... 1 A
# 2 1.1...... 2 B
# 3 ..1.2.... 2 B
# 4 ....2.1.. 2 B
# 5 ......1.1 2 B
# 6 12....... 3 C
# 7 .23...... 3 C
# 8 ..34..... 3 C
# 9 1...2.... 4 C
# 10 ....2...3 4 C
# 11 ..3..4... 5 D
identical(tmp, desired.result)
# [1] TRUE
Here's an option. Not clean, but neither is the problem.
library(stringi)
## isolate the strings, allowing overlap via positive lookaheads
strings <- sapply(my.data$my.string, function(x) {
stri_match_all_regex(x, "(?=([0-9]{1}\\.*[0-9]{1}))")[[1]][,2]
})
Identify the offsets at the start of each group.
## identify the . offsets
strpos <- lapply(1:length(strings), function(x) {
y <- {nchar(sub(perl=T,'^\\.*\\K.*','',my.data$my.string[x]))+c(0, cumsum(nchar(strings[[x]])-1))}
return(y[-length(y)])
})
Build up the data.frame with only 2 sapply loops.
## collate the results using sapply
w <- nchar(my.data$my.string[1L]);
output.result <- data.frame(
my.string = cbind(unlist(sapply(1:length(strings), function(y) {
cbind(sapply(1:length(strings[[y]]), function(x) {
leftstr <- paste0(paste0(rep(".", strpos[[y]][[x]]), collapse=""), strings[[y]][x])
rightstr <- paste0(rep(".", w-nchar(leftstr)), collapse="")
paste0(leftstr, rightstr, collapse="")
}))
}))),
my.data[unlist(sapply(1:length(strings), function(x) {
rep(x, sapply(strings, length)[x])
})), c(2,3)], stringsAsFactors=FALSE
)
row.names(output.result) <- NULL
output.result
my.string cov1 cov2
1 11....... 1 A
2 1.1...... 2 B
3 ..1.2.... 2 B
4 ....2.1.. 2 B
5 ......1.1 2 B
6 12....... 3 C
7 .23...... 3 C
8 ..34..... 3 C
9 1...2.... 4 C
10 ....2...3 4 C
11 ..3..4... 5 D
identical(desired.result, output.result)
[1] TRUE

trying to append a list, but something breaks

I'm trying to create an empty list which will have as many elements as there are num.of.walkers. I then try to append, to each created element, a new sub-list (length of new sub-list corresponds to a value in a.
When I fiddle around in R everything goes smooth:
list.of.dist[[1]] <- vector("list", a[1])
list.of.dist[[2]] <- vector("list", a[2])
list.of.dist[[3]] <- vector("list", a[3])
list.of.dist[[4]] <- vector("list", a[4])
I then try to write a function. Here is my feeble attempt that results in an error. Can someone chip in what am I doing wrong?
countNumberOfWalks <- function(walk.df) {
list.of.walkers <- sort(unique(walk.df$label))
num.of.walkers <- length(unique(walk.df$label))
#Pre-allocate objects for further manipulation
list.of.dist <- vector("list", num.of.walkers)
a <- c()
# Count the number of walks per walker.
for (i in list.of.walkers) {
a[i] <- nrow(walk.df[walk.df$label == i,])
}
a <- as.vector(a)
# Add a sublist (length = number of walks) for each walker.
for (i in i:num.of.walkers) {
list.of.dist[[i]] <- vector("list", a[i])
}
return(list.of.dist)
}
> num.of.walks.per.walker <- countNumberOfWalks(walk.df)
Error in vector("list", a[i]) : vector size cannot be NA
Assuming 'walk.df' is something like:
walk.df <- data.frame(label=sample(1:10,100,T),var2=1:100)
then:
countNumberOfWalks <- function(walk.df) {
list.of.walkers <- sort(unique(walk.df$label))
num.of.walkers <- length(unique(walk.df$label))
list.of.dist <- vector("list", num.of.walkers)
for (i in 1:num.of.walkers) {
list.of.dist[[i]] <- vector("list",
nrow(walk.df[walk.df$label == list.of.walkers[i],]))}
return(list.of.dist)
}
Will achieve what you're after.