Outputting the results from bife object to Latex in Rmarkdown? - r-markdown

I'm estimating a fixed-effects probit model using the bife package in R. I'm trying to extract the output into something I can use with either stargazer or texreg so I can output them into a paper using Rmarkdown to create a LaTeX object. I'm aware I can manually extract the coefficients and standard errors, etc., but I'm wondering if there isn't a more efficient way to coerce this object into something that'd work with either package.
Here's a reproducible example:
install.packages("bife")
library(bife)
data("iris")
iris$big <- ifelse(iris$Sepal.Length > median(iris$Sepal.Length),1,0)
output <- bife(big ~ Sepal.Width + Petal.Length | Species, data=iris, "logit")

I think I found an alternative solution for this one, even if it is probably too late
Basically, first, I went on the repository of the package "texreg" and found this function:
extract.bife <- function(model,
include.loglik = TRUE,
include.deviance = TRUE,
include.nobs = TRUE,
...) {
s <- summary(model)
coefficient.names <- rownames(s$cm)
co <- s$cm[, 1]
se <- s$cm[, 2]
pval <- s$cm[, 4]
gof <- numeric()
gof.names <- character()
gof.decimal <- logical()
if (include.loglik == TRUE) {
lik <- logLik(model)
gof <- c(gof, lik)
gof.names <- c(gof.names, "Log Likelihood")
gof.decimal <- c(gof.decimal, TRUE)
}
if (include.deviance == TRUE) {
gof <- c(gof, deviance(model))
gof.names <- c(gof.names, "Deviance")
gof.decimal <- c(gof.decimal, TRUE)
}
if (include.nobs == TRUE) {
n <- s$nobs["nobs"]
gof <- c(gof, n)
gof.names <- c(gof.names, "Num. obs.")
gof.decimal <- c(gof.decimal, FALSE)
}
tr <- createTexreg(
coef.names = coefficient.names,
coef = co,
se = se,
pvalues = pval,
gof.names = gof.names,
gof = gof,
gof.decimal = gof.decimal
)
return(tr)
}
So for your example, just apply it on your model and use the function texreg and you may have a Latex-"like" output
tr <- extract.bife(output)
texreg(tr)
I hope it will help!
Best

Related

shiny DT::renderDT() multiple tables

I can't get renderDT() to display multiple data tables that my script creates. The code below. Reading the input table works, the progress indicator progresses through each line, but the hg38 and hg19 tabs are empty in the display.
If I move the hg38 renderDT() inside of the my_data <- reactive( it will display the hg38 table, but I get the following error and nothing in the hg19 tab
```Warning: Error in $: object of type 'closure' is not subsettable```
```105: <Anonymous>```
If I move both renderDT() inside the my_data <- reactive( I get nothing in either tab. I'm clearly misunderstanding something, but I'm not sure what.
library(shiny)
library(DT)
library("dplyr")
library(GenomeInfoDb)
library(BSgenome)
library("MafDb.gnomAD.r2.1.hs37d5")
library("MafH5.gnomAD.v3.1.2.GRCh38")
library(BSgenome.Hsapiens.UCSC.hg19)
library(BSgenome.Hsapiens.UCSC.hg38)
mafdb_hg19 <- MafDb.gnomAD.r2.1.hs37d5
mafdb_hg38 <- MafH5.gnomAD.v3.1.2.GRCh38
hg19 <- BSgenome.Hsapiens.UCSC.hg19
hg38 <- BSgenome.Hsapiens.UCSC.hg38
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
fileInput("file1", "Choose CSV File", accept = ".csv"),
#checkboxInput("header", "Header", TRUE),
width = 2
),
mainPanel(
tabsetPanel(
tabPanel("Primers(input)", DT::dataTableOutput("primers")),
tabPanel("SNPs(hg38)", DT::dataTableOutput("hg38")),
tabPanel("SNPs(hg19)", DT::dataTableOutput("hg19"))
)
)
)
)
server <- function(input, output){
primer_file <- reactive(
{
file <- input$file1
ext <- tools::file_ext(file$datapath)
req(file)
validate(need(ext == "csv", "Please upload a csv file"))
primer_file <- as.data.frame(read.csv(file$datapath, header = TRUE))
return(primer_file)
})
output$primers <- DT::renderDT(primer_file())
my_data <- reactive(
{
primers_hg38 <- data.frame(primer_id=character(),
seq=character(),
chr=character(),
hg38_pos=integer(),
AF_allpopmax_hg38=integer(),
stringsAsFactors=FALSE)
primers_hg19 <- data.frame(primer_id=character(),
seq=character(),
chr=character(),
hg19_pos=integer(),
AF_afr=integer(),AF_amr=integer(), AF_asj=integer(), AF_eas=integer(), AF_fin=integer(), AF_nfe=integer(), AF_oth=integer(),
stringsAsFactors=FALSE)
progress <- shiny::Progress$new()
# Make sure it closes when we exit this reactive, even if there's an error
on.exit(progress$close())
progress$set(message = "Calculating", value = 0)
lastrow <- nrow(primer_file())
firstrow=1
for (no in (firstrow:lastrow))
{
row = primer_file()[no,]
temp_chr <- row$chr
temp_FP <- row$FP
temp_RP <- row$RP
progress$inc(1/lastrow, detail = paste("line ", no))
###################lets do hg38 first######################
subject_hg38 <- hg38[[temp_chr]]
products_hg38 <- matchProbePair(temp_FP,temp_RP,subject_hg38)
amp_start_hg38 = (start(products_hg38))
fp_end_hg38 = (start(products_hg38) + as.integer(nchar(row$FP)) - 1)
rp_start_hg38 = ( (start(products_hg38) + width(products_hg38)) - as.integer(nchar(row$RP)) )
amp_end_hg38 = (start(products_hg38) + width(products_hg38) - 1)
fp_range_hg38 <- GRanges(seqnames=temp_chr, IRanges(start=amp_start_hg38:fp_end_hg38, width=1))
fp_scores_hg38 <- gscores(mafdb_hg38,fp_range_hg38,pop="AF_allpopmax")
fp_scores_hg38 <- as.data.frame(fp_scores_hg38)
rp_range_hg38 <- GRanges(seqnames=temp_chr, IRanges(start=rp_start_hg38:amp_end_hg38, width=1))
rp_scores_hg38 <- gscores(mafdb_hg38,rp_range_hg38,pop="AF_allpopmax")
rp_scores_hg38 <- as.data.frame(rp_scores_hg38)
#primers_hg38 <- data.frame(primer_id=character(),
# seq=character(),
# chr=character(),
# hg38_pos=integer(),
# AF_allpopmax_hg38=integer(),
# stringsAsFactors=FALSE)
primers_hg38 <- bind_rows(primers_hg38,setNames(data.frame(primer_id <- paste0(row$ID,"_F"),
seq <- temp_FP,
chr <- fp_scores_hg38$seqnames,
hg38_pos <- fp_scores_hg38$start,
AF_allpopmax_hg38 <- fp_scores_hg38$AF_allpopmax, stringsAsFactors=FALSE),c("primer_id", "seq", "chr","hg38_pos","AF_allpopmax_hg38")))
#names(primers_hg38) <- c("primer_id","seq","chr","hg38_pos","AF_allpopmax_hg38")
primers_hg38 <- bind_rows(primers_hg38,setNames(data.frame(primer_id <- paste0(row$ID,"_R"),
seq <- temp_RP,
chr <- rp_scores_hg38$seqnames,
hg38_pos <- rp_scores_hg38$start,
AF_allpopmax_hg38 <- rp_scores_hg38$AF_allpopmax,stringsAsFactors=FALSE),c("primer_id", "seq", "chr","hg38_pos","AF_allpopmax_hg38")))
##########################now hg19######################################
#names(primers_hg38) <- c("primer_id","seq","chr","hg38_pos","AF_allpopmax_hg38")
subject_hg19 <- hg19[[temp_chr]]
products_hg19 <- matchProbePair(temp_FP,temp_RP,subject_hg19)
amp_start_hg19 = start(products_hg19)
fp_end_hg19 = ( start(products_hg19) + as.integer(nchar(row$FP)) - 1)
rp_start_hg19 = ( (start(products_hg19) + width(products_hg19)) - as.integer(nchar(row$RP)) )
amp_end_hg19 = (start(products_hg19) + width(products_hg19) - 1)
fp_range_hg19 <- GRanges(seqnames=temp_chr, IRanges(start=amp_start_hg19:fp_end_hg19, width=1))
fp_scores_hg19 <- gscores(mafdb_hg19,fp_range_hg19,pop=c("AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth"))
fp_scores_hg19 <- as.data.frame(fp_scores_hg19)
rp_range_hg19 <- GRanges(seqnames=temp_chr, IRanges(start=rp_start_hg19:amp_end_hg19, width=1))
rp_scores_hg19 <- gscores(mafdb_hg19,rp_range_hg19,pop=c("AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth"))
rp_scores_hg19 <- as.data.frame(rp_scores_hg19)
primers_hg19 <- bind_rows(primers_hg19,setNames(data.frame(primer_id <- paste0(row$ID,"_F"),
seq <- temp_FP,
chr <- fp_scores_hg19$seqnames,
hg19_pos <- fp_scores_hg19$start,
AF_afr <- fp_scores_hg19$AF_afr,
AF_amr <- fp_scores_hg19$AF_amr,
AF_asj <- fp_scores_hg19$AF_asj,
AF_eas <- fp_scores_hg19$AF_eas,
AF_fin <- fp_scores_hg19$AF_fin,
AF_nfe<- fp_scores_hg19$AF_nfe,
AF_oth<- fp_scores_hg19$AF_oth,
stringsAsFactors=FALSE),
c("primer_id", "seq", "chr","hg19_pos","AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth")))
primers_hg19 <- bind_rows(primers_hg19,setNames(data.frame(primer_id <- paste0(row$ID,"_R"),
seq <- temp_RP,
chr <- rp_scores_hg19$seqnames,
hg19_pos <- rp_scores_hg19$start,
AF_afr <- rp_scores_hg19$AF_afr,
AF_amr <- rp_scores_hg19$AF_amr,
AF_asj <- rp_scores_hg19$AF_asj,
AF_eas <- rp_scores_hg19$AF_eas,
AF_fin <- rp_scores_hg19$AF_fin,
AF_nfe<- rp_scores_hg19$AF_nfe,
AF_oth<- rp_scores_hg19$AF_oth,
stringsAsFactors=FALSE),
c("primer_id", "seq", "chr","hg19_pos","AF_afr","AF_amr","AF_asj","AF_eas","AF_fin","AF_nfe","AF_oth")))
}
return(list(as.data.frame(primers_hg38), as.data.frame(primers_hg19)))
})
output$hg38 <- DT::renderDT(my_data()$primers_hg38)
#, options = list(paging = F, info = F, search = list(),
# dom = "Brtip", fixedColumns = T, fixedHeader = T,
# buttons = c("copy", "excel")),
# fillContainer = TRUE)
output$hg19 <- DT::renderDT(my_data()$primers_hg19, options = list(paging = F, info = F, search = list(),
dom = "Brtip", fixedColumns = T, fixedHeader = T,
buttons = c("copy", "excel")),
fillContainer = TRUE)
}
shinyApp(ui, server)
Please consider posting a MRE in the future. If you access the data as my_data()[[1]] it should work. However, if you define a named list, your method works. Take a look at an MRE below.
library(DT)
ui <- fluidPage(
DTOutput("t1"), DTOutput("t2")
)
server <- function(input, output) {
myd <- reactive(list(iris,mtcars))
myd2 <- reactive(list(ab=rock,cd=pressure))
output$t1 <- renderDT(head(myd()[[2]])) ### works
# output$t1 <- renderDT(myd()$mtcars) ### does not work
output$t2 <- renderDT(head(myd2()$ab)) ### works as a named list
}
shinyApp(ui = ui, server = server)

Merge multiple lists with different element lengths into a data frame in a function

I have a function to extract rules of Decision Tree
data(iris)
names(iris)[names(iris) == "Sepal.Length"] <- "SL"
names(iris)[names(iris) == "Sepal.Width"] <- "SW"
names(iris)[names(iris) == "Petal.Length"] <- "PL"
names(iris)[names(iris) == "Petal.Width"] <- "PW"
library(rpart)
set.seed(10)
pohon <- rpart(Species ~ ., iris,
method='class',
control=rpart.control(minsplit = 5, cp=0))
library(reshape)
rules.rpart <- function(model){
if (!inherits(model, "rpart")) stop("Not a legitimate rpart tree")
frm <- model$frame
names <- row.names(frm)
ylevels <- attr(model, "ylevels")
ds.size <- model$frame[1,]$n
for (i in 1:nrow(frm))
{
if (frm[i,1] == "<leaf>")
{
prediksi=ylevels[frm[i,]$yval]
pth <- path.rpart(model, nodes=as.numeric(names[i]), print.it=F)
urutan=unlist(pth)[-1]
ur <- pth[-1]
a=paste(urutan)
a1=t(data.frame(a))
df=data.frame(prediksi,a1)
print(bind_rows(list(df)))
}}}
rules.rpart(pohon)
bb <- rules.rpart(pohon)
bb
My questions is:
How can I convert the output into a single data frame from several lists (different number of lists) with different element lengths?
Why I can't define the output into an object named "bb"? why does bb become NULL when called?

How can I get a table to print under a picture using a loop in a .rmd with word_document?

I am trying to create a .rmd file that takes all of the pictures for a field day and the notes that was taken and create a report. I am able to get the pictures to plot but the no matter what I try the table with the notes does not want to print. Below is the loop I am utilizing:
for(i in 1:nrow(subset_Inventory_data)) {
singlept <- subset_Inventory_data[i,]
picture <- pictureLookup[singlept$GlobalID == pictureLookup$REL_GLOBAL,]
#PRINT PICTURE
plot(image_read(paste(baseURL,picture$UID,sep = "")) %>%
# image_resize("400x400") %>%
image_rotate(degrees = 90)
)
#creating table underneath picture
Categories <- c("Latitude", "Longitude", "Road Width", "Conditon", "Lock Present","Additional Notes")
sum_table <- data.frame(Category = character(),
Information = character(),
stringsAsFactors = FALSE)
sum_table <- rbind(sum_table,Categories,
stringsAsFactors = FALSE)
colnames(sum_table) <- Categories
sum_table$Latitude <- sprintf("%f",singlept$LAT)
sum_table$Longitude <-sprintf("%f",singlept$LONG)
sum_table$`Road Width` <- paste(singlept$Gate_Width,"feet")
sum_table$Conditon <- singlept$Condition
sum_table$`Lock Present` <- singlept$GlobalID
sum_table$`Additional Notes` <- singlept$General_Notes
#TRIED FLEXTABLE
ft <- flextable(sum_table)
ft <- fontsize(ft, size = 12)
ft <- autofit(ft)
print(ft)
#TRIED KABLE
print(kable(sum_table,"latex"))
}

R deleting duplicates when duplicates slightly differ by 1 or 2 letters

I am collecting tweets with the twitteR package and get many duplicates. This code works fine:
tweets <- searchTwitter(keyword, n=500, lang="en", since=NULL, until=NULL, retryOnRateLimit=100)
mydata <- sapply(tweets, function(x) x$getText())
mydata <- unique(mydata, incomparables = F, nmax = NA)
The problem is that it actually doesn't delete any duplicates because it doesn't recognises them as such. The duplicate tweets typically contain shortened URLS that differ by 1 or 2 digits. So I tried to clean the tweets of URLS with this code:
tweets <- searchTwitter(keyword, n=500, lang="en", since=NULL, until=NULL,
retryOnRateLimit=100)
mydata <- sapply(tweets, function(x) x$getText())
mydata <- data.frame(tweetsText, stringsAsFactors = FALSE)
names(mydata) <- c('words')
removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
mydata$words <- removeURL(mydata$words)
removeURL <- function(x) gsub("https[[:alnum:]]*", "", x)
mydata$words <- removeURL(mydata$words)
mydata$words <- unique(mydata$words, incomparables = F, nmax = NA)
Now I get the error message:
Error in $<-.data.frame(*tmp*, "words", value = c("Tripping around #DisneySprings.....) : replacement has 295 rows, data has 300
Advice? Thanks!
Your error is easily reproducible:
mydata <- data.frame(list(w = c(0, 1, 0, 1)))
mydata$words <- c(0, 1, 1)
# Error in `$<-.data.frame`(`*tmp*`, "words", value = c(0, 1, 1)) :
# replacement has 3 rows, data has 4
This just means that you need to assign a vector of the same length as the length of the data frame.
To filter out duplicate values you need to change the last line of your code to:
res <- mydata[!duplicated(mydata$words), ]

rankall : returning the correct data frame to rank hospitals on performance

this is a solution(not working well) to a coursera problem. I'm trying to rank a data frame containing the names of hospitals based on their performance on 3 different conditions. (I found another to this question at How to subset a row from list based on condition). I think I'm not subsetting right and I don't return the correct data frame at the end. really new to programming and R. thank you for your help.
rankall <- function(outcome, num = 'best'){
data <- read.csv('outcome-of-care-measures.csv', colClasses = 'character')
data[,11] <- as.numeric(data[,11])
data[,17] <- as.numeric(data[,17])
data[17] <- as.numeric(data[,23])
states <- sort(unique(data$State))
conditions <- data[c(11,17,23)]
if(!state %in% states){stop('invalid state')}
if(!outcome %in% conditions){stop('invalid outcome')}
for (i in 1:length(states)){
statedata <-data[data$State == state[i],]
if(outcome == 'heart attack'){column <- (statedata[,11]}
if(outcome == 'heart failure') {column <-(statedata[,17]}
if(outcome == 'pneumonia') {column <- statedata[,23]}
rankedhospitals <- c()
rankcondition <- rank(column, na.last = NA)
if (num == 'best'){num <- 1}
if(num == 'worst'){num <- nrow(rankcondition)}
rankedhospitals[i] <- statedata$Hospital.Name[order(column, statedata$Hospital.Name)[num]]
rankedhospitals <- cbind(rankedhospitals,states[num,2])
}
return (c('rankedhospitals', 'states'))
}