Functions dplyr with rlang::last_error() in purrr::map loop in r - list

I'm using a function to calculate the length of linestring per cell by ID and store in a list, convert each element of the list into a RasterLayer and turn that list into a RasterStack, average all layers and get a single raster.
#function
# build_length_raster <- function(one_df) {
intersect_list <- by(
one_df ,
one_df$sub_id,
function(subid_df) sf::st_intersection(grid2, subid_df) %>%
dplyr::mutate(length = as.numeric(sf::st_length(.))) %>%
sf::st_drop_geometry()
)
list_length_grid <- purrr::map(intersect_list, function(x)
x %>% dplyr::left_join(x=grid2, by="cell", copy=T) %>%
dplyr::mutate(length=length) %>%
dplyr::mutate_if(is.numeric,coalesce,0)
)
list_length_raster <- purrr::map(list_length_grid, function(x)
raster::rasterize(x, r, field="length", na.rm=F, background=0)
)
list_length_raster2 <- unlist(list_length_raster, recursive=F)
raster_stack <- raster::stack(list_length_raster2)
raster_mean <- raster::stackApply(
raster_stack,
indices = rep(1,nlayers(raster_stack)),
fun = "mean", na.rm = TRUE)
#}
The function presents a step where, in order for the resulting grid of st_intersection() to have the same number of cells as it had initially, I use left_join(by="cell" column).Then I use mutate() to replace the NA's with 0. When I run the function steps for one dataframe from the list, it works perfectly, but when I put it inside map() to do this in a list, I get this error, which seems to refer to the dplyr functions:
final_list <- purrr::map(mylist, build_length_raster)
> rlang::last_error()
<error/rlang_error>
Join columns must be present in data.
x Problem with `cell`.
Backtrace:
1. purrr::map(mylist, build_length_raster)
15. dplyr:::left_join.data.frame(., x = grid, by = "cell", copy = T)
16. dplyr:::join_mutate(...)
17. dplyr:::join_cols(...)
18. dplyr:::standardise_join_by(by, x_names = x_names, y_names = y_names)
19. dplyr:::check_join_vars(by$y, y_names)
Run `rlang::last_trace()` to see the full context.
Is there a way to solved this problem?
MYDATA example
library(tidyverse)
library(sf)
library(purrr)
library(raster)
#data example
id <- c("844", "844", "844", "844", "844","844", "844", "844", "844", "844",
"844", "844", "845", "845", "845", "845", "845","845", "845", "845",
"845","845", "845", "845")
sub_id <- c("2017_844_1", "2017_844_1", "2017_844_1", "2017_844_1", "2017_844_2",
"2017_844_2", "2017_844_2", "2017_844_2", "2017_844_3", "2017_844_3",
"2017_844_3", "2017_844_3", "2017_845_1", "2017_845_1", "2017_845_1",
"2017_845_1", "2017_845_2","2017_845_2", "2017_845_2", "2017_845_2",
"2017_845_3","2017_845_3", "2017_845_3", "2017_845_3")
lat <- c(-30.6456, -29.5648, -27.6667, -31.5587, -30.6934, -29.3147, -23.0538,
-26.5877, -26.6923, -23.40865, -23.1143, -23.28331, -31.6456, -24.5648,
-27.6867, -31.4587, -30.6784, -28.3447, -23.0466, -27.5877, -26.8524,
-23.8855, -24.1143, -23.5874)
long <- c(-50.4879, -49.8715, -51.8716, -50.4456, -50.9842, -51.9787, -41.2343,
-40.2859, -40.19599, -41.64302, -41.58042, -41.55057, -50.4576, -48.8715,
-51.4566, -51.4456, -50.4477, -50.9937, -41.4789, -41.3859, -40.2536,
-41.6502, -40.5442, -41.4057)
df <- tibble(id = as.factor(id), sub_id = as.factor(sub_id), lat, long)
#converting ​to sf
df.sf <- df %>%
​sf::st_as_sf(coords = c("long", "lat"), crs = 4326)
#creating grid
xy <- sf::st_coordinates(df.sf)
grid = sf::st_make_grid(sf::st_bbox(df.sf),
​cellsize = .1, square = FALSE) %>%
​sf::st_as_sf()
#creating raster
r <- raster::raster(grid, res=0.1)
#return grid because raster function changes number of cells
grid2 <- rasterToPolygons(r, na.rm=F) %>%
st_as_sf() %>% mutate(cell=1:ncell(r))
#creating linestring to each sub_id
df.line <- df.sf %>%
dplyr::group_by(sub_id, id) %>%
dplyr::summarize() %>%
sf::st_cast("LINESTRING")
#creating ID list
mylist<- split(df.line, df.line$id)
#separating one dataframe of list to test function
one_df <- df.line[df.line$id=="844",]
one_df$id <- droplevels(one_df$id)
one_df$sub_id <- droplevels(one_df$sub_id)

The specific error is caused because intersect_list has empty items in the list, which cannot be joined because they are empty, and hence have no columns to join by. If you modified the map function to only use non-empty items of intersect_list you would not get that error.
As you noted in the comments, removing the empty list entries with keep(intersect_list, ~ !is.null(.)) before mapping left_join onto the list items will fix the error.
However, I don't think this is the most elegant way to solve this problem. I might misunderstand what the goal is, but if it's to produce a raster from the total length of lines within each grid cell, I think a simpler approach without using purrr might work.
This is not the exact same as your product, but I'm keeping it simpler rn to illustrate an alternate approach. Here is a sum of the lengths in each cell as a stars object (similar to raster but plays better with the tidyverse and sf).
I'm starting off from your objects one_df and grid:
# Turn multiple lines into single MULTILINESTRING:
one_df %>%
st_union() ->
union_df
# Intersection of each grid cell with the MULTILINESTRING geometry:
grid %>%
st_intersection(union_df) ->
grid_lines
# Get lengths:
grid_lines %>%
mutate(length = st_length(x)) %>%
st_drop_geometry() ->
grid_lengths
# Join the calculated lengths back with the spatial grid,
# most of which will have NA for length
grid %>%
left_join(grid_lengths, by = "cell") ->
grid_with_lengths
# Rasterize the length field of the grid
grid_with_lengths %>%
dplyr::select(length) %>%
stars::st_rasterize() ->
length_stars
length_stars %>% mapview::mapview()

Related

How to remove specific number of events in a tbl_regression (gtsummary package)

I'm using tbl_regression from the gtsummary package to show the results of cox proportional hazards models. Due to circumstances regarding sensitive personal information, I am not allowed to show strata with a number of observations less than 5. I can, however, still show the estimates, CIs etc. for those strata, but not how many persons have had the event if the number is less than 5. In these number of events strata with less than 5 observations, I would like to insert just a line to indicate this.
From what I have read, the modify_table_body function is perhaps the correct function to achieve this. However, I cannot manage to find out how to use it correctly. Is there any way to define that the regression table should not show N_event less than 5 but still show HRs, CIs, person years ect. for those given stratas?
Below is my preliminary code in which I thought maybe should be followed by "%>% modify_table_body()".
Thank you in advance for your help!
Best,
Mathilde
cox_cat_cns2 <- coxph(Surv(TTD_year, Dod_status) ~ Highest_Edu_Household + Diag_year + Age_household_mom_num + Age_household_dad_num + Country_origin_household, data = data_cox_cat_cns)
cox_cat_cns_adj_table <- tbl_regression(cox_cat_cns2,
label = c(Highest_Edu_Household ~ "Highest parental education",
Diag_year ~ "Year of diagnosis",
Age_household_mom_num ~ "Mother's age at diagnosis",
Age_household_dad_num ~ "Father's age at diagnosis",
Country_origin_household ~ "Parents' country of origin"),
exponentiate = TRUE) %>%
add_nevent(location = "level") %>%
bold_labels() %>%
italicize_levels() %>%
modify_table_styling(
columns = estimate,
rows = reference_row %in% TRUE,
missing_symbol = "Ref.") %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE) %>%
modify_table_styling(
column = p.value,
hide = TRUE) %>%
modify_header(
label = "",
stat_nevent = "**Events (N)**",
exposure ~ "**Person years**")
You can 1. define a new function to "style" the number of events that collapses any counts less than 5 as "<5", then 2. use that function to style the column in the resulting table. Example below!
library(gtsummary)
#> #Uighur
library(survival)
library(dplyr)
style_number5 <- function(x, ...) {
ifelse(
x < 5,
paste0("<", style_number(5, ...)),
style_number(x, ...)
)
}
style_number5(4:6)
#> [1] "<5" "5" "6"
tbl <-
trial %>%
slice(1:45) %>%
coxph(Surv(ttdeath, death) ~ stage, data = .) %>%
tbl_regression(exponentiate = TRUE) %>%
add_nevent(location = "level") %>%
modify_fmt_fun(stat_nevent ~ style_number5)
Created on 2022-04-21 by the reprex package (v2.0.1)

Merge multiple lists with different element lengths into a data frame in a function

I have a function to extract rules of Decision Tree
data(iris)
names(iris)[names(iris) == "Sepal.Length"] <- "SL"
names(iris)[names(iris) == "Sepal.Width"] <- "SW"
names(iris)[names(iris) == "Petal.Length"] <- "PL"
names(iris)[names(iris) == "Petal.Width"] <- "PW"
library(rpart)
set.seed(10)
pohon <- rpart(Species ~ ., iris,
method='class',
control=rpart.control(minsplit = 5, cp=0))
library(reshape)
rules.rpart <- function(model){
if (!inherits(model, "rpart")) stop("Not a legitimate rpart tree")
frm <- model$frame
names <- row.names(frm)
ylevels <- attr(model, "ylevels")
ds.size <- model$frame[1,]$n
for (i in 1:nrow(frm))
{
if (frm[i,1] == "<leaf>")
{
prediksi=ylevels[frm[i,]$yval]
pth <- path.rpart(model, nodes=as.numeric(names[i]), print.it=F)
urutan=unlist(pth)[-1]
ur <- pth[-1]
a=paste(urutan)
a1=t(data.frame(a))
df=data.frame(prediksi,a1)
print(bind_rows(list(df)))
}}}
rules.rpart(pohon)
bb <- rules.rpart(pohon)
bb
My questions is:
How can I convert the output into a single data frame from several lists (different number of lists) with different element lengths?
Why I can't define the output into an object named "bb"? why does bb become NULL when called?

Save results for each file of a list of files looping through a factor variable in R. Vector does not update

I am using a list of files, and I am trying to create a data frame that contains: for each sample, the percentage of two particular "GT" types by the levels of another factor variable called "chr" (with 1 to 24 levels).
It would have to look like this:
The problem I keep getting is that the vector never gets updated for the ith sample, it only keeps the first vector created. And then I am not sure how to save that updated vector on my data frame (df).
vector_chr <- vector();
for (i in seq_along(list_files)) {
GT <- list_files[[i]][,9]
chr <- list_files[[i]][,3]
GT$chr <- chr$chr # creating one df with both GT and chr
for (j in unique(GT$chr)){
dat_list = split(GT, GT$chr) # split data frames by chr (1 to 24)
table <- table(dat_list[[j]][,1]) # take GT and make a table
sum <- sum(table[3:4]) # sum GTs 3 and 4
perc <- sum/nrow(GT)
vector_chr <- c(vector_chr,perc) # assign the 24 percentages to a vector
}
df <- data.frame(matrix(ncol = 25, nrow = length(files)))
x <- c("Sample", "chr1", "chr2", "chr3",
"chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
"chr11", "chr12","chr13", "chr14", "chr15", "chr16",
"chr17", "chr18", "chr19", "chr20", "chr21", "chr22",
"chrX", "chrXY")
colnames(df) <- x
df$Sample <- names(list_files)
df[i,2:25] <- vector_chr # assign the 24 percentages for EACH sample
}

How can I get a table to print under a picture using a loop in a .rmd with word_document?

I am trying to create a .rmd file that takes all of the pictures for a field day and the notes that was taken and create a report. I am able to get the pictures to plot but the no matter what I try the table with the notes does not want to print. Below is the loop I am utilizing:
for(i in 1:nrow(subset_Inventory_data)) {
singlept <- subset_Inventory_data[i,]
picture <- pictureLookup[singlept$GlobalID == pictureLookup$REL_GLOBAL,]
#PRINT PICTURE
plot(image_read(paste(baseURL,picture$UID,sep = "")) %>%
# image_resize("400x400") %>%
image_rotate(degrees = 90)
)
#creating table underneath picture
Categories <- c("Latitude", "Longitude", "Road Width", "Conditon", "Lock Present","Additional Notes")
sum_table <- data.frame(Category = character(),
Information = character(),
stringsAsFactors = FALSE)
sum_table <- rbind(sum_table,Categories,
stringsAsFactors = FALSE)
colnames(sum_table) <- Categories
sum_table$Latitude <- sprintf("%f",singlept$LAT)
sum_table$Longitude <-sprintf("%f",singlept$LONG)
sum_table$`Road Width` <- paste(singlept$Gate_Width,"feet")
sum_table$Conditon <- singlept$Condition
sum_table$`Lock Present` <- singlept$GlobalID
sum_table$`Additional Notes` <- singlept$General_Notes
#TRIED FLEXTABLE
ft <- flextable(sum_table)
ft <- fontsize(ft, size = 12)
ft <- autofit(ft)
print(ft)
#TRIED KABLE
print(kable(sum_table,"latex"))
}

Can one add a data.frame to itself?

I want to append or add a data.frame to itself...
Much in the same way the one adds:
n <- n + t
I have a function that creates a data.frame.
I have been using:
g <- function(compareA,compareB) {
for (i in 1:1000) {
ttr <- t.test(compareA, compareA, var.equal = TRUE)
tt_pvalues[i] <- ttr$p.value
}
name_tag <- paste(nameA, nameB, sep = "_Vs_")
tt_titles <- data.frame(name_tag, tt_titles)
# character vector which I want to add to a list
ALL_pvalues <- data.frame(tt_pvalues, ALL_pvalues)
# adding a numeric vector of values to a larger data.frame
}
Would cbind be better here?
There are two methods that would "add or append" data to a data.frame by columns and one that would append by rows. Assuming tag is the data.frame, and tt_titles is a vector of the same length that 'tag' has rows, then either of these would work:
tag <- cbind(tag, tt_titles)
# tt_titles could also be a data.frame with same number of rows
Or:
tag[["tt_titles"]] <- tt_titles
Now let's assume that we have instead two data.frames with the same column.names:
bigger.df <- rbind(tag, tag2)