提问人:Caleb Sytsma 提问时间:8/15/2023 最后编辑:Caleb Sytsma 更新时间:8/16/2023 访问量:100
如何在 R 中使用 foreach() 解决“外部指针无效”
How to resolve "external pointer is not valid" with foreach() in R
问:
我正在编写一个代码,用于计算与每个人口像素单元格的给定距离内的公园面积。这两个数据集都是 30m 栅格,我从中提取类和 xy 坐标(以米为单位)以创建数据帧。由于数据集非常庞大,我正在尝试逐个县进行计算。
人口数据的 TIFF 大致代表每个县,因此我在读取它们的同一调用中使用它们来裁剪公园栅格。我的代码运行良好,但我一直在努力使它成为一个并行过程,因为在整个国家/地区执行此操作的处理时间约为一整个工作日。我已经将我的原始代码改编为使用 foreach(),它比原始代码更有效地使用我的机器。
当我手动运行每个步骤时,此代码起作用,但在我尝试并行运行时抛出以下错误:“{ 中的错误:任务 1 失败 - ”i 在参数中:'Count = dist_Euclidean(...)'。由错误引起:!在为函数“as.data.frame”选择方法时计算参数“x”时出错:外部指针无效” 需要强调的是,如果我手动设置“code”和“i”并在不运行 foreach() 的情况下单击它,则此代码会为我提供我想要的确切输出。问题出在并行化上。
**注意:我知道提供您使用的数据是最佳做法,但这些数据集非常大。“县”和“州”是美国人口普查局的形状文件,“公园”是我对全国公园区域的栅格(30m 像素)。“rasterlist”提取我拥有的每个群体 TIFF 的文件名。我认为我无法提供这些数据进行测试,所以希望这可以是一个理论练习?
pkgList <- c("parallel", "parallelly","doParallel","foreach","terra","sf","dplyr","tidyr","stringi","stringr")
for(package.i in pkgList) {
suppressPackageStartupMessages(library(package.i, character.only = TRUE))} ; rm(package.i)
rasterlist <- as.list(gsub("._","",list.files(path = "tifs", pattern = '.tif$', all.files = F, full.names = T)))
states <- st_read("StateLines/cb_2018_us_state_500k.shp")
counties <- st_read("CountyLines/cb_2018_us_county_500k.shp") %>%
mutate(State = stri_replace_all_fixed(STATEFP, states$STATEFP, states$NAME, vectorize_all = FALSE),
code_st = paste(STATEFP,"-", sep = ""),
State = gsub(" ", "", State), StateCode = paste(State, "-", sep = ""),
NAME = case_when(NAME == "Miami-Dade" ~ "MiamiDade", TRUE ~ NAME)) %>% #stupid Miami-Dade
filter(!State %in% c("Alaska","Hawaii","CommonwealthoftheNorthernMarianaIslands","UnitedStatesVirginIslands","AmericanSamoa","Guam","PuertoRico")) #I only have pop data for the continental US at the moment
parks <- rast("ParksProject.tif")
dist_Euclidean <- function(x, y, dfrm, accDist) {
count <- 0
dfrm1 <- data.frame(x=x,y=y)
for (i in 1:nrow(dfrm1)){
hold <- dfrm1[i,]
count <- append(count, length(which(sqrt((dfrm$x - hold$x)^2 + (dfrm$y - hold$y)^2)<accDist)))}
return(count[-1])}
accessDist <- 500 ###Distance used to define "access", in meters
cl <- makeCluster(availableCores(omit = 1, constraints = "connections", logical = FALSE), type = "PSOCK")
registerDoParallel(cl)
for (code in unique(counties$code_st)) {
df.state <- as.character(rasterlist[str_detect(rasterlist, code)])
counties_temp <- st_transform(counties %>% filter(code_st == code), st_crs(parks))
parks_state <- crop(parks, extend(ext(counties_temp), accessDist/30+5))
write.csv(
foreach(i = 1:length(df.state), .packages = pkgList, .combine = "rbind") %dopar% {
temp <- as.data.frame(rast(df.state[i]), xy = TRUE) %>%
mutate(File = paste(df.state[i]),
State = stri_replace_all_fixed(File, counties_temp$code_st, counties_temp$StateCode, vectorize_all = FALSE),
State = stri_replace_all_fixed(State, counties_temp$COUNTYFP, counties_temp$NAME, vectorize_all = FALSE)) %>%
separate_wider_delim(State, names = c("Trash1","Trash2","State","County"), delim = c("-")) %>%
mutate(County = str_remove(County, ".tif")) %>% select(-c("Trash1","Trash2")) %>%
rename("Pop" = starts_with("neon-"))%>% mutate(Count = dist_Euclidean(x, y, as.data.frame(crop(parks_state, extend(ext(rast(df.state[i])),accessDist/30+5)), xy = TRUE), accDist = accessDist))
}, (paste(unique(counties_temp$State), "Parks.csv", sep = "")))
rm(df.state, counties_temp, parks_state)
gc()
}
stopCluster(cl)
重要的一点:在我尝试将 dist_Euclidean 函数添加到其中之前,它与 foreach() 循环并行运行。如果我下降%>%突变(计数= dist_Euclidean...accessDist)),这有效 - 它只是创建人口数据的数据帧。问题似乎出在公园数据帧创建中,但只有当我尝试将其作为并行过程进行时。我尝试使用 foreach 中的 .export 调用手动将parks_state栅格添加到每个并行化中,但这并不能解决问题。
这是怎么回事?
编辑::添加回溯输出
> traceback()
9: stop(simpleError(msg, call = expr))
8: e$fun(obj, substitute(ex), parent.frame(), e$data)
7: foreach(i = 1:length(df.state), .packages = pkgList, .combine = "rbind") %dopar%
{
temp <- as.data.frame(rast(df.state[i]), xy = TRUE) %>%
mutate(File = paste(df.state[i]), State = stri_replace_all_fixed(File,
counties_temp$code_st, counties_temp$StateCode,
vectorize_all = FALSE), State = stri_replace_all_fixed(State,
counties_temp$COUNTYFP, counties_temp$NAME, vectorize_all = FALSE)) %>%
separate_wider_delim(State, names = c("Trash1", "Trash2",
"State", "County"), delim = c("-")) %>% mutate(County = str_remove(County,
".tif")) %>% select(-c("Trash1", "Trash2")) %>% rename(Pop = starts_with("neon-")) %>%
mutate(Count = dist_Euclidean(x, y, as.data.frame(crop(parks_state,
extend(ext(rast(df.state[i])), accessDist/30 +
5)), xy = TRUE), accDist = accessDist))
}
6: is.data.frame(x)
5: utils::write.table(foreach(i = 1:length(df.state), .packages = pkgList,
.combine = "rbind") %dopar% {
temp <- as.data.frame(rast(df.state[i]), xy = TRUE) %>% mutate(File = paste(df.state[i]),
State = stri_replace_all_fixed(File, counties_temp$code_st,
counties_temp$StateCode, vectorize_all = FALSE),
State = stri_replace_all_fixed(State, counties_temp$COUNTYFP,
counties_temp$NAME, vectorize_all = FALSE)) %>% separate_wider_delim(State,
names = c("Trash1", "Trash2", "State", "County"), delim = c("-")) %>%
mutate(County = str_remove(County, ".tif")) %>% select(-c("Trash1",
"Trash2")) %>% rename(Pop = starts_with("neon-")) %>%
mutate(Count = dist_Euclidean(x, y, as.data.frame(crop(parks_state,
extend(ext(rast(df.state[i])), accessDist/30 + 5)),
xy = TRUE), accDist = accessDist))
}, (paste(unique(counties_temp$State), "Parks.csv", sep = "")),
col.names = NA, sep = ",", dec = ".", qmethod = "double")
4: eval(expr, p)
3: eval(expr, p)
2: eval.parent(Call)
1: write.csv(foreach(i = 1:length(df.state), .packages = pkgList,
.combine = "rbind") %dopar% {
temp <- as.data.frame(rast(df.state[i]), xy = TRUE) %>% mutate(File = paste(df.state[i]),
State = stri_replace_all_fixed(File, counties_temp$code_st,
counties_temp$StateCode, vectorize_all = FALSE),
State = stri_replace_all_fixed(State, counties_temp$COUNTYFP,
counties_temp$NAME, vectorize_all = FALSE)) %>% separate_wider_delim(State,
names = c("Trash1", "Trash2", "State", "County"), delim = c("-")) %>%
mutate(County = str_remove(County, ".tif")) %>% select(-c("Trash1",
"Trash2")) %>% rename(Pop = starts_with("neon-")) %>%
mutate(Count = dist_Euclidean(x, y, as.data.frame(crop(parks_state,
extend(ext(rast(df.state[i])), accessDist/30 + 5)),
xy = TRUE), accDist = accessDist))
}, (paste(unique(counties_temp$State), "Parks.csv", sep = "")))
答:
问题在于 terra SpatRasters 是不可导出的 (https://future.futureverse.org/articles/future-4-non-exportable-objects.html)。 感谢 HenrikB 链接上面的帮助文章,该文章解释了该文章和其他不可导出的类/类型。我只需要找到一个解决方法!
评论
debugonceIdist_Euclidean)
%>% mutate(Count=