如何查找指定距离之外的多边形-解网

问：

我正在尝试获取与其他地区在指定距离之外的地区的汇总统计数据。我已经能够使用 dnearneig（）来查找指定距离内地区的汇总统计数据，但我无法弄清楚如何将其转换为外部情况，或者使用 dnearneigh（）的结果作为输入来告诉 R“不要考虑这些地区”。我也尝试过使用 st_distance（）/st_is_within_distance（），但运气不佳。任何建议将不胜感激。

这是我到目前为止在指定距离内的情况的代码。

packages <- c("readxl",  #Allows us to read in excel files
             "writexl", #Allows us to write to excel files 
             "haven",  #
             "stargazer",
             "tidyr",
             "dplyr",
             "labelled",
             "tidyverse",
             "expss", 
             "Hmisc",
             "foreign",
             "spdep",
             "sf",
             "qpcR",
             "plm",
             "ivreg",
             "sjlabelled",
             "Statamarkdown",
             "vctrs")
# library(tables)
# rm(list=setdiff(ls(), c("df_jslc_2000_2019","df_jslc_allyears")))

pacman::p_load(packages, character.only = TRUE)

 df_year <- df[df$YEAR==year,]
 # print(df_year)
 shp_centroid_3 <- st_centroid(df_year$geometry)
 # print(shp_centroid_3)
 print("we made it 2 lines")
 # within_km_10_pre<- st_is_within_distance(df_year$geometry,df_year$geometry,10000)
 ##Filter out self
 # within_km_10 <- within_km_10_pre[which(lengths(within_km_10_pre) >1)]
 within_km_10 <- dnearneigh(shp_centroid_3,5000,10000)

 print(within_km_10)
 shp_centroid_3$TOT_EXP <- df_year$TOTAL_E
 shp_centroid_3$Z_SCORE <- df_year$Z_b_100
 shp_centroid_3$Z_BEACH_100_ORIGINAL <- df_year$Z__100_
 shp_centroid_3$Z_WATER <- df_year$Z_WATER
 shp_centroid_3$Z_WAT_SC <- df_year$Z_WAT_S
 
 
 tourism_10km_mean1 <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$TOT_EXP[v],na.rm=TRUE)}))
 # print(tourism_10km_mean1)
 tourism_10km_sum <- unlist(lapply(within_km_10, \(v){sum(shp_centroid_3$TOT_EXP[v],na.rm = TRUE)}))
 tourism_10km_score <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_SCORE[v],na.rm=TRUE)}))
 tourism_10km_score_beach_original <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_BEACH_100_ORIGINAL[v],na.rm=TRUE)}))
 tourism_10km_water_zscore <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WATER[v],na.rm=TRUE)}))
 tourism_10km_water_zscore_scaled <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WAT_SC[v],na.rm=TRUE)}))

R 空间 shapefile 最近邻

nc以包中的数据集为例sf
获取多边形质心
st_is_within_distance(..., dist = 50000)在质心上为每条记录构建“内部”索引列表，50000M 更适合这个特定的示例数据集
构建嵌套数据集，其中每条记录都会获得一大堆不在距离内的记录，其子集带有否定的“内”索引;假设它仅用于属性统计并排除所有几何图形
从嵌套的 Tibbles 中收集一些示例统计信息

library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)

nc <- read_sf(system.file("shape/nc.shp", package="sf")) %>% 
  select(NAME, AREA, starts_with("BIR")) %>% 
  mutate(cntr = st_centroid(geometry),
         within_dist = st_is_within_distance(cntr, dist = 50000))

# ignore geometries for now for more compact output
nc_df <- st_drop_geometry(nc) %>% select(-cntr)

# build a nested tibble, each county row gets a 
# tibble of all counties outside of distance, use rowwise grouping to 
# subset nc_df with "-within_dist" (negated within_dist) of current row
nc_nested <- nc_df %>% 
  rowwise() %>%
  mutate(out_dist = (nc_df[-within_dist, ]) %>% select(-within_dist) %>% list()) %>% 
  # add some summary stats from nested tibbles:
  mutate(out_bir74_mean = mean(out_dist$BIR74),
         out_bir79_mean = mean(out_dist$BIR79)) %>% 
  ungroup() %>% 
  select(-within_dist)
nc_nested
#> # A tibble: 100 × 7
#>    NAME         AREA BIR74 BIR79 out_dist          out_bir74_mean out_bir79_mean
#>    <chr>       <dbl> <dbl> <dbl> <list>                     <dbl>          <dbl>
#>  1 Ashe        0.114  1091  1364 <tibble [96 × 4]>          3374.          4323.
#>  2 Alleghany   0.061   487   542 <tibble [96 × 4]>          3355.          4304.
#>  3 Surry       0.143  3188  3616 <tibble [95 × 4]>          3371.          4325.
#>  4 Currituck   0.07    508   830 <tibble [96 × 4]>          3407.          4357.
#>  5 Northampton 0.153  1421  1606 <tibble [97 × 4]>          3335.          4273.
#>  6 Hertford    0.097  1452  1838 <tibble [95 × 4]>          3417.          4377.
#>  7 Camden      0.062   286   350 <tibble [94 × 4]>          3467.          4434.
#>  8 Gates       0.091   420   594 <tibble [93 × 4]>          3480.          4453.
#>  9 Warren      0.118   968  1190 <tibble [95 × 4]>          3345.          4284.
#> 10 Stokes      0.124  1612  2038 <tibble [95 × 4]>          3238.          4148.
#> # ℹ 90 more rows

可视化“外部”几何形状：

# collect details for plot
lee = tibble::lst(
  polygon = nc[nc$NAME == "Lee","geometry"],
  within_range = st_centroid(polygon) %>% st_buffer(50000),
  outside_idx = -unlist(nc$within_dist[nc$NAME == "Lee"])
)

ggplot() +
  geom_sf(data = nc, fill = NA) +
  geom_sf(data = nc[lee$outside_idx, ], aes(fill = "outside of dist."), alpha = .5) +
  geom_sf(data = lee$within_range, fill = "green", alpha = .2) +
  geom_sf(data = lee$polygon, aes(fill = "Lee")) +
  geom_sf(data = nc, aes(geometry = cntr)) +
  scale_fill_manual(values = c("red", "gold"), name = NULL) +
  theme(legend.position = "bottom")

^{创建于 2023-08-31 使用 reprex v2.0.2}

rowwise()，也通过分组来工作——每一行都是一个组。此处用于关闭该行分组。实际上，这是一个非常好的问题，这是您应该注意的问题，在使用如下所示的嵌套结构（即从嵌套的 tibble 中提取统计信息）时，应该首先调用，但是当只处理常规列时，您很可能希望将其关闭或只是更改分组 first。这个答案几乎没有其他关于使用嵌套数据集的例子。ungroup()out_distrowwise()group_by()

1赞 margusl 9/1/2023

还有一些 dplyr.tidyverse.org/articles/rowwise.html 解释了如何以及在什么情况下可以使用行。

0赞 Matt 9/1/2023

啊，明白了。感谢您的资源。

1赞 I_O 9/1/2023 #2

另一种方法：

多边形的空间数据帧示例：

library(sf)
library(dplyr)

df_polys <- 
  list(
    A = st_polygon(list(matrix(c(1, 1, 2, 1, 2, 4, 1, 1), , 2, byrow = TRUE))),
    B = st_polygon(list(matrix(c(8, 1, 10, 1, 10, 5, 8, 5, 8, 1), , 2, byrow = TRUE))),
    C = st_point(c(9, 9)) |> st_buffer(3.2)
  ) |>
  st_as_sfc() |>
  st_as_sf() |>
  rename(geometry = x) |>
  mutate(id = LETTERS[row_number()], .before = 1)

看起来像这样：


    buffer_dist = 2
    
    library(ggplot2)
    df_polys |>
    ggplot() +
      geom_sf() +
      geom_sf(aes(geometry = st_buffer(geometry, buffer_dist)), alpha = 0)

从可能的多边形组合列表（= Spatial DataFrame 的行组合）开始，查看每个多边形是否与它的任何相邻多边形相交，以最大允许距离进行缓冲：df_polysbuffer_dist


combn(nrow(df_polys), 2) |>
  t() |>
  as.data.frame() |>
  setNames(nm = paste0('row_index', 1:2)) |>
  ## --- pairwise intersection check starts here
  rowwise() |>
  mutate(within = st_intersects(df_polys[row_index1,] |> 
                                st_buffer(buffer_size),
                                df_polys[row_index2,]
                                ) |> as.logical(),
         id1 = df_polys$id[row_index1],
         id2 = df_polys$id[row_index2]
         )

结果

# Rowwise: 
  row_index1 row_index2 within id1   id2  
       <int>      <int> <lgl>  <chr> <chr>
1          1          2 NA     A     B    
2          1          3 NA     A     C    
3          2          3 TRUE   B     C

根据需要过滤多边形属性并将其与行索引匹配

如何查找指定距离之外的多边形

How to Find the Polygons Outside of a Specified Distance

评论

步骤：

评论

评论