对数据框 R 中的多行使用 dplyr 合并函数

Using the dplyr coalesce function for multiple rows from data frame R

提问人:user8229029 提问时间:11/11/2023 更新时间:11/11/2023 访问量:58

问:

我有几行数据,我想使用 R 函数合并这些数据。如何在这组 5 行数据上使用它?我认为问题是合并函数无法将每一行识别为向量。我试过把它转换成五个列表,但这一切似乎都很低效。

library(tidyverse)
my_data <- structure(list(Station_Number = c("0001", "0001", "0001", "0001","0001"), 
Station_Name = c("Ithaca 3E", "Ithaca 3E", "Ithaca 3E","Ithaca 3E", "Ithaca 3E"), NWSLI 
= c("ITMN1", "ITMN1", "ITMN1", "ITMN1", "ITMN1"), Station_Legacy_Name = c("Mead", Mead", 
"Mead", "Mead", "Mead"), Lat = c(41.1528611, 41.1528611, 41.1528611, 41.1528611, 
41.1528611), Lon = c(-96.4914444, -96.4914444, -96.4914444, -96.4914444, -96.4914444), 
Elev_m = c("353.7", "353.7", "353.7", "353.7", "353.7"), TIMESTAMP = c("2015-04-16 
10:00:00", "2015-04-16 10:00:00", "2015-04-16 10:00:00", "2015-04-16 10:00:00", "2015- 
04-16 10:00:00"), RECORD = c(0L, 0L, 0L, 0L, 0L), Ms_veg_10cm = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), Ms_veg_50cm = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_), Ms_veg_100cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
Ms_veg_25cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Ta_2m_Avg = 
c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), RH_2m_Avg = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), Ts_bare_10cm_Avg = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), Solar_2m_Avg = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
WS_ms_S_WVT = c(0.698, NA, NA, NA, NA), WS_ms_U_WVT = c(0.586, NA, NA, NA, NA), 
WindDir_DU_WVT = c(212.4, NA, NA, NA, NA), WindDir_SDU_WVT = c(23.94, NA, NA, NA, NA), 
Rain_1m_Tot = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), Pres_2m = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), BattVolts_Min = c(13.28, NA, NA, NA, NA), Ta_C_Avg = c(17.83, 
NA, NA, NA, NA), RH_Avg = c(66.69, NA, NA, NA, NA), Ts_C_Avg = c(11.61, 
NA, NA, NA, NA), Solar_Avg = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), Theta_10 = c(NA, -4.73, NA, NA, NA), 
Theta_25 = c(NA, NA, -4.848, NA, NA), Theta_50 = c(NA, NA, 
NA, -4.932, NA), Theta_100 = c(NA, NA, NA, NA, -4.949), Rain_mm_Tot = c(0, 
NA, NA, NA, NA), BP_mbar = c(976.4229, NA, NA, NA, NA), source = c("E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table101-dat (2).backup", 
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table301-dat", 
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table302-dat", 
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table303-dat", 
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table304-dat"
)), row.names = c(44257L, 74695L, 76522L, 78349L, 80176L), class = "data.frame")

我想要一个看起来像这样的数据行:

structure(list(Station_Number = "0001", Station_Name = "Ithaca 3E", 
NWSLI = "ITMN1", Station_Legacy_Name = "Mead", Lat = 41.1528611, 
Lon = -96.4914444, Elev_m = "353.7", TIMESTAMP = "2015-04-16 10:00:00", 
RECORD = 0L, Ms_veg_10cm = NA_real_, Ms_veg_50cm = NA_real_, 
Ms_veg_100cm = NA_real_, Ms_veg_25cm = NA_real_, Ta_2m_Avg = NA_real_, 
RH_2m_Avg = NA_real_, Ts_bare_10cm_Avg = NA_real_, Solar_2m_Avg = NA_real_, 
WS_ms_S_WVT = 0.698, WS_ms_U_WVT = 0.586, WindDir_DU_WVT = 212.4, 
WindDir_SDU_WVT = 23.94, Rain_1m_Tot = NA_real_, Pres_2m = NA_real_, 
BattVolts_Min = 13.28, Ta_C_Avg = 17.83, RH_Avg = 66.69, 
Ts_C_Avg = 11.61, Solar_Avg = NA_real_, Theta_10 = -4.73, 
Theta_25 = NA_real_, Theta_50 = NA_real_, Theta_100 = NA_real_, 
Rain_mm_Tot = 0, BP_mbar = 976.4229, source = "E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table101-dat (2).backup"), row.names = 1L, class = "data.frame")

我试过:

coalesce(unlist(my_data[1:5,]))

和其他版本,但没有运气。我如何使用合并或任何其他方法将其合并为我想要的内容?

r dplyr tidyverse coalesce

评论


答:

0赞 Nicholas Ray 11/11/2023 #1

这可能很难一概而论,但这将实现你所追求的目标。

my_data <- data.frame(
     Station_Number = c("0001", "0001", "0001", "0001","0001"),
     Station_Name = c("Ithaca 3E", "Ithaca 3E", "Ithaca 3E","Ithaca 3E",
                      "Ithaca 3E"),
     NWSLI = c("ITMN1", "ITMN1", "ITMN1", "ITMN1", "ITMN1"),
     Station_Legacy_Name = c("Mead","Mead","Mead", "Mead", "Mead"),
     Lat = c(41.1528611, 41.1528611, 41.1528611, 41.1528611, 41.1528611),
     Lon = c(-96.4914444, -96.4914444, -96.4914444, -96.4914444, -96.4914444),
     Elev_m = c("353.7", "353.7", "353.7", "353.7", "353.7"),
     TIMESTAMP = c("2015-04-16 10:00:00", "2015-04-16 10:00:00",
                   "2015-04-16 10:00:00", "2015-04-16 10:00:00",
                   "2015-04-16 10:00:00"),
     RECORD = c(0L, 0L, 0L, 0L, 0L),
     Ms_veg_10cm = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_),
     Ms_veg_50cm = c(NA_real_, NA_real_, NA_real_, NA_real_,NA_real_),
     Ms_veg_100cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
     Ms_veg_25cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
     Ta_2m_Avg = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
     RH_2m_Avg = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_),
     Ts_bare_10cm_Avg = c(NA_real_, NA_real_, NA_real_,NA_real_, NA_real_),
     Solar_2m_Avg = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
     WS_ms_S_WVT = c(0.698, NA, NA, NA, NA),
     WS_ms_U_WVT = c(0.586, NA, NA, NA, NA),
     WindDir_DU_WVT = c(212.4, NA, NA, NA, NA),
     WindDir_SDU_WVT = c(23.94, NA, NA, NA, NA),
     Rain_1m_Tot = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
     Pres_2m = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
     BattVolts_Min = c(13.28, NA, NA, NA, NA), 
     Ta_C_Avg = c(17.83,NA, NA, NA, NA),
     RH_Avg = c(66.69, NA, NA, NA, NA),
     Ts_C_Avg = c(11.61,NA, NA, NA, NA),
     Solar_Avg = c(NA_real_, NA_real_, NA_real_,NA_real_, NA_real_),
     Theta_10 = c(NA, -4.73, NA, NA, NA),
     Theta_25 = c(NA, NA, -4.848, NA, NA),
     Theta_50 = c(NA, NA,NA, -4.932, NA),
     Theta_100 = c(NA, NA, NA, NA, -4.949),
     Rain_mm_Tot = c(0,NA, NA, NA, NA),
     BP_mbar = c(976.4229, NA, NA, NA, NA),
     row.names = c(44257L, 74695L, 76522L, 78349L, 80176L)
)
library(zoo)
for (i in 1:ncol(my_data)) {
  my_data[,i]<-ifelse(is.na(my_data[1,i]),na.locf(my_data[,i],fromLast=TRUE),
                      na.locf(my_data[,i]))
}
my_data<-my_data[1,]

这取决于这样一个事实,即您想要压缩(或合并)的列都只有一个观测值。您可以只用该观测值填充其他(空)行,然后从该数据框中选择一行。

1赞 Jon Spring 11/11/2023 #2

这会将任何非 NA 填充到下面的任何 NA 中。然后我们可以采取最后一行。

my_data |>
  tidyr::fill(everything()) |>
  tail(1)

结果

      Station_Number Station_Name NWSLI Station_Legacy_Name      Lat       Lon Elev_m           TIMESTAMP RECORD Ms_veg_10cm Ms_veg_50cm Ms_veg_100cm Ms_veg_25cm Ta_2m_Avg RH_2m_Avg Ts_bare_10cm_Avg Solar_2m_Avg WS_ms_S_WVT WS_ms_U_WVT WindDir_DU_WVT WindDir_SDU_WVT Rain_1m_Tot Pres_2m BattVolts_Min Ta_C_Avg RH_Avg Ts_C_Avg Solar_Avg Theta_10 Theta_25 Theta_50 Theta_100 Rain_mm_Tot  BP_mbar
80176           0001    Ithaca 3E ITMN1                Mead 41.15286 -96.49144  353.7 2015-04-16 10:00:00      0          NA          NA           NA          NA        NA        NA               NA           NA       0.698       0.586          212.4           23.94          NA      NA         13.28    17.83  66.69    11.61        NA    -4.73   -4.848   -4.932    -4.949           0 976.4229

评论

1赞 user8229029 11/11/2023
效果很好。我稍微修改了一下,dupped60_2_rows |> tidyr::fill(everything(), .direction = 'up') |> head(1)