提问人:user8229029 提问时间:11/11/2023 更新时间:11/11/2023 访问量:58
对数据框 R 中的多行使用 dplyr 合并函数
Using the dplyr coalesce function for multiple rows from data frame R
问:
我有几行数据,我想使用 R 函数合并这些数据。如何在这组 5 行数据上使用它?我认为问题是合并函数无法将每一行识别为向量。我试过把它转换成五个列表,但这一切似乎都很低效。
library(tidyverse)
my_data <- structure(list(Station_Number = c("0001", "0001", "0001", "0001","0001"),
Station_Name = c("Ithaca 3E", "Ithaca 3E", "Ithaca 3E","Ithaca 3E", "Ithaca 3E"), NWSLI
= c("ITMN1", "ITMN1", "ITMN1", "ITMN1", "ITMN1"), Station_Legacy_Name = c("Mead", Mead",
"Mead", "Mead", "Mead"), Lat = c(41.1528611, 41.1528611, 41.1528611, 41.1528611,
41.1528611), Lon = c(-96.4914444, -96.4914444, -96.4914444, -96.4914444, -96.4914444),
Elev_m = c("353.7", "353.7", "353.7", "353.7", "353.7"), TIMESTAMP = c("2015-04-16
10:00:00", "2015-04-16 10:00:00", "2015-04-16 10:00:00", "2015-04-16 10:00:00", "2015-
04-16 10:00:00"), RECORD = c(0L, 0L, 0L, 0L, 0L), Ms_veg_10cm = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), Ms_veg_50cm = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Ms_veg_100cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
Ms_veg_25cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Ta_2m_Avg =
c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), RH_2m_Avg = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), Ts_bare_10cm_Avg = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Solar_2m_Avg = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
WS_ms_S_WVT = c(0.698, NA, NA, NA, NA), WS_ms_U_WVT = c(0.586, NA, NA, NA, NA),
WindDir_DU_WVT = c(212.4, NA, NA, NA, NA), WindDir_SDU_WVT = c(23.94, NA, NA, NA, NA),
Rain_1m_Tot = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), Pres_2m = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), BattVolts_Min = c(13.28, NA, NA, NA, NA), Ta_C_Avg = c(17.83,
NA, NA, NA, NA), RH_Avg = c(66.69, NA, NA, NA, NA), Ts_C_Avg = c(11.61,
NA, NA, NA, NA), Solar_Avg = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Theta_10 = c(NA, -4.73, NA, NA, NA),
Theta_25 = c(NA, NA, -4.848, NA, NA), Theta_50 = c(NA, NA,
NA, -4.932, NA), Theta_100 = c(NA, NA, NA, NA, -4.949), Rain_mm_Tot = c(0,
NA, NA, NA, NA), BP_mbar = c(976.4229, NA, NA, NA, NA), source = c("E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table101-dat (2).backup",
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table301-dat",
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table302-dat",
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table303-dat",
"E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table304-dat"
)), row.names = c(44257L, 74695L, 76522L, 78349L, 80176L), class = "data.frame")
我想要一个看起来像这样的数据行:
structure(list(Station_Number = "0001", Station_Name = "Ithaca 3E",
NWSLI = "ITMN1", Station_Legacy_Name = "Mead", Lat = 41.1528611,
Lon = -96.4914444, Elev_m = "353.7", TIMESTAMP = "2015-04-16 10:00:00",
RECORD = 0L, Ms_veg_10cm = NA_real_, Ms_veg_50cm = NA_real_,
Ms_veg_100cm = NA_real_, Ms_veg_25cm = NA_real_, Ta_2m_Avg = NA_real_,
RH_2m_Avg = NA_real_, Ts_bare_10cm_Avg = NA_real_, Solar_2m_Avg = NA_real_,
WS_ms_S_WVT = 0.698, WS_ms_U_WVT = 0.586, WindDir_DU_WVT = 212.4,
WindDir_SDU_WVT = 23.94, Rain_1m_Tot = NA_real_, Pres_2m = NA_real_,
BattVolts_Min = 13.28, Ta_C_Avg = 17.83, RH_Avg = 66.69,
Ts_C_Avg = 11.61, Solar_Avg = NA_real_, Theta_10 = -4.73,
Theta_25 = NA_real_, Theta_50 = NA_real_, Theta_100 = NA_real_,
Rain_mm_Tot = 0, BP_mbar = 976.4229, source = "E:\\Data_Collected_withDups/Mead/NE-Mead-CR1000-Table101-dat (2).backup"), row.names = 1L, class = "data.frame")
我试过:
coalesce(unlist(my_data[1:5,]))
和其他版本,但没有运气。我如何使用合并或任何其他方法将其合并为我想要的内容?
答:
0赞
Nicholas Ray
11/11/2023
#1
这可能很难一概而论,但这将实现你所追求的目标。
my_data <- data.frame(
Station_Number = c("0001", "0001", "0001", "0001","0001"),
Station_Name = c("Ithaca 3E", "Ithaca 3E", "Ithaca 3E","Ithaca 3E",
"Ithaca 3E"),
NWSLI = c("ITMN1", "ITMN1", "ITMN1", "ITMN1", "ITMN1"),
Station_Legacy_Name = c("Mead","Mead","Mead", "Mead", "Mead"),
Lat = c(41.1528611, 41.1528611, 41.1528611, 41.1528611, 41.1528611),
Lon = c(-96.4914444, -96.4914444, -96.4914444, -96.4914444, -96.4914444),
Elev_m = c("353.7", "353.7", "353.7", "353.7", "353.7"),
TIMESTAMP = c("2015-04-16 10:00:00", "2015-04-16 10:00:00",
"2015-04-16 10:00:00", "2015-04-16 10:00:00",
"2015-04-16 10:00:00"),
RECORD = c(0L, 0L, 0L, 0L, 0L),
Ms_veg_10cm = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_),
Ms_veg_50cm = c(NA_real_, NA_real_, NA_real_, NA_real_,NA_real_),
Ms_veg_100cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
Ms_veg_25cm = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
Ta_2m_Avg = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
RH_2m_Avg = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_),
Ts_bare_10cm_Avg = c(NA_real_, NA_real_, NA_real_,NA_real_, NA_real_),
Solar_2m_Avg = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
WS_ms_S_WVT = c(0.698, NA, NA, NA, NA),
WS_ms_U_WVT = c(0.586, NA, NA, NA, NA),
WindDir_DU_WVT = c(212.4, NA, NA, NA, NA),
WindDir_SDU_WVT = c(23.94, NA, NA, NA, NA),
Rain_1m_Tot = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
Pres_2m = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
BattVolts_Min = c(13.28, NA, NA, NA, NA),
Ta_C_Avg = c(17.83,NA, NA, NA, NA),
RH_Avg = c(66.69, NA, NA, NA, NA),
Ts_C_Avg = c(11.61,NA, NA, NA, NA),
Solar_Avg = c(NA_real_, NA_real_, NA_real_,NA_real_, NA_real_),
Theta_10 = c(NA, -4.73, NA, NA, NA),
Theta_25 = c(NA, NA, -4.848, NA, NA),
Theta_50 = c(NA, NA,NA, -4.932, NA),
Theta_100 = c(NA, NA, NA, NA, -4.949),
Rain_mm_Tot = c(0,NA, NA, NA, NA),
BP_mbar = c(976.4229, NA, NA, NA, NA),
row.names = c(44257L, 74695L, 76522L, 78349L, 80176L)
)
library(zoo)
for (i in 1:ncol(my_data)) {
my_data[,i]<-ifelse(is.na(my_data[1,i]),na.locf(my_data[,i],fromLast=TRUE),
na.locf(my_data[,i]))
}
my_data<-my_data[1,]
这取决于这样一个事实,即您想要压缩(或合并)的列都只有一个观测值。您可以只用该观测值填充其他(空)行,然后从该数据框中选择一行。
1赞
Jon Spring
11/11/2023
#2
这会将任何非 NA 填充到下面的任何 NA 中。然后我们可以采取最后一行。
my_data |>
tidyr::fill(everything()) |>
tail(1)
结果
Station_Number Station_Name NWSLI Station_Legacy_Name Lat Lon Elev_m TIMESTAMP RECORD Ms_veg_10cm Ms_veg_50cm Ms_veg_100cm Ms_veg_25cm Ta_2m_Avg RH_2m_Avg Ts_bare_10cm_Avg Solar_2m_Avg WS_ms_S_WVT WS_ms_U_WVT WindDir_DU_WVT WindDir_SDU_WVT Rain_1m_Tot Pres_2m BattVolts_Min Ta_C_Avg RH_Avg Ts_C_Avg Solar_Avg Theta_10 Theta_25 Theta_50 Theta_100 Rain_mm_Tot BP_mbar
80176 0001 Ithaca 3E ITMN1 Mead 41.15286 -96.49144 353.7 2015-04-16 10:00:00 0 NA NA NA NA NA NA NA NA 0.698 0.586 212.4 23.94 NA NA 13.28 17.83 66.69 11.61 NA -4.73 -4.848 -4.932 -4.949 0 976.4229
评论
1赞
user8229029
11/11/2023
效果很好。我稍微修改了一下,dupped60_2_rows |> tidyr::fill(everything(), .direction = 'up') |> head(1)
评论