如何根据条件重置列中度数的累积

How to reset cumulative accumulation of degree hours in a column based on a condition

提问人:Ahsk 提问时间:11/9/2023 最后编辑:Ahsk 更新时间:11/10/2023 访问量:67

问:

我根据特定条件计算了该列。但是,我遇到了将计算重置为变量超过 5 的问题。index_newzerodry_hours

这是我的代码:

base_temperature <- 44

df <- df %>%
 mutate(dry_hours = ifelse(lwd== 0, sequence(rle(lwd == 0)$lengths), 0)) %>%
  mutate(zero_index = lwd == 0 | dry_hours > 5 | temp < 44 | temp > 86) %>%
  group_by(event) %>%
  mutate(index_new = cumsum(ifelse(zero_index, 0,  temp - base_temperature))) %>%
  select(-zero_index) %>%
  relocate(index, .before = index_new)

这是可重复的例子


df <- structure(list(event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
 2, 2, 2, 2, 2, 2), lwd = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 1, 1, 1, 1, 1, 1), temp = c(40, 41, 42, 43, 44, 45, 46, 47, 48, 
 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 
 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 
 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 40, 41, 42, 43, 44, 45, 
 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 
 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 
 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90), dry_hours = c(0, 
 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 
 3, 4, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), index = c(0, 
 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 26, 35, 45, 56, 68, 81, 95, 
 110, 110, 110, 110, 110, 130, 151, 173, 196, 220, 245, 245, 245, 
 245, 245, 245, 0, 0, 33, 67, 102, 138, 175, 213, 252, 292, 333, 
 375, 375, 375, 375, 375, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 
 26, 35, 45, 56, 68, 81, 95, 110, 110, 110, 110, 110, 130, 151, 
 173, 196, 220, 245, 271, 298, 326, 355, 385, 416, 448, 481, 515, 
 550, 586, 623, 661, 700, 740, 781, 823, 823, 823, 823, 823), 
 index_new = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 26, 35, 
 45, 56, 68, 81, 95, 110, 110, 110, 110, 110, 130, 151, 173, 
 196, 220, 245, 245, 245, 245, 245, 245, 245, 245, 278, 312, 
 347, 383, 420, 458, 497, 537, 578, 620, 620, 620, 620, 620, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 26, 35, 45, 56, 68, 
 81, 95, 110, 110, 110, 110, 110, 130, 151, 173, 196, 220, 
 245, 271, 298, 326, 355, 385, 416, 448, 481, 515, 550, 586, 
 623, 661, 700, 740, 781, 823, 823, 823, 823, 823)), 
 class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -102L),
 groups = structure(list(event = c(1, 2), .rows = structure(list(1:51, 52:102),
 ptype = integer(0), class = c("vctrs_list_of", 
 "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
 ), row.names = c(NA, -2L), .drop = TRUE))

R DataFrame DPLYR 操作 数据 整理

评论


答:

1赞 Gregor Thomas 11/10/2023 #1

如果希望计算分组依据并在超过 5 时重置,则需要将超过 5 时的计数添加到分组中。更改为 :eventdry_hoursdry_hoursgroup_by(event)group_by(event, cumsum(dry_hours > 5))

df %>%
 mutate(dry_hours = ifelse(lwd== 0, sequence(rle(lwd == 0)$lengths), 0)) %>%
  mutate(zero_index = lwd == 0 | dry_hours > 5 | temp < 44 | temp > 86) %>%
  group_by(event, cumsum(dry_hours > 5)) %>%
  mutate(index_new = cumsum(ifelse(zero_index, 0,  temp - base_temperature))) %>%
  select(-zero_index) %>%
  relocate(index, .before = index_new) |>
  ungroup() |>
  filter(index != index_new) ## keep only rows that do not match
# A tibble: 0 × 7
# ℹ 7 variables: event <dbl>, lwd <dbl>, temp <dbl>, dry_hours <dbl>, index <dbl>, index_new <dbl>,
#   cumsum(dry_hours > 5) <int>

## all rows match!
1赞 Adriano Mello 11/10/2023 #2

dry_hours每当开始数数时,对吧?所以,如果 ,无论 ,都是真的。也许你需要回顾一下:lwd == 0lwd == 0zero_indexdry_hours

  mutate(zero_index = lwd == 0 | dry_hours > 5 | temp < 44 | temp > 86)