如何递增非连续重复的组

How to increment non consecutive duplicated groups

提问人:TarJae 提问时间:11/14/2023 最后编辑:ThomasIsCodingTarJae 更新时间:11/14/2023 访问量:54

问:

我该如何计算?col2

col2每次出现一个值时,都应该递增,因为一个值在被不同的值打断后,例如,创建一个每次新序列开始时都会改变的分组变量。col1

structure(list(col1 = c("A", "A", "A", "A", "B", "B", "B", "C", 
"C", "C", "C", "A", "A", "E", "E", "E", "F", "F", "F", "G", "G", 
"G", "C", "C", "C", "A", "A", "A"), col2 = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L)), class = "data.frame", row.names = c(NA, 
-28L))

期望输出:

   col1 col2
1     A    1
2     A    1
3     A    1
4     A    1
5     B    1
6     B    1
7     B    1
8     C    1
9     C    1
10    C    1
11    C    1
12    A    2
13    A    2
14    E    1
15    E    1
16    E    1
17    F    1
18    F    1
19    F    1
20    G    1
21    G    1
22    G    1
23    C    2
24    C    2
25    C    2
26    A    3
27    A    3
28    A    3
R 序列

评论


答:

4赞 stefan 11/14/2023 #1

一种选择是像这样使用两次:consecutive_id

library(dplyr, warn = FALSE)

df |>
  select(-col2) |>
  mutate(col2 = consecutive_id(col1)) |>
  mutate(col2 = consecutive_id(col2), .by = col1)
#>    col1 col2
#> 1     A    1
#> 2     A    1
#> 3     A    1
#> 4     A    1
#> 5     B    1
#> 6     B    1
#> 7     B    1
#> 8     C    1
#> 9     C    1
#> 10    C    1
#> 11    C    1
#> 12    A    2
#> 13    A    2
#> 14    E    1
#> 15    E    1
#> 16    E    1
#> 17    F    1
#> 18    F    1
#> 19    F    1
#> 20    G    1
#> 21    G    1
#> 22    G    1
#> 23    C    2
#> 24    C    2
#> 25    C    2
#> 26    A    3
#> 27    A    3
#> 28    A    3
2赞 ThomasIsCoding 11/14/2023 #2

使用rle + ave

transform(
  df,
  col3 = with(
    rle(col1),
    rep(ave(seq_along(values), values, FUN = seq_along), lengths)
  )
)

   col1 col2 col3
1     A    1    1
2     A    1    1
3     A    1    1
4     A    1    1
5     B    1    1
6     B    1    1
7     B    1    1
8     C    1    1
9     C    1    1
10    C    1    1
11    C    1    1
12    A    2    2
13    A    2    2
14    E    1    1
15    E    1    1
16    E    1    1
17    F    1    1
18    F    1    1
19    F    1    1
20    G    1    1
21    G    1    1
22    G    1    1
23    C    2    2
24    C    2    2
25    C    2    2
26    A    3    3
27    A    3    3
28    A    3    3
1赞 Chris 11/14/2023 #3

还有一个脆弱的功能base

increment_groups = function(groups) {

end = cumsum(rle(groups$col1)$lengths)
start = end - rle(groups$col1)$lengths +1
vals = rle(groups$col1)$values
df1 = data.frame(start, end, vals)
for (i in 1:length(df1$vals)) {
  df1$val2[which(df1$vals == unique(df1$vals[i]))] =
  seq(1, length(which(rle(df1$vals)$values == unique(df1$vals[1]))), 1)
  }
for (i in 1:length(df1$start)) {
  groups$col2[df1$start[i]:df1$end[i]] = df1$val2[i]
  }
return=list(groups, df1)
}

在警告的烟雾散去后,它给出了

incr_interrupted_groups  = increment_groups(groups)
<---Warnings -snip --->
incr_interrupted_groups
[[1]]
   col1 col2
1     A    1
2     A    1
3     A    1
4     A    1
5     B    1
6     B    1
7     B    1
8     C    1
9     C    1
10    C    1
11    C    1
12    A    2
13    A    2
14    E    1
15    E    1
16    E    1
17    F    1
18    F    1
19    F    1
20    G    1
21    G    1
22    G    1
23    C    2
24    C    2
25    C    2
26    A    3
27    A    3
28    A    3

[[2]]
  start end vals val2
1     1   4    A    1
2     5   7    B    1
3     8  11    C    1
4    12  13    A    2
5    14  16    E    1
6    17  19    F    1
7    20  22    G    1
8    23  25    C    2
9    26  28    A    3

评论

0赞 TarJae 11/14/2023
点赞!由于巨大的努力!