如何在 dplyr 中命名group_split输出列表-解网

问：

我有以下使用dplyr group_split的过程：

library(tidyverse)
set.seed(1)
iris %>% sample_n(size = 5) %>% 
    group_by(Species) %>% 
    group_split()

结果是：

[[1]]
# A tibble: 2 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
         <dbl>       <dbl>        <dbl>       <dbl> <fct>  
1          5           3.5          1.6         0.6 setosa 
2          5.1         3.8          1.5         0.3 setosa 

[[2]]
# A tibble: 2 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species   
         <dbl>       <dbl>        <dbl>       <dbl> <fct>     
1          5.9         3            4.2         1.5 versicolor
2          6.2         2.2          4.5         1.5 versicolor

[[3]]
# A tibble: 1 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species  
         <dbl>       <dbl>        <dbl>       <dbl> <fct>    
1          6.2         3.4          5.4         2.3 virginica

我想实现的是按分组名称（即物种）命名此列表。产生这个（手工完成）：

$setosa
# A tibble: 2 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
         <dbl>       <dbl>        <dbl>       <dbl> <fct>  
1          5           3.5          1.6         0.6 setosa 
2          5.1         3.8          1.5         0.3 setosa 

$versicolor
# A tibble: 2 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species   
         <dbl>       <dbl>        <dbl>       <dbl> <fct>     
1          5.9         3            4.2         1.5 versicolor
2          6.2         2.2          4.5         1.5 versicolor

$virginica
# A tibble: 1 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species  
         <dbl>       <dbl>        <dbl>       <dbl> <fct>    
1          6.2         3.4          5.4         2.3 virginica

我怎样才能做到这一点？

更新

我尝试了这个新数据，现在的命名称为：Cluster

df <- structure(list(Cluster = c("Cluster9", "Cluster11", "Cluster1", 
"Cluster9", "Cluster6", "Cluster12", "Cluster9", "Cluster11", 
"Cluster8", "Cluster8"), gene_name = c("Tbc1d8", "Vimp", "Grhpr", 
"H1f0", "Zfp398", "Pikfyve", "Ankrd13a", "Fgfr1op2", "Golga7", 
"Lars2"), p_value = c(3.46629097620496e-47, 3.16837338947245e-62, 
1.55108439059684e-06, 9.46078511685542e-131, 0.000354049720507017, 
0.0146807415917158, 1.42799750295289e-38, 2.0697825959399e-08, 
4.13777221466668e-06, 3.92889640704683e-184), morans_test_statistic = c(14.3797687352223, 
16.6057085487911, 4.66393667525872, 24.301453902967, 3.38642377758137, 
2.17859882998961, 12.9350063459509, 5.48479186018979, 4.4579286289179, 
28.9144540271157), morans_I = c(0.0814728893885783, 0.0947505609609695, 
0.0260671534007409, 0.138921824574569, 0.018764800166045, 0.0119813199210325, 
0.0736554862590782, 0.0309849638728409, 0.0250591347318986, 0.165310420808725
), q_value = c(1.57917584337356e-46, 1.62106594498462e-61, 3.43312171446844e-06, 
6.99503520654745e-130, 0.000683559649593623, 0.0245476826213791, 
5.96116678335584e-38, 4.97603701391971e-08, 8.9649490080526e-06, 
3.48152096326702e-183)), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))

使用 Ronak Shah 的方法，我得到的结果不一致：

df %>% group_split(Cluster) %>% setNames(unique(df$Cluster))
$Cluster9
# A tibble: 1 x 6
  Cluster  gene_name    p_value morans_test_statistic morans_I    q_value
  <chr>    <chr>          <dbl>                 <dbl>    <dbl>      <dbl>
1 Cluster1 Grhpr     0.00000155                  4.66   0.0261 0.00000343

$Cluster11
# A tibble: 2 x 6
  Cluster   gene_name  p_value morans_test_statistic morans_I  q_value
  <chr>     <chr>        <dbl>                 <dbl>    <dbl>    <dbl>
1 Cluster11 Vimp      3.17e-62                 16.6    0.0948 1.62e-61
2 Cluster11 Fgfr1op2  2.07e- 8                  5.48   0.0310 4.98e- 8

$Cluster1
# A tibble: 1 x 6
  Cluster   gene_name p_value morans_test_statistic morans_I q_value
  <chr>     <chr>       <dbl>                 <dbl>    <dbl>   <dbl>
1 Cluster12 Pikfyve    0.0147                  2.18   0.0120  0.0245

$Cluster6
# A tibble: 1 x 6
  Cluster  gene_name  p_value morans_test_statistic morans_I  q_value
  <chr>    <chr>        <dbl>                 <dbl>    <dbl>    <dbl>
1 Cluster6 Zfp398    0.000354                  3.39   0.0188 0.000684

$Cluster12
# A tibble: 2 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster8 Golga7    4.14e-  6                  4.46   0.0251 8.96e-  6
2 Cluster8 Lars2     3.93e-184                 28.9    0.165  3.48e-183

$Cluster8
# A tibble: 3 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster9 Tbc1d8    3.47e- 47                  14.4   0.0815 1.58e- 46
2 Cluster9 H1f0      9.46e-131                  24.3   0.139  7.00e-130
3 Cluster9 Ankrd13a  1.43e- 38                  12.9   0.0737 5.96e- 38

请注意，其中有。$Cluster9Cluster1

请告知如何去做？

r dplyr split tidyverse tidytable

library(dplyr)

df <- iris %>% sample_n(size = 5) 

df %>%
   group_split(Species) %>%
   setNames(unique(df$Species))


#$setosa
# A tibble: 1 x 5
#  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#         <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#1            5         3.4          1.5         0.2 setosa 

#$versicolor
# A tibble: 1 x 5
#  Sepal.Length Sepal.Width Petal.Length Petal.Width Species   
#         <dbl>       <dbl>        <dbl>       <dbl> <fct>     
#1            6         3.4          4.5         1.6 versicolor

#$virginica
# A tibble: 3 x 5
#  Sepal.Length Sepal.Width Petal.Length Petal.Width Species  
#         <dbl>       <dbl>        <dbl>       <dbl> <fct>    
#1          7.3         2.9          6.3         1.8 virginica
#2          6.9         3.1          5.1         2.3 virginica
#3          7.7         3            6.1         2.3 virginica

奇怪的是，它没有直接命名列表，因为它应该是命名列表的替代方案。group_splitbase::split

split(df, df$Species)

该文件说：

group_split（）的工作方式类似于 base：：split（），但

它使用 group_by（）中的分组结构，因此受数据掩码的约束
它不会根据分组来命名列表的元素，因为这通常会丢失信息并且令人困惑。

对于更新的数据集，它不起作用，因为在我们使用的命名时，它以与它们出现的顺序相同的顺序获取数据，而，根据其值的递增顺序拆分数据。（所以拆分的顺序是,, ...）克服此问题的一种方法是使用 .uniquegroup_splitCluster1Cluster11Cluster2Clusterfactorlevelsunique

df <- df %>%
      mutate(Cluster = factor(Cluster, levels = unique(Cluster))) 

df %>%
   group_split(Cluster) %>%
   setNames(unique(df$Cluster))

或者，如果您不希望它们作为因素

df %>%
  group_split(Cluster) %>%
  setNames(sort(unique(df$Cluster)))

有没有办法在不调用原始数据帧的情况下实现这一点？例如，当您使用 mutate 时，您将 mutated DataFrame 保存在对象中。但是，如果我们有一长串管道，并且我们想避免保存生成的对象，但仍然通过一些不在原始数据帧中但由管道创建的变量来命名表，这可能吗？

1赞 Ronak Shah 8/12/2020

这取决于你的情况，但我认为这应该是可能的。如果变量是在管道中创建的，则可以用于引用对象。可用于引用以前创建的列。..$column_name

1赞 Tung 4/16/2021

来自开发人员：“我们坚信给它起名字是一种反模式，因此这不会发生在中，但我们确实提供了一些工具，您可以使用它来制作带有名称的自己的版本。github.com/tidyverse/dplyr/issues/4223dplyrdplyr

0赞 Nick 2/25/2020 #2

使用一个循环来访问每个 df 中 Cluster 的唯一元素，然后将它们分配为各自的名称。for

x.names <-NULL
for (i in 1:length(df)){
  x.names[i]<-c(unique(df[[i]]$Cluster))
  names(df)<-x.names
}

8赞 FB74 4/3/2020 #3

我遇到了同样的问题，并使用了这个 2 步解决方案：

df= df %>% group_by(Cluster)
df= df %>% group_split() %>% set_names(unlist(group_keys(df)))

df$Cluster1
# A tibble: 1 x 6
  Cluster  gene_name    p_value morans_test_statistic morans_I    q_value
  <chr>    <chr>          <dbl>                 <dbl>    <dbl>      <dbl>
1 Cluster1 Grhpr     0.00000155                  4.66   0.0261 0.00000343
df$Cluster9
# A tibble: 3 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster9 Tbc1d8    3.47e- 47                  14.4   0.0815 1.58e- 46
2 Cluster9 H1f0      9.46e-131                  24.3   0.139  7.00e-130
3 Cluster9 Ankrd13a  1.43e- 38                  12.9   0.0737 5.96e- 38

$setosa
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
4          5.5         3.5          1.3         0.2  setosa
5          5.3         3.7          1.5         0.2  setosa

$versicolor
  Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
3            5         2.3          3.3           1 versicolor

$virginica
  Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
1          7.7         2.6          6.9         2.3 virginica
2          7.2         3.0          5.8         1.6 virginica

也适用于上述数据帧：

df %>% 
  split(f = as.factor(.$Cluster))

为您提供：

$Cluster1
# A tibble: 1 x 6
  Cluster  gene_name    p_value morans_test_statistic morans_I    q_value
  <chr>    <chr>          <dbl>                 <dbl>    <dbl>      <dbl>
1 Cluster1 Grhpr     0.00000155                  4.66   0.0261 0.00000343

$Cluster11
# A tibble: 2 x 6
  Cluster   gene_name  p_value morans_test_statistic morans_I  q_value
  <chr>     <chr>        <dbl>                 <dbl>    <dbl>    <dbl>
1 Cluster11 Vimp      3.17e-62                 16.6    0.0948 1.62e-61
2 Cluster11 Fgfr1op2  2.07e- 8                  5.48   0.0310 4.98e- 8

$Cluster12
# A tibble: 1 x 6
  Cluster   gene_name p_value morans_test_statistic morans_I q_value
  <chr>     <chr>       <dbl>                 <dbl>    <dbl>   <dbl>
1 Cluster12 Pikfyve    0.0147                  2.18   0.0120  0.0245

$Cluster6
# A tibble: 1 x 6
  Cluster  gene_name  p_value morans_test_statistic morans_I  q_value
  <chr>    <chr>        <dbl>                 <dbl>    <dbl>    <dbl>
1 Cluster6 Zfp398    0.000354                  3.39   0.0188 0.000684

$Cluster8
# A tibble: 2 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster8 Golga7    4.14e-  6                  4.46   0.0251 8.96e-  6
2 Cluster8 Lars2     3.93e-184                 28.9    0.165  3.48e-183

$Cluster9
# A tibble: 3 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster9 Tbc1d8    3.47e- 47                  14.4   0.0815 1.58e- 46
2 Cluster9 H1f0      9.46e-131                  24.3   0.139  7.00e-130
3 Cluster9 Ankrd13a  1.43e- 38                  12.9   0.0737 5.96e- 38

9赞 Tung 4/16/2021 #6

如果要将数据框拆分为多个组并具有命名列表，则 tidytable 包具有用于此的 group_split.（） 函数。

### pacman will check and install missing packages if needed
if (!require("pacman")) install.packages("pacman")
pacman::p_load(gapminder)
pacman::p_load(tidytable)

按一组拆分。使用以下选项将组保留在数据框中.keep

gapminder_split_1group <- gapminder %>% 
  group_split.(continent, .keep = FALSE, .named = TRUE)
gapminder_split_1group
#> $Asia
#> # A tidytable: 396 x 5
#>    country      year lifeExp      pop gdpPercap
#>    <fct>       <int>   <dbl>    <int>     <dbl>
#>  1 Afghanistan  1952    28.8  8425333      779.
#>  2 Afghanistan  1957    30.3  9240934      821.
#>  3 Afghanistan  1962    32.0 10267083      853.
#>  4 Afghanistan  1967    34.0 11537966      836.
#>  5 Afghanistan  1972    36.1 13079460      740.
#>  6 Afghanistan  1977    38.4 14880372      786.
#>  7 Afghanistan  1982    39.9 12881816      978.
#>  8 Afghanistan  1987    40.8 13867957      852.
#>  9 Afghanistan  1992    41.7 16317921      649.
#> 10 Afghanistan  1997    41.8 22227415      635.
#> # ... with 386 more rows
#> 
#> $Europe
#> # A tidytable: 360 x 5
#>    country  year lifeExp     pop gdpPercap
#>    <fct>   <int>   <dbl>   <int>     <dbl>
#>  1 Albania  1952    55.2 1282697     1601.
#>  2 Albania  1957    59.3 1476505     1942.
#>  3 Albania  1962    64.8 1728137     2313.
#>  4 Albania  1967    66.2 1984060     2760.
#>  5 Albania  1972    67.7 2263554     3313.
#>  6 Albania  1977    68.9 2509048     3533.
#>  7 Albania  1982    70.4 2780097     3631.
#>  8 Albania  1987    72   3075321     3739.
#>  9 Albania  1992    71.6 3326498     2497.
#> 10 Albania  1997    73.0 3428038     3193.
#> # ... with 350 more rows
#> 
#> $Africa
#> # A tidytable: 624 x 5
#>    country  year lifeExp      pop gdpPercap
#>    <fct>   <int>   <dbl>    <int>     <dbl>
#>  1 Algeria  1952    43.1  9279525     2449.
#>  2 Algeria  1957    45.7 10270856     3014.
#>  3 Algeria  1962    48.3 11000948     2551.
#>  4 Algeria  1967    51.4 12760499     3247.
#>  5 Algeria  1972    54.5 14760787     4183.
#>  6 Algeria  1977    58.0 17152804     4910.
#>  7 Algeria  1982    61.4 20033753     5745.
#>  8 Algeria  1987    65.8 23254956     5681.
#>  9 Algeria  1992    67.7 26298373     5023.
#> 10 Algeria  1997    69.2 29072015     4797.
#> # ... with 614 more rows
#> 
#> $Americas
#> # A tidytable: 300 x 5
#>    country    year lifeExp      pop gdpPercap
#>    <fct>     <int>   <dbl>    <int>     <dbl>
#>  1 Argentina  1952    62.5 17876956     5911.
#>  2 Argentina  1957    64.4 19610538     6857.
#>  3 Argentina  1962    65.1 21283783     7133.
#>  4 Argentina  1967    65.6 22934225     8053.
#>  5 Argentina  1972    67.1 24779799     9443.
#>  6 Argentina  1977    68.5 26983828    10079.
#>  7 Argentina  1982    69.9 29341374     8998.
#>  8 Argentina  1987    70.8 31620918     9140.
#>  9 Argentina  1992    71.9 33958947     9308.
#> 10 Argentina  1997    73.3 36203463    10967.
#> # ... with 290 more rows
#> 
#> $Oceania
#> # A tidytable: 24 x 5
#>    country    year lifeExp      pop gdpPercap
#>    <fct>     <int>   <dbl>    <int>     <dbl>
#>  1 Australia  1952    69.1  8691212    10040.
#>  2 Australia  1957    70.3  9712569    10950.
#>  3 Australia  1962    70.9 10794968    12217.
#>  4 Australia  1967    71.1 11872264    14526.
#>  5 Australia  1972    71.9 13177000    16789.
#>  6 Australia  1977    73.5 14074100    18334.
#>  7 Australia  1982    74.7 15184200    19477.
#>  8 Australia  1987    76.3 16257249    21889.
#>  9 Australia  1992    77.6 17481977    23425.
#> 10 Australia  1997    78.8 18565243    26998.
#> # ... with 14 more rows

分为 2 组

gapminder_split_2group <- gapminder %>% 
  group_split.(continent, country, .keep = FALSE, .named = TRUE)
head(gapminder_split_2group)
#> $Asia.Afghanistan
#> # A tidytable: 12 x 4
#>     year lifeExp      pop gdpPercap
#>    <int>   <dbl>    <int>     <dbl>
#>  1  1952    28.8  8425333      779.
#>  2  1957    30.3  9240934      821.
#>  3  1962    32.0 10267083      853.
#>  4  1967    34.0 11537966      836.
#>  5  1972    36.1 13079460      740.
#>  6  1977    38.4 14880372      786.
#>  7  1982    39.9 12881816      978.
#>  8  1987    40.8 13867957      852.
#>  9  1992    41.7 16317921      649.
#> 10  1997    41.8 22227415      635.
#> 11  2002    42.1 25268405      727.
#> 12  2007    43.8 31889923      975.
#> 
#> $Europe.Albania
#> # A tidytable: 12 x 4
#>     year lifeExp     pop gdpPercap
#>    <int>   <dbl>   <int>     <dbl>
#>  1  1952    55.2 1282697     1601.
#>  2  1957    59.3 1476505     1942.
#>  3  1962    64.8 1728137     2313.
#>  4  1967    66.2 1984060     2760.
#>  5  1972    67.7 2263554     3313.
#>  6  1977    68.9 2509048     3533.
#>  7  1982    70.4 2780097     3631.
#>  8  1987    72   3075321     3739.
#>  9  1992    71.6 3326498     2497.
#> 10  1997    73.0 3428038     3193.
#> 11  2002    75.7 3508512     4604.
#> 12  2007    76.4 3600523     5937.
#> 
#> $Africa.Algeria
#> # A tidytable: 12 x 4
#>     year lifeExp      pop gdpPercap
#>    <int>   <dbl>    <int>     <dbl>
#>  1  1952    43.1  9279525     2449.
#>  2  1957    45.7 10270856     3014.
#>  3  1962    48.3 11000948     2551.
#>  4  1967    51.4 12760499     3247.
#>  5  1972    54.5 14760787     4183.
#>  6  1977    58.0 17152804     4910.
#>  7  1982    61.4 20033753     5745.
#>  8  1987    65.8 23254956     5681.
#>  9  1992    67.7 26298373     5023.
#> 10  1997    69.2 29072015     4797.
#> 11  2002    71.0 31287142     5288.
#> 12  2007    72.3 33333216     6223.
#> 
#> $Africa.Angola
#> # A tidytable: 12 x 4
#>     year lifeExp      pop gdpPercap
#>    <int>   <dbl>    <int>     <dbl>
#>  1  1952    30.0  4232095     3521.
#>  2  1957    32.0  4561361     3828.
#>  3  1962    34    4826015     4269.
#>  4  1967    36.0  5247469     5523.
#>  5  1972    37.9  5894858     5473.
#>  6  1977    39.5  6162675     3009.
#>  7  1982    39.9  7016384     2757.
#>  8  1987    39.9  7874230     2430.
#>  9  1992    40.6  8735988     2628.
#> 10  1997    41.0  9875024     2277.
#> 11  2002    41.0 10866106     2773.
#> 12  2007    42.7 12420476     4797.
#> 
#> $Americas.Argentina
#> # A tidytable: 12 x 4
#>     year lifeExp      pop gdpPercap
#>    <int>   <dbl>    <int>     <dbl>
#>  1  1952    62.5 17876956     5911.
#>  2  1957    64.4 19610538     6857.
#>  3  1962    65.1 21283783     7133.
#>  4  1967    65.6 22934225     8053.
#>  5  1972    67.1 24779799     9443.
#>  6  1977    68.5 26983828    10079.
#>  7  1982    69.9 29341374     8998.
#>  8  1987    70.8 31620918     9140.
#>  9  1992    71.9 33958947     9308.
#> 10  1997    73.3 36203463    10967.
#> 11  2002    74.3 38331121     8798.
#> 12  2007    75.3 40301927    12779.
#> 
#> $Oceania.Australia
#> # A tidytable: 12 x 4
#>     year lifeExp      pop gdpPercap
#>    <int>   <dbl>    <int>     <dbl>
#>  1  1952    69.1  8691212    10040.
#>  2  1957    70.3  9712569    10950.
#>  3  1962    70.9 10794968    12217.
#>  4  1967    71.1 11872264    14526.
#>  5  1972    71.9 13177000    16789.
#>  6  1977    73.5 14074100    18334.
#>  7  1982    74.7 15184200    19477.
#>  8  1987    76.3 16257249    21889.
#>  9  1992    77.6 17481977    23425.
#> 10  1997    78.8 18565243    26998.
#> 11  2002    80.4 19546792    30688.
#> 12  2007    81.2 20434176    34435.

^{创建于 2021-04-15 由 reprex 软件包（v2.0.0）}

2赞 Florian 8/7/2021 #7

一个可选的附加解决方案，用于删除多余的列：

iris %>% sample_n(size = 5) %>%
split(.$Species) %>%
map(~select(., -Species))

12赞 deeenes 3/24/2022 #8

解决方案如下

依赖工具tidyverse
适合线性管道
不依赖于因子或向量的顺序，可以应用于任何数据框列表

它与 Kim 的解决方案几乎相同，但是更合适的功能选择使其可读性和更简单。

library(dplyr)
library(purrr)

iris %>%
dplyr::group_split(Species) %>%
purrr::set_names(purrr::map_chr(., ~.x$Species[1]))

library(tidyverse)
list_cyl <- mtcars %>% 
   group_by(cyl) %>%
   # apply any functions to the grouped data 
   # to create a list column where each row is the grouped data
   # here are some examples
   do(# res = select_all(.),
      res = as_tibble(.)) %>%
   ungroup() %>%
   # set name of each grouped data
   mutate(res = set_names(res, paste0("Cylinder: ", cyl))) %>%
   # extract the list column
   pull(res)
   
list_cyl
$`Cylinder: 4`
# A tibble: 11 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
 2  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
 3  22.8     4 141.     95  3.92  3.15  22.9     1     0     4     2
 4  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
 5  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
 6  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
 7  21.5     4 120.     97  3.7   2.46  20.0     1     0     3     1
 8  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
 9  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
10  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2
11  21.4     4 121     109  4.11  2.78  18.6     1     1     4     2

$`Cylinder: 6`
# A tibble: 7 × 11
    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
3  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
4  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
5  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
6  17.8     6  168.   123  3.92  3.44  18.9     1     0     4     4
7  19.7     6  145    175  3.62  2.77  15.5     0     1     5     6

$`Cylinder: 8`
# A tibble: 14 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
 2  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
 3  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3
 4  17.3     8  276.   180  3.07  3.73  17.6     0     0     3     3
 5  15.2     8  276.   180  3.07  3.78  18       0     0     3     3
 6  10.4     8  472    205  2.93  5.25  18.0     0     0     3     4
 7  10.4     8  460    215  3     5.42  17.8     0     0     3     4
 8  14.7     8  440    230  3.23  5.34  17.4     0     0     3     4
 9  15.5     8  318    150  2.76  3.52  16.9     0     0     3     2
10  15.2     8  304    150  3.15  3.44  17.3     0     0     3     2
11  13.3     8  350    245  3.73  3.84  15.4     0     0     3     4
12  19.2     8  400    175  3.08  3.84  17.0     0     0     3     2
13  15.8     8  351    264  4.22  3.17  14.5     0     1     5     4
14  15       8  301    335  3.54  3.57  14.6     0     1     5     8

我更喜欢这种方法而不是 + 方法，因为它不需要从原始数据集的现有列（即我的示例中的 mtcars）中获取名称group_split()setNames(unique(*grouping column*))

例如，假设我想创建一个 mtcars 数据集列表，该列表按气缸数和变速箱类型进行拆分。

Using the `group_split()` + `setNames(unique(*grouping column*))` approach:
list_cyl_am1 <- mtcars %>%
  # create cylinder + transmission type group identifier
  mutate(am_text = ifelse(am == 1, "Manual", "Automatic"),
         grp_id = paste0("Cylinder: ", cyl, " & ", am_text, " Transmission")) %>%
  group_split(grp_id) %>%
  setNames(unique(mtcars$grp_id))
names(list_cyl_am1)
NULL

但是使用我的方法，它会给我提供我需要的东西

list_cyl_am2 <- mtcars %>%
  # create cylinder + transmission type group identifier
  mutate(am_text = ifelse(am == 1, "Manual", "Automatic"),
         grp_id = paste0("Cylinder: ", cyl, " & ", am_text, " Transmission")) %>%
  group_by(grp_id) %>%
  # apply some functions to the grouped data
  do(res = as_tibble(.)) %>%
  ungroup() %>%
  # set name
  mutate(res = set_names(res, grp_id)) %>%
  # extract the list column
  pull(res)

names(list_cyl_am2)
"Cylinder: 4 & Automatic Transmission" "Cylinder: 4 & Manual Transmission"   
"Cylinder: 6 & Automatic Transmission" "Cylinder: 6 & Manual Transmission"   
"Cylinder: 8 & Automatic Transmission" "Cylinder: 8 & Manual Transmission"

上一个：如何在 dplyr 链中筛选时保留基本数据帧行名

下一个：如何将 NA 替换为一组值

如何在 dplyr 中命名group_split输出列表

How to name the list of the group_split output in dplyr

评论

评论

评论

评论