将矢量列和列表列组成的 R 数据帧处理为“长”格式

Manipulating an R dataframe consisting of vector and list columns to 'long' format

提问人:voppikode 提问时间:1/28/2022 最后编辑:voppikode 更新时间:1/28/2022 访问量:108

问:

请参阅随附的 dput。我需要将有问题的数据框转换为由五列组成的表单:Area、Group、Seats、Votes (%) 和 ShapleyShubik。每个特定区域的行数应取决于该区域中的组数。我相信这个期望的最终结果有点像所引用的“长格式”数据。

structure(list(Area = c("Germany", "France", "Italy", "Spain"
), data = list(structure(list(Group = c("Group1", "Group2 ", 
"Group3 ", "Group4  ", "Group5 ", "Group6  ", "Group7 ", "Group8 ", 
"Group9   "), Seats = c(2L, 13L, 23L, 9L, 11L, 5L, 18L, 3L, 1L
), NeededQuota = c(43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-9L)), structure(list(Group = c("Group2 ", "Group4  ", "Group6  ", 
"Group1", "Group7 ", "Group3 "), Seats = c(5L, 5L, 1L, 6L, 1L, 
9L), NeededQuota = c(14L, 14L, 14L, 14L, 14L, 14L)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -6L)), structure(list(
    Group = c("Group4  ", "Group7 ", "Group5 ", "Group2 ", "Group8 ", 
    "Group9   ", "Group1", "Group6  ", "Group3 "), Seats = c(8L, 
    14L, 2L, 10L, 2L, 3L, 2L, 6L, 28L), NeededQuota = c(38L, 
    38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -9L)), structure(list(
    Group = c("Group6  ", "Group2 ", "Group7 ", "Group3 ", "Group4  ", 
    "Group9   ", "Group5 ", "Group10"), Seats = c(10L, 9L, 1L, 
    3L, 4L, 1L, 2L, 1L), NeededQuota = c(16L, 16L, 16L, 16L, 
    16L, 16L, 16L, 16L)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L))), models = list(structure(list(Results = structure(c(2, 
0.0235294117647059, 0.0261904761904762, 13, 0.152941176470588, 
0.142857142857143, 23, 0.270588235294118, 0.3, 9, 0.105882352941176, 
0.107142857142857, 11, 0.129411764705882, 0.121428571428571, 
5, 0.0588235294117647, 0.0476190476190476, 18, 0.211764705882353, 
0.214285714285714, 3, 0.0352941176470588, 0.0333333333333333, 
1, 0.0117647058823529, 0.00714285714285714), .Dim = c(3L, 9L), .Dimnames = list(
    c("Votes", "Votes (%)", "Shapley-Shubik"), c("Group1", "Group2 ", 
    "Group3 ", "Group4  ", "Group5 ", "Group6  ", "Group7 ", 
    "Group8 ", "Group9   "))), Distribution = c(2L, 13L, 23L, 
9L, 11L, 5L, 18L, 3L, 1L), function (object, contr, how.many, 
    ...) 
{
    if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_")))) 
        object <- as.factor(object)
    if (!nlevels(object)) 
        stop("object not interpretable as a factor")
    if (!missing(contr) && is.name(Xcontr <- substitute(contr))) 
        contr <- switch(as.character(Xcontr), poly = "contr.poly", 
            helmert = "contr.helmert", sum = "contr.sum", treatment = "contr.treatment", 
            SAS = "contr.SAS", contr)
    if (missing(contr)) {
        oc <- getOption("contrasts")
        contr <- if (length(oc) < 2L) 
            if (is.ordered(object)) 
                contr.poly
            else contr.treatment
        else oc[1 + is.ordered(object)]
    }
    if (missing(how.many) && missing(...)) 
        contrasts(object) <- contr
    else {
        if (is.character(contr)) 
            contr <- get(contr, mode = "function")
        if (is.function(contr)) 
            contr <- contr(nlevels(object), ...)
        contrasts(object, how.many) <- contr
    }
    object
}, Method = "PowerIndex", Quota = 43L, Names = c("Group1", "Group2 ", 
"Group3 ", "Group4  ", "Group5 ", "Group6  ", "Group7 ", "Group8 ", 
"Group9   ")), class = "ShapleyShubik"), structure(list(Results = structure(c(5, 
0.185185185185185, 0.166666666666667, 5, 0.185185185185185, 0.166666666666667, 
1, 0.037037037037037, 0, 6, 0.222222222222222, 0.166666666666667, 
1, 0.037037037037037, 0, 9, 0.333333333333333, 0.5), .Dim = c(3L, 
6L), .Dimnames = list(c("Votes", "Votes (%)", "Shapley-Shubik"
), c("Group2 ", "Group4  ", "Group6  ", "Group1", "Group7 ", 
"Group3 "))), Distribution = c(5L, 5L, 1L, 6L, 1L, 9L), function (object, 
    contr, how.many, ...) 
{
    if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_")))) 
        object <- as.factor(object)
    if (!nlevels(object)) 
        stop("object not interpretable as a factor")
    if (!missing(contr) && is.name(Xcontr <- substitute(contr))) 
        contr <- switch(as.character(Xcontr), poly = "contr.poly", 
            helmert = "contr.helmert", sum = "contr.sum", treatment = "contr.treatment", 
            SAS = "contr.SAS", contr)
    if (missing(contr)) {
        oc <- getOption("contrasts")
        contr <- if (length(oc) < 2L) 
            if (is.ordered(object)) 
                contr.poly
            else contr.treatment
        else oc[1 + is.ordered(object)]
    }
    if (missing(how.many) && missing(...)) 
        contrasts(object) <- contr
    else {
        if (is.character(contr)) 
            contr <- get(contr, mode = "function")
        if (is.function(contr)) 
            contr <- contr(nlevels(object), ...)
        contrasts(object, how.many) <- contr
    }
    object
}, Method = "PowerIndex", Quota = 14L, Names = c("Group2 ", "Group4  ", 
"Group6  ", "Group1", "Group7 ", "Group3 ")), class = "ShapleyShubik"), 
    structure(list(Results = structure(c(8, 0.106666666666667, 
    0.096031746031746, 14, 0.186666666666667, 0.131746031746032, 
    2, 0.0266666666666667, 0.0198412698412698, 10, 0.133333333333333, 
    0.131746031746032, 2, 0.0266666666666667, 0.0198412698412698, 
    3, 0.04, 0.0198412698412698, 2, 0.0266666666666667, 0.0198412698412698, 
    6, 0.08, 0.0484126984126984, 28, 0.373333333333333, 0.512698412698413
    ), .Dim = c(3L, 9L), .Dimnames = list(c("Votes", "Votes (%)", 
    "Shapley-Shubik"), c("Group4  ", "Group7 ", "Group5 ", "Group2 ", 
    "Group8 ", "Group9   ", "Group1", "Group6  ", "Group3 "))), 
        Distribution = c(8L, 14L, 2L, 10L, 2L, 3L, 2L, 6L, 28L
        ), function (object, contr, how.many, ...) 
        {
            if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_")))) 
                object <- as.factor(object)
            if (!nlevels(object)) 
                stop("object not interpretable as a factor")
            if (!missing(contr) && is.name(Xcontr <- substitute(contr))) 
                contr <- switch(as.character(Xcontr), poly = "contr.poly", 
                  helmert = "contr.helmert", sum = "contr.sum", 
                  treatment = "contr.treatment", SAS = "contr.SAS", 
                  contr)
            if (missing(contr)) {
                oc <- getOption("contrasts")
                contr <- if (length(oc) < 2L) 
                  if (is.ordered(object)) 
                    contr.poly
                  else contr.treatment
                else oc[1 + is.ordered(object)]
            }
            if (missing(how.many) && missing(...)) 
                contrasts(object) <- contr
            else {
                if (is.character(contr)) 
                  contr <- get(contr, mode = "function")
                if (is.function(contr)) 
                  contr <- contr(nlevels(object), ...)
                contrasts(object, how.many) <- contr
            }
            object
        }, Method = "PowerIndex", Quota = 38L, Names = c("Group4  ", 
        "Group7 ", "Group5 ", "Group2 ", "Group8 ", "Group9   ", 
        "Group1", "Group6  ", "Group3 ")), class = "ShapleyShubik"), 
    structure(list(Results = structure(c(10, 0.32258064516129, 
    0.323809523809524, 9, 0.290322580645161, 0.261904761904762, 
    1, 0.032258064516129, 0.0285714285714286, 3, 0.0967741935483871, 
    0.0952380952380952, 4, 0.129032258064516, 0.157142857142857, 
    1, 0.032258064516129, 0.0285714285714286, 2, 0.0645161290322581, 
    0.0761904761904762, 1, 0.032258064516129, 0.0285714285714286
    ), .Dim = c(3L, 8L), .Dimnames = list(c("Votes", "Votes (%)", 
    "Shapley-Shubik"), c("Group6  ", "Group2 ", "Group7 ", "Group3 ", 
    "Group4  ", "Group9   ", "Group5 ", "Group10"))), Distribution = c(10L, 
    9L, 1L, 3L, 4L, 1L, 2L, 1L), function (object, contr, how.many, 
        ...) 
    {
        if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_")))) 
            object <- as.factor(object)
        if (!nlevels(object)) 
            stop("object not interpretable as a factor")
        if (!missing(contr) && is.name(Xcontr <- substitute(contr))) 
            contr <- switch(as.character(Xcontr), poly = "contr.poly", 
                helmert = "contr.helmert", sum = "contr.sum", 
                treatment = "contr.treatment", SAS = "contr.SAS", 
                contr)
        if (missing(contr)) {
            oc <- getOption("contrasts")
            contr <- if (length(oc) < 2L) 
                if (is.ordered(object)) 
                  contr.poly
                else contr.treatment
            else oc[1 + is.ordered(object)]
        }
        if (missing(how.many) && missing(...)) 
            contrasts(object) <- contr
        else {
            if (is.character(contr)) 
                contr <- get(contr, mode = "function")
            if (is.function(contr)) 
                contr <- contr(nlevels(object), ...)
            contrasts(object, how.many) <- contr
        }
        object
    }, Method = "PowerIndex", Quota = 16L, Names = c("Group6  ", 
    "Group2 ", "Group7 ", "Group3 ", "Group4  ", "Group9   ", 
    "Group5 ", "Group10")), class = "ShapleyShubik"))), row.names = c(NA, 
-4L), groups = structure(list(Area = c("France", "Germany", "Italy", 
"Spain"), .rows = structure(list(2L, 1L, 3L, 4L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

我最初以为我必须解开结构,并尝试

ByArea <- outputdata %>%
group_by(Area) %>%
unnest()

但它会产生一个错误,告诉 .Input must be a vector, not a <ShapleyShubik> object

编辑: 所需输出的头部如下:

Area        Group      Seats   Seats(%)        ShapleyShubik
Germany     Group1       2      0.25              0.1234
Germany     Group2       2      0.25              0.1234
Germany     Group3       4      0.50              0.7532
r dplyr 重塑 数据操作

评论

0赞 Onyambu 1/28/2022
我无法告诉您需要的输出。至于是否甚至.尝试两者并选择为您提供所需输出的那个unnest(outputdata, data)unnest_wider(outputdata, data)
1赞 camille 1/28/2022
什么是ShapleyShubik?它似乎是您的一列的类,但它来自什么包?也许您可以先将数据样本简化为不需要外部包即可正确读取它的东西
0赞 Onyambu 1/28/2022
似乎你想要unnest(outputdata, data)
0赞 voppikode 1/28/2022
@camille ShapleyShubik 来自包 GameTheory。我使用它的命令 ShapleyShubik() 来计算不同国家/地区不同群体的 Shapley-Shubik 功率指数。
0赞 voppikode 1/28/2022
@Onyambu 我需要的输出是返回的,但最右边的两列需要是每个组的 Votes (%) 和 ShapleyShubik 值。现在它只是带有列表的“模型”。我想以某种方式为组分配这些值。我正在编辑原始消息所需的输出。unnest(outputdata,data)

答:

1赞 Kat 1/28/2022 #1

看起来你对 SO 相当陌生;欢迎来到社区!为了快速获得最佳答案,最好使您的问题可重复。你在这里有数据,但没有库。

无论哪种方式,我想我都可以提供帮助。这是使用几个调用 的包。tidyverse

library(tidyverse)
showMe <- map_dfr(1:4, # there are four power models in this object
                         # first capture the country for each group
                         # create a vector of repeats the length of the model
                  ~cbind(Area = rep(outputdata$Area[[.x]], 
                                    times = ncol(outputdata$models[[.x]][["Results"]])),
                         # now capture the results
                         t(outputdata$models[[.x]][["Results"]]) %>% 
                           as.data.frame() %>% 
                           # move the groups from row names to a column
                           mutate(Groups = rownames(.), 
                                  .before = 1)))

这是您此时将看到的内容:

#                   Area    Groups Votes  Votes (%) Shapley-Shubik
# Group1...1     Germany    Group1     2 0.02352941    0.026190476
# Group2 ...2    Germany   Group2     13 0.15294118    0.142857143
# Group3 ...3    Germany   Group3     23 0.27058824    0.300000000
# Group4  ...4   Germany  Group4       9 0.10588235    0.107142857
# Group5 ...5    Germany   Group5     11 0.12941176    0.121428571
# Group6  ...6   Germany  Group6       5 0.05882353    0.047619048
# Group7 ...7    Germany   Group7     18 0.21176471    0.214285714
# Group8 ...8    Germany   Group8      3 0.03529412    0.033333333

接下来,删除行名并修剪字段中的空格。Groups

# now remove rownames, then remove whitespace from groups
rownames(showMe) <- NULL
showMe$Groups <- trimws(showMe$Groups)
showMe
#       Area  Groups Votes  Votes (%) Shapley-Shubik
# 1  Germany  Group1     2 0.02352941    0.026190476
# 2  Germany  Group2    13 0.15294118    0.142857143
# 3  Germany  Group3    23 0.27058824    0.300000000
# 4  Germany  Group4     9 0.10588235    0.107142857
# 5  Germany  Group5    11 0.12941176    0.121428571
# 6  Germany  Group6     5 0.05882353    0.047619048
# 7  Germany  Group7    18 0.21176471    0.214285714