提问人:voppikode 提问时间:1/28/2022 最后编辑:voppikode 更新时间:1/28/2022 访问量:108
将矢量列和列表列组成的 R 数据帧处理为“长”格式
Manipulating an R dataframe consisting of vector and list columns to 'long' format
问:
请参阅随附的 dput。我需要将有问题的数据框转换为由五列组成的表单:Area、Group、Seats、Votes (%) 和 ShapleyShubik。每个特定区域的行数应取决于该区域中的组数。我相信这个期望的最终结果有点像所引用的“长格式”数据。
structure(list(Area = c("Germany", "France", "Italy", "Spain"
), data = list(structure(list(Group = c("Group1", "Group2 ",
"Group3 ", "Group4 ", "Group5 ", "Group6 ", "Group7 ", "Group8 ",
"Group9 "), Seats = c(2L, 13L, 23L, 9L, 11L, 5L, 18L, 3L, 1L
), NeededQuota = c(43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-9L)), structure(list(Group = c("Group2 ", "Group4 ", "Group6 ",
"Group1", "Group7 ", "Group3 "), Seats = c(5L, 5L, 1L, 6L, 1L,
9L), NeededQuota = c(14L, 14L, 14L, 14L, 14L, 14L)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L)), structure(list(
Group = c("Group4 ", "Group7 ", "Group5 ", "Group2 ", "Group8 ",
"Group9 ", "Group1", "Group6 ", "Group3 "), Seats = c(8L,
14L, 2L, 10L, 2L, 3L, 2L, 6L, 28L), NeededQuota = c(38L,
38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -9L)), structure(list(
Group = c("Group6 ", "Group2 ", "Group7 ", "Group3 ", "Group4 ",
"Group9 ", "Group5 ", "Group10"), Seats = c(10L, 9L, 1L,
3L, 4L, 1L, 2L, 1L), NeededQuota = c(16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L))), models = list(structure(list(Results = structure(c(2,
0.0235294117647059, 0.0261904761904762, 13, 0.152941176470588,
0.142857142857143, 23, 0.270588235294118, 0.3, 9, 0.105882352941176,
0.107142857142857, 11, 0.129411764705882, 0.121428571428571,
5, 0.0588235294117647, 0.0476190476190476, 18, 0.211764705882353,
0.214285714285714, 3, 0.0352941176470588, 0.0333333333333333,
1, 0.0117647058823529, 0.00714285714285714), .Dim = c(3L, 9L), .Dimnames = list(
c("Votes", "Votes (%)", "Shapley-Shubik"), c("Group1", "Group2 ",
"Group3 ", "Group4 ", "Group5 ", "Group6 ", "Group7 ",
"Group8 ", "Group9 "))), Distribution = c(2L, 13L, 23L,
9L, 11L, 5L, 18L, 3L, 1L), function (object, contr, how.many,
...)
{
if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_"))))
object <- as.factor(object)
if (!nlevels(object))
stop("object not interpretable as a factor")
if (!missing(contr) && is.name(Xcontr <- substitute(contr)))
contr <- switch(as.character(Xcontr), poly = "contr.poly",
helmert = "contr.helmert", sum = "contr.sum", treatment = "contr.treatment",
SAS = "contr.SAS", contr)
if (missing(contr)) {
oc <- getOption("contrasts")
contr <- if (length(oc) < 2L)
if (is.ordered(object))
contr.poly
else contr.treatment
else oc[1 + is.ordered(object)]
}
if (missing(how.many) && missing(...))
contrasts(object) <- contr
else {
if (is.character(contr))
contr <- get(contr, mode = "function")
if (is.function(contr))
contr <- contr(nlevels(object), ...)
contrasts(object, how.many) <- contr
}
object
}, Method = "PowerIndex", Quota = 43L, Names = c("Group1", "Group2 ",
"Group3 ", "Group4 ", "Group5 ", "Group6 ", "Group7 ", "Group8 ",
"Group9 ")), class = "ShapleyShubik"), structure(list(Results = structure(c(5,
0.185185185185185, 0.166666666666667, 5, 0.185185185185185, 0.166666666666667,
1, 0.037037037037037, 0, 6, 0.222222222222222, 0.166666666666667,
1, 0.037037037037037, 0, 9, 0.333333333333333, 0.5), .Dim = c(3L,
6L), .Dimnames = list(c("Votes", "Votes (%)", "Shapley-Shubik"
), c("Group2 ", "Group4 ", "Group6 ", "Group1", "Group7 ",
"Group3 "))), Distribution = c(5L, 5L, 1L, 6L, 1L, 9L), function (object,
contr, how.many, ...)
{
if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_"))))
object <- as.factor(object)
if (!nlevels(object))
stop("object not interpretable as a factor")
if (!missing(contr) && is.name(Xcontr <- substitute(contr)))
contr <- switch(as.character(Xcontr), poly = "contr.poly",
helmert = "contr.helmert", sum = "contr.sum", treatment = "contr.treatment",
SAS = "contr.SAS", contr)
if (missing(contr)) {
oc <- getOption("contrasts")
contr <- if (length(oc) < 2L)
if (is.ordered(object))
contr.poly
else contr.treatment
else oc[1 + is.ordered(object)]
}
if (missing(how.many) && missing(...))
contrasts(object) <- contr
else {
if (is.character(contr))
contr <- get(contr, mode = "function")
if (is.function(contr))
contr <- contr(nlevels(object), ...)
contrasts(object, how.many) <- contr
}
object
}, Method = "PowerIndex", Quota = 14L, Names = c("Group2 ", "Group4 ",
"Group6 ", "Group1", "Group7 ", "Group3 ")), class = "ShapleyShubik"),
structure(list(Results = structure(c(8, 0.106666666666667,
0.096031746031746, 14, 0.186666666666667, 0.131746031746032,
2, 0.0266666666666667, 0.0198412698412698, 10, 0.133333333333333,
0.131746031746032, 2, 0.0266666666666667, 0.0198412698412698,
3, 0.04, 0.0198412698412698, 2, 0.0266666666666667, 0.0198412698412698,
6, 0.08, 0.0484126984126984, 28, 0.373333333333333, 0.512698412698413
), .Dim = c(3L, 9L), .Dimnames = list(c("Votes", "Votes (%)",
"Shapley-Shubik"), c("Group4 ", "Group7 ", "Group5 ", "Group2 ",
"Group8 ", "Group9 ", "Group1", "Group6 ", "Group3 "))),
Distribution = c(8L, 14L, 2L, 10L, 2L, 3L, 2L, 6L, 28L
), function (object, contr, how.many, ...)
{
if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_"))))
object <- as.factor(object)
if (!nlevels(object))
stop("object not interpretable as a factor")
if (!missing(contr) && is.name(Xcontr <- substitute(contr)))
contr <- switch(as.character(Xcontr), poly = "contr.poly",
helmert = "contr.helmert", sum = "contr.sum",
treatment = "contr.treatment", SAS = "contr.SAS",
contr)
if (missing(contr)) {
oc <- getOption("contrasts")
contr <- if (length(oc) < 2L)
if (is.ordered(object))
contr.poly
else contr.treatment
else oc[1 + is.ordered(object)]
}
if (missing(how.many) && missing(...))
contrasts(object) <- contr
else {
if (is.character(contr))
contr <- get(contr, mode = "function")
if (is.function(contr))
contr <- contr(nlevels(object), ...)
contrasts(object, how.many) <- contr
}
object
}, Method = "PowerIndex", Quota = 38L, Names = c("Group4 ",
"Group7 ", "Group5 ", "Group2 ", "Group8 ", "Group9 ",
"Group1", "Group6 ", "Group3 ")), class = "ShapleyShubik"),
structure(list(Results = structure(c(10, 0.32258064516129,
0.323809523809524, 9, 0.290322580645161, 0.261904761904762,
1, 0.032258064516129, 0.0285714285714286, 3, 0.0967741935483871,
0.0952380952380952, 4, 0.129032258064516, 0.157142857142857,
1, 0.032258064516129, 0.0285714285714286, 2, 0.0645161290322581,
0.0761904761904762, 1, 0.032258064516129, 0.0285714285714286
), .Dim = c(3L, 8L), .Dimnames = list(c("Votes", "Votes (%)",
"Shapley-Shubik"), c("Group6 ", "Group2 ", "Group7 ", "Group3 ",
"Group4 ", "Group9 ", "Group5 ", "Group10"))), Distribution = c(10L,
9L, 1L, 3L, 4L, 1L, 2L, 1L), function (object, contr, how.many,
...)
{
if (isFALSE(as.logical(Sys.getenv("_R_OPTIONS_STRINGS_AS_FACTORS_"))))
object <- as.factor(object)
if (!nlevels(object))
stop("object not interpretable as a factor")
if (!missing(contr) && is.name(Xcontr <- substitute(contr)))
contr <- switch(as.character(Xcontr), poly = "contr.poly",
helmert = "contr.helmert", sum = "contr.sum",
treatment = "contr.treatment", SAS = "contr.SAS",
contr)
if (missing(contr)) {
oc <- getOption("contrasts")
contr <- if (length(oc) < 2L)
if (is.ordered(object))
contr.poly
else contr.treatment
else oc[1 + is.ordered(object)]
}
if (missing(how.many) && missing(...))
contrasts(object) <- contr
else {
if (is.character(contr))
contr <- get(contr, mode = "function")
if (is.function(contr))
contr <- contr(nlevels(object), ...)
contrasts(object, how.many) <- contr
}
object
}, Method = "PowerIndex", Quota = 16L, Names = c("Group6 ",
"Group2 ", "Group7 ", "Group3 ", "Group4 ", "Group9 ",
"Group5 ", "Group10")), class = "ShapleyShubik"))), row.names = c(NA,
-4L), groups = structure(list(Area = c("France", "Germany", "Italy",
"Spain"), .rows = structure(list(2L, 1L, 3L, 4L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
我最初以为我必须解开结构,并尝试
ByArea <- outputdata %>%
group_by(Area) %>%
unnest()
但它会产生一个错误,告诉 .Input must be a vector, not a <ShapleyShubik> object
编辑: 所需输出的头部如下:
Area Group Seats Seats(%) ShapleyShubik
Germany Group1 2 0.25 0.1234
Germany Group2 2 0.25 0.1234
Germany Group3 4 0.50 0.7532
答:
1赞
Kat
1/28/2022
#1
看起来你对 SO 相当陌生;欢迎来到社区!为了快速获得最佳答案,最好使您的问题可重复。你在这里有数据,但没有库。
无论哪种方式,我想我都可以提供帮助。这是使用几个调用 的包。tidyverse
library(tidyverse)
showMe <- map_dfr(1:4, # there are four power models in this object
# first capture the country for each group
# create a vector of repeats the length of the model
~cbind(Area = rep(outputdata$Area[[.x]],
times = ncol(outputdata$models[[.x]][["Results"]])),
# now capture the results
t(outputdata$models[[.x]][["Results"]]) %>%
as.data.frame() %>%
# move the groups from row names to a column
mutate(Groups = rownames(.),
.before = 1)))
这是您此时将看到的内容:
# Area Groups Votes Votes (%) Shapley-Shubik
# Group1...1 Germany Group1 2 0.02352941 0.026190476
# Group2 ...2 Germany Group2 13 0.15294118 0.142857143
# Group3 ...3 Germany Group3 23 0.27058824 0.300000000
# Group4 ...4 Germany Group4 9 0.10588235 0.107142857
# Group5 ...5 Germany Group5 11 0.12941176 0.121428571
# Group6 ...6 Germany Group6 5 0.05882353 0.047619048
# Group7 ...7 Germany Group7 18 0.21176471 0.214285714
# Group8 ...8 Germany Group8 3 0.03529412 0.033333333
接下来,删除行名并修剪字段中的空格。Groups
# now remove rownames, then remove whitespace from groups
rownames(showMe) <- NULL
showMe$Groups <- trimws(showMe$Groups)
showMe
# Area Groups Votes Votes (%) Shapley-Shubik
# 1 Germany Group1 2 0.02352941 0.026190476
# 2 Germany Group2 13 0.15294118 0.142857143
# 3 Germany Group3 23 0.27058824 0.300000000
# 4 Germany Group4 9 0.10588235 0.107142857
# 5 Germany Group5 11 0.12941176 0.121428571
# 6 Germany Group6 5 0.05882353 0.047619048
# 7 Germany Group7 18 0.21176471 0.214285714
评论
unnest(outputdata, data)
unnest_wider(outputdata, data)
unnest(outputdata, data)
unnest(outputdata,data)