箱线图,每个箱子有多个均值,显示个体变化

Boxplot with means multiple means per box showing individual variation

提问人:Sharklady 提问时间:6/29/2023 最后编辑:jay.sfSharklady 更新时间:6/29/2023 访问量:52

问:

我正在尝试制作一个箱形图,该箱形图不仅显示了每个箱子(红点)数据的总体平均值,还显示了我正在查看的每种行为的数据集中包含的 9 个人的平均值。因此,与其绘制如下所示的所有原始数据,不如让它绘制每个人在每种行为下的平均值

这是当前的图表,我想要每框大约 9 个平均值 + 以红色显示的总体平均值。

Using standard jitter I get this mess

library(ggplot2)

ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
  geom_boxplot(outlier.shape= NA) +
  geom_point(aes(fill=Behaviour), size=2, position=position_jitter(width=0.2, height=0.1)) +
  stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
  theme_classic() + my_scale + 
  theme(axis.text.y=element_text(size=16, angle=0))+
  ylim(-30, 30) 

编辑

我现在需要用特定颜色将这些行为排序。代码在添加抖动之前工作正常,但现在它不会对它们进行排序。查看包含@Mark修复的完整代码。

enter image description here

my_colors <- c("#CCFFFF", "#000000", "#7F7F7F", "#336699", "#008080", "#00CCFF", "#264AE2")
names(my_colors) <- levels(factor(c((Seen2$Behaviour), levels(Seen2$Behaviour))))
my_scale <- scale_fill_manual(name="Behaviour", values=my_colors,)
behavssec$Behaviour <- factor(Seen2$Behaviour, 
                              levels=c("Burst", "High energy swimming", 
                                         "Medium energy swimming", 
                                         "Low energy swimming", "Travel",
                                         "Ascending", "Descending"))

ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
  geom_boxplot(outlier.shape= NA) +
  geom_point(data=means, size=2, position=position_jitter(width=0.2, height=0.1)) + 
  stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
  theme_classic() + my_scale + 
  theme( axis.text.y= element_text( size=16, angle =0)) +
  ylim(-30, 30) 

数据

Seen2 <- structure(list(SharkID = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L), Behaviour = c("Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Travel", "Travel", "Travel", "Travel", 
"Travel", "Burst", "Burst", "Burst", "Burst", "Burst", "Ascending", 
"Ascending", "Ascending", "Ascending", "Ascending", "Ascending", 
"Ascending", "Descending", "Descending", "Descending", "Descending", 
"Descending", "Descending", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Burst", "Burst", "Burst", "Burst", "Burst", 
"Burst", "High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Burst", "Burst", "Burst", "Burst", "Burst", 
"Burst", "Burst", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming"), Roll_Avg = c(3.97084, 3.90604, 3.90738, 
3.80425, 3.4154, -0.993225, -0.940408, -0.55992, -0.791121, -1.83573, 
-3.41667, -14.0837, -14.9381, -16.4732, -16.6994, -15.5318, -18.2402, 
-19.4427, -22.8129, -27.009, -27.3907, 17.3778, 13.4861, 7.82564, 
4.63057, 6.94956, 14.3372, 22.0873, -11.5397, -11.7741, -11.4795, 
-10.7844, -10.5135, -11.0162, -90, -90, 11.0157, 6.13595, 2.2689, 
-0.710414, -5.56132, -12.0987, -9.70231, -7.13388, -5.41693, 
-4.23157, 2.11092, 2.19057, 1.5597, 0.637742, 1.17135, 3.41601, 
4.71664, 4.61525, -0.813111, -4.45238, -7.43746, -9.11626, -9.94338, 
-11.0361, -11.8852, -10.472, -5.12697, 2.61247, 9.80993, 17.307, 
10.5466, -4.01104, -7.40708, -2.72602, -5.43834, -5.22419, -4.8472, 
-4.43957, -1.67914, 2.39693, 7.84736, -9.7158, -8.70349, -8.22463, 
-8.22878, -9.43265, -0.527293, -0.283262, -0.614311, -0.380123, 
-0.344986, 7.73204, 7.47037, 7.00224, 7.01661, 7.38737, 7.83069, 
-1.83138, -1.7847, -1.68084, -1.61196, -1.49905, -1.61391, -1.46356, 
-0.986477, -0.806394, -0.883015, -0.840026, -0.727501, -1.15641, 
-1.28692, -1.38961, -1.43838, -1.42089, -1.27225)), class = "data.frame", row.names = c(NA, 
-111L))
R 箱线图 抖动

评论


答:

0赞 Mark 6/29/2023 #1

您可以做的是,为要绘制的样本创建一个单独的数据帧,然后在调用中引用该数据帧:geom_point()

library(tidyverse)

set.seed(123)

number_of_samples <- 9

Seen2 <- Seen2 %>%
mutate(Behaviour = factor(Behaviour, levels = c("Burst","High.energy.swimming"   , "Medium.energy.swimming", "Low.energy.swimming", "Travel", "Ascending", "Descending")))

# create sample dataframe
means <- Seen2 %>% 
group_by(Behaviour, SharkID) %>% 
summarise(Roll_Avg = mean(Roll_Avg)) %>% # get the mean of each group
group_by(Behaviour) %>%
sample_n(ifelse(n() < number_of_samples, n(), number_of_samples)) # some behaviours have less than 9 rows, so we take the max of 9 and the number of rows for each group

ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour))+
  geom_boxplot(outlier.shape= NA)+
  geom_point(data = means, size = 2, position=position_jitter(width=0.2, height = 0.1))+ # you don't need to repeat the fill=Behaviour or y=Roll_Avg, since that is in ggplot() already
  stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
  theme_classic () + 
  theme( axis.text.y=  element_text( size = 16, angle =0))+
  ylim(-30,30)

boxplot of shark things

评论

0赞 Sharklady 6/29/2023
谢谢@Mark - 它不会让我发表评论。我现在面临的问题是,试图将我的行为按顺序排列:爆发、高能量、中等能量、低能量、水面游泳、颜色上升和下降。当我在没有geom_point的情况下运行我的原始代码时,行为是有序的,没有问题。我已将信息添加到我的原始问题中。
0赞 Mark 6/29/2023
@Sharklady退房 stackoverflow.com/a/41417136/4145280
0赞 Sharklady 6/29/2023
谢谢@Mark我过去不需要使用它。我能够使用 behavssec$Behaviour <- factor( Seen2$Behaviour,levels = c....决定我的顺序并与我的颜色一起工作,但包含 Jitter 在某种程度上超越了它或不让它磨损
0赞 Mark 6/29/2023
@Sharklady更新了代码。看起来您使用的级别缺少点(例如,“高能量游泳”应该是“高能量游泳”
0赞 Sharklady 6/29/2023
该代码不起作用。它不是找到每个人的平均值,而是找到每次我运行绘图时看起来 9 个随机数据点都在变化的平均值。它不是为每种行为采取每个人的手段。
0赞 jay.sf 6/29/2023 #2

首先,稍微缩短标签。

Seen3$Behaviour <- gsub('.energy.swimming', '*', Seen3$Behaviour)

接下来,进行一些预处理,用于计算每个行为和鲨鱼(请注意,箱线图中的中心线实际上显示了中位数tapplymean

m <- with(Seen3, tapply(Roll_Avg, list(Behaviour, SharkID), median, na.rm=TRUE))

接下来,使用 ,boxplot

boxplot(Roll_Avg ~ Behaviour, Seen3, col=0, border='grey40', ylab='Moving average')

在为鲨鱼定义了六种颜色之后,

clr <- rainbow(ncol(m), v=.75)

添加 ED 均值(或中位数)。jitter

set.seed(666)  ## to fix the jitter
points(jitter(as.vector(row(m))), m, col=clr, pch=20)

最后,添加一个不错的.legend

legend('bottomleft', legend=colnames(m), col=clr, pch=20, ncol=2, title='Shark')
text(ncol(m), min(m, na.rm=TRUE)*1.75, '*swimming', bty='n', xpd=TRUE, adj=0, cex=.9)

enter image description here

Note: If you (really?) want to remove the outliers from display, include a in the call.pch=NAboxplot()


Data:

## Seen2 with outliers removed according to the Tukey criterion
Seen3 <- structure(list(SharkID = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L), Behaviour = c("Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Travel", "Travel", "Travel", "Travel", 
"Travel", "Burst", "Burst", "Burst", "Burst", "Burst", "Ascending", 
"Ascending", "Ascending", "Ascending", "Ascending", "Ascending", 
"Ascending", "Descending", "Descending", "Descending", "Descending", 
"Descending", "Descending", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Burst", "Burst", "Burst", "Burst", "Burst", 
"Burst", "High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming", 
"Low.energy.swimming", "Low.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming", 
"High.energy.swimming", "Burst", "Burst", "Burst", "Burst", "Burst", 
"Burst", "Burst", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming", 
"Medium.energy.swimming"), Roll_Avg = c(3.97084, 3.90604, 3.90738, 
3.80425, 3.4154, -0.993225, -0.940408, -0.55992, -0.791121, -1.83573, 
-3.41667, -14.0837, -14.9381, -16.4732, -16.6994, -15.5318, -18.2402, 
-19.4427, -22.8129, NA, NA, 17.3778, 13.4861, 7.82564, 4.63057, 
6.94956, 14.3372, NA, -11.5397, -11.7741, -11.4795, -10.7844, 
-10.5135, -11.0162, NA, NA, 11.0157, 6.13595, 2.2689, -0.710414, 
-5.56132, -12.0987, -9.70231, -7.13388, -5.41693, -4.23157, 2.11092, 
2.19057, 1.5597, 0.637742, 1.17135, 3.41601, 4.71664, 4.61525, 
-0.813111, -4.45238, -7.43746, -9.11626, -9.94338, -11.0361, 
-11.8852, -10.472, -5.12697, 2.61247, 9.80993, 17.307, 10.5466, 
-4.01104, -7.40708, -2.72602, -5.43834, -5.22419, -4.8472, -4.43957, 
-1.67914, 2.39693, 7.84736, -9.7158, -8.70349, -8.22463, -8.22878, 
-9.43265, -0.527293, -0.283262, -0.614311, -0.380123, -0.344986, 
7.73204, 7.47037, 7.00224, 7.01661, 7.38737, 7.83069, -1.83138, 
-1.7847, -1.68084, -1.61196, -1.49905, -1.61391, -1.46356, -0.986477, 
-0.806394, -0.883015, -0.840026, -0.727501, -1.15641, -1.28692, 
-1.38961, -1.43838, -1.42089, -1.27225)), row.names = c(NA, -111L
), class = "data.frame")