提问人:Ahsk 提问时间:10/7/2023 最后编辑:Ahsk 更新时间:10/10/2023 访问量:86
如何绘制每个类别中值的散点图?
How to plot a scatter plot for values in each category?
问:
我有一个包含降水和风速数据的数据集。我使用该函数将风速 () 分为五个相等的类别,并将降雨量分为四类:0 毫米、0.01 至 2.50 毫米、2.51 至 5.00 毫米、5.01 至 7.50 毫米和 >7.51 毫米。max_ws
cut_number
我想创建一个图,显示 和 的每个组合的降水量。所以 x 轴是 ,y 轴是,散点图中的数据点表示列。rain_category
ws_category
rain_category
ws_category
precipitation
这是我的数据的可重现示例:
df <- structure(list(max_ws = c(2.4, 1.1, 0, 2.9, 3.8, 4.1, 3.9, 3.8,
2.6, 3.8, 4.2, 2.1, 2.9, 1.5, 2, 2.2, 3.1, 2.9, 3.1, 4.3, 4.1,
4.7, 3.1, 2.7, 5.7, 5.8, 3.8, 2.9, 0.3, 1.6, 0.8, 0, 1.9, 1.2,
4.3, 0.9, 2.4, 3.7, 4.8, 4.5, 3.5, 0, 2.3, 3.2, 3.2, 5, 3.3,
3.6, 2.4, 2.8, 4.7, 5.3, 4.4, 1.6, 5.3, 5.5, 4.6, 2.7, 3.5, 2.5,
2.3, 3.5, 4.7, 3.8, 4.4, 2.8, 5.4, 3.3, 4.7, 4, 3.3, 3.1, 2,
1.7, 2.7, 3.2, 3, 4.6, 4, 3.6, 3.2, 4.5, 3.8, 4.1, 3.3, 2, 3.2,
4.1, 4.3, 4.6, 4.5, 3.9, 3.1, 3.9, 4.6, 3.7, 3.4, 4.9, 3.2, 3.8,
4.6, 4, 1.9, 2.4, 3.3, 4.4, 3.4, 5.1, 4.6, 4.9, 3.4, 4, 3.6,
4.9, 4, 5.3, 5.6, 4.4, 5.5, 5.9, 5.8, 3.9, 5.1, 3.8, 3.3, 4.8,
3.7, 3.6, 4.3, 3, 4.8, 5.6, 4.3, 3, 4.8, 2.7, 4.4, 2.5, 4.5,
2.8, 3.4, 4.7, 4.1, 4.2, 4.5, 4.9, 4.5, 2.9, 3.2, 3, 1.6, 2.4,
3.3, 2.8, 3, 1.9, 3, 3.8, 3.1, 4.9, 5.3, 3.6, 3.8, 3.8, 2.5,
3.5, 3.8, 4.2, 4.9, 4, 3.9, 4, 3.9, 5.3, 4.5, 4.5, 4.8, 3.3,
2.7, 3.3, 3.5, 3.9, 4.8, 3.3, 2.9, 3, 4.5, 4.2, 3.6, 5.5, 6,
4.4, 4.6, 4.7, 2.9, 3.7, 2.5, 4.1, 3.2, 4.6, 4.7, 2.9, 2.9, 1.7,
3.6, 3.1, 3.6, 4.1, 3.4, 2.8, 3.3, 4.2, 3, 3.3, 2.4, 3.6, 2.8,
2.9, 4.3, 4, 3, 2, 2.3, 3.7, 3.8, 4.4, 4.3, 4.7, 3.5, 2.6, 3.9,
3.5, 2.8, 2.4, 3.7, 3.2, 2.5, 4.8, 3.7, 3.4, 2.9, 3.4, 2.5, 4,
2.2, 3.7, 2.6, 2.6, 2.3, 2.6, 3.1, 2.5, 3.1, 3.2, 3.9, 3.1, 2,
4.7, 2.3, 3.7, 3.3, 3.7, 3, 4.1, 3.6, 2.5, 3.3, 5.6, 4.5, 3.3,
3.6, 3.7, 4, 3.9, 4.2, 3.3, 4.5, 2.9, 6.2, 3, 3.7, 2.1, 3.2,
1.9, 3.3, 4, 3.6, 4.3, 3.7, 5.2, 3.9, 3.7, 2.9, 2.4, 3.8, 3.2,
3.1, 2.5, 2.8, 3.2, 3.8, 3.2, 4.6, 3.3, 4.2, 3.9, 4.4, 4.4, 3.6,
3, 4, 3.4, 4.3, 3.5, 2.5, 3.7, 3.3, 3.3, 1.2, 1.9, 2.9, 3.4,
1.4, 2.7, 3, 4.2, 5, 2, 3.7, 8, 5.7, 1.8, 3.3, 3.8, 2.7, 4.5,
3.6, 4.2, 5.2, 4.1, 4.9, 4.1, 2.9, 4.8, 4.9, 3.7, 2.7, 2.8, 5.2,
3.9, 3, 2.8, 1.4, 2.9, 5.9, 5.2, 4.2, 4.3, 6, 5.6, 4.1, 5.5,
4.2, 4.9, 5.7, 5.8), precipitation = c(0.1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 0, 0, 0, 0, 0, 2.8, 0.6, 4.8, 9.8, 2.3, 0, 0, 0,
0.1, 2.3, 0.2, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.1, 4.3, 10.4, 3, 5.6,
0, 0, 0, 0, 0.5, 3.3, 4.2, 2.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.7, 0.1, 0, 0, 0, 2.5, 0.1, 0, 10, 0, 0.8, 0, 0, 0, 0, 0, 0,
0, 0.6, 0.3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 0.4, 0.5, 0.5,
0.1, 0, 0, 0, 2.2, 1.9, 0, 8, 6, 0, 3.6, 0, 0, 0, 0.3, 0, 1,
1.1, 1.5, 1.1, 4.3, 0.9, 0.8, 0, 0.3, 2.7, 0.7, 0, 0, 0, 3.8,
0, 0.1, 0, 0.8, 0, 0.1, 12.1, 4.2, 0, 0, 0, 0, 3.1, 2.4, 0, 0.4,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.8, 19, 1, 0, 0, 3, 0, 4.8,
0.2, 2.9, 0.1, 1.6, 1.5, 0, 0, 0, 2, 5.3, 0, 6, 0, 0, 2.5, 0.4,
4.4, 20.7, 6.1, 3.4, 2.8, 0, 0.2, 3.7, 0.6, 0, 0, 0, 4.2, 0,
0, 7.3, 10.3, 1, 4.3, 0.2, 4.2, 0.7, 4, 7.7, 3.1, 19.1, 2.6,
0.9, 0, 0, 0, 0, 0, 0, 11.2, 0.6, 1.9, 4.4, 0, 0, 0.4, 0.6, 0,
5.4, 2.6, 3.4, 5, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0.4, 0, 0, 13.9,
0, 0.1, 2, 1.9, 3.3, 1.5, 0, 0, 0, 5.5, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.1, 4.5, 0.9, 0.2, 3.9, 0, 0, 0, 0.7, 2, 0, 6.7, 1.4,
8.8, 10.9, 2, 3.8, 10.1, 0.1, 0, 0, 3.3, 0, 5.2, 1.9, 24.9, 2,
1.9, 0.1, 0.9, 0, 0, 10.5, 3.4, 0.2, 1.1, 2.1, 0.5, 0, 0, 0,
0, 0, 5.4, 0.8, 0.2, 0, 0, 0.3, 7.1, 0.2, 0.1, 3.9, 1.7, 3.2,
3.6, 0.4, 4.8, 0.3, 1, 0.9, 1.1, 0, 0, 0, 0, 0, 0, 2.3, 1, 0,
0, 0, 0, 0, 2.2, 0.1, 1.7, 0.3, 0, 0.7, 0, 1.9, 0.1, 3.2, 1.9,
1.4, 0, 0, 7.3, 8.7, 1.2, 5, 2.2, 0, 8.6, 3.7, 2.3, 5.1, 0.2,
0, 0, 3.5, 22, 1, 8.7, 2.6, 3.5, 0.2, 0.7, 0.9, 6.3, 7.8), ws_category = structure(c(1L,
1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 3L, 4L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 4L, 4L, 5L, 2L, 2L, 5L, 5L, 3L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 4L, 1L, 1L, 3L, 5L, 4L, 3L, 1L, 1L, 2L, 2L, 5L, 3L, 3L, 1L,
2L, 5L, 5L, 4L, 1L, 5L, 5L, 4L, 2L, 3L, 1L, 1L, 3L, 5L, 3L, 4L,
2L, 5L, 3L, 5L, 4L, 3L, 2L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 3L, 2L,
4L, 3L, 4L, 3L, 1L, 2L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 3L, 3L,
5L, 2L, 3L, 4L, 4L, 1L, 1L, 3L, 4L, 3L, 5L, 4L, 5L, 3L, 4L, 3L,
5L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 3L, 3L, 5L, 3L, 3L, 4L,
2L, 5L, 5L, 4L, 2L, 5L, 2L, 4L, 1L, 4L, 2L, 3L, 5L, 4L, 4L, 4L,
5L, 4L, 2L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 1L, 2L, 3L, 2L, 5L, 5L,
3L, 3L, 3L, 1L, 3L, 3L, 4L, 5L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 5L,
3L, 2L, 3L, 3L, 4L, 5L, 3L, 2L, 2L, 4L, 4L, 3L, 5L, 5L, 4L, 4L,
5L, 2L, 3L, 1L, 4L, 2L, 4L, 5L, 2L, 2L, 1L, 3L, 2L, 3L, 4L, 3L,
2L, 3L, 4L, 2L, 3L, 1L, 3L, 2L, 2L, 4L, 4L, 2L, 1L, 1L, 3L, 3L,
4L, 4L, 5L, 3L, 2L, 4L, 3L, 2L, 1L, 3L, 2L, 1L, 5L, 3L, 3L, 2L,
3L, 1L, 4L, 1L, 3L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 1L,
5L, 1L, 3L, 3L, 3L, 2L, 4L, 3L, 1L, 3L, 5L, 4L, 3L, 3L, 3L, 4L,
4L, 4L, 3L, 4L, 2L, 5L, 2L, 3L, 1L, 2L, 1L, 3L, 4L, 3L, 4L, 3L,
5L, 4L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 2L, 2L, 3L, 2L, 4L, 3L, 4L,
4L, 4L, 4L, 3L, 2L, 4L, 3L, 4L, 3L, 1L, 3L, 3L, 3L, 1L, 1L, 2L,
3L, 1L, 2L, 2L, 4L, 5L, 1L, 3L, 5L, 5L, 1L, 3L, 3L, 2L, 4L, 3L,
4L, 5L, 4L, 5L, 4L, 2L, 5L, 5L, 3L, 2L, 2L, 5L, 4L, 2L, 2L, 1L,
2L, 5L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 4L, 5L, 5L, 5L), levels = c("[0,2.5]",
"(2.5,3.2]", "(3.2,3.8]", "(3.8,4.6]", "(4.6,11.6]"), class = "factor"),
rain_category = structure(c(3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 4L, 3L, 4L, 1L, 3L, 2L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 1L,
4L, 5L, 2L, 2L, 2L, 2L, 3L, 4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 2L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L,
2L, 1L, 5L, 2L, 4L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 4L,
3L, 3L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 4L, 2L, 3L, 2L, 3L, 2L,
3L, 1L, 4L, 2L, 2L, 2L, 2L, 4L, 3L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L, 4L, 2L, 4L,
3L, 4L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 5L, 2L, 5L, 2L, 2L, 3L,
3L, 4L, 1L, 5L, 4L, 4L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 4L, 2L,
2L, 5L, 1L, 3L, 4L, 3L, 4L, 3L, 4L, 1L, 4L, 1L, 4L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, 4L, 2L, 2L, 3L, 3L, 2L, 5L,
4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L,
1L, 2L, 3L, 3L, 3L, 4L, 3L, 2L, 2L, 2L, 5L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 3L, 3L,
2L, 5L, 3L, 1L, 1L, 3L, 4L, 1L, 3L, 2L, 2L, 4L, 2L, 5L, 3L,
1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 5L, 3L, 3L, 2L, 2L, 3L, 5L, 3L, 3L, 4L, 3L, 4L,
4L, 3L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 4L,
3L, 3L, 2L, 2L, 5L, 1L, 3L, 4L, 3L, 2L, 1L, 4L, 3L, 5L, 3L,
2L, 2L, 4L, 1L, 3L, 1L, 4L, 4L, 3L, 3L, 3L, 5L, 1L), levels = c(">7.50",
"0", "0.01 to 2.50", "2.51 to 5.00", "5.01 to 7.50"), class = "factor")), row.names = c(NA,
-366L), class = c("tbl_df", "tbl", "data.frame"))
当我尝试绘制时,每个类别中只有一个数据点,但我的数据中有很多数据点。我错过了哪一步?
p <- ggplot(df, aes(x = rain_category, y = ws_category, fill = precipitation)) +
geom_point(size = 3) +
theme_minimal()
p
答:
3赞
Seth
10/7/2023
#1
通过以这种方式对数据进行分箱,所有点在 和 的每个交点处都重叠。白板草图类似于连续数据。如果要保持数据标记,但要了解网格上每个条柱中的点数,则可以添加随机噪声。如果它给人的印象是,在垃圾箱中更靠右的点具有更大的价值,这可能会导致错误的结论。ws_category
rain_category
编辑在评论中提出的问题之后,在最后添加了一个汇总数据图。
library(tidyverse)
ggplot(df, aes(x = rain_category, y = ws_category, color = precipitation)) +
geom_point(size = 1,
position = position_jitterdodge(dodge.width = 0.25,
jitter.height = 0.25)) +
theme_minimal()
考虑分面,正如@MrFlick在评论中建议的那样。每对类别都是单独绘制的,允许您使用实际测量值绘制彼此相对的点。
ggplot(df, aes(precipitation, max_ws)) +
geom_point() +
facet_grid(ws_category ~ rain_category,
scales = 'free')
汇总绘图的降水数据
在创建绘图之前,这将计算平均降水量并计算每对 和 中的站点数。 用于更改关卡的顺序,放在最后。ws_category
rain_category
fct_relevel()
>7.50
library(tidyverse)
df %>%
summarise(avg_precipitation = mean(precipitation),
n = n(),
.by = c(ws_category, rain_category)) %>%
ggplot(aes(x = fct_relevel(rain_category, ">7.50", after = Inf), y = ws_category, label = n, fill = avg_precipitation)) +
geom_tile() +
geom_text(size = 10) +
labs(x = 'Rain Category',
y = 'WS Category',
fill = 'Average Precipitation')
创建于 2023-10-07 使用 reprex v2.0.2
评论
0赞
Seth
10/8/2023
嗨,@Ahsk。我用一个新图表编辑了答案,该图表绘制了汇总数据而不是单个观察结果。我希望这会有所帮助!
评论
ggplot(df, aes(x = max_ws, y = precipitation)) + geom_point() + facet_grid(ws_category~rain_category)