如何在 gnuplot 或 python 中进行数据分箱?

How to do data binning in gnuplot or python?

提问人:Rabia Sönmez 提问时间:9/9/2023 最后编辑:Rabia Sönmez 更新时间:9/9/2023 访问量:28

问:

我正在尝试在 python 中进行数据分箱。我想得到的图表是original_graph。但我越来越my_plot。我想将数据装箱为纬度 2 度范围和海拔 100 公里范围。等待您的帮助,我的代码如下;

import csv
import numpy as np
import matplotlib.pyplot as plt
csv_file = 'new.csv'

latitude_data = []
altitude_data = []
color_data = []

with open(csv_file, 'r', encoding='utf-8-sig') as file:
    csvreader = csv.reader(file)

    for row in csvreader:
        latitude = float(row[2])  # Third column (latitude)
        altitude = float(row[1])  # Second column (altitude)
        color_value = float(row[0])  # First column

        latitude_data.append(latitude)
        altitude_data.append(altitude)
        color_data.append(color_value)

latitude_bin_width = 2.0  # Two-degree range in latitude
altitude_bin_width = 100  # 100-kilometer range in altitude


num_latitude_bins = int((max(latitude_data) - min(latitude_data)) / latitude_bin_width) + 1
num_altitude_bins = int((max(altitude_data) - min(altitude_data)) / altitude_bin_width) + 1


H, xedges, yedges = np.histogram2d(latitude_data, altitude_data, bins=[num_latitude_bins, num_altitude_bins])


plt.figure(figsize=(12, 8))
plt.scatter(latitude_data, altitude_data, c=color_data, cmap='jet', marker='o', alpha=0.5, s=100)
plt.colorbar(label='First Column Value', format='%.0e')


plt.show()


python-3.x 分箱

评论


答:

0赞 Marco Parola 9/9/2023 #1

您似乎想要创建具有纬度和高度特定图格宽度的 2D 直方图,然后根据您的数据创建带有颜色编码的散点图。您的代码差不多就在那里,但您需要使用 np.histogram2d 函数来计算直方图,然后使用 imshow:

import csv
import numpy as np
import matplotlib.pyplot as plt

csv_file = 'new.csv'

latitude_data = []
altitude_data = []
color_data = []

with open(csv_file, 'r', encoding='utf-8-sig') as file:
    csvreader = csv.reader(file)

    for row in csvreader:
        latitude = float(row[2])  # Third column (latitude)
        altitude = float(row[1])  # Second column (altitude)
        color_value = float(row[0])  # First column

        latitude_data.append(latitude)
        altitude_data.append(altitude)
        color_data.append(color_value)

latitude_bin_width = 2.0  # Two-degree range in latitude
altitude_bin_width = 100  # 100-kilometer range in altitude

num_latitude_bins = int((max(latitude_data) - min(latitude_data)) / latitude_bin_width) + 1
num_altitude_bins = int((max(altitude_data) - min(altitude_data)) / altitude_bin_width) + 1

H, xedges, yedges = np.histogram2d(latitude_data, altitude_data, bins=[num_latitude_bins, num_altitude_bins])

# Create a 2D histogram plot
plt.figure(figsize=(12, 8))
plt.imshow(H.T, origin='lower', extent=[min(latitude_data), max(latitude_data), min(altitude_data), max(altitude_data)],
           cmap='jet', aspect='auto', interpolation='none')

plt.colorbar(label='Frequency')

# Scatter plot your original data points
plt.scatter(latitude_data, altitude_data, c=color_data, cmap='jet', marker='o', alpha=0.5, s=100)

plt.xlabel('Latitude')
plt.ylabel('Altitude (km)')
plt.title('Scatter Plot with 2D Histogram')

plt.show()