提问人:Syuuuu 提问时间:8/8/2023 更新时间:8/9/2023 访问量:26
如何在customdatagenerator中获取正确的confusion_matrix数据
How to get correct confusion_matrix data in customdatagenerator
问:
我正在建造confusion_matrix,但我总是返回错误的形状y_true
我认为我的y_label是正确的,我有 62 个值数据
我不知道y_true应该在何处申报y_true
ValueError
Found input variables with inconsistent numbers of samples: [63, 62]
File "C:\Labbb\inceptionResnetV2\InceptionResnetV2_1.py", line 213, in <module>
sns.heatmap(confusion_matrix(y_true, y_pred),
ValueError: Found input variables with inconsistent numbers of samples: [63, 62]
我尝试在get_data中附加self.y_true,使用 def get_y_true返回self.y_true,并在 on_epoch_end 中使用“self.y_true = []”,shuffle=False。
这里是 CustomDataGenerator。
我应该在哪里声明“self.y_true = []”
train_dir = r'C:\Labbb\mergeimage_npy\512512\npy\train'
valid_dir = r'C:\Labbb\mergeimage_npy\512512\npy\val'
image_folders = ['image0', 'image1', 'image2', 'image3', 'image4', 'image6', 'image7']
label_folders = ['label0', 'label1', 'label2', 'label3', 'label4', 'label6', 'label7']
class CustomDataGenerator(Sequence):
def __init__(self, image_folders, label_folders, dir, dim=(512,512), batch_size=1,n_classes=7,n_channels=8,shuffle=True):
self.image_folders = image_folders
...
self.image_paths = []
self.label_paths = []
self.on_epoch_end()
def __len__(self):
return int(np.ceil(len(self.image_paths) / self.batch_size))
def __getitem__(self, index):
batch_image_paths = self.image_paths[index * self.batch_size: (index + 1) * self.batch_size]
batch_label_paths = self.label_paths[index * self.batch_size: (index + 1) * self.batch_size]
batch = zip(batch_image_paths, batch_label_paths)
return self.get_data(batch)
def on_epoch_end(self):
self.image_paths = []
self.label_paths = []
self.y_true = []
for folder in self.image_folders:
image_folder_path = os.path.join(self.dir, folder)
image_files = os.listdir(image_folder_path)
for file_name in image_files:
self.image_paths.append(os.path.join(image_folder_path, file_name))
for folder in self.label_folders:
...
if self.shuffle:
np.random.shuffle(self.image_paths)
np.random.shuffle(self.label_paths)
def get_data(self, batch):
X = np.empty((self.batch_size, *self.dim, self.n_channels))
y = np.empty((self.batch_size, self.n_classes))
for i, (image_path, label_path) in enumerate(batch):
image = np.load(image_path)
with open(label_path, 'r') as f:
line = f.readline().strip()
filepath, label = line.rsplit(' ', 1)
label = int(label)
self.y_true.append(label)
label_one_hot = to_categorical(label, num_classes=self.n_classes)
X[i,] = image
y[i,] = label_one_hot
return X, y
def get_y_true(self):
return self.y_true
这是获取y_true和y_pred,并构建confusion_matrix
在这里,“y_true = val_datagen.get_y_true()”应该放在这一行“Y_pred = model.predict”之前还是之后?
train_datagen = CustomDataGenerator(image_folders, label_folders, train_dir, **params, shuffle = True)
val_datagen = CustomDataGenerator(image_folders, label_folders, valid_dir, **params, shuffle = False)
y_true = val_datagen.get_y_true()
Y_pred = model.predict(val_datagen)
y_pred = np.argmax(Y_pred, axis=1)
fig, ax = plt.subplots(figsize=(12,6))
sns.heatmap(confusion_matrix(y_true, y_pred),annot=True, fmt="d", cmap='Greens',ax = ax)
答:
0赞
mhenning
8/9/2023
#1
我无法测试它,但它现在应该可以工作了。我将 from 移至 ,只有在调用第一批时才会重置它。在纪元开始时回调会更好。这仅在数据集至少被调用一次后才起作用,因为图像和标签是在 上一个批次加载的。
但我不确定批量大小为 1 的加载了多少图像。似乎一批获得了图像文件夹路径,该文件夹中是否有多个图像?self.y_true=[]
on_epoch_end()
__get_item__()
get_data()
train_dir = r'C:\Labbb\mergeimage_npy\512512\npy\train'
valid_dir = r'C:\Labbb\mergeimage_npy\512512\npy\val'
image_folders = ['image0', 'image1', 'image2', 'image3', 'image4', 'image6', 'image7']
label_folders = ['label0', 'label1', 'label2', 'label3', 'label4', 'label6', 'label7']
class CustomDataGenerator(Sequence):
def __init__(self, image_folders, label_folders, dir, dim=(512,512), batch_size=1,n_classes=7,n_channels=8,shuffle=True):
self.image_folders = image_folders
...
self.image_paths = []
self.label_paths = []
self.init_paths()
def __len__(self):
return int(np.ceil(len(self.image_paths) / self.batch_size))
def __getitem__(self, index):
if index == 0: # this line here should fix it
self.y_true = []
batch_image_paths = self.image_paths[index * self.batch_size: (index + 1) * self.batch_size]
batch_label_paths = self.label_paths[index * self.batch_size: (index + 1) * self.batch_size]
batch = zip(batch_image_paths, batch_label_paths)
return self.get_data(batch)
def init_paths(self):
for folder in self.image_folders:
image_folder_path = os.path.join(self.dir, folder)
image_files = os.listdir(image_folder_path)
for file_name in image_files:
self.image_paths.append(os.path.join(image_folder_path, file_name))
for folder in self.label_folders:
...
if self.shuffle:
np.random.shuffle(self.image_paths)
np.random.shuffle(self.label_paths)
def on_epoch_end(self):
if self.shuffle:
np.random.shuffle(self.image_paths)
np.random.shuffle(self.label_paths)
def get_data(self, batch):
X = np.empty((self.batch_size, *self.dim, self.n_channels))
y = np.empty((self.batch_size, self.n_classes))
for i, (image_path, label_path) in enumerate(batch):
image = np.load(image_path)
with open(label_path, 'r') as f:
line = f.readline().strip()
filepath, label = line.rsplit(' ', 1)
label = int(label)
self.y_true.append(label)
label_one_hot = to_categorical(label, num_classes=self.n_classes)
X[i,] = image
y[i,] = label_one_hot
return X, y
def get_y_true(self):
return self.y_true
评论