由于 on-batch-end（）问题导致训练缓慢-解网

问：

我正在按照 keras 博客中的示例，使用顶级模型的预训练和学习权重对我的数据进行 vgg19 模型的微调。我正在一个具有每个任务 32 个 cpus 和两个 tesla K20 GPU 的集群上运行我的代码。我有几条警告消息说： UserWarning：与批量更新（0.118864）相比，方法 on_batch_end（）速度较慢。检查您的回传。显然有些东西减慢了我的训练阶段。这是我的代码：

import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras import backend as K
from keras import optimizers
from keras.models import Model
K.set_image_dim_ordering('tf')
# dimensions of our images.
img_width, img_height = 48, 48
top_model_weights_path = 'modelvgg19_10k.h5'
train_data_dir = 'data13/train'
validation_data_dir = 'data13/validation'
nb_train_samples = 16000
nb_validation_samples = 4000
epochs = 50
batch_size = 200
def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)
    model = applications.VGG19(include_top=False, weights='imagenet', input_shape=(48,48,3))

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    np.save(open('bottleneck_features_train', 'wb'),bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    np.save(open('bottleneck_features_validation', 'wb'),bottleneck_features_validation)

def train_top_model():
    train_data = np.load(open('bottleneck_features_train', 'rb'))
    train_labels = np.array([0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

    validation_data = np.load(open('bottleneck_features_validation', 'rb'))
    validation_labels = np.array([0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)

save_bottlebeck_features()
train_top_model()


# path to the model weights files.
weights_path = '../keras/examples/vgg19_weights.h5'
top_model_weights_path = 'modelvgg19_10k.h5'
# dimensions of our images.
img_width, img_height = 48, 48

train_data_dir = 'data13/train'
validation_data_dir = 'data13/validation'
nb_train_samples = 16000
nb_validation_samples = 4000
epochs = 80
batch_size = 200

# build the VGG16 network
base_model = applications.VGG19(weights='imagenet', include_top=False, input_shape=(48,48,3))
print('Model loaded.')

# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))

# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)

# add the model on top of the convolutional base
# model.add(top_model)
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))

# set the first 15 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:15]:
    layer.trainable = False

# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

# prepare data augmentation configuration
train_datagen = ImageDataGenerator(rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

model.summary()

# fine-tune the model
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size,
    verbose=1)}

请帮助解决加快培训速度的问题。

回调 GPU Keras 训练数据

0赞 papayiannis 7/23/2018

您实际上没有使用任何回调，所以我认为它一定是纪元末尾指标的计算。如果删除指标，它是否仍会报告？

答： 暂无答案

上一个：如何在同步nodejs函数中等待promise？

下一个：为什么我的 Button 的命令在我创建 Button 时立即执行，而不是在我单击它时执行？[复制]

由于 on-batch-end（）问题导致训练缓慢

Slow training due to on-batch-end() issue

评论

由于 on-batch-end（） 问题导致训练缓慢

Slow training due to on-batch-end() issue

评论

由于 on-batch-end（）问题导致训练缓慢