Я тренирую меньшую модель, похожую на VGG, и устанавливаю предварительно обученные веса VGG16 для первых сверточных слоев, которые идентичны. Моя модель, правда, ведет себя довольно странно и вообще ничего не узнает - потери не меняются, точность остается прежней. Что не так и как это исправить?
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras import layers
from keras import models
from keras import optimizers
from keras.layers import Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
base_model = models.Sequential()
base_model.add(layers.Conv2D(64, (3, 3), activation='relu', name='block1_conv1', input_shape=(224, 224, 3)))
base_model.add(layers.Conv2D(64, (3, 3), activation='relu', name='block1_conv2'))
base_model.add(layers.MaxPooling2D((2, 2)))
#model.add(Dropout(0.2))
base_model.add(layers.Conv2D(128, (3, 3), activation='relu', name='block2_conv1'))
base_model.add(layers.Conv2D(128, (3, 3), activation='relu', name='block2_conv2'))
base_model.add(layers.MaxPooling2D((2, 2), name='block2_pool'))
#model.add(Dropout(0.2))
base_model.summary()
"""
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
_________________________________________________________________
block1_conv1 (Conv2D) (None, 256, 256, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 256, 256, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 128, 128, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 128, 128, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 128, 128, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 64, 64, 128) 0
=================================================================
Total params: 260,160.0
Trainable params: 260,160.0
Non-trainable params: 0.0
"""
base_model.add(layers.Flatten())
#base_model.add(layers.Dropout(0.5)) #Dropout for regularization
base_model.add(layers.Dense(256, activation='relu'))
base_model.add(layers.Dense(1, activation='sigmoid')) #Sigmoid function at the end because we have just two classes
epochs = 50
callbacks = []
#schedule = None
decay = 0.0
#earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
#mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
#reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
base_model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4,decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
vgg = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
print('Model loaded.')
"""
['block1_conv1',
'block1_conv2',
'block1_pool',
'block2_conv1',
'block2_conv2',
'block2_pool',
'block3_conv1',
'block3_conv2',
'block3_conv3',
'block3_conv4',
'block3_pool',
'block4_conv1',
'block4_conv2',
'block4_conv3',
'block4_conv4',
'block4_pool',
'block5_conv1',
'block5_conv2',
'block5_conv3',
'block5_conv4',
'block5_pool',
'dense_1',
'dense_2',
'dense_3',
'dropout_1',
'global_average_pooling2d_1',
'input_1']
"""
for layer in vgg.layers:
if layer.name == 'block1_conv1':
base_model.layers[0].set_weights(layer.get_weights())
elif layer.name == 'block1_conv2':
base_model.layers[1].set_weights(layer.get_weights())
elif layer.name == 'block2_conv1':
base_model.layers[3].set_weights(layer.get_weights())
elif layer.name == 'block2_conv2':
base_model.layers[4].set_weights(layer.get_weights())
os.environ["CUDA_VISIBLE_DEVICES"]="0"
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = base_model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size) #,
#callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
#Save the model
#base_model.save_weights('/home/d/Desktop/s/base_model_weights.h5')
#base_model.save('/home/d/Desktop/s/base_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accurarcy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
и обучение длится вечно (от эпохи к эпохе тоже никаких изменений):
2625/4002 [==================>...........] - ETA: 3:49 - loss: 7.9723 - acc: 0.5053
2626/4002 [==================>...........] - ETA: 3:49 - loss: 7.9720 - acc: 0.5053
2627/4002 [==================>...........] - ETA: 3:49 - loss: 7.9735 - acc: 0.5052
2628/4002 [==================>...........] - ETA: 3:48 - loss: 7.9732 - acc: 0.5052
2629/4002 [==================>...........] - ETA: 3:48 - loss: 7.9732 - acc: 0.5052
2630/4002 [==================>...........] - ETA: 3:48 - loss: 7.9729 - acc: 0.5052
2631/4002 [==================>...........] - ETA: 3:48 - loss: 7.9725 - acc: 0.5052
2632/4002 [==================>...........] - ETA: 3:48 - loss: 7.9729 - acc: 0.5052
2633/4002 [==================>...........] - ETA: 3:48 - loss: 7.9733 - acc: 0.5052
2634/4002 [==================>...........] - ETA: 3:47 - loss: 7.9722 - acc: 0.5053
2635/4002 [==================>...........] - ETA: 3:47 - loss: 7.9730 - acc: 0.5052
2636/4002 [==================>...........] - ETA: 3:47 - loss: 7.9719 - acc: 0.5053
2637/4002 [==================>...........] - ETA: 3:47 - loss: 7.9727 - acc: 0.5052
2638/4002 [==================>...........] - ETA: 3:47 - loss: 7.9731 - acc: 0.5052
2639/4002 [==================>...........] - ETA: 3:47 - loss: 7.9732 - acc: 0.5052