Notebook based on the first example of https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras import applications
from tensorflow.keras.callbacks import *
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from IPython.display import Image, display
import pandas as pd
import pandas as pd
import numpy as np
import utils_keras
import utils_classification
from IPython.core.display import HTML
%matplotlib inline
# dimensions of our images.
img_width, img_height = 224, 224
num_classes=2
epochs = 8
batch_size = 32
#========== PATHS =========
name_data='heatcows'
path = '/home/sheila/datasets/cows/heat/'
dir_data = path #+'dataset-training' # TRAIN AND VALIDATION datasets are the same
path_outputs = '/home/sheila/datasets/cows/outputs/'
modelname='VGG16'
path_cnn = path_outputs + name_data+'-'+modelname
path_ftr_train = path_cnn+'-fts-train'
path_ftr_val = path_cnn+'-fts-val'
path_whole = path_cnn +'-topmodel-1000-256'
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
datagen = ImageDataGenerator(
rotation_range=10,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
zoom_range=[0.8,1.1], brightness_range=[0.4,1.0],
horizontal_flip=True,
fill_mode='nearest')
i = 0
for batch in datagen.flow_from_directory(path+'/onheat', batch_size=5, save_to_dir='/home/sheila/datasets/cows/previewheat/', save_format='jpg'):
i += 1
if i > 600:
break # otherwise the generator would loop indefinitely
model_cnn_name='VGG16'
model_cnn = applications.VGG16(include_top=False, weights='imagenet')
#modelname='mobilenet'
#model_pretrained = applications.mobilenet.MobileNet(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
#modelname='resnet50'
#model_pretrained = applications.ResNet50(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
#modelname='inceptionV3'
#model_pretrained = applications.InceptionV3(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
plot_model(model_cnn, to_file = path_outputs + 'plot-'+model_cnn_name + '.png', show_shapes=True)
# display(Image(filename=to_file))
Saving the output of my data over the network. No modification is done over that network.
# class_mode='categorical' ->2D one-hot encoded labels, labels are in the second element of each batch
def save_bottleneck_features(model,path_fts_train, path_fts_val):
datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.1)# no augmentation
arguments = {'directory': dir_data, 'target_size': (img_height, img_width), 'batch_size': batch_size,
'class_mode': None, # just data, no labels (no training)
'shuffle': False} # features in order, so we keep track of the elements for next phase
# We can also allow shuffling and get the label per each element but that would require to be controlled
# and not through fit or predict. We need that data for the next step.
# Generating the dataset
generator = datagen.flow_from_directory(**arguments, subset='training')
utils_keras.print_generator_details(generator)
class_names = utils_keras.get_class_names(generator)
steps = utils_keras.get_steps_for_epoch(generator)
x_fts_train = model.predict_generator(generator=generator, steps=steps)
print('- Shape x_fts_train', x_fts_train.shape)
np.savez(file=path_fts_train, x_fts_train=x_fts_train, # save features training and labels
y_train=generator.classes, class_names=class_names) # classes of each element indicated in their numeric index
generator = datagen.flow_from_directory(**arguments, subset='validation') # same directory
steps = utils_keras.get_steps_for_epoch(generator)
x_fts_val = model.predict_generator(generator, steps)
np.savez(file=path_fts_val, x_fts_val=x_fts_val, y_val=generator.classes,
class_names=class_names) # saving features validation
save_bottleneck_features(model_cnn, path_ftr_train, path_ftr_val)
def get_saved_features(path_ftr_train, path_ftr_val):
db_train = np.load(path_ftr_train + '.npz')
print('npz train', db_train.files)
x_fts_train = db_train['x_fts_train']
y_train = db_train['y_train']
class_names = db_train['class_names']
db_val = np.load(path_ftr_val + '.npz')
x_fts_val = db_val['x_fts_val']
y_val = db_val['y_val']
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
print('x_fts_train = ', x_fts_train.shape)
print('y_train = ', y_train.shape)
print('x_fts_val = ', x_fts_val.shape)
print('y_val = ', y_val.shape)
print('class_names = ', class_names)
return x_fts_train, y_train, x_fts_val, y_val, class_names
x_fts_train, y_train, x_fts_val, y_val, class_names = get_saved_features(path_ftr_train, path_ftr_val)
db_train = np.load(path_ftr_train+'.npz')
print('npz train',db_train.files)
x_fts_train = db_train['x_fts_train']
y_train = db_train['y_train']
class_names = db_train['class_names']
db_val = np.load(path_ftr_val+'.npz')
x_fts_val = db_val['x_fts_val']
y_val = db_val['y_val']
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
print('x_fts_train = ', x_fts_train.shape)
print('y_train = ', y_train.shape)
print('x_fts_val = ', x_fts_val.shape)
print('y_val = ', y_val.shape)
print('class_names = ',class_names)
def create_model_top(input_shape):
inputs = Input(shape=input_shape)
x = Flatten()(inputs)
x = Dense(1000, activation='relu', name='dense_1000')(x)
x = Dropout(0.5, name='dropout_1000')(x)
x = Dense(256, activation='relu', name='dense_256')(x)
x = Dropout(0.5, name='dropout_256')(x)
pred_layer = Dense(num_classes, activation='softmax', name='softmax_20')(x)
model = Model(inputs=inputs, outputs=pred_layer)
plot_model(model, to_file=path_outputs + "plot-model_top.png", show_shapes=True)
return model
model_top = create_model_top(input_shape=x_fts_train.shape[1:])
model_top.summary()
plot_model(model_top, show_shapes=True)
In python, I could get a model to retrain many times by running this function for example. It won't create a new model each time. I am calling train_model over the same object and it gets modified each time
def train_model_top(model_top, x_fts_train, y_train, x_fts_val, y_val, class_names, epochs, batch_size, path_whole):
callbacks = [EarlyStopping(monitor='val_accuracy', patience=10), TensorBoard(log_dir=path_outputs + 'logs'),
ModelCheckpoint(path_whole + '-model_weights_best.h5', monitor='val_loss', save_best_only=True), #_{epoch:02d}
CSVLogger(path_outputs + 'log.csv')]
# * Callbacks are helping to save model with smaller val_loss and keep running until the bal_acc has failed during 5 pochs
model_top.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# optimizer=optimizers.SGD(lr=0.01,momentum=0.9),
# optimizer='adam',
history = model_top.fit(x_fts_train, y_train, epochs=epochs, batch_size=batch_size,
validation_data=(x_fts_val, y_val), callbacks=callbacks, shuffle=True, verbose=1)
model_top.save_weights(path_whole + '-weights.h5') # save just weights. Opposite:load_weights
model_top.save(path_whole + '-model_weights_last_epoch{epoch:02d}.h5') # save model and weights. Opposite:load_model
utils_keras.plot_learning_from_history(model_top.history, figsize=(13, 7), filename=path_whole + 'historyplot')
display(pd.DataFrame(model_top.history.history))
print(model_top.history.params)
train_model_top(model_top, x_fts_train, y_train, x_fts_val, y_val, class_names, epochs, batch_size, path_whole)
def plot_cm(model, x_fts_train, y_train, x_fts_val, y_val):
y_val_pred = model.predict(x_fts_val)
y_train_pred = model.predict(x_fts_train)
cm_val = confusion_matrix(utils_classification.invert_categorical(y_val), utils_classification.invert_categorical(y_val_pred))
print("The confusion matrix of the validation set is:"); display(pd.DataFrame(cm_val))
fig1 = utils_classification.plot_confusion_matrix(cm_val, class_names, normalize=False,
fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
colorbar=False, file_name=path_whole + 'cm')
fig2 = utils_classification.plot_confusion_matrix(cm_val, class_names, normalize=True,
fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
colorbar=False, file_name=path_whole + 'cm-normalized')
display(fig2) # fig1
cm_train = confusion_matrix(utils_classification.invert_categorical(y_train), utils_classification.invert_categorical(y_train_pred))
print("The confusion matrix of the training set is:"); display(pd.DataFrame(cm_val))
fig3 = utils_classification.plot_confusion_matrix(cm_train, class_names, normalize=False,
fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
colorbar=False, file_name=path_whole + 'cm-train')
fig4 = utils_classification.plot_confusion_matrix(cm_train, class_names, normalize=True,
fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
colorbar=False, file_name=path_whole + 'cm-train-normalized')
display(fig4) # fig3
#==== Running with the last epoch values ==========
plot_cm(model_top, x_fts_train, y_train, x_fts_val, y_val)
model_best = load_model(path_whole + '-model_weights_best.h5')
plot_cm(model_best, x_fts_train, y_train, x_fts_val, y_val)
y_val_pred = model_best.predict(x_fts_val)
y_val_pred[0]
y_val
Therefore when using it for multilable a change need to be taken in which each class will be the positive class versus all the rest as negative class.
We repeat this process for each class.
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(num_classes):
fpr[i], tpr[i], _ = sklearn.metrics.roc_curve(y_val[:, i], y_val_pred[:, i])
roc_auc[i] = sklearn.metrics.auc(fpr[i], tpr[i])
fpr
plt.plot(fpr[2], tpr[2], label='ROC curve (area = %0.2f)' % roc_auc[2])
plt.legend()
for i in range(num_classes):
plt.plot(fpr[i], tpr[i], label='ROC AUC={:.3f} - {} '.format(roc_auc[i], class_names[i]))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("ROC Curves per class")
plt.legend()
precision = dict()
recall = dict()
average_precision = dict()
for i in range(num_classes):
precision[i], recall[i], _ = sklearn.metrics.precision_recall_curve(y_val[:, i], y_val_pred[:, i])
average_precision[i] = sklearn.metrics.average_precision_score(y_val[:, i], y_val_pred[:, i])
for i in range(num_classes):
plt.plot(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
plt.step(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("ROC Curves per class")
plt.legend()
Perhaps it is the third part in cats-dogs keras tut
The weight saved is the last one that reflect the state in which the model finished after the fit function. However, due to a callback, the best model for the higher validation accuracy (model_weights_best) was also saved and is not the same necesarily. It can be defined what variable to monitor to save the best model.
model_top.save_weights(path_whole+'-weights.h5') # save just weights. Opposite:load_weights
model_top.save(path_whole+'-model_weights.h5') # save model and weights. Opposite:load_model
## Analyzing saved model and best model ##
model_last= load_model(path_whole+'-model_weights.h5')
model_best= load_model(path_whole+'-model_weights_best.h5')
#np.all(model_last==model_best)
#np.all(model_last.layers[0].get_weights()[0] == model_best.layers[0].get_weights()[0])
#print(model.layers[1].get_weights()[0])
#model = model_last
model_top = model_best
#print('model best',model_best.layers[1].get_weights()[0])
#print('model last',model_last.layers[1].get_weights()[0])
print('Expected accuracy = ', history.history['val_accuracy']) # this is from the last one
print('Manual accuracy =', utils_classification.compute_accuracy_from_cm(cm_val)) # this is from the best model acc
Model uses the weights of pretrained part + recently trained dense model added on top
def create_model_whole(model_cnn, model_top, path_whole):
output_whole = model_top(model_cnn.output)
model_whole = Model(inputs=model_cnn.input, outputs=output_whole)
model_whole.summary()
model_whole.save(path_whole + '_model_whole.h5') # save whole model + weights
plot_model(model_whole, expand_nested=True, show_shapes=True, to_file=path_outputs+"plot-model_whole.png")
create_model_whole(model_cnn, model_top, path_whole)