Notebook based on the first example of https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras import applications
from tensorflow.keras.callbacks import *
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras.models import load_model

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from IPython.display import Image, display
import pandas as pd
import pandas as pd
import numpy as np

import utils_keras
import utils_classification
from IPython.core.display import HTML

%matplotlib inline

Using TensorFlow backend.

0) Preparing the Dataset / Generators¶

# dimensions of our images.
img_width, img_height = 224, 224
num_classes=2
epochs = 8
batch_size = 32

#========== PATHS =========
name_data='heatcows'
path = '/home/sheila/datasets/cows/heat/'
dir_data = path #+'dataset-training' # TRAIN AND VALIDATION datasets are the same
path_outputs = '/home/sheila/datasets/cows/outputs/'
modelname='VGG16'
path_cnn    = path_outputs + name_data+'-'+modelname
path_ftr_train = path_cnn+'-fts-train'
path_ftr_val   = path_cnn+'-fts-val'
path_whole = path_cnn +'-topmodel-1000-256'

Augmenting Data¶

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        zoom_range=[0.8,1.1], brightness_range=[0.4,1.0],
        horizontal_flip=True,
        fill_mode='nearest')

i = 0
for batch in datagen.flow_from_directory(path+'/onheat', batch_size=5, save_to_dir='/home/sheila/datasets/cows/previewheat/', save_format='jpg'):
    i += 1
    if i > 600:
        break  # otherwise the generator would loop indefinitely

Found 0 images belonging to 0 classes.

1) Defining Architecture¶

1.a) Pretrained architecture¶

Loading VGG 16 architecture.¶

model_cnn_name='VGG16'
model_cnn = applications.VGG16(include_top=False, weights='imagenet')

#modelname='mobilenet'
#model_pretrained = applications.mobilenet.MobileNet(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

#modelname='resnet50'
#model_pretrained = applications.ResNet50(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

#modelname='inceptionV3'
#model_pretrained = applications.InceptionV3(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

plot_model(model_cnn, to_file = path_outputs + 'plot-'+model_cnn_name + '.png', show_shapes=True)
# display(Image(filename=to_file))

Saving features following a pre trained model with my data¶

Saving the output of my data over the network. No modification is done over that network.

# class_mode='categorical' ->2D one-hot encoded labels, labels are in the second element of each batch
def save_bottleneck_features(model,path_fts_train, path_fts_val):
    datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.1)# no augmentation  
    arguments = {'directory': dir_data, 'target_size': (img_height, img_width), 'batch_size': batch_size,
            'class_mode': None, # just data, no labels (no training)
            'shuffle': False} # features in order, so we keep track of the elements for next phase
    # We can also allow shuffling and get the label per each element but that would require to be controlled
    # and not through fit or predict. We need that data for the next step.
    
    # Generating the dataset
    generator = datagen.flow_from_directory(**arguments, subset='training') 
    utils_keras.print_generator_details(generator)  
    class_names = utils_keras.get_class_names(generator)
    steps = utils_keras.get_steps_for_epoch(generator)
    x_fts_train = model.predict_generator(generator=generator, steps=steps)
    print('- Shape x_fts_train', x_fts_train.shape)
    np.savez(file=path_fts_train, x_fts_train=x_fts_train, # save features training and labels
        y_train=generator.classes, class_names=class_names) # classes of each element indicated in their numeric index

    
    generator = datagen.flow_from_directory(**arguments, subset='validation') # same directory
    steps = utils_keras.get_steps_for_epoch(generator)
    x_fts_val = model.predict_generator(generator, steps)
    np.savez(file=path_fts_val, x_fts_val=x_fts_val, y_val=generator.classes,
            class_names=class_names) # saving features validation

Saving Features¶

save_bottleneck_features(model_cnn, path_ftr_train, path_ftr_val)

Found 4670 images belonging to 2 classes.
===== History GENERATOR Characteristics =====
Generator class_indices =  {'nonheat': 0, 'onheat': 1}
Generator classes =  [0 0 0 ... 1 1 1]
Generator Batch Size =  32
Generator Total Batches Seen =  0
Generator Num Classes=  2
Generator Number of Samples=  4670
Generator len(classes)=  4670
Generator len(Filenames) =  4670
- Shape x_fts_train (4670, 7, 7, 512)
Found 518 images belonging to 2 classes.

Opening saved Features¶

def get_saved_features(path_ftr_train, path_ftr_val):
    db_train = np.load(path_ftr_train + '.npz')
    print('npz train', db_train.files)
    x_fts_train = db_train['x_fts_train']
    y_train = db_train['y_train']
    class_names = db_train['class_names']
    db_val = np.load(path_ftr_val + '.npz')
    x_fts_val = db_val['x_fts_val']
    y_val = db_val['y_val']
    y_train = to_categorical(y_train, num_classes)
    y_val = to_categorical(y_val, num_classes)

    print('x_fts_train = ', x_fts_train.shape)
    print('y_train = ', y_train.shape)
    print('x_fts_val = ', x_fts_val.shape)
    print('y_val = ', y_val.shape)
    print('class_names = ', class_names)
    return x_fts_train, y_train, x_fts_val, y_val, class_names
x_fts_train, y_train, x_fts_val, y_val, class_names = get_saved_features(path_ftr_train, path_ftr_val)

npz train ['x_fts_train', 'y_train', 'class_names']
x_fts_train =  (4670, 7, 7, 512)
y_train =  (4670, 2)
x_fts_val =  (518, 7, 7, 512)
y_val =  (518, 2)
class_names =  ['nonheat' 'onheat']

Loading already saved Features¶

db_train = np.load(path_ftr_train+'.npz')
print('npz train',db_train.files)

x_fts_train = db_train['x_fts_train']
y_train     = db_train['y_train']
class_names = db_train['class_names']

db_val = np.load(path_ftr_val+'.npz')
x_fts_val = db_val['x_fts_val']
y_val     = db_val['y_val']

y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)

print('x_fts_train = ', x_fts_train.shape)
print('y_train = ', y_train.shape)
print('x_fts_val = ', x_fts_val.shape)
print('y_val = ', y_val.shape)
print('class_names = ',class_names)

npz train ['x_fts_train', 'y_train', 'class_names']
x_fts_train =  (4670, 7, 7, 512)
y_train =  (4670, 2)
x_fts_val =  (518, 7, 7, 512)
y_val =  (518, 2)
class_names =  ['nonheat' 'onheat']

1.b) Create the architecture to train on top¶

def create_model_top(input_shape):
    inputs = Input(shape=input_shape)
    x = Flatten()(inputs)
    x = Dense(1000, activation='relu', name='dense_1000')(x)
    x = Dropout(0.5, name='dropout_1000')(x)
    x = Dense(256, activation='relu', name='dense_256')(x)
    x = Dropout(0.5, name='dropout_256')(x)
    pred_layer = Dense(num_classes, activation='softmax', name='softmax_20')(x)
    model = Model(inputs=inputs, outputs=pred_layer)
    plot_model(model, to_file=path_outputs + "plot-model_top.png", show_shapes=True)
    return model
model_top = create_model_top(input_shape=x_fts_train.shape[1:])
model_top.summary()
plot_model(model_top, show_shapes=True)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 7, 7, 512)]       0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense_1000 (Dense)           (None, 1000)              25089000  
_________________________________________________________________
dropout_1000 (Dropout)       (None, 1000)              0         
_________________________________________________________________
dense_256 (Dense)            (None, 256)               256256    
_________________________________________________________________
dropout_256 (Dropout)        (None, 256)               0         
_________________________________________________________________
softmax_20 (Dense)           (None, 2)                 514       
=================================================================
Total params: 25,345,770
Trainable params: 25,345,770
Non-trainable params: 0
_________________________________________________________________

2) Compiling, Fitting - just the top part - the cnn features are unmodified¶

In python, I could get a model to retrain many times by running this function for example. It won't create a new model each time. I am calling train_model over the same object and it gets modified each time

def train_model_top(model_top, x_fts_train, y_train, x_fts_val, y_val, class_names, epochs, batch_size, path_whole):
    callbacks = [EarlyStopping(monitor='val_accuracy', patience=10), TensorBoard(log_dir=path_outputs + 'logs'),
        ModelCheckpoint(path_whole + '-model_weights_best.h5', monitor='val_loss', save_best_only=True), #_{epoch:02d}
        CSVLogger(path_outputs + 'log.csv')]
    
    # * Callbacks are helping to save model with smaller val_loss and keep running until the bal_acc has failed during 5 pochs
    model_top.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    # optimizer=optimizers.SGD(lr=0.01,momentum=0.9),
    # optimizer='adam',

    history = model_top.fit(x_fts_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_data=(x_fts_val, y_val), callbacks=callbacks, shuffle=True, verbose=1)
    
    model_top.save_weights(path_whole + '-weights.h5')  # save just weights. Opposite:load_weights
    model_top.save(path_whole + '-model_weights_last_epoch{epoch:02d}.h5')  # save model and weights. Opposite:load_model
    utils_keras.plot_learning_from_history(model_top.history, figsize=(13, 7), filename=path_whole + 'historyplot')    
    display(pd.DataFrame(model_top.history.history))    
    print(model_top.history.params)
        
train_model_top(model_top, x_fts_train, y_train, x_fts_val, y_val, class_names, epochs, batch_size, path_whole)

Train on 4670 samples, validate on 518 samples
Epoch 1/8
  32/4670 [..............................] - ETA: 2:40 - loss: 1.5056 - accuracy: 0.5312

WARNING: Logging before flag parsing goes to stderr.
W0822 15:09:52.557423 140141793986368 callbacks.py:244] Method (on_train_batch_end) is slow compared to the batch update (0.104025). Check your callbacks.

4670/4670 [==============================] - 4s 864us/sample - loss: 1.6801 - accuracy: 0.6964 - val_loss: 0.2413 - val_accuracy: 0.9112
Epoch 2/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.2764 - accuracy: 0.9024 - val_loss: 0.1799 - val_accuracy: 0.9324
Epoch 3/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.1560 - accuracy: 0.9505 - val_loss: 0.0936 - val_accuracy: 0.9749
Epoch 4/8
4670/4670 [==============================] - 2s 502us/sample - loss: 0.0939 - accuracy: 0.9743 - val_loss: 0.1056 - val_accuracy: 0.9556
Epoch 5/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.0914 - accuracy: 0.9767 - val_loss: 0.0833 - val_accuracy: 0.9807
Epoch 6/8
4670/4670 [==============================] - 2s 509us/sample - loss: 0.0717 - accuracy: 0.9831 - val_loss: 0.2323 - val_accuracy: 0.9363
Epoch 7/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.0367 - accuracy: 0.9927 - val_loss: 0.0560 - val_accuracy: 0.9903
Epoch 8/8
4670/4670 [==============================] - 2s 497us/sample - loss: 0.0441 - accuracy: 0.9934 - val_loss: 0.0822 - val_accuracy: 0.9865
Width=10.075 - height=2.4022727272727264

{'batch_size': 32, 'epochs': 8, 'steps': 146, 'samples': 4670, 'verbose': 0, 'do_validation': True, 'metrics': ['loss', 'accuracy', 'val_loss', 'val_accuracy']}

Results through Confussion Matrices¶

def plot_cm(model, x_fts_train, y_train, x_fts_val, y_val):
    
    y_val_pred = model.predict(x_fts_val)
    y_train_pred = model.predict(x_fts_train)    
    
    cm_val = confusion_matrix(utils_classification.invert_categorical(y_val), utils_classification.invert_categorical(y_val_pred))
    print("The confusion matrix of the validation set is:"); display(pd.DataFrame(cm_val))
    
    fig1 = utils_classification.plot_confusion_matrix(cm_val, class_names, normalize=False,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm')
    
    fig2 = utils_classification.plot_confusion_matrix(cm_val, class_names, normalize=True,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm-normalized')
    display(fig2) # fig1
    
    cm_train = confusion_matrix(utils_classification.invert_categorical(y_train), utils_classification.invert_categorical(y_train_pred))
    print("The confusion matrix of the training set is:"); display(pd.DataFrame(cm_val))
    
    fig3 = utils_classification.plot_confusion_matrix(cm_train, class_names, normalize=False,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm-train')
    fig4 = utils_classification.plot_confusion_matrix(cm_train, class_names, normalize=True,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm-train-normalized')
    
    display(fig4) # fig3

Results with the model trained under last Epoch (probably overfitted model but not much as I have early stopping)¶

#====  Running with the last epoch values ==========
plot_cm(model_top, x_fts_train, y_train, x_fts_val, y_val)

The confusion matrix of the validation set is:

The confusion matrix of the training set is:

Results with the model trained under best Epoch (lowest accuracy loss)¶

model_best = load_model(path_whole + '-model_weights_best.h5')
plot_cm(model_best, x_fts_train, y_train, x_fts_val, y_val)

The confusion matrix of the validation set is:

The confusion matrix of the training set is:

ROC Curve and ROC Area Under Curve (AUC)¶

y_val_pred = model_best.predict(x_fts_val)
y_val_pred[0]
y_val

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

Remember that roc curve is just for binary classification.¶

Therefore when using it for multilable a change need to be taken in which each class will be the positive class versus all the rest as negative class.

--> Error --> sklearn.metrics.roc_curve(y_val,y_val_pred)¶

We repeat this process for each class.

fpr = dict()
tpr = dict()
roc_auc = dict()                         
                          
for i in range(num_classes):
    fpr[i], tpr[i], _ = sklearn.metrics.roc_curve(y_val[:, i], y_val_pred[:, i])
    roc_auc[i] = sklearn.metrics.auc(fpr[i], tpr[i])
    
fpr

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-a7a39a3751e3> in <module>
      4 
      5 for i in range(num_classes):
----> 6     fpr[i], tpr[i], _ = sklearn.metrics.roc_curve(y_val[:, i], y_val_pred[:, i])
      7     roc_auc[i] = sklearn.metrics.auc(fpr[i], tpr[i])
      8 

NameError: name 'sklearn' is not defined

plt.plot(fpr[2], tpr[2],  label='ROC curve (area = %0.2f)' % roc_auc[2])
plt.legend()

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label='ROC AUC={:.3f} - {} '.format(roc_auc[i], class_names[i]))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("ROC Curves per class")
plt.legend()

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-16-c5cb5c93419e> in <module>
      1 for i in range(num_classes):
----> 2     plt.plot(fpr[i], tpr[i], label='ROC AUC={:.3f} - {} '.format(roc_auc[i], class_names[i]))
      3 plt.xlabel('False Positive Rate')
      4 plt.ylabel('True Positive Rate')
      5 plt.title("ROC Curves per class")

KeyError: 0

Precision Recall¶

precision = dict()
recall = dict()
average_precision = dict()
for i in range(num_classes):
    precision[i], recall[i], _ = sklearn.metrics.precision_recall_curve(y_val[:, i], y_val_pred[:, i])
    average_precision[i] = sklearn.metrics.average_precision_score(y_val[:, i], y_val_pred[:, i])

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-b9b9e8e3bd2f> in <module>
      3 average_precision = dict()
      4 for i in range(num_classes):
----> 5     precision[i], recall[i], _ = sklearn.metrics.precision_recall_curve(y_val[:, i], y_val_pred[:, i])
      6     average_precision[i] = sklearn.metrics.average_precision_score(y_val[:, i], y_val_pred[:, i])

NameError: name 'sklearn' is not defined

for i in range(num_classes):
    plt.plot(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
    plt.step(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("ROC Curves per class")
plt.legend()

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-20-c74cbd33a34b> in <module>
      1 for i in range(num_classes):
----> 2     plt.plot(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
      3     plt.step(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
      4 plt.xlabel('False Positive Rate')
      5 plt.ylabel('True Positive Rate')

KeyError: 0

==> Check the rest from this point later -¶

Perhaps it is the third part in cats-dogs keras tut

Set the path for all the files to be saved¶

The weight saved is the last one that reflect the state in which the model finished after the fit function. However, due to a callback, the best model for the higher validation accuracy (model_weights_best) was also saved and is not the same necesarily. It can be defined what variable to monitor to save the best model.

model_top.save_weights(path_whole+'-weights.h5') # save just weights. Opposite:load_weights
model_top.save(path_whole+'-model_weights.h5') # save model and weights. Opposite:load_model

## Analyzing saved model and best model ##
model_last= load_model(path_whole+'-model_weights.h5')
model_best= load_model(path_whole+'-model_weights_best.h5')


#np.all(model_last==model_best)
#np.all(model_last.layers[0].get_weights()[0] == model_best.layers[0].get_weights()[0])

#print(model.layers[1].get_weights()[0])
#model = model_last
model_top = model_best
#print('model best',model_best.layers[1].get_weights()[0])
#print('model last',model_last.layers[1].get_weights()[0])

Verifying accuracy¶

print('Expected accuracy = ', history.history['val_accuracy']) # this is from the last one
print('Manual accuracy =', utils_classification.compute_accuracy_from_cm(cm_val)) # this is from the best model acc

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-c72c1b033497> in <module>
----> 1 print('Expected accuracy = ', history.history['val_accuracy']) # this is from the last one
      2 print('Manual accuracy =', utils_classification.compute_accuracy_from_cm(cm_val)) # this is from the best model acc

NameError: name 'history' is not defined

Building a Complete model¶

Model uses the weights of pretrained part + recently trained dense model added on top

def create_model_whole(model_cnn, model_top, path_whole):
    output_whole = model_top(model_cnn.output)
    model_whole = Model(inputs=model_cnn.input, outputs=output_whole)
    model_whole.summary()
    model_whole.save(path_whole + '_model_whole.h5')  # save whole model + weights
    plot_model(model_whole, expand_nested=True, show_shapes=True, to_file=path_outputs+"plot-model_whole.png")

create_model_whole(model_cnn, model_top, path_whole)

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
model (Model)                (None, 2)                 25345770  
=================================================================
Total params: 40,060,458
Trainable params: 40,060,458
Non-trainable params: 0
_________________________________________________________________

	loss	accuracy	val_loss	val_accuracy
0	1.680113	0.696360	0.241296	0.911197
1	0.276414	0.902355	0.179908	0.932432
2	0.155966	0.950535	0.093636	0.974903
3	0.093917	0.974304	0.105602	0.955598
4	0.091431	0.976660	0.083308	0.980695
5	0.071653	0.983083	0.232319	0.936293
6	0.036681	0.992719	0.056023	0.990348
7	0.044138	0.993362	0.082249	0.986486

	0	1
0	236	6
1	1	275

	0	1
0	236	6
1	1	275

	0	1
0	238	4
1	1	275

	0	1
0	238	4
1	1	275