In [1]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras import applications
from tensorflow.keras.callbacks import *
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras.models import load_model

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from IPython.display import Image, display
import pandas as pd
import pandas as pd
import numpy as np

import utils_keras
import utils_classification
from IPython.core.display import HTML

%matplotlib inline
Using TensorFlow backend.

0) Preparing the Dataset / Generators

In [2]:
# dimensions of our images.
img_width, img_height = 224, 224
num_classes=2
epochs = 8
batch_size = 32

#========== PATHS =========
name_data='heatcows'
path = '/home/sheila/datasets/cows/heat/'
dir_data = path #+'dataset-training' # TRAIN AND VALIDATION datasets are the same
path_outputs = '/home/sheila/datasets/cows/outputs/'
modelname='VGG16'
path_cnn    = path_outputs + name_data+'-'+modelname
path_ftr_train = path_cnn+'-fts-train'
path_ftr_val   = path_cnn+'-fts-val'
path_whole = path_cnn +'-topmodel-1000-256'

Augmenting Data

In [3]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        zoom_range=[0.8,1.1], brightness_range=[0.4,1.0],
        horizontal_flip=True,
        fill_mode='nearest')

i = 0
for batch in datagen.flow_from_directory(path+'/onheat', batch_size=5, save_to_dir='/home/sheila/datasets/cows/previewheat/', save_format='jpg'):
    i += 1
    if i > 600:
        break  # otherwise the generator would loop indefinitely
Found 0 images belonging to 0 classes.

1) Defining Architecture

1.a) Pretrained architecture

Loading VGG 16 architecture.

In [4]:
model_cnn_name='VGG16'
model_cnn = applications.VGG16(include_top=False, weights='imagenet')

#modelname='mobilenet'
#model_pretrained = applications.mobilenet.MobileNet(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

#modelname='resnet50'
#model_pretrained = applications.ResNet50(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

#modelname='inceptionV3'
#model_pretrained = applications.InceptionV3(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

plot_model(model_cnn, to_file = path_outputs + 'plot-'+model_cnn_name + '.png', show_shapes=True)
# display(Image(filename=to_file))
Out[4]:

Saving features following a pre trained model with my data

Saving the output of my data over the network. No modification is done over that network.

In [5]:
# class_mode='categorical' ->2D one-hot encoded labels, labels are in the second element of each batch
def save_bottleneck_features(model,path_fts_train, path_fts_val):
    datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.1)# no augmentation  
    arguments = {'directory': dir_data, 'target_size': (img_height, img_width), 'batch_size': batch_size,
            'class_mode': None, # just data, no labels (no training)
            'shuffle': False} # features in order, so we keep track of the elements for next phase
    # We can also allow shuffling and get the label per each element but that would require to be controlled
    # and not through fit or predict. We need that data for the next step.
    
    # Generating the dataset
    generator = datagen.flow_from_directory(**arguments, subset='training') 
    utils_keras.print_generator_details(generator)  
    class_names = utils_keras.get_class_names(generator)
    steps = utils_keras.get_steps_for_epoch(generator)
    x_fts_train = model.predict_generator(generator=generator, steps=steps)
    print('- Shape x_fts_train', x_fts_train.shape)
    np.savez(file=path_fts_train, x_fts_train=x_fts_train, # save features training and labels
        y_train=generator.classes, class_names=class_names) # classes of each element indicated in their numeric index

    
    generator = datagen.flow_from_directory(**arguments, subset='validation') # same directory
    steps = utils_keras.get_steps_for_epoch(generator)
    x_fts_val = model.predict_generator(generator, steps)
    np.savez(file=path_fts_val, x_fts_val=x_fts_val, y_val=generator.classes,
            class_names=class_names) # saving features validation    

Saving Features

In [6]:
save_bottleneck_features(model_cnn, path_ftr_train, path_ftr_val)
Found 4670 images belonging to 2 classes.
===== History GENERATOR Characteristics =====
Generator class_indices =  {'nonheat': 0, 'onheat': 1}
Generator classes =  [0 0 0 ... 1 1 1]
Generator Batch Size =  32
Generator Total Batches Seen =  0
Generator Num Classes=  2
Generator Number of Samples=  4670
Generator len(classes)=  4670
Generator len(Filenames) =  4670
- Shape x_fts_train (4670, 7, 7, 512)
Found 518 images belonging to 2 classes.

Opening saved Features

In [7]:
def get_saved_features(path_ftr_train, path_ftr_val):
    db_train = np.load(path_ftr_train + '.npz')
    print('npz train', db_train.files)
    x_fts_train = db_train['x_fts_train']
    y_train = db_train['y_train']
    class_names = db_train['class_names']
    db_val = np.load(path_ftr_val + '.npz')
    x_fts_val = db_val['x_fts_val']
    y_val = db_val['y_val']
    y_train = to_categorical(y_train, num_classes)
    y_val = to_categorical(y_val, num_classes)

    print('x_fts_train = ', x_fts_train.shape)
    print('y_train = ', y_train.shape)
    print('x_fts_val = ', x_fts_val.shape)
    print('y_val = ', y_val.shape)
    print('class_names = ', class_names)
    return x_fts_train, y_train, x_fts_val, y_val, class_names
x_fts_train, y_train, x_fts_val, y_val, class_names = get_saved_features(path_ftr_train, path_ftr_val)
npz train ['x_fts_train', 'y_train', 'class_names']
x_fts_train =  (4670, 7, 7, 512)
y_train =  (4670, 2)
x_fts_val =  (518, 7, 7, 512)
y_val =  (518, 2)
class_names =  ['nonheat' 'onheat']

Loading already saved Features

In [8]:
db_train = np.load(path_ftr_train+'.npz')
print('npz train',db_train.files)

x_fts_train = db_train['x_fts_train']
y_train     = db_train['y_train']
class_names = db_train['class_names']

db_val = np.load(path_ftr_val+'.npz')
x_fts_val = db_val['x_fts_val']
y_val     = db_val['y_val']

y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)

print('x_fts_train = ', x_fts_train.shape)
print('y_train = ', y_train.shape)
print('x_fts_val = ', x_fts_val.shape)
print('y_val = ', y_val.shape)
print('class_names = ',class_names)
npz train ['x_fts_train', 'y_train', 'class_names']
x_fts_train =  (4670, 7, 7, 512)
y_train =  (4670, 2)
x_fts_val =  (518, 7, 7, 512)
y_val =  (518, 2)
class_names =  ['nonheat' 'onheat']

1.b) Create the architecture to train on top

In [9]:
def create_model_top(input_shape):
    inputs = Input(shape=input_shape)
    x = Flatten()(inputs)
    x = Dense(1000, activation='relu', name='dense_1000')(x)
    x = Dropout(0.5, name='dropout_1000')(x)
    x = Dense(256, activation='relu', name='dense_256')(x)
    x = Dropout(0.5, name='dropout_256')(x)
    pred_layer = Dense(num_classes, activation='softmax', name='softmax_20')(x)
    model = Model(inputs=inputs, outputs=pred_layer)
    plot_model(model, to_file=path_outputs + "plot-model_top.png", show_shapes=True)
    return model
model_top = create_model_top(input_shape=x_fts_train.shape[1:])
model_top.summary()
plot_model(model_top, show_shapes=True)
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 7, 7, 512)]       0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense_1000 (Dense)           (None, 1000)              25089000  
_________________________________________________________________
dropout_1000 (Dropout)       (None, 1000)              0         
_________________________________________________________________
dense_256 (Dense)            (None, 256)               256256    
_________________________________________________________________
dropout_256 (Dropout)        (None, 256)               0         
_________________________________________________________________
softmax_20 (Dense)           (None, 2)                 514       
=================================================================
Total params: 25,345,770
Trainable params: 25,345,770
Non-trainable params: 0
_________________________________________________________________
Out[9]:

2) Compiling, Fitting - just the top part - the cnn features are unmodified

In python, I could get a model to retrain many times by running this function for example. It won't create a new model each time. I am calling train_model over the same object and it gets modified each time

In [10]:
def train_model_top(model_top, x_fts_train, y_train, x_fts_val, y_val, class_names, epochs, batch_size, path_whole):
    callbacks = [EarlyStopping(monitor='val_accuracy', patience=10), TensorBoard(log_dir=path_outputs + 'logs'),
        ModelCheckpoint(path_whole + '-model_weights_best.h5', monitor='val_loss', save_best_only=True), #_{epoch:02d}
        CSVLogger(path_outputs + 'log.csv')]
    
    # * Callbacks are helping to save model with smaller val_loss and keep running until the bal_acc has failed during 5 pochs
    model_top.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    # optimizer=optimizers.SGD(lr=0.01,momentum=0.9),
    # optimizer='adam',

    history = model_top.fit(x_fts_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_data=(x_fts_val, y_val), callbacks=callbacks, shuffle=True, verbose=1)
    
    model_top.save_weights(path_whole + '-weights.h5')  # save just weights. Opposite:load_weights
    model_top.save(path_whole + '-model_weights_last_epoch{epoch:02d}.h5')  # save model and weights. Opposite:load_model
    utils_keras.plot_learning_from_history(model_top.history, figsize=(13, 7), filename=path_whole + 'historyplot')    
    display(pd.DataFrame(model_top.history.history))    
    print(model_top.history.params)
        
train_model_top(model_top, x_fts_train, y_train, x_fts_val, y_val, class_names, epochs, batch_size, path_whole)
Train on 4670 samples, validate on 518 samples
Epoch 1/8
  32/4670 [..............................] - ETA: 2:40 - loss: 1.5056 - accuracy: 0.5312
WARNING: Logging before flag parsing goes to stderr.
W0822 15:09:52.557423 140141793986368 callbacks.py:244] Method (on_train_batch_end) is slow compared to the batch update (0.104025). Check your callbacks.
4670/4670 [==============================] - 4s 864us/sample - loss: 1.6801 - accuracy: 0.6964 - val_loss: 0.2413 - val_accuracy: 0.9112
Epoch 2/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.2764 - accuracy: 0.9024 - val_loss: 0.1799 - val_accuracy: 0.9324
Epoch 3/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.1560 - accuracy: 0.9505 - val_loss: 0.0936 - val_accuracy: 0.9749
Epoch 4/8
4670/4670 [==============================] - 2s 502us/sample - loss: 0.0939 - accuracy: 0.9743 - val_loss: 0.1056 - val_accuracy: 0.9556
Epoch 5/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.0914 - accuracy: 0.9767 - val_loss: 0.0833 - val_accuracy: 0.9807
Epoch 6/8
4670/4670 [==============================] - 2s 509us/sample - loss: 0.0717 - accuracy: 0.9831 - val_loss: 0.2323 - val_accuracy: 0.9363
Epoch 7/8
4670/4670 [==============================] - 5s 1ms/sample - loss: 0.0367 - accuracy: 0.9927 - val_loss: 0.0560 - val_accuracy: 0.9903
Epoch 8/8
4670/4670 [==============================] - 2s 497us/sample - loss: 0.0441 - accuracy: 0.9934 - val_loss: 0.0822 - val_accuracy: 0.9865
Width=10.075 - height=2.4022727272727264
loss accuracy val_loss val_accuracy
0 1.680113 0.696360 0.241296 0.911197
1 0.276414 0.902355 0.179908 0.932432
2 0.155966 0.950535 0.093636 0.974903
3 0.093917 0.974304 0.105602 0.955598
4 0.091431 0.976660 0.083308 0.980695
5 0.071653 0.983083 0.232319 0.936293
6 0.036681 0.992719 0.056023 0.990348
7 0.044138 0.993362 0.082249 0.986486
{'batch_size': 32, 'epochs': 8, 'steps': 146, 'samples': 4670, 'verbose': 0, 'do_validation': True, 'metrics': ['loss', 'accuracy', 'val_loss', 'val_accuracy']}

Results through Confussion Matrices

In [11]:
def plot_cm(model, x_fts_train, y_train, x_fts_val, y_val):
    
    y_val_pred = model.predict(x_fts_val)
    y_train_pred = model.predict(x_fts_train)    
    
    cm_val = confusion_matrix(utils_classification.invert_categorical(y_val), utils_classification.invert_categorical(y_val_pred))
    print("The confusion matrix of the validation set is:"); display(pd.DataFrame(cm_val))
    
    fig1 = utils_classification.plot_confusion_matrix(cm_val, class_names, normalize=False,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm')
    
    fig2 = utils_classification.plot_confusion_matrix(cm_val, class_names, normalize=True,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm-normalized')
    display(fig2) # fig1
    
    cm_train = confusion_matrix(utils_classification.invert_categorical(y_train), utils_classification.invert_categorical(y_train_pred))
    print("The confusion matrix of the training set is:"); display(pd.DataFrame(cm_val))
    
    fig3 = utils_classification.plot_confusion_matrix(cm_train, class_names, normalize=False,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm-train')
    fig4 = utils_classification.plot_confusion_matrix(cm_train, class_names, normalize=True,
                                                      fill_numbers=True, figsize=(5, 5), rotation_xaxis=90,
                                                      colorbar=False, file_name=path_whole + 'cm-train-normalized')
    
    display(fig4) # fig3

Results with the model trained under last Epoch (probably overfitted model but not much as I have early stopping)

In [12]:
#====  Running with the last epoch values ==========
plot_cm(model_top, x_fts_train, y_train, x_fts_val, y_val)
The confusion matrix of the validation set is:
0 1
0 236 6
1 1 275
The confusion matrix of the training set is:
0 1
0 236 6
1 1 275

Results with the model trained under best Epoch (lowest accuracy loss)

In [13]:
model_best = load_model(path_whole + '-model_weights_best.h5')
plot_cm(model_best, x_fts_train, y_train, x_fts_val, y_val)
The confusion matrix of the validation set is:
0 1
0 238 4
1 1 275
The confusion matrix of the training set is:
0 1
0 238 4
1 1 275

ROC Curve and ROC Area Under Curve (AUC)

In [14]:
y_val_pred = model_best.predict(x_fts_val)
y_val_pred[0]
y_val
Out[14]:
array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

Remember that roc curve is just for binary classification.

Therefore when using it for multilable a change need to be taken in which each class will be the positive class versus all the rest as negative class.

--> Error --> sklearn.metrics.roc_curve(y_val,y_val_pred)

We repeat this process for each class.

In [15]:
fpr = dict()
tpr = dict()
roc_auc = dict()                         
                          
for i in range(num_classes):
    fpr[i], tpr[i], _ = sklearn.metrics.roc_curve(y_val[:, i], y_val_pred[:, i])
    roc_auc[i] = sklearn.metrics.auc(fpr[i], tpr[i])
    
fpr
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-a7a39a3751e3> in <module>
      4 
      5 for i in range(num_classes):
----> 6     fpr[i], tpr[i], _ = sklearn.metrics.roc_curve(y_val[:, i], y_val_pred[:, i])
      7     roc_auc[i] = sklearn.metrics.auc(fpr[i], tpr[i])
      8 

NameError: name 'sklearn' is not defined
In [ ]:
plt.plot(fpr[2], tpr[2],  label='ROC curve (area = %0.2f)' % roc_auc[2])
plt.legend()
In [16]:
for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label='ROC AUC={:.3f} - {} '.format(roc_auc[i], class_names[i]))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("ROC Curves per class")
plt.legend()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-16-c5cb5c93419e> in <module>
      1 for i in range(num_classes):
----> 2     plt.plot(fpr[i], tpr[i], label='ROC AUC={:.3f} - {} '.format(roc_auc[i], class_names[i]))
      3 plt.xlabel('False Positive Rate')
      4 plt.ylabel('True Positive Rate')
      5 plt.title("ROC Curves per class")

KeyError: 0

Precision Recall

In [19]:
precision = dict()
recall = dict()
average_precision = dict()
for i in range(num_classes):
    precision[i], recall[i], _ = sklearn.metrics.precision_recall_curve(y_val[:, i], y_val_pred[:, i])
    average_precision[i] = sklearn.metrics.average_precision_score(y_val[:, i], y_val_pred[:, i])
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-b9b9e8e3bd2f> in <module>
      3 average_precision = dict()
      4 for i in range(num_classes):
----> 5     precision[i], recall[i], _ = sklearn.metrics.precision_recall_curve(y_val[:, i], y_val_pred[:, i])
      6     average_precision[i] = sklearn.metrics.average_precision_score(y_val[:, i], y_val_pred[:, i])

NameError: name 'sklearn' is not defined
In [20]:
for i in range(num_classes):
    plt.plot(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
    plt.step(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("ROC Curves per class")
plt.legend()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-20-c74cbd33a34b> in <module>
      1 for i in range(num_classes):
----> 2     plt.plot(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
      3     plt.step(recall[i], precision[i], label='Precision-Recall={:.3f} - {} '.format(average_precision[i], class_names[i]))
      4 plt.xlabel('False Positive Rate')
      5 plt.ylabel('True Positive Rate')

KeyError: 0
In [ ]:
 

==> Check the rest from this point later -

Perhaps it is the third part in cats-dogs keras tut

In [ ]:
 
In [ ]:
 
In [ ]:
 

Set the path for all the files to be saved

The weight saved is the last one that reflect the state in which the model finished after the fit function. However, due to a callback, the best model for the higher validation accuracy (model_weights_best) was also saved and is not the same necesarily. It can be defined what variable to monitor to save the best model.

In [23]:
model_top.save_weights(path_whole+'-weights.h5') # save just weights. Opposite:load_weights
model_top.save(path_whole+'-model_weights.h5') # save model and weights. Opposite:load_model

## Analyzing saved model and best model ##
model_last= load_model(path_whole+'-model_weights.h5')
model_best= load_model(path_whole+'-model_weights_best.h5')


#np.all(model_last==model_best)
#np.all(model_last.layers[0].get_weights()[0] == model_best.layers[0].get_weights()[0])

#print(model.layers[1].get_weights()[0])
#model = model_last
model_top = model_best
#print('model best',model_best.layers[1].get_weights()[0])
#print('model last',model_last.layers[1].get_weights()[0])

Verifying accuracy

In [24]:
print('Expected accuracy = ', history.history['val_accuracy']) # this is from the last one
print('Manual accuracy =', utils_classification.compute_accuracy_from_cm(cm_val)) # this is from the best model acc
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-c72c1b033497> in <module>
----> 1 print('Expected accuracy = ', history.history['val_accuracy']) # this is from the last one
      2 print('Manual accuracy =', utils_classification.compute_accuracy_from_cm(cm_val)) # this is from the best model acc

NameError: name 'history' is not defined

Building a Complete model

Model uses the weights of pretrained part + recently trained dense model added on top

In [17]:
def create_model_whole(model_cnn, model_top, path_whole):
    output_whole = model_top(model_cnn.output)
    model_whole = Model(inputs=model_cnn.input, outputs=output_whole)
    model_whole.summary()
    model_whole.save(path_whole + '_model_whole.h5')  # save whole model + weights
    plot_model(model_whole, expand_nested=True, show_shapes=True, to_file=path_outputs+"plot-model_whole.png")

create_model_whole(model_cnn, model_top, path_whole)
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
model (Model)                (None, 2)                 25345770  
=================================================================
Total params: 40,060,458
Trainable params: 40,060,458
Non-trainable params: 0
_________________________________________________________________
In [ ]:
 
In [ ]: