Simple Neural Network

29/10/2019

png

In this short tutorial, I show how to implement a simple neural network model able to classify hand-written digits. The implementation is based on the Keras framework and uses the MNIST open-source dataset of hand-written digits.

The Jupyter Notebook can be downloaded from here. You can also download the PDF slides of a lesson on the topic I taught at the University of St.Gallen in Switzerland.

Load Modules

import numpy  as np
import pandas  as pd
np.random.seed(2019)

%pylab inline
# For retina displays:
# %config InlineBackend.figure_format = 'retina'

This notebook requires the Keras module. You can install it with the shell command pip install keras.

from keras.models     import Sequential
from keras.layers     import Activation
from keras.optimizers import SGD
from keras.layers     import Dense
from keras.utils      import to_categorical
from keras.datasets   import mnist
from keras.metrics    import categorical_accuracy
from keras.callbacks  import EarlyStopping

The module seaborn is not necessary. Just for nicer plots, install it with pip install seaborn.

import seaborn as sns
sns.set(style='white', font_scale=1.3, rc={
    'lines.linewidth': 3,
    'axes.grid': True, 'grid.linestyle': ':',
    'axes.spines.left': True,
    'axes.spines.bottom': True,
    'axes.spines.right': True,
    'axes.spines.top': True,
    'axes.edgecolor': '.5',
})

Load MNIST Dataset

(x_trai, y_trai), (x_test, y_test) = mnist.load_data()

L_trai = len(x_trai)
L_test = len(x_test)

# Reshape the images into vectors of dim 28*28=784
X_trai = x_trai.reshape((L_trai, -1))
X_test = x_test.reshape((L_test, -1))

# Y needs to be converted into a vector with indicators
Y_test = to_categorical(y_test)
Y_trai = to_categorical(y_trai)

print("We have %d samples in the training set"%L_trai)
print("We have %d samples in the test set"%L_test)

We have 60000 samples in the training set
We have 10000 samples in the test set

Plot some random digits

NX, NY = 2, 6
fig, ax = plt.subplots(NX,NY,figsize=(1.5*NY,1.5*NX))
for i in range(NX):
    for j in range(NY):
        img = x_trai[np.random.randint(L_trai)]
        ax[i][j].imshow(img, cmap='Greys');
        ax[i][j].set_xticklabels([]); ax[i][j].set_xticks([])
        ax[i][j].set_yticklabels([]); ax[i][j].set_yticks([])
plt.tight_layout()
#plt.savefig("digits_examples.pdf")

png

Shallow Neural Network

Only one hidden layer

# Define the shallow neural network

stop_rule = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

model = Sequential()
model.add(Dense(300, input_dim=784, activation="relu"))
model.add(Dense(10))              # 10 is the output dimension
model.add(Activation("softmax"))  # Provides a probabilities for each of 0,..,9
model.compile(loss="categorical_crossentropy", optimizer=SGD(), metrics=[categorical_accuracy] )
history = model.fit(
    X_trai, Y_trai, validation_split=0.2,
    epochs=30, batch_size=512, verbose=1,
    callbacks=[stop_rule],
)

df = pd.DataFrame(history.history).rename({
    'loss':'Training Loss', 'val_loss':'Validation Loss'
},axis=1)

fig, ax1 = plt.subplots(1,1, figsize=(10,5))
df.iloc[1:][['Training Loss', 'Validation Loss']].plot(style='-o', ax=ax1, logy=True);

png

Out-of-sample predictions

preds = model.predict(X_test)
# The predicted class is the one with the highest prob
preds = np.argmax(preds,axis=1)
reals = np.argmax(Y_test,axis=1)

accuracy_in  = df.iloc[-1]['categorical_accuracy']
accuracy_out = np.mean(preds==reals)
print("In-sample accuracy:     {0:0.2f}% ".format(accuracy_in*100))
print("Out-of-sample accuracy: {0:0.2f}%".format(accuracy_out*100))

In-sample accuracy:     88.45% 
Out-of-sample accuracy: 86.96%

NX, NY = 2, 6
fig, ax = plt.subplots(NX,NY,figsize=(1.5*NY,2*NX))
for i in range(NX):
    for j in range(NY):
        s = np.random.randint(len(preds))
        img = x_test[s]
        colors = 'Greens' if reals[s]==preds[s] else 'Reds'
        ax[i][j].imshow(img, cmap=colors)
        ax[i][j].set_title("Number:    %d \nPredicted: %d"%(reals[s], preds[s]))
        ax[i][j].set_xticklabels([]); ax[i][j].set_xticks([])
        ax[i][j].set_yticklabels([]); ax[i][j].set_yticks([])
plt.tight_layout()

png

Go Deeper

4 hidden layers

stop_rule = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

# Define the neural network
deep_model = Sequential()
deep_model.add(Dense(300, input_dim=784, activation="relu"))
deep_model.add(Dense(150, activation="relu"))
deep_model.add(Dense(100, activation="relu"))
deep_model.add(Dense(50 , activation="relu"))
deep_model.add(Dense(10))              # 10 is the output dimension
deep_model.add(Activation("softmax"))  # Provides a probabilities for each of 0,..,9
deep_model.compile(loss="categorical_crossentropy", optimizer=SGD(), metrics=[categorical_accuracy] )

history = deep_model.fit(
    X_trai, Y_trai, validation_split=0.2,
    epochs=30, batch_size=512, verbose=1,
    callbacks=[stop_rule],
)

df = pd.DataFrame(history.history).rename({
    'loss':'Training Loss', 'val_loss':'Validation Loss'
},axis=1)
df.index = df.index + 1
fig, ax1 = plt.subplots(1,1, figsize=(10,5))
df[['Training Loss', 'Validation Loss']].plot(style='-o', ax=ax1, logy=True);

png

Out-of-sample predictions

preds = deep_model.predict(X_test)
# The predicted class is the one with the highest prob
preds = np.argmax(preds,axis=1)
reals = np.argmax(Y_test,axis=1)

accuracy_in  = df.iloc[-1]['categorical_accuracy']
accuracy_out = np.mean(preds==reals)
print("In-sample accuracy:     {0:0.2f}% ".format(accuracy_in*100))
print("Out-of-sample accuracy: {0:0.2f}%".format(accuracy_out*100))

In-sample accuracy:     99.58% 
Out-of-sample accuracy: 94.51%

NX, NY = 2, 6
fig, ax = plt.subplots(NX,NY,figsize=(1.5*NY,2*NX))
for i in range(NX):
    for j in range(NY):
        s = np.random.randint(len(preds))
        img = x_test[s]
        colors = 'Greens' if reals[s]==preds[s] else 'Reds'
        ax[i][j].imshow(img, cmap=colors)
        ax[i][j].set_title("Number:    %d \nPredicted: %d"%(reals[s], preds[s]))
        ax[i][j].set_xticklabels([]); ax[i][j].set_xticks([])
        ax[i][j].set_yticklabels([]); ax[i][j].set_yticks([])
plt.tight_layout()

png

Author: Andrea Barbon

Back