FASHION MNIST with Python (DAY 8)¶

DATA SOURCE : https://www.kaggle.com/zalando-research/fashionmnist (Kaggle, Fashion MNIST)

FASHION MNIST with Python (DAY 1) : http://deepstat.tistory.com/35

FASHION MNIST with Python (DAY 2) : http://deepstat.tistory.com/36

FASHION MNIST with Python (DAY 3) : http://deepstat.tistory.com/37

FASHION MNIST with Python (DAY 4) : http://deepstat.tistory.com/38

FASHION MNIST with Python (DAY 5) : http://deepstat.tistory.com/39

FASHION MNIST with Python (DAY 6) : http://deepstat.tistory.com/40

FASHION MNIST with Python (DAY 7) : http://deepstat.tistory.com/41

Datasets¶

Importing numpy, pandas, pyplot¶

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Loading datasets¶

data_train = pd.read_csv("../datasets/fashion-mnist_train.csv")
data_test = pd.read_csv("../datasets/fashion-mnist_test.csv")

data_train_y = data_train.label
y_test = data_test.label

data_train_x = data_train.drop("label",axis=1)/256
x_test = data_test.drop("label",axis=1)/256

Spliting valid and training¶

np.random.seed(0)
valid2_idx = np.random.choice(60000,10000,replace = False)
valid1_idx = np.random.choice(list(set(range(60000)) - set(valid2_idx)),10000,replace=False)
train_idx = list(set(range(60000))-set(valid1_idx)-set(valid2_idx))

x_train = data_train_x.iloc[train_idx,:]
y_train = data_train_y.iloc[train_idx]

x_valid1 = data_train_x.iloc[valid1_idx,:]
y_valid1 = data_train_y.iloc[valid1_idx]

x_valid2 = data_train_x.iloc[valid2_idx,:]
y_valid2 = data_train_y.iloc[valid2_idx]

Convolutional Neural Network (CNN)¶

Importing TensorFlow¶

import tensorflow as tf
from sklearn.metrics import confusion_matrix

Defining weight_variables and bias_variables¶

def weight_variables(shape):
    initial = tf.truncated_normal(shape)
    return tf.Variable(initial)

def bias_variables(shape):
    initial = tf.zeros(shape)
    return tf.Variable(initial)

Defining conv2d and maxpool¶

def conv2d(x,W):
    return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding = 'VALID')

def maxpool(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'VALID')

Constructing the CNN¶

Convolution, Maxout, Maxpooling, Dropout, Softmax, Cross-Entropy, Adam

Model : input -> [convolution -> maxout -> dropout] -> [convolution -> batch normalizaton -> maxout -> maxpool -> dropout] -> [convolution -> batch normalizaton -> maxout -> dropout] -> flatten -> [batch normalization -> inner product -> softmax] -> output
Loss : cross entropy
Optimizer : Adam

Inputs¶

x = tf.placeholder("float", [None,784])
x_image = tf.reshape(x, [-1,28,28,1])
y = tf.placeholder("int64", [None,])
y_dummies = tf.one_hot(y,depth = 10)

drop_prob = tf.placeholder("float")
training = tf.placeholder("bool")

l1_w = weight_variables([5,5,1,4*8])
l1_b = bias_variables([4*8])
l1_conv = conv2d(x_image, l1_w) + l1_b
l1_maxout = tf.contrib.layers.maxout(l1_conv,8)
l1_dropout = tf.layers.dropout(l1_maxout,rate = drop_prob, training = training)

l2_w = weight_variables([5,5,8,4*8])
l2_conv = conv2d(l1_dropout, l2_w)
l2_batch_normalization = tf.layers.batch_normalization(l2_conv)
l2_maxout = tf.contrib.layers.maxout(l2_batch_normalization,8)
l2_maxpool = maxpool(l2_maxout)
l2_dropout = tf.layers.dropout(l2_maxpool,rate = drop_prob, training = training)

l3_w = weight_variables([5,5,8,8*4])
l3_conv = conv2d(l2_dropout, l3_w)
l3_batch_normalization = tf.layers.batch_normalization(l3_conv)
l3_maxout = tf.contrib.layers.maxout(l3_batch_normalization,4)
l3_dropout = tf.layers.dropout(l3_maxout,rate = drop_prob, training = training)

l3_reshape = tf.reshape(l3_dropout,[-1,6*6*4])

l4_w = weight_variables([6*6*4,10])
l4_b = bias_variables([10])
l4_batch_normalization = tf.layers.batch_normalization(l3_reshape)
l4_inner_prod = tf.matmul(l4_batch_normalization, l4_w) + l4_b
l4_log_softmax = tf.nn.log_softmax(l4_inner_prod)

Cross-entropy¶

xent_loss = -tf.reduce_sum( tf.multiply(y_dummies,l4_log_softmax) )

Accuracy¶

pred_labels = tf.argmax(l4_log_softmax,axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(y, pred_labels),"float"))

Training the Model¶

lr = tf.placeholder("float")
train_step = tf.train.AdamOptimizer(lr).minimize(xent_loss)

saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())

batch_size = 1000
for i in range(8001):
    batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
    batch_train_x = x_train.iloc[batch_obs]
    batch_train_y = y_train.iloc[batch_obs]
    feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.1}
    _, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
    
    if i % 2000 == 0:
        print("step " + str(i) + " training cross-entropy : " + str(tmp))
    
    if i % 4000 == 0:
        feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
        train_acc = sess.run(acc, feed_dict = feed_dict)
        feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
        valid1_acc = sess.run(acc, feed_dict = feed_dict)
        print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
        save_path = saver.save(sess, "./CNN/model.ckpt")
        print("Model saved in path: " + save_path)

step 0 training cross-entropy : 42244024.0
step 0 training_acc = 0.074 valid_acc = 0.0896
Model saved in path: ./CNN/model.ckpt
step 2000 training cross-entropy : 907.0232
step 4000 training cross-entropy : 834394500.0
step 4000 training_acc = 0.178 valid_acc = 0.1693
Model saved in path: ./CNN/model.ckpt
step 6000 training cross-entropy : 881304.6
step 8000 training cross-entropy : 108451.55
step 8000 training_acc = 0.195 valid_acc = 0.19
Model saved in path: ./CNN/model.ckpt

batch_size = 1000
for i in range(16001):
    batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
    batch_train_x = x_train.iloc[batch_obs]
    batch_train_y = y_train.iloc[batch_obs]
    feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.01}
    _, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
    
    if i % 4000 == 0:
        print("step " + str(i) + " training cross-entropy : " + str(tmp))
    
    if i % 8000 == 0:
        feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
        train_acc = sess.run(acc, feed_dict = feed_dict)
        feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
        valid1_acc = sess.run(acc, feed_dict = feed_dict)
        print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
        save_path = saver.save(sess, "./CNN/model.ckpt")
        print("Model saved in path: " + save_path)

step 0 training cross-entropy : 266021.25
step 0 training_acc = 0.203 valid_acc = 0.2017
Model saved in path: ./CNN/model.ckpt
step 4000 training cross-entropy : 53425.797
step 8000 training cross-entropy : 13400.107
step 8000 training_acc = 0.493 valid_acc = 0.4964
Model saved in path: ./CNN/model.ckpt
step 12000 training cross-entropy : 1908.7681
step 16000 training cross-entropy : 1130.8694
step 16000 training_acc = 0.609 valid_acc = 0.5841
Model saved in path: ./CNN/model.ckpt

batch_size = 1000
for i in range(200001):
    batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
    batch_train_x = x_train.iloc[batch_obs]
    batch_train_y = y_train.iloc[batch_obs]
    feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.001}
    _, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
    
    if i % 20000 == 0:
        print("step " + str(i) + " training cross-entropy : " + str(tmp))
    
    if i % 40000 == 0:
        feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
        train_acc = sess.run(acc, feed_dict = feed_dict)
        feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
        valid1_acc = sess.run(acc, feed_dict = feed_dict)
        print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
        save_path = saver.save(sess, "./CNN/model.ckpt")
        print("Model saved in path: " + save_path)

step 0 training cross-entropy : 1238.7634
step 0 training_acc = 0.619 valid_acc = 0.5912
Model saved in path: ./CNN/model.ckpt
step 20000 training cross-entropy : 619.7753
step 40000 training cross-entropy : 487.32724
step 40000 training_acc = 0.849 valid_acc = 0.8291
Model saved in path: ./CNN/model.ckpt
step 60000 training cross-entropy : 435.53052
step 80000 training cross-entropy : 411.05118
step 80000 training_acc = 0.885 valid_acc = 0.8694
Model saved in path: ./CNN/model.ckpt
step 100000 training cross-entropy : 371.35452
step 120000 training cross-entropy : 374.37384
step 120000 training_acc = 0.896 valid_acc = 0.8806
Model saved in path: ./CNN/model.ckpt
step 140000 training cross-entropy : 352.6808
step 160000 training cross-entropy : 323.04425
step 160000 training_acc = 0.904 valid_acc = 0.8881
Model saved in path: ./CNN/model.ckpt
step 180000 training cross-entropy : 339.18182
step 200000 training cross-entropy : 328.336
step 200000 training_acc = 0.908 valid_acc = 0.8894
Model saved in path: ./CNN/model.ckpt

batch_size = 1000
for i in range(400001):
    batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
    batch_train_x = x_train.iloc[batch_obs]
    batch_train_y = y_train.iloc[batch_obs]
    feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.0001}
    _, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
    
    if i % 40000 == 0:
        print("step " + str(i) + " training cross-entropy : " + str(tmp))
    
    if i % 80000 == 0:
        feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
        train_acc = sess.run(acc, feed_dict = feed_dict)
        feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
        valid1_acc = sess.run(acc, feed_dict = feed_dict)
        print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
        save_path = saver.save(sess, "./CNN/model.ckpt")
        print("Model saved in path: " + save_path)

step 0 training cross-entropy : 290.74316
step 0 training_acc = 0.91 valid_acc = 0.8893
Model saved in path: ./CNN/model.ckpt
step 40000 training cross-entropy : 295.59088
step 80000 training cross-entropy : 317.6109
step 80000 training_acc = 0.919 valid_acc = 0.8934
Model saved in path: ./CNN/model.ckpt
step 120000 training cross-entropy : 279.0041
step 160000 training cross-entropy : 309.32037
step 160000 training_acc = 0.905 valid_acc = 0.8947
Model saved in path: ./CNN/model.ckpt
step 200000 training cross-entropy : 303.6273
step 240000 training cross-entropy : 297.97955
step 240000 training_acc = 0.908 valid_acc = 0.8961
Model saved in path: ./CNN/model.ckpt
step 280000 training cross-entropy : 280.16525
step 320000 training cross-entropy : 314.53802
step 320000 training_acc = 0.904 valid_acc = 0.8962
Model saved in path: ./CNN/model.ckpt
step 360000 training cross-entropy : 307.3462
step 400000 training cross-entropy : 291.04474
step 400000 training_acc = 0.925 valid_acc = 0.8963
Model saved in path: ./CNN/model.ckpt

Training Accuracy¶

feed_dict1 = {x : x_train.iloc[0:8000,], y : y_train[0:8000], drop_prob : .125, training : False}
MLP_predict_train1, MLP_train_acc1 = sess.run([pred_labels,acc], feed_dict = feed_dict1)

feed_dict2 = {x : x_train.iloc[8000:16000], y : y_train[8000:16000], drop_prob : .125, training : False}
MLP_predict_train2, MLP_train_acc2 = sess.run([pred_labels,acc], feed_dict = feed_dict2)

feed_dict3 = {x : x_train.iloc[16000:24000], y : y_train[16000:24000], drop_prob : .125, training : False}
MLP_predict_train3, MLP_train_acc3 = sess.run([pred_labels,acc], feed_dict = feed_dict3)

feed_dict4 = {x : x_train.iloc[24000:32000], y : y_train[24000:32000], drop_prob : .125, training : False}
MLP_predict_train4, MLP_train_acc4 = sess.run([pred_labels,acc], feed_dict = feed_dict4)

feed_dict5 = {x : x_train.iloc[32000:40000], y : y_train[32000:40000], drop_prob : .125, training : False}
MLP_predict_train5, MLP_train_acc5 = sess.run([pred_labels,acc], feed_dict = feed_dict5)

MLP_predict_train = np.concatenate((MLP_predict_train1, MLP_predict_train2,
                                   MLP_predict_train3,MLP_predict_train4,MLP_predict_train5), axis=None)
MLP_train_acc = np.mean((MLP_train_acc1,MLP_train_acc2,MLP_train_acc3,MLP_train_acc4,MLP_train_acc5))

print(confusion_matrix(MLP_predict_train,y_train))
print("TRAINING ACCURACY =",MLP_train_acc)

[[3500    9   50   55    3    0  455    0    6    0]
 [   2 3921    2    5    3    0    3    0    1    0]
 [  75    2 3544   28  177    1  270    0   22    0]
 [  74   39   26 3628   96    1   87    0    7    1]
 [  10   10  233   96 3555    0  320    0    9    1]
 [   0    0    0    0    0 3839    0    6    3    4]
 [ 310    6  194  115  181    0 2853    0   18    0]
 [   0    0    0    0    0   64    0 3991    0   67]
 [  23    3    7    2    1    1   17    4 3880    1]
 [   0    0    0    0    0   26    0  102    0 3955]]
TRAINING ACCURACY = 0.91665

Validation Accuracy¶

feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
MLP_predict_valid1, MLP_valid1_acc = sess.run([pred_labels,acc], feed_dict = feed_dict)

print(confusion_matrix(MLP_predict_valid1,y_valid1))
print("VALIDATION ACCURACY =",MLP_valid1_acc)

[[ 885    2   13   22    4    0  123    0    7    0]
 [   1 1004    0    7    1    0    1    0    0    0]
 [  13    2  784    8   46    0   82    0    4    0]
 [  27   11    9  921   44    1   21    0    7    0]
 [   4    1   68   31  826    0   86    0    4    0]
 [   0    0    0    0    0 1017    0    3    1    3]
 [  81    6   68   22   71    0  665    0    8    0]
 [   0    0    0    0    0   25    0  913    1   22]
 [   4    0    3    1    3    2    9    2 1002    7]
 [   0    0    0    0    0   15    0   30    0  946]]
VALIDATION ACCURACY = 0.8963

{"TRAIN_ACC" : MLP_train_acc , "VALID_ACC" : MLP_valid1_acc}

{'TRAIN_ACC': 0.91665, 'VALID_ACC': 0.8963}

티스토리

FASHION MNIST with Python (DAY 8) - CNN