티스토리 뷰

CNN for FASHION MNIST with Tensorflow (test accuracy 0.9308)

CNN for FASHION MNIST with Tensorflow (test accuracy 0.9308)

DATA SOURCE : https://www.kaggle.com/zalando-research/fashionmnist (Kaggle, Fashion MNIST)

Datasets

Importing numpy, pandas, pyplot

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Loading datasets

In [2]:
data_train = pd.read_csv("../datasets/fashion-mnist_train.csv")
data_test = pd.read_csv("../datasets/fashion-mnist_test.csv")
In [3]:
data_train_y = data_train.label
y_test = data_test.label
In [4]:
data_train_x = data_train.drop("label",axis=1)/256
x_test = data_test.drop("label",axis=1)/256

Spliting valid and training

In [5]:
np.random.seed(0)
valid2_idx = np.random.choice(60000,10000,replace = False)
valid1_idx = np.random.choice(list(set(range(60000)) - set(valid2_idx)),10000,replace=False)
train_idx = list(set(range(60000))-set(valid1_idx)-set(valid2_idx))

x_train = data_train_x.iloc[train_idx,:]
y_train = data_train_y.iloc[train_idx]

x_valid1 = data_train_x.iloc[valid1_idx,:]
y_valid1 = data_train_y.iloc[valid1_idx]

x_valid2 = data_train_x.iloc[valid2_idx,:]
y_valid2 = data_train_y.iloc[valid2_idx]

CNN

Making Class Minibatch

In [6]:
class minibatchData:
    def __init__(self, X, Y):
        self.start_num = 0
        self.x = X
        self.y = Y

    def minibatch(self, batch_size):
        self.outidx = range(self.start_num,(self.start_num + batch_size))
        self.start_num = (self.start_num + batch_size)%(self.x.shape[0])
        return self.x.iloc[self.outidx,:], self.y.iloc[self.outidx]
In [7]:
train_minibatch_data = minibatchData(x_train, y_train)
valid1_minibatch_data = minibatchData(x_valid1, y_valid1)
valid2_minibatch_data = minibatchData(x_valid2, y_valid2)

Importing TensorFlow

In [8]:
import tensorflow as tf
from sklearn.metrics import confusion_matrix

Defining weight_variables and bias_variables

In [9]:
def weight_variables(shape):
    initial = tf.ones(shape)
    return tf.Variable(initial)

def bias_variables(shape):
    initial = tf.zeros(shape)
    return tf.Variable(initial)    

Defining conv2d and maxpool

In [10]:
def conv2d(x,W):
    return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding = 'SAME')

def maxpool(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')

layers

In [11]:
x = tf.placeholder("float", [None,784])
x_image = tf.reshape(x, [-1,28,28,1])
y = tf.placeholder("int64", [None,])
y_dummies = tf.one_hot(y,depth = 10)

drop_prob = tf.placeholder("float")
training = tf.placeholder("bool")
In [12]:
l1_w = weight_variables([5,5,1,128])
l1_b = bias_variables([128])
l1_conv = conv2d(x_image, l1_w) + l1_b
l1_relu = tf.nn.relu(l1_conv)
l1_maxpool = maxpool(l1_relu)
l1_dropout = tf.layers.dropout(l1_maxpool,rate = drop_prob, training = training)
In [13]:
l2_w = weight_variables([5,5,128,256])
l2_conv = conv2d(l1_dropout, l2_w)
l2_batch_normalization = tf.layers.batch_normalization(l2_conv)
l2_leaky_relu = tf.nn.leaky_relu(l2_batch_normalization)
l2_maxpool = maxpool(l2_leaky_relu)
l2_dropout = tf.layers.dropout(l2_maxpool,rate = drop_prob, training = training)
In [14]:
l3_w = weight_variables([5,5,256,384])
l3_conv = conv2d(l2_dropout, l3_w)
l3_batch_normalization = tf.layers.batch_normalization(l3_conv)
l3_leaky_relu = tf.nn.leaky_relu(l3_batch_normalization)
l3_maxpool = maxpool(l3_leaky_relu)
l3_dropout = tf.layers.dropout(l3_maxpool,rate = drop_prob, training = training)
In [15]:
l4_w = weight_variables([5,5,384,512])
l4_conv = conv2d(l3_dropout, l4_w)
l4_batch_normalization = tf.layers.batch_normalization(l4_conv)
l4_leaky_relu = tf.nn.leaky_relu(l4_batch_normalization)
l4_maxpool = maxpool(l4_leaky_relu)
l4_dropout = tf.layers.dropout(l4_maxpool,rate = drop_prob, training = training)
In [16]:
l4_reshape = tf.reshape(l4_dropout,[-1,2048])
In [17]:
l5_w = weight_variables([2048,512])
l5_batch_normalization = tf.layers.batch_normalization(l4_reshape, training = training)
l5_inner_product = tf.matmul(l5_batch_normalization, l5_w)
l5_leaky_relu = tf.nn.leaky_relu(l5_inner_product)
l5_dropout = tf.layers.dropout(l5_leaky_relu,rate = drop_prob, training = training)
In [18]:
l6_w = weight_variables([512,128])
l6_batch_normalization = tf.layers.batch_normalization(l5_dropout, training = training)
l6_inner_product = tf.matmul(l6_batch_normalization, l6_w)
l6_leaky_relu = tf.nn.leaky_relu(l6_inner_product)
l6_dropout = tf.layers.dropout(l6_leaky_relu,rate = drop_prob, training = training)
In [19]:
l7_w = weight_variables([128,10])
l7_b = bias_variables([10])
l7_batch_normalization =  tf.layers.batch_normalization(l6_dropout, training = training)
l7_inner_product = tf.matmul(l7_batch_normalization, l7_w) + l7_b
l7_log_softmax = tf.nn.log_softmax(l7_inner_product)

Cross-entropy

In [20]:
xent_loss = -tf.reduce_mean( tf.multiply(y_dummies,l7_log_softmax) )

Accuracy

In [21]:
pred_labels = tf.argmax(l7_log_softmax,axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(y, pred_labels),"float"))

Training the Model

In [22]:
lr = tf.placeholder("float")
train_step = tf.train.AdamOptimizer(lr).minimize(xent_loss)
In [23]:
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
In [24]:
epochs = 401
batch_size = 100

tmp_xent_loss_3 = [1.0,1.0,1.0]
learning_rate = 0.1
rep_num = int((x_train.shape[0])/batch_size)
max_valid1_acc = .0
valid1_rep_num = int((x_valid1.shape[0])/batch_size)

for i in range(epochs):
    tmp_loss_vec = [.0 for a in range(rep_num)]
    tmp_valid1_acc_vec = [.0 for a in range(valid1_rep_num)]
    tmp_train_acc_vec = [.0 for a in range(rep_num)]
    for j in range(rep_num):
        batch_train_x, batch_train_y = train_minibatch_data.minibatch(batch_size)
        feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : 3/8, training : True, lr : learning_rate}
        _, tmp_loss_vec[j] = sess.run([train_step,xent_loss], feed_dict = feed_dict)
    
    tmp_xent_loss_3 = [tmp_xent_loss_3[1], tmp_xent_loss_3[2], sum(tmp_loss_vec)/rep_num]
   
    if tmp_xent_loss_3[0] == min(tmp_xent_loss_3):
        learning_rate = learning_rate * .8
        print("lr = " + str(learning_rate) + "  xent : " + str(tmp_xent_loss_3[2]))

    for j in range(valid1_rep_num):
        batch_valid1_x, batch_valid1_y = valid1_minibatch_data.minibatch(batch_size)
        feed_dict = {x : batch_valid1_x, y : batch_valid1_y, drop_prob : 3/8, training : False}
        tmp_valid1_acc_vec[j] = sess.run(acc, feed_dict = feed_dict)

    valid1_acc = sum(tmp_valid1_acc_vec)/valid1_rep_num
    
    if valid1_acc >= max_valid1_acc:
        max_valid1_acc = valid1_acc
        print("epoch : " + str(i) + "  max_valid_acc = " + str(valid1_acc))
        save_path = saver.save(sess, "./CNN5/model.ckpt")
        
    if i % 25 == 0:
        print("epoch : " + str(i) + " -- training cross-entropy : " + str(tmp_xent_loss_3[2]))
        
    if i % 50 == 0:
        for j in range(rep_num):
            batch_train_x, batch_train_y = train_minibatch_data.minibatch(batch_size)
            feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : 3/8, training : False}
            tmp_train_acc_vec[j] = sess.run(acc, feed_dict = feed_dict)
            
        train_acc = sum(tmp_train_acc_vec)/rep_num
        print("epoch : " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
epoch : 0  max_valid_acc = 0.10259999979287386
epoch : 0 -- training cross-entropy : 0.20075667399913072
epoch : 0 training_acc = 0.09974999983329326 valid_acc = 0.10259999979287386
epoch : 1  max_valid_acc = 0.10259999979287386
epoch : 2  max_valid_acc = 0.10409999992698431
.
.
.
epoch : 110  max_valid_acc = 0.9289000058174133
lr = 0.005497558138880004  xent : 0.002262152568653164
epoch : 113  max_valid_acc = 0.9294000029563904
lr = 0.004398046511104004  xent : 0.0021365167253588877
.
.
.
lr = 2.1872507247830263e-12  xent : 0.0013466358766333997
epoch : 400 -- training cross-entropy : 0.0013466358766333997
epoch : 400 training_acc = 0.9993000006675721 valid_acc = 0.9287000060081482
In [25]:
saver.restore(sess, "./CNN5/model.ckpt")
print("Model restored.")
INFO:tensorflow:Restoring parameters from ./CNN5/model.ckpt
Model restored.

Training Accuracy

In [26]:
batch_size = 1000
rep_num = int((x_train.shape[0])/batch_size)
tmp_train_acc_vec = [.0 for a in range(rep_num)]
CNN5_predict_train = []

for j in range(rep_num):
    batch_train_x, batch_train_y = train_minibatch_data.minibatch(batch_size)
    feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : 3/8, training : False}
    tmp_CNN5_predict_train, tmp_train_acc_vec[j] = sess.run([pred_labels,acc], feed_dict = feed_dict)
    CNN5_predict_train = np.concatenate([CNN5_predict_train, tmp_CNN5_predict_train])

CNN5_train_acc = sum(tmp_train_acc_vec)/rep_num
In [27]:
print(confusion_matrix(CNN5_predict_train,y_train))
print("TRAINING ACCURACY =",CNN5_train_acc)
[[3990    0    0    0    0    0    1    0    0    0]
 [   1 3990    3    2    1    0    4    0    0    0]
 [   0    0 4053    0    0    0    5    0    0    0]
 [   0    0    0 3926    0    0    0    0    0    0]
 [   0    0    0    0 4015    0    3    0    0    0]
 [   1    0    0    1    0 3932    0    0    0    0]
 [   1    0    0    0    0    0 3992    0    0    0]
 [   0    0    0    0    0    0    0 4096    0    0]
 [   0    0    0    0    0    0    0    0 3946    0]
 [   1    0    0    0    0    0    0    7    0 4029]]
TRAINING ACCURACY = 0.9992250084877015

Validation Accuracy

In [28]:
batch_size = 1000
valid1_rep_num = int((x_valid1.shape[0])/batch_size)
tmp_valid1_acc_vec = [.0 for a in range(rep_num)]
CNN5_predict_valid1 = []

for j in range(valid1_rep_num):
    batch_valid1_x, batch_valid1_y = valid1_minibatch_data.minibatch(batch_size)
    feed_dict = {x : batch_valid1_x, y : batch_valid1_y, drop_prob : 3/8, training : False}
    tmp_CNN5_predict_valid1, tmp_valid1_acc_vec[j] = sess.run([pred_labels,acc], feed_dict = feed_dict)
    CNN5_predict_valid1 = np.concatenate([CNN5_predict_valid1, tmp_CNN5_predict_valid1])

CNN5_valid1_acc = sum(tmp_valid1_acc_vec)/valid1_rep_num
In [29]:
print(confusion_matrix(CNN5_predict_valid1,y_valid1))
print("VALIDATION ACCURACY =",CNN5_valid1_acc)
[[ 904    0   13   10    0    0   79    0    2    0]
 [   7 1025    0   17    7    0    8    0    0    0]
 [   8    0  848    2   37    0   81    0    0    0]
 [  23    1    9  953   33    0   18    0    1    0]
 [   1    0   54   10  890    0   64    0    0    0]
 [   0    0    0    1    1 1055    0   11    2    5]
 [  67    0   21   10   23    0  726    0    3    0]
 [   0    0    0    0    0    2    0  908    0   14]
 [   5    0    0    9    4    1   11    0 1026    0]
 [   0    0    0    0    0    2    0   29    0  959]]
VALIDATION ACCURACY = 0.9293999969959259
In [30]:
{"TRAIN_ACC" : CNN5_train_acc , "VALID_ACC" : CNN5_valid1_acc}
Out[30]:
{'TRAIN_ACC': 0.9992250084877015, 'VALID_ACC': 0.9293999969959259}

Test Accuracy

In [31]:
batch_size = 1000
valid2_rep_num = int((x_valid2.shape[0])/batch_size)
tmp_valid2_acc_vec = [.0 for a in range(rep_num)]
CNN5_predict_valid2 = []

for j in range(valid2_rep_num):
    batch_valid2_x, batch_valid2_y = valid2_minibatch_data.minibatch(batch_size)
    feed_dict = {x : batch_valid2_x, y : batch_valid2_y, drop_prob : 3/8, training : False}
    tmp_CNN5_predict_valid2, tmp_valid2_acc_vec[j] = sess.run([pred_labels,acc], feed_dict = feed_dict)
    CNN5_predict_valid2 = np.concatenate([CNN5_predict_valid2, tmp_CNN5_predict_valid2])

CNN5_valid2_acc = sum(tmp_valid2_acc_vec)/valid2_rep_num
In [32]:
print(confusion_matrix(CNN5_predict_valid2,y_valid2))
print("TEST ACCURACY =",CNN5_valid2_acc)
[[ 897    0    9    9    0    0  102    0    0    0]
 [   3  979    1   15    2    0    5    0    1    0]
 [  17    1  924    3   37    0   82    0    3    0]
 [  17    3    7 1004   25    0   25    0    2    0]
 [   1    0   37   18  908    0   74    0    3    0]
 [   0    1    3    2    0 1001    2    5    3    9]
 [  51    0   18    8   14    0  710    0    3    0]
 [   0    0    0    0    0    7    0  914    1   13]
 [   5    0    0    0    3    0    8    0 1001    1]
 [   0    0    0    0    0    0    0   30    3  970]]
TEST ACCURACY = 0.9308000028133392
In [33]:
{"TRAIN_ACC" : CNN5_train_acc , "VALID_ACC" : CNN5_valid1_acc , "TEST_ACC" : CNN5_valid2_acc}
Out[33]:
{'TRAIN_ACC': 0.9992250084877015,
 'VALID_ACC': 0.9293999969959259,
 'TEST_ACC': 0.9308000028133392}
In [ ]:
 
In [ ]:
 


공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
TAG
more
«   2025/06   »
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30
글 보관함