FASHION MNIST with Python (DAY 8) - CNN
FASHION MNIST with Python (DAY 8)¶
DATA SOURCE : https://www.kaggle.com/zalando-research/fashionmnist (Kaggle, Fashion MNIST)
FASHION MNIST with Python (DAY 1) : http://deepstat.tistory.com/35
FASHION MNIST with Python (DAY 2) : http://deepstat.tistory.com/36
FASHION MNIST with Python (DAY 3) : http://deepstat.tistory.com/37
FASHION MNIST with Python (DAY 4) : http://deepstat.tistory.com/38
FASHION MNIST with Python (DAY 5) : http://deepstat.tistory.com/39
FASHION MNIST with Python (DAY 6) : http://deepstat.tistory.com/40
FASHION MNIST with Python (DAY 7) : http://deepstat.tistory.com/41
Datasets¶
Importing numpy, pandas, pyplot¶
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Loading datasets¶
data_train = pd.read_csv("../datasets/fashion-mnist_train.csv")
data_test = pd.read_csv("../datasets/fashion-mnist_test.csv")
data_train_y = data_train.label
y_test = data_test.label
data_train_x = data_train.drop("label",axis=1)/256
x_test = data_test.drop("label",axis=1)/256
Spliting valid and training¶
np.random.seed(0)
valid2_idx = np.random.choice(60000,10000,replace = False)
valid1_idx = np.random.choice(list(set(range(60000)) - set(valid2_idx)),10000,replace=False)
train_idx = list(set(range(60000))-set(valid1_idx)-set(valid2_idx))
x_train = data_train_x.iloc[train_idx,:]
y_train = data_train_y.iloc[train_idx]
x_valid1 = data_train_x.iloc[valid1_idx,:]
y_valid1 = data_train_y.iloc[valid1_idx]
x_valid2 = data_train_x.iloc[valid2_idx,:]
y_valid2 = data_train_y.iloc[valid2_idx]
Convolutional Neural Network (CNN)¶
Importing TensorFlow¶
import tensorflow as tf
from sklearn.metrics import confusion_matrix
Defining weight_variables and bias_variables¶
def weight_variables(shape):
initial = tf.truncated_normal(shape)
return tf.Variable(initial)
def bias_variables(shape):
initial = tf.zeros(shape)
return tf.Variable(initial)
Defining conv2d and maxpool¶
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding = 'VALID')
def maxpool(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'VALID')
Constructing the CNN¶
Convolution, Maxout, Maxpooling, Dropout, Softmax, Cross-Entropy, Adam
Model : input -> [convolution -> maxout -> dropout] -> [convolution -> batch normalizaton -> maxout -> maxpool -> dropout] -> [convolution -> batch normalizaton -> maxout -> dropout] -> flatten -> [batch normalization -> inner product -> softmax] -> output
Loss : cross entropy
Optimizer : Adam
Inputs¶
x = tf.placeholder("float", [None,784])
x_image = tf.reshape(x, [-1,28,28,1])
y = tf.placeholder("int64", [None,])
y_dummies = tf.one_hot(y,depth = 10)
drop_prob = tf.placeholder("float")
training = tf.placeholder("bool")
l1_w = weight_variables([5,5,1,4*8])
l1_b = bias_variables([4*8])
l1_conv = conv2d(x_image, l1_w) + l1_b
l1_maxout = tf.contrib.layers.maxout(l1_conv,8)
l1_dropout = tf.layers.dropout(l1_maxout,rate = drop_prob, training = training)
l2_w = weight_variables([5,5,8,4*8])
l2_conv = conv2d(l1_dropout, l2_w)
l2_batch_normalization = tf.layers.batch_normalization(l2_conv)
l2_maxout = tf.contrib.layers.maxout(l2_batch_normalization,8)
l2_maxpool = maxpool(l2_maxout)
l2_dropout = tf.layers.dropout(l2_maxpool,rate = drop_prob, training = training)
l3_w = weight_variables([5,5,8,8*4])
l3_conv = conv2d(l2_dropout, l3_w)
l3_batch_normalization = tf.layers.batch_normalization(l3_conv)
l3_maxout = tf.contrib.layers.maxout(l3_batch_normalization,4)
l3_dropout = tf.layers.dropout(l3_maxout,rate = drop_prob, training = training)
l3_reshape = tf.reshape(l3_dropout,[-1,6*6*4])
l4_w = weight_variables([6*6*4,10])
l4_b = bias_variables([10])
l4_batch_normalization = tf.layers.batch_normalization(l3_reshape)
l4_inner_prod = tf.matmul(l4_batch_normalization, l4_w) + l4_b
l4_log_softmax = tf.nn.log_softmax(l4_inner_prod)
Cross-entropy¶
xent_loss = -tf.reduce_sum( tf.multiply(y_dummies,l4_log_softmax) )
Accuracy¶
pred_labels = tf.argmax(l4_log_softmax,axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(y, pred_labels),"float"))
Training the Model¶
lr = tf.placeholder("float")
train_step = tf.train.AdamOptimizer(lr).minimize(xent_loss)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
batch_size = 1000
for i in range(8001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.1}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 2000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 4000 == 0:
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./CNN/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 1000
for i in range(16001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.01}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 4000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 8000 == 0:
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./CNN/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 1000
for i in range(200001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.001}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 20000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 40000 == 0:
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./CNN/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 1000
for i in range(400001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.0001}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 40000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 80000 == 0:
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./CNN/model.ckpt")
print("Model saved in path: " + save_path)
Training Accuracy¶
feed_dict1 = {x : x_train.iloc[0:8000,], y : y_train[0:8000], drop_prob : .125, training : False}
MLP_predict_train1, MLP_train_acc1 = sess.run([pred_labels,acc], feed_dict = feed_dict1)
feed_dict2 = {x : x_train.iloc[8000:16000], y : y_train[8000:16000], drop_prob : .125, training : False}
MLP_predict_train2, MLP_train_acc2 = sess.run([pred_labels,acc], feed_dict = feed_dict2)
feed_dict3 = {x : x_train.iloc[16000:24000], y : y_train[16000:24000], drop_prob : .125, training : False}
MLP_predict_train3, MLP_train_acc3 = sess.run([pred_labels,acc], feed_dict = feed_dict3)
feed_dict4 = {x : x_train.iloc[24000:32000], y : y_train[24000:32000], drop_prob : .125, training : False}
MLP_predict_train4, MLP_train_acc4 = sess.run([pred_labels,acc], feed_dict = feed_dict4)
feed_dict5 = {x : x_train.iloc[32000:40000], y : y_train[32000:40000], drop_prob : .125, training : False}
MLP_predict_train5, MLP_train_acc5 = sess.run([pred_labels,acc], feed_dict = feed_dict5)
MLP_predict_train = np.concatenate((MLP_predict_train1, MLP_predict_train2,
MLP_predict_train3,MLP_predict_train4,MLP_predict_train5), axis=None)
MLP_train_acc = np.mean((MLP_train_acc1,MLP_train_acc2,MLP_train_acc3,MLP_train_acc4,MLP_train_acc5))
print(confusion_matrix(MLP_predict_train,y_train))
print("TRAINING ACCURACY =",MLP_train_acc)
Validation Accuracy¶
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
MLP_predict_valid1, MLP_valid1_acc = sess.run([pred_labels,acc], feed_dict = feed_dict)
print(confusion_matrix(MLP_predict_valid1,y_valid1))
print("VALIDATION ACCURACY =",MLP_valid1_acc)
{"TRAIN_ACC" : MLP_train_acc , "VALID_ACC" : MLP_valid1_acc}