FASHION MNIST with Python (DAY 7) - MLP
FASHION MNIST with Python (DAY 7)¶
DATA SOURCE : https://www.kaggle.com/zalando-research/fashionmnist (Kaggle, Fashion MNIST)
FASHION MNIST with Python (DAY 1) : http://deepstat.tistory.com/35
FASHION MNIST with Python (DAY 2) : http://deepstat.tistory.com/36
FASHION MNIST with Python (DAY 3) : http://deepstat.tistory.com/37
FASHION MNIST with Python (DAY 4) : http://deepstat.tistory.com/38
FASHION MNIST with Python (DAY 5) : http://deepstat.tistory.com/39
FASHION MNIST with Python (DAY 6) : http://deepstat.tistory.com/40
Datasets¶
Importing numpy, pandas, pyplot¶
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Loading datasets¶
data_train = pd.read_csv("..\\datasets\\fashion-mnist_train.csv")
data_test = pd.read_csv("..\\datasets\\fashion-mnist_test.csv")
data_train_y = data_train.label
y_test = data_test.label
data_train_x = data_train.drop("label",axis=1)/256
x_test = data_test.drop("label",axis=1)/256
Spliting valid and training¶
np.random.seed(0)
valid2_idx = np.random.choice(60000,10000,replace = False)
valid1_idx = np.random.choice(list(set(range(60000)) - set(valid2_idx)),10000,replace=False)
train_idx = list(set(range(60000))-set(valid1_idx)-set(valid2_idx))
x_train = data_train_x.iloc[train_idx,:]
y_train = data_train_y.iloc[train_idx]
x_valid1 = data_train_x.iloc[valid1_idx,:]
y_valid1 = data_train_y.iloc[valid1_idx]
x_valid2 = data_train_x.iloc[valid2_idx,:]
y_valid2 = data_train_y.iloc[valid2_idx]
Multilayer Perceptron (MLP)¶
Importing TensorFlow¶
import tensorflow as tf
from sklearn.metrics import confusion_matrix
Defining weight_variables and bias_variables¶
def weight_variables(shape):
initial = tf.truncated_normal(shape)
return tf.Variable(initial)
def bias_variables(shape):
initial = tf.truncated_normal(shape)
return tf.Variable(initial)
Constructing the MLP¶
leaky ReLU, Dropout, Maxout, Batch Normalization, softmax, cross entropy, Adam
Model : input -> [inner product -> leaky_relu -> dropout] -> [batch normalization -> inner product -> reshape -> maxout -> dropout] -> [inner product -> leaky_relu -> dropout] -> [batch Normalization -> inner product -> reshape -> maxout -> softmax] -> output
Loss : cross entropy
Optimizer : Adam
Inputs¶
x = tf.placeholder("float", [None,784])
y = tf.placeholder("int64", [None,])
y_dummies = tf.one_hot(y,depth = 10)
drop_prob = tf.placeholder("float")
training = tf.placeholder("bool")
Layer1¶
[inner product -> leaky_relu -> dropout]
l1_w = weight_variables([784,640])
l1_b = bias_variables([640])
l1_inner_product = tf.matmul(x, l1_w) + l1_b
l1_leaky_relu = tf.nn.leaky_relu(l1_inner_product)
l1_dropout = tf.layers.dropout(l1_leaky_relu,rate = drop_prob, training = training)
Layer2¶
[batch normalization -> inner product -> reshape -> maxout -> dropout]
l2_w = weight_variables([640,640])
l2_b = bias_variables([640])
l2_batch_normalization = tf.layers.batch_normalization(l1_dropout, training = training)
l2_inner_product = tf.matmul(l2_batch_normalization, l2_w) + l2_b
l2_reshape = tf.reshape(l2_inner_product,[-1,80,8])
l2_maxout = tf.reshape(
tf.contrib.layers.maxout(l2_reshape,num_units=1),
[-1,80])
l2_dropout = tf.layers.dropout(l2_maxout,rate = drop_prob, training = training)
Layer3¶
[inner product -> leaky_relu -> dropout]
l3_w = weight_variables([80,80])
l3_b = bias_variables([80])
l3_inner_product = tf.matmul(l2_dropout, l3_w) + l3_b
l3_leaky_relu = tf.nn.leaky_relu(l3_inner_product)
l3_dropout = tf.layers.dropout(l3_leaky_relu,rate = drop_prob, training = training)
Layer4¶
[batch normalization -> inner product -> reshape -> maxout -> softmax]
l4_w = weight_variables([80,80])
l4_b = bias_variables([80])
l4_batch_normalization = tf.layers.batch_normalization(l3_dropout, training = training)
l4_inner_product = tf.matmul(l4_batch_normalization, l4_w) + l4_b
l4_reshape = tf.reshape(l4_inner_product,[-1,10,8])
l4_maxout = tf.reshape(
tf.contrib.layers.maxout(l4_reshape,num_units=1),
[-1,10])
l4_log_softmax = tf.nn.log_softmax(l4_maxout)
Cross-entropy¶
xent_loss = -tf.reduce_sum( tf.multiply(y_dummies,l4_log_softmax) )
Accuracy¶
pred_labels = tf.argmax(l4_log_softmax,axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(y, pred_labels),"float"))
Training the Model¶
lr = tf.placeholder("float")
train_step = tf.train.AdamOptimizer(lr).minimize(xent_loss)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
batch_size = 64
for i in range(200001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .15, training : True, lr : 0.01}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 10000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 40000 == 0:
feed_dict = {x : x_train, y : y_train, drop_prob : .15, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .15, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 64
for i in range(200001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .15, training : True, lr : 0.001}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 10000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 40000 == 0:
feed_dict = {x : x_train, y : y_train, drop_prob : .15, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .15, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP/model.ckpt")
print("Model saved in path: " + save_path)
Training Accuracy¶
feed_dict = {x : x_train, y : y_train, drop_prob : .15, training : False}
MLP_predict_train, MLP_train_acc = sess.run([pred_labels,acc], feed_dict = feed_dict)
print(confusion_matrix(MLP_predict_train,y_train))
print("TRAINING ACCURACY =",MLP_train_acc)
Validation Accuracy¶
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .15, training : False}
MLP_predict_valid1, MLP_valid1_acc = sess.run([pred_labels,acc], feed_dict = feed_dict)
print(confusion_matrix(MLP_predict_valid1,y_valid1))
print("VALIDATION ACCURACY =",MLP_valid1_acc)
{"TRAIN_ACC" : MLP_train_acc , "VALID_ACC" : MLP_valid1_acc}