FASHION MNIST with Python (DAY 9) - MLP using reused variables
FASHION MNIST with Python (DAY 9)¶
DATA SOURCE : https://www.kaggle.com/zalando-research/fashionmnist (Kaggle, Fashion MNIST)
FASHION MNIST with Python (DAY 1) : http://deepstat.tistory.com/35
FASHION MNIST with Python (DAY 2) : http://deepstat.tistory.com/36
FASHION MNIST with Python (DAY 3) : http://deepstat.tistory.com/37
FASHION MNIST with Python (DAY 4) : http://deepstat.tistory.com/38
FASHION MNIST with Python (DAY 5) : http://deepstat.tistory.com/39
FASHION MNIST with Python (DAY 6) : http://deepstat.tistory.com/40
FASHION MNIST with Python (DAY 7) : http://deepstat.tistory.com/41
FASHION MNIST with Python (DAY 8) : http://deepstat.tistory.com/42
Datasets¶
Importing numpy, pandas, pyplot¶
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Loading datasets¶
data_train = pd.read_csv("../datasets/fashion-mnist_train.csv")
data_test = pd.read_csv("../datasets/fashion-mnist_test.csv")
data_train_y = data_train.label
y_test = data_test.label
data_train_x = data_train.drop("label",axis=1)/256
x_test = data_test.drop("label",axis=1)/256
Spliting valid and training¶
np.random.seed(0)
valid2_idx = np.random.choice(60000,10000,replace = False)
valid1_idx = np.random.choice(list(set(range(60000)) - set(valid2_idx)),10000,replace=False)
train_idx = list(set(range(60000))-set(valid1_idx)-set(valid2_idx))
x_train = data_train_x.iloc[train_idx,:]
y_train = data_train_y.iloc[train_idx]
x_valid1 = data_train_x.iloc[valid1_idx,:]
y_valid1 = data_train_y.iloc[valid1_idx]
x_valid2 = data_train_x.iloc[valid2_idx,:]
y_valid2 = data_train_y.iloc[valid2_idx]
MLP with re-using variables¶
Importing TensorFlow¶
import tensorflow as tf
from sklearn.metrics import confusion_matrix
Defining weight_variables and bias_variables¶
def weight_variables(shape):
initial = tf.truncated_normal(shape)
return tf.Variable(initial)
def bias_variables(shape):
initial = tf.truncated_normal(shape)
return tf.Variable(initial)
Constructing the MLP with re-using variables¶
Linear, ReLU, leaky ReLU, ELU, SELU, Sigmoid, arctan, tanh, softsign, softplus, softmax, Maxout, Dropout, Batch Normalization, cross entropy, Adam
Model : input -> [inner product -> dropout]-> [batch normalization -> inner product -> [Linear, ReLU, leaky ReLU, ELU, SELU, Sigmoid, arctan, tanh, softsign, softplus, softmax, Maxout]*20 -> dropout]*10 -> [batch normalization -> inner product -> softmax] -> output
Loss : cross entropy
Optimizer : Adam
def weight_reuse_layer(inputs, training, drop_prob):
with tf.variable_scope("deepstat", reuse=tf.AUTO_REUSE):
w_linear = tf.get_variable("w_linear", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_linear = tf.get_variable("b_linear", [160], initializer = tf.initializers.random_uniform(-1,1))
w_relu = tf.get_variable("w_relu", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_relu = tf.get_variable("b_relu", [160], initializer = tf.initializers.random_uniform(-1,1))
w_leaky_relu = tf.get_variable("w_leaky_relu", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_leaky_relu = tf.get_variable("b_leaky_relu", [160], initializer = tf.initializers.random_uniform(-1,1))
w_elu = tf.get_variable("w_elu", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_elu = tf.get_variable("b_elu", [160], initializer = tf.initializers.random_uniform(-1,1))
w_selu = tf.get_variable("w_selu", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_selu = tf.get_variable("b_selu", [160], initializer = tf.initializers.random_uniform(-1,1))
w_sigmoid = tf.get_variable("w_sigmoid", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_sigmoid = tf.get_variable("b_sigmoid", [160], initializer = tf.initializers.random_uniform(-1,1))
w_atan = tf.get_variable("w_atan", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_atan = tf.get_variable("b_atan", [160], initializer = tf.initializers.random_uniform(-1,1))
w_tanh = tf.get_variable("w_tanh", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_tanh = tf.get_variable("b_tanh", [160], initializer = tf.initializers.random_uniform(-1,1))
w_softsign = tf.get_variable("w_softsign", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_softsign = tf.get_variable("b_softsign", [160], initializer = tf.initializers.random_uniform(-1,1))
w_softplus = tf.get_variable("w_softplus", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_softplus = tf.get_variable("b_softplus", [160], initializer = tf.initializers.random_uniform(-1,1))
w_log_softmax = tf.get_variable("w_log_softmax", [1920,160], initializer = tf.initializers.random_uniform(-1,1))
b_log_softmax = tf.get_variable("b_log_softmax", [160], initializer = tf.initializers.random_uniform(-1,1))
w_maxout = tf.get_variable("w_maxout", [1920,320], initializer = tf.initializers.random_uniform(-1,1))
b_maxout = tf.get_variable("b_maxout", [320], initializer = tf.initializers.random_uniform(-1,1))
l_batch_normalization = tf.layers.batch_normalization(inputs, training = training)
l_linear = tf.matmul(l_batch_normalization, w_linear) + b_linear
l_relu = tf.nn.relu(tf.matmul(l_batch_normalization, w_relu) + b_relu)
l_leaky_relu = tf.nn.leaky_relu(tf.matmul(l_batch_normalization, w_leaky_relu) + b_leaky_relu)
l_elu = tf.nn.elu(tf.matmul(l_batch_normalization, w_elu) + b_elu)
l_selu = tf.nn.selu(tf.matmul(l_batch_normalization, w_selu) + b_selu)
l_sigmoid = tf.nn.sigmoid(tf.matmul(l_batch_normalization, w_sigmoid) + b_sigmoid)
l_atan = tf.atan(tf.matmul(l_batch_normalization, w_atan) + b_atan)
l_tanh = tf.nn.tanh(tf.matmul(l_batch_normalization, w_tanh) + b_tanh)
l_softsign = tf.nn.softsign(tf.matmul(l_batch_normalization, w_softsign) + b_softsign)
l_softplus = tf.nn.softplus(tf.matmul(l_batch_normalization, w_softplus) + b_softplus)
l_log_softmax = tf.nn.log_softmax(tf.matmul(l_batch_normalization, w_log_softmax) + b_log_softmax)
l_maxout = tf.reshape(
tf.contrib.layers.maxout(
tf.reshape(
tf.matmul(
l_batch_normalization, w_maxout) + b_maxout,
[-1,160,2]),
num_units=1),
[-1,160])
l_concat = tf.concat([
l_linear,l_relu,l_leaky_relu,l_elu,l_selu,l_sigmoid,
l_atan,l_tanh,l_softsign,l_softplus,l_log_softmax,l_maxout
], 1)
l_dropout = tf.layers.dropout(l_concat, rate = drop_prob, training = training)
return l_dropout
Inputs¶
x = tf.placeholder("float", [None,784])
y = tf.placeholder("int64", [None,])
y_dummies = tf.one_hot(y,depth = 10)
drop_prob = tf.placeholder("float")
training = tf.placeholder("bool")
Layer1¶
[inner product -> dropout]
l1_w = weight_variables([784,1920])
l1_b = bias_variables([1920])
l1_inner_product = tf.matmul(x, l1_w) + l1_b
l1_dropout = tf.layers.dropout(l1_inner_product,rate = drop_prob, training = training)
Layer2-11¶
[batch normalization -> inner product -> [Linear, ReLU, leaky ReLU, ELU, SELU, Sigmoid, arctan, tanh, softsign, softplus, softmax, Maxout]*20 -> dropout]
l2 = weight_reuse_layer(l1_dropout, training, drop_prob)
l3 = weight_reuse_layer(l2, training, drop_prob)
l4 = weight_reuse_layer(l3, training, drop_prob)
l5 = weight_reuse_layer(l4, training, drop_prob)
l6 = weight_reuse_layer(l5, training, drop_prob)
l7 = weight_reuse_layer(l6, training, drop_prob)
l8 = weight_reuse_layer(l7, training, drop_prob)
l9 = weight_reuse_layer(l8, training, drop_prob)
l10 = weight_reuse_layer(l9, training, drop_prob)
l11 = weight_reuse_layer(l10, training, drop_prob)
Layer12¶
[batch normalization -> inner product -> softmax]
l12_w = weight_variables([1920,10])
l12_b = bias_variables([10])
l12_batch_normalization = tf.layers.batch_normalization(l11, training = training)
l12_inner_product = tf.matmul(l12_batch_normalization, l12_w) + l12_b
l12_log_softmax = tf.nn.log_softmax(l12_inner_product)
Cross-entropy¶
xent_loss = -tf.reduce_sum( tf.multiply(y_dummies,l12_log_softmax) )
Accuracy¶
pred_labels = tf.argmax(l12_log_softmax,axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(y, pred_labels),"float"))
Training the Model¶
lr = tf.placeholder("float")
train_step = tf.train.AdamOptimizer(lr).minimize(xent_loss)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
batch_size = 512
for i in range(20001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.1}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 2000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 4000 == 0:
feed_dict = {x : x_train, y : y_train, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP_reuse/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 512
for i in range(80001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.01}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 8000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 16000 == 0:
feed_dict = {x : x_train, y : y_train, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP_reuse/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 512
for i in range(320001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.001}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 32000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 64000 == 0:
feed_dict = {x : x_train, y : y_train, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP_reuse/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 512
for i in range(1280001):
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.0001}
_, tmp = sess.run([train_step,xent_loss], feed_dict = feed_dict)
if i % 128000 == 0:
print("step " + str(i) + " training cross-entropy : " + str(tmp))
if i % 256000 == 0:
feed_dict = {x : x_train, y : y_train, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP_reuse/model.ckpt")
print("Model saved in path: " + save_path)
batch_size = 512
batch_obs = np.random.choice(x_train.shape[0],batch_size,replace=False)
batch_train_x = x_train.iloc[batch_obs]
batch_train_y = y_train.iloc[batch_obs]
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : .125, training : True, lr : 0.0001}
_, tmp,tmp_acc = sess.run([train_step,xent_loss,acc], feed_dict = feed_dict)
print("step " + str(i) + " training cross-entropy : " + str(tmp) + " accuracy of training step : " + str(tmp_acc))
feed_dict = {x : x_train, y : y_train, drop_prob : .125, training : False}
train_acc = sess.run(acc, feed_dict = feed_dict)
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : False}
valid1_acc = sess.run(acc, feed_dict = feed_dict)
print("step " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid1_acc))
save_path = saver.save(sess, "./MLP_reuse/model.ckpt")
print("Model saved in path: " + save_path)
Training Accuracy¶
feed_dict = {x : x_train, y : y_train, drop_prob : .125, training : True}
MLP_predict_train, MLP_train_acc = sess.run([pred_labels,acc], feed_dict = feed_dict)
print(confusion_matrix(MLP_predict_train,y_train))
print("TRAINING ACCURACY =",MLP_train_acc)
Validation Accuracy¶
feed_dict = {x : x_valid1, y : y_valid1, drop_prob : .125, training : True}
MLP_predict_valid1, MLP_valid1_acc = sess.run([pred_labels,acc], feed_dict = feed_dict)
print(confusion_matrix(MLP_predict_valid1,y_valid1))
print("VALIDATION ACCURACY =",MLP_valid1_acc)
{"TRAIN_ACC" : MLP_train_acc , "VALID_ACC" : MLP_valid1_acc}