Real Data Analysis
(TestAcc=0.9341) ResNet for FASHON MNIST
딥스탯
2018. 11. 20. 23:18
CNN ResNet for FASHION MNIST with Tensorflow¶
DATA SOURCE : https://www.kaggle.com/zalando-research/fashionmnist (Kaggle, Fashion MNIST)
- FASHION MNIST with Python (DAY 1) : http://deepstat.tistory.com/35
- FASHION MNIST with Python (DAY 2) : http://deepstat.tistory.com/36
- FASHION MNIST with Python (DAY 3) : http://deepstat.tistory.com/37
- FASHION MNIST with Python (DAY 4) : http://deepstat.tistory.com/38
- FASHION MNIST with Python (DAY 5) : http://deepstat.tistory.com/39
- FASHION MNIST with Python (DAY 6) : http://deepstat.tistory.com/40
- FASHION MNIST with Python (DAY 7) : http://deepstat.tistory.com/41
- FASHION MNIST with Python (DAY 8) : http://deepstat.tistory.com/42
- FASHION MNIST with Python (DAY 9) : http://deepstat.tistory.com/43
- FASHION MNIST with Python (DAY 10) : http://deepstat.tistory.com/44
Datasets¶
Importing numpy, pandas, pyplot¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Loading datasets¶
In [2]:
data_train = pd.read_csv("../datasets/fashion-mnist_train.csv")
data_test = pd.read_csv("../datasets/fashion-mnist_test.csv")
In [3]:
data_train_y = data_train.label
y_test = data_test.label
In [4]:
data_train_x = data_train.drop("label",axis=1)/256
x_test = data_test.drop("label",axis=1)/256
Spliting valid and training¶
In [5]:
np.random.seed(0)
valid_idx = np.random.choice(60000,10000,replace = False)
train_idx = list(set(range(60000))-set(valid_idx))
x_train = data_train_x.iloc[train_idx,:]
y_train = data_train_y.iloc[train_idx]
x_valid = data_train_x.iloc[valid_idx,:]
y_valid = data_train_y.iloc[valid_idx]
CNN¶
Making Class Minibatch¶
In [6]:
class minibatchData:
def __init__(self, X, Y):
self.start_num = 0
self.x = X
self.y = Y
def minibatch(self, batch_size):
self.outidx = range(self.start_num,(self.start_num + batch_size))
self.start_num = (self.start_num + batch_size)%(self.x.shape[0])
return self.x.iloc[self.outidx,:], self.y.iloc[self.outidx]
In [7]:
train_minibatch_data = minibatchData(x_train, y_train)
valid_minibatch_data = minibatchData(x_valid, y_valid)
test_minibatch_data = minibatchData(x_test, y_test)
Importing TensorFlow¶
In [8]:
import tensorflow as tf
from sklearn.metrics import confusion_matrix
Defining weight_variables and bias_variables¶
In [9]:
def weight_variables(shape):
initial = tf.random_uniform(shape=shape, minval=-.1, maxval=.1)
return tf.Variable(initial)
def bias_variables(shape):
initial = tf.random_uniform(shape=shape, minval=0, maxval=.1)
return tf.Variable(initial)
Defining conv2d and maxpool¶
In [10]:
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding = 'SAME')
def maxpool(x):
return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'SAME')
def avgpool(x):
return tf.nn.avg_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
layers¶
In [11]:
x = tf.placeholder("float", [None,784])
x_image = tf.reshape(x, [-1,28,28,1])
y = tf.placeholder("int64", [None,])
y_dummies = tf.one_hot(y,depth = 10)
drop_prob = tf.placeholder("float")
training = tf.placeholder("bool")
In [12]:
l1_w = weight_variables([3,3,1,64])
l1_b = bias_variables([64])
l1_conv = conv2d(x_image, l1_w) + l1_b
l1_relu = tf.nn.relu(l1_conv)
l1_dropout = tf.layers.dropout(l1_relu,rate = drop_prob, training = training)
In [13]:
l2_w = weight_variables([1,1,64,16])
l2_b = bias_variables([16])
l2_conv = conv2d(l1_dropout, l2_w) + l2_b
l2_batch_normalization = tf.layers.batch_normalization(l2_conv)
l2_relu = tf.nn.relu(l2_batch_normalization)
l2_dropout = tf.layers.dropout(l2_relu, rate = drop_prob, training = training)
l3_w = weight_variables([3,3,16,16])
l3_b = bias_variables([16])
l3_conv = conv2d(l2_dropout, l3_w) + l3_b
l3_batch_normalization = tf.layers.batch_normalization(l3_conv)
l3_relu = tf.nn.relu(l3_batch_normalization)
l3_dropout = tf.layers.dropout(l3_relu, rate = drop_prob, training = training)
l4_w = weight_variables([1,1,16,64])
l4_b = bias_variables([64])
l4_conv = conv2d(l3_dropout, l4_w) + l4_b
l4_batch_normalization = tf.layers.batch_normalization(l4_conv)
l4_dropout = tf.layers.dropout(l4_batch_normalization, rate = drop_prob, training = training)
In [14]:
l4_add = tf.nn.relu(l4_dropout + l1_dropout)
In [15]:
l5_w = weight_variables([1,1,64,16])
l5_b = bias_variables([16])
l5_conv = conv2d(l4_add, l5_w) + l5_b
l5_batch_normalization = tf.layers.batch_normalization(l5_conv)
l5_relu = tf.nn.relu(l5_batch_normalization)
l5_dropout = tf.layers.dropout(l5_relu, rate = drop_prob, training = training)
l6_w = weight_variables([3,3,16,16])
l6_b = bias_variables([16])
l6_conv = conv2d(l5_dropout, l6_w) + l6_b
l6_batch_normalization = tf.layers.batch_normalization(l6_conv)
l6_relu = tf.nn.relu(l6_batch_normalization)
l6_dropout = tf.layers.dropout(l6_relu, rate = drop_prob, training = training)
l7_w = weight_variables([1,1,16,64])
l7_b = bias_variables([64])
l7_conv = conv2d(l6_dropout, l7_w) + l7_b
l7_batch_normalization = tf.layers.batch_normalization(l7_conv)
l7_dropout = tf.layers.dropout(l7_batch_normalization, rate = drop_prob, training = training)
In [16]:
l7_add = tf.nn.relu(l7_dropout + l4_dropout)
In [17]:
l8_w = weight_variables([1,1,64,16])
l8_b = bias_variables([16])
l8_conv = conv2d(l7_add, l8_w) + l8_b
l8_batch_normalization = tf.layers.batch_normalization(l8_conv)
l8_relu = tf.nn.relu(l8_batch_normalization)
l8_dropout = tf.layers.dropout(l8_relu, rate = drop_prob, training = training)
l9_w = weight_variables([3,3,16,16])
l9_b = bias_variables([16])
l9_conv = conv2d(l8_dropout, l9_w) + l9_b
l9_batch_normalization = tf.layers.batch_normalization(l9_conv)
l9_relu = tf.nn.relu(l9_batch_normalization)
l9_dropout = tf.layers.dropout(l9_relu, rate = drop_prob, training = training)
l10_w = weight_variables([1,1,16,64])
l10_b = bias_variables([64])
l10_conv = conv2d(l9_dropout, l10_w) + l10_b
l10_batch_normalization = tf.layers.batch_normalization(l10_conv)
l10_dropout = tf.layers.dropout(l10_batch_normalization, rate = drop_prob, training = training)
In [18]:
l10_add = tf.nn.relu(l10_dropout + l7_dropout)
In [19]:
l11_w = weight_variables([1,1,64,16])
l11_b = bias_variables([16])
l11_conv = conv2d(l10_add, l11_w) + l11_b
l11_batch_normalization = tf.layers.batch_normalization(l11_conv)
l11_relu = tf.nn.relu(l11_batch_normalization)
l11_dropout = tf.layers.dropout(l11_relu, rate = drop_prob, training = training)
l12_w = weight_variables([3,3,16,16])
l12_b = bias_variables([16])
l12_conv = conv2d(l11_dropout, l12_w) + l12_b
l12_batch_normalization = tf.layers.batch_normalization(l12_conv)
l12_relu = tf.nn.relu(l12_batch_normalization)
l12_dropout = tf.layers.dropout(l12_relu, rate = drop_prob, training = training)
l13_w = weight_variables([1,1,16,64])
l13_b = bias_variables([64])
l13_conv = conv2d(l12_dropout, l13_w) + l13_b
l13_batch_normalization = tf.layers.batch_normalization(l13_conv)
l13_dropout = tf.layers.dropout(l13_batch_normalization, rate = drop_prob, training = training)
In [20]:
l13_add = tf.nn.relu(l13_dropout + l10_dropout)
In [21]:
l14_w = weight_variables([1,1,64,16])
l14_b = bias_variables([16])
l14_conv = conv2d(l13_add, l14_w) + l14_b
l14_batch_normalization = tf.layers.batch_normalization(l14_conv)
l14_relu = tf.nn.relu(l14_batch_normalization)
l14_dropout = tf.layers.dropout(l14_relu, rate = drop_prob, training = training)
l15_w = weight_variables([3,3,16,16])
l15_b = bias_variables([16])
l15_conv = conv2d(l14_dropout, l15_w) + l15_b
l15_batch_normalization = tf.layers.batch_normalization(l15_conv)
l15_relu = tf.nn.relu(l15_batch_normalization)
l15_dropout = tf.layers.dropout(l15_relu, rate = drop_prob, training = training)
l16_w = weight_variables([1,1,16,64])
l16_b = bias_variables([64])
l16_conv = conv2d(l15_dropout, l16_w) + l16_b
l16_batch_normalization = tf.layers.batch_normalization(l16_conv)
l16_dropout = tf.layers.dropout(l16_batch_normalization, rate = drop_prob, training = training)
In [22]:
l16_add = tf.nn.relu(l16_dropout + l13_dropout)
In [23]:
l17_w = weight_variables([1,1,64,32])
l17_b = bias_variables([32])
l17_conv = conv2d(l16_add, l17_w) + l17_b
l17_batch_normalization = tf.layers.batch_normalization(l17_conv)
l17_relu = tf.nn.relu(l17_batch_normalization)
l17_avgpool = avgpool(l17_relu)
l17_dropout = tf.layers.dropout(l17_avgpool, rate = drop_prob, training = training)
l18_w = weight_variables([3,3,32,32])
l18_b = bias_variables([32])
l18_conv = conv2d(l17_dropout, l18_w) + l18_b
l18_batch_normalization = tf.layers.batch_normalization(l18_conv)
l18_relu = tf.nn.relu(l18_batch_normalization)
l18_dropout = tf.layers.dropout(l18_relu, rate = drop_prob, training = training)
l19_w = weight_variables([1,1,32,128])
l19_b = bias_variables([128])
l19_conv = conv2d(l18_dropout, l19_w) + l19_b
l19_batch_normalization = tf.layers.batch_normalization(l19_conv)
l19_dropout = tf.layers.dropout(l19_batch_normalization, rate = drop_prob, training = training)
In [24]:
l16_w2 = weight_variables([1,1,64,128])
l16_conv2 = conv2d(l16_add, l16_w2)
l16_add2 = avgpool(l16_conv2)
In [25]:
l19_add = tf.nn.relu(l19_dropout + l16_add2)
In [26]:
l20_w = weight_variables([1,1,128,32])
l20_b = bias_variables([32])
l20_conv = conv2d(l19_add, l20_w) + l20_b
l20_batch_normalization = tf.layers.batch_normalization(l20_conv)
l20_relu = tf.nn.relu(l20_batch_normalization)
l20_dropout = tf.layers.dropout(l20_relu, rate = drop_prob, training = training)
l21_w = weight_variables([3,3,32,32])
l21_b = bias_variables([32])
l21_conv = conv2d(l20_dropout, l21_w) + l21_b
l21_batch_normalization = tf.layers.batch_normalization(l21_conv)
l21_relu = tf.nn.relu(l21_batch_normalization)
l21_dropout = tf.layers.dropout(l21_relu, rate = drop_prob, training = training)
l22_w = weight_variables([1,1,32,128])
l22_b = bias_variables([128])
l22_conv = conv2d(l21_dropout, l22_w) + l22_b
l22_batch_normalization = tf.layers.batch_normalization(l22_conv)
l22_dropout = tf.layers.dropout(l22_batch_normalization, rate = drop_prob, training = training)
In [27]:
l22_add = tf.nn.relu(l22_dropout + l19_add)
In [28]:
l23_w = weight_variables([1,1,128,32])
l23_b = bias_variables([32])
l23_conv = conv2d(l22_add, l23_w) + l23_b
l23_batch_normalization = tf.layers.batch_normalization(l23_conv)
l23_relu = tf.nn.relu(l23_batch_normalization)
l23_dropout = tf.layers.dropout(l23_relu, rate = drop_prob, training = training)
l24_w = weight_variables([3,3,32,32])
l24_b = bias_variables([32])
l24_conv = conv2d(l23_dropout, l24_w) + l24_b
l24_batch_normalization = tf.layers.batch_normalization(l24_conv)
l24_relu = tf.nn.relu(l24_batch_normalization)
l24_dropout = tf.layers.dropout(l24_relu, rate = drop_prob, training = training)
l25_w = weight_variables([1,1,32,128])
l25_b = bias_variables([128])
l25_conv = conv2d(l24_dropout, l25_w) + l25_b
l25_batch_normalization = tf.layers.batch_normalization(l25_conv)
l25_dropout = tf.layers.dropout(l25_batch_normalization, rate = drop_prob, training = training)
In [29]:
l25_add = tf.nn.relu(l25_dropout + l22_add)
In [30]:
l26_w = weight_variables([1,1,128,32])
l26_b = bias_variables([32])
l26_conv = conv2d(l25_add, l26_w) + l26_b
l26_batch_normalization = tf.layers.batch_normalization(l26_conv)
l26_relu = tf.nn.relu(l26_batch_normalization)
l26_dropout = tf.layers.dropout(l26_relu, rate = drop_prob, training = training)
l27_w = weight_variables([3,3,32,32])
l27_b = bias_variables([32])
l27_conv = conv2d(l26_dropout, l27_w) + l27_b
l27_batch_normalization = tf.layers.batch_normalization(l27_conv)
l27_relu = tf.nn.relu(l27_batch_normalization)
l27_dropout = tf.layers.dropout(l27_relu, rate = drop_prob, training = training)
l28_w = weight_variables([1,1,32,128])
l28_b = bias_variables([128])
l28_conv = conv2d(l27_dropout, l28_w) + l28_b
l28_batch_normalization = tf.layers.batch_normalization(l28_conv)
l28_dropout = tf.layers.dropout(l28_batch_normalization, rate = drop_prob, training = training)
In [31]:
l28_add = tf.nn.relu(l28_dropout + l25_add)
In [32]:
l29_w = weight_variables([1,1,128,32])
l29_b = bias_variables([32])
l29_conv = conv2d(l28_add, l29_w) + l29_b
l29_batch_normalization = tf.layers.batch_normalization(l29_conv)
l29_relu = tf.nn.relu(l29_batch_normalization)
l29_dropout = tf.layers.dropout(l29_relu, rate = drop_prob, training = training)
l30_w = weight_variables([3,3,32,32])
l30_b = bias_variables([32])
l30_conv = conv2d(l29_dropout, l30_w) + l30_b
l30_batch_normalization = tf.layers.batch_normalization(l30_conv)
l30_relu = tf.nn.relu(l30_batch_normalization)
l30_dropout = tf.layers.dropout(l30_relu, rate = drop_prob, training = training)
l31_w = weight_variables([1,1,32,128])
l31_b = bias_variables([128])
l31_conv = conv2d(l30_dropout, l31_w) + l31_b
l31_batch_normalization = tf.layers.batch_normalization(l31_conv)
l31_dropout = tf.layers.dropout(l31_batch_normalization, rate = drop_prob, training = training)
In [33]:
l31_add = tf.nn.relu(l31_dropout + l28_add)
In [34]:
l32_w = weight_variables([1,1,128,32])
l32_b = bias_variables([32])
l32_conv = conv2d(l31_add, l32_w) + l32_b
l32_batch_normalization = tf.layers.batch_normalization(l32_conv)
l32_relu = tf.nn.relu(l32_batch_normalization)
l32_dropout = tf.layers.dropout(l32_relu, rate = drop_prob, training = training)
l33_w = weight_variables([3,3,32,32])
l33_b = bias_variables([32])
l33_conv = conv2d(l32_dropout, l33_w) + l33_b
l33_batch_normalization = tf.layers.batch_normalization(l33_conv)
l33_relu = tf.nn.relu(l33_batch_normalization)
l33_dropout = tf.layers.dropout(l33_relu, rate = drop_prob, training = training)
l34_w = weight_variables([1,1,32,128])
l34_b = bias_variables([128])
l34_conv = conv2d(l33_dropout, l34_w) + l34_b
l34_batch_normalization = tf.layers.batch_normalization(l34_conv)
l34_dropout = tf.layers.dropout(l34_batch_normalization, rate = drop_prob, training = training)
In [35]:
l34_add = tf.nn.relu(l34_dropout + l31_add)
In [36]:
l35_w = weight_variables([1,1,128,64])
l35_b = bias_variables([64])
l35_conv = conv2d(l34_add, l35_w) + l35_b
l35_batch_normalization = tf.layers.batch_normalization(l35_conv)
l35_relu = tf.nn.relu(l35_batch_normalization)
l35_avgpool = avgpool(l35_relu)
l35_dropout = tf.layers.dropout(l35_avgpool, rate = drop_prob, training = training)
l36_w = weight_variables([3,3,64,64])
l36_b = bias_variables([64])
l36_conv = conv2d(l35_dropout, l36_w) + l36_b
l36_batch_normalization = tf.layers.batch_normalization(l36_conv)
l36_relu = tf.nn.relu(l36_batch_normalization)
l36_dropout = tf.layers.dropout(l36_relu, rate = drop_prob, training = training)
l37_w = weight_variables([1,1,64,256])
l37_b = bias_variables([256])
l37_conv = conv2d(l36_dropout, l37_w) + l37_b
l37_batch_normalization = tf.layers.batch_normalization(l37_conv)
l37_dropout = tf.layers.dropout(l37_batch_normalization, rate = drop_prob, training = training)
In [37]:
l34_w2 = weight_variables([1,1,128,256])
l34_conv2 = conv2d(l34_add, l34_w2)
l34_add2 = avgpool(l34_conv2)
In [38]:
l37_add = tf.nn.relu(l37_dropout + l34_add2)
In [39]:
l38_w = weight_variables([1,1,256,64])
l38_b = bias_variables([64])
l38_conv = conv2d(l37_add, l38_w) + l38_b
l38_batch_normalization = tf.layers.batch_normalization(l38_conv)
l38_relu = tf.nn.relu(l38_batch_normalization)
l38_dropout = tf.layers.dropout(l38_relu, rate = drop_prob, training = training)
l39_w = weight_variables([3,3,64,64])
l39_b = bias_variables([64])
l39_conv = conv2d(l38_dropout, l39_w) + l39_b
l39_batch_normalization = tf.layers.batch_normalization(l39_conv)
l39_relu = tf.nn.relu(l39_batch_normalization)
l39_dropout = tf.layers.dropout(l39_relu, rate = drop_prob, training = training)
l40_w = weight_variables([1,1,64,256])
l40_b = bias_variables([256])
l40_conv = conv2d(l39_dropout, l40_w) + l40_b
l40_batch_normalization = tf.layers.batch_normalization(l40_conv)
l40_dropout = tf.layers.dropout(l40_batch_normalization, rate = drop_prob, training = training)
In [40]:
l40_add = tf.nn.relu(l40_dropout + l37_add)
In [41]:
l41_w = weight_variables([1,1,256,64])
l41_b = bias_variables([64])
l41_conv = conv2d(l40_add, l41_w) + l41_b
l41_batch_normalization = tf.layers.batch_normalization(l41_conv)
l41_relu = tf.nn.relu(l41_batch_normalization)
l41_dropout = tf.layers.dropout(l41_relu, rate = drop_prob, training = training)
l42_w = weight_variables([3,3,64,64])
l42_b = bias_variables([64])
l42_conv = conv2d(l41_dropout, l42_w) + l42_b
l42_batch_normalization = tf.layers.batch_normalization(l42_conv)
l42_relu = tf.nn.relu(l42_batch_normalization)
l42_dropout = tf.layers.dropout(l42_relu, rate = drop_prob, training = training)
l43_w = weight_variables([1,1,64,256])
l43_b = bias_variables([256])
l43_conv = conv2d(l42_dropout, l43_w) + l43_b
l43_batch_normalization = tf.layers.batch_normalization(l43_conv)
l43_dropout = tf.layers.dropout(l43_batch_normalization, rate = drop_prob, training = training)
In [42]:
l43_add = tf.nn.relu(l43_dropout + l40_add)
In [43]:
l44_w = weight_variables([1,1,256,64])
l44_b = bias_variables([64])
l44_conv = conv2d(l43_add, l44_w) + l44_b
l44_batch_normalization = tf.layers.batch_normalization(l44_conv)
l44_relu = tf.nn.relu(l44_batch_normalization)
l44_dropout = tf.layers.dropout(l44_relu, rate = drop_prob, training = training)
l45_w = weight_variables([3,3,64,64])
l45_b = bias_variables([64])
l45_conv = conv2d(l44_dropout, l45_w) + l45_b
l45_batch_normalization = tf.layers.batch_normalization(l45_conv)
l45_relu = tf.nn.relu(l45_batch_normalization)
l45_dropout = tf.layers.dropout(l45_relu, rate = drop_prob, training = training)
l46_w = weight_variables([1,1,64,256])
l46_b = bias_variables([256])
l46_conv = conv2d(l45_dropout, l46_w) + l46_b
l46_batch_normalization = tf.layers.batch_normalization(l46_conv)
l46_dropout = tf.layers.dropout(l46_batch_normalization, rate = drop_prob, training = training)
In [44]:
l46_add = tf.nn.relu(l46_dropout + l43_add)
In [45]:
l47_w = weight_variables([1,1,256,64])
l47_b = bias_variables([64])
l47_conv = conv2d(l46_add, l47_w) + l47_b
l47_batch_normalization = tf.layers.batch_normalization(l47_conv)
l47_relu = tf.nn.relu(l47_batch_normalization)
l47_dropout = tf.layers.dropout(l47_relu, rate = drop_prob, training = training)
l48_w = weight_variables([3,3,64,64])
l48_b = bias_variables([64])
l48_conv = conv2d(l47_dropout, l48_w) + l48_b
l48_batch_normalization = tf.layers.batch_normalization(l48_conv)
l48_relu = tf.nn.relu(l48_batch_normalization)
l48_dropout = tf.layers.dropout(l48_relu, rate = drop_prob, training = training)
l49_w = weight_variables([1,1,64,256])
l49_b = bias_variables([256])
l49_conv = conv2d(l48_dropout, l49_w) + l49_b
l49_batch_normalization = tf.layers.batch_normalization(l49_conv)
l49_dropout = tf.layers.dropout(l49_batch_normalization, rate = drop_prob, training = training)
In [46]:
l49_add = tf.nn.relu(l49_dropout + l46_add)
In [47]:
l50_avgpool = tf.nn.avg_pool(l49_add, ksize=[1, 8, 8, 1], strides = [1, 8, 8, 1], padding = 'SAME')
l50_flatten = tf.reshape(l50_avgpool, [-1,256])
l50_batch_normalization = tf.layers.batch_normalization(l50_flatten)
l50_w = weight_variables([256,10])
l50_b = bias_variables([10])
l50_inner_product = tf.matmul(l50_batch_normalization, l50_w) + l50_b
l50_log_softmax = tf.nn.log_softmax(l50_inner_product)
Cross-entropy¶
In [48]:
xent_loss = -tf.reduce_mean( tf.multiply(y_dummies,l50_log_softmax) )
Accuracy¶
In [49]:
pred_labels = tf.argmax(l50_log_softmax,axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(y, pred_labels),"float"))
Training the Model¶
In [50]:
lr = tf.placeholder("float")
train_step = tf.train.AdamOptimizer(lr).minimize(xent_loss)
In [51]:
saver = tf.train.Saver()
In [52]:
best_valid_acc_vec = {}
for k in range(0,5):
drop_probability = k/16
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
epochs = 301
batch_size = 100
tmp_xent_loss_3 = [1.0,1.0,1.0]
learning_rate = 1/2**10
rep_num = int((x_train.shape[0])/batch_size)
max_valid_acc = .0
valid_rep_num = int((x_valid.shape[0])/batch_size)
for i in range(epochs):
tmp_loss_vec = [.0 for a in range(rep_num)]
tmp_valid_acc_vec = [.0 for a in range(valid_rep_num)]
tmp_train_acc_vec = [.0 for a in range(rep_num)]
for j in range(rep_num):
batch_train_x, batch_train_y = train_minibatch_data.minibatch(batch_size)
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : drop_probability, training : True, lr : learning_rate}
_, tmp_loss_vec[j] = sess.run([train_step,xent_loss], feed_dict = feed_dict)
tmp_xent_loss_3 = [tmp_xent_loss_3[1], tmp_xent_loss_3[2], sum(tmp_loss_vec)/rep_num]
if tmp_xent_loss_3[0] == min(tmp_xent_loss_3):
learning_rate = learning_rate * 7/8
for j in range(valid_rep_num):
batch_valid_x, batch_valid_y = valid_minibatch_data.minibatch(batch_size)
feed_dict = {x : batch_valid_x, y : batch_valid_y, drop_prob : drop_probability, training : False}
tmp_valid_acc_vec[j] = sess.run(acc, feed_dict = feed_dict)
valid_acc = sum(tmp_valid_acc_vec)/valid_rep_num
if valid_acc > max_valid_acc:
max_valid_acc = valid_acc
best_valid_acc_vec[k] = max_valid_acc
print("DP : " + str(k) + "/16 epoch : " + str(i) + " max_valid_acc = " + str(valid_acc))
save_path = saver.save(sess, "./CNNres/model" + str(k) + ".ckpt")
if i % 50 == 0:
print("DP : " + str(k) + "/16 epoch : " + str(i) + " -- training cross-entropy : " + str(tmp_xent_loss_3[2]))
for j in range(rep_num):
batch_train_x, batch_train_y = train_minibatch_data.minibatch(batch_size)
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : drop_probability, training : False}
tmp_train_acc_vec[j] = sess.run(acc, feed_dict = feed_dict)
train_acc = sum(tmp_train_acc_vec)/rep_num
print("DP : " + str(k) + "/16 epoch : " + str(i) + " training_acc = " + str(train_acc) + " valid_acc = " + str(valid_acc))
if (tmp_xent_loss_3[0] - tmp_xent_loss_3[1])**2 + (tmp_xent_loss_3[1] - tmp_xent_loss_3[2])**2 < 1e-10:
print("DP : " + str(k) + "/16 converged" + " epoch : " + str(i))
break
In [53]:
print(best_valid_acc_vec)
print(max(best_valid_acc_vec))
In [63]:
sess = tf.Session()
saver.restore(sess, "./CNNres/model3.ckpt")
print("Model restored.")
Training Accuracy¶
In [64]:
batch_size = 1000
rep_num = int((x_train.shape[0])/batch_size)
tmp_train_acc_vec = [.0 for a in range(rep_num)]
CNNres_predict_train = []
for j in range(rep_num):
batch_train_x, batch_train_y = train_minibatch_data.minibatch(batch_size)
feed_dict = {x : batch_train_x, y : batch_train_y, drop_prob : 1/8, training : False}
tmp_CNNres_predict_train, tmp_train_acc_vec[j] = sess.run([pred_labels,acc], feed_dict = feed_dict)
CNNres_predict_train = np.concatenate([CNNres_predict_train, tmp_CNNres_predict_train])
CNNres_train_acc = sum(tmp_train_acc_vec)/rep_num
In [65]:
print(confusion_matrix(CNNres_predict_train,y_train))
print("TRAINING ACCURACY =",CNNres_train_acc)
Validation Accuracy¶
In [66]:
batch_size = 1000
valid_rep_num = int((x_valid.shape[0])/batch_size)
tmp_valid_acc_vec = [.0 for a in range(rep_num)]
CNNres_predict_valid = []
for j in range(valid_rep_num):
batch_valid_x, batch_valid_y = valid_minibatch_data.minibatch(batch_size)
feed_dict = {x : batch_valid_x, y : batch_valid_y, drop_prob : 1/8, training : False}
tmp_CNNres_predict_valid, tmp_valid_acc_vec[j] = sess.run([pred_labels,acc], feed_dict = feed_dict)
CNNres_predict_valid = np.concatenate([CNNres_predict_valid, tmp_CNNres_predict_valid])
CNNres_valid_acc = sum(tmp_valid_acc_vec)/valid_rep_num
In [67]:
print(confusion_matrix(CNNres_predict_valid,y_valid))
print("VALIDATION ACCURACY =",CNNres_valid_acc)
In [68]:
{"TRAIN_ACC" : CNNres_train_acc , "VALID_ACC" : CNNres_valid_acc}
Out[68]:
Test Accuracy¶
In [69]:
batch_size = 1000
test_rep_num = int((x_test.shape[0])/batch_size)
tmp_test_acc_vec = [.0 for a in range(rep_num)]
CNNres_predict_test = []
for j in range(test_rep_num):
batch_test_x, batch_test_y = test_minibatch_data.minibatch(batch_size)
feed_dict = {x : batch_test_x, y : batch_test_y, drop_prob : 1/8, training : False}
tmp_CNNres_predict_test, tmp_test_acc_vec[j] = sess.run([pred_labels,acc], feed_dict = feed_dict)
CNNres_predict_test = np.concatenate([CNNres_predict_test, tmp_CNNres_predict_test])
CNNres_test_acc = sum(tmp_test_acc_vec)/test_rep_num
In [70]:
print(confusion_matrix(CNNres_predict_test,y_test))
print("TEST ACCURACY =",CNNres_test_acc)
In [71]:
{"TRAIN_ACC" : CNNres_train_acc , "VALID_ACC" : CNNres_valid_acc , "TEST_ACC" : CNNres_test_acc}
Out[71]:
In [ ]: