''' using CNN + 2 fully connected layers for recognition Can be trained in two modes: full == cnn + fnn, all layers are trainable half == fnn, only top fully connected layers are tained, the bottom CNN is fixed with parameters supplied by autoencoder input cnn features are from autoencoder with 3 cnn layers and 2 fully connected layers (cnn+fnn) boosting mode: (a) random permuting [0..29] directories into two parts: training and testing. (b) verify testing performance before and after training, (c) save updated model parameters if training improves performance, discard model parameters if not (d) repeat ''' from utils import * W_conv1 = 0.0 W_conv2 = 0.0 W_conv3 = 0.0 W_fc1 = 0.0 W_fc2 = 0.0 W_out2 = 0.0 b_conv1 = 0.0 b_conv2 = 0.0 b_conv3 = 0.0 b_fc1 = 0.0 b_fc2 = 0.0 b_out2 = 0.0 Type = [ 'half', 'full' ] nType = 'full' l1 = 256 # CNN layer 1, 2, and 3 size l2 = 512 l3 = 1024 # fdim = dim/(2**3) # 6x6 grid after 3 layers fSize = 3 # filter size in CNN fc = 1024 # fully connected layer size def backupNetwork() : global W_fc1, W_fc2, W_out2, b_fc1, b_fc2, b_out2 global WB_fc1, WB_fc2, WB_out2, bB_fc1, bB_fc2, bB_out2 global W_conv1, W_conv2, W_conv3, b_conv1, b_conv2, b_conv3 global WB_conv1, WB_conv2, WB_conv3, bB_conv1, bB_conv2, bB_conv3 WB_fc1 = W_fc1 WB_fc2 = W_fc2 WB_out2 = W_out2 WB_conv1 = W_conv1 WB_conv2 = W_conv2 WB_conv3 = W_conv3 bB_fc1 = b_fc1 bB_fc2 = b_fc2 bB_out2 = b_out2 bB_conv1 = b_conv1 bB_conv2 = b_conv2 bB_conv3 = b_conv3 def restoreNetwork() : global W_fc1, W_fc2, W_out2, b_fc1, b_fc2, b_out2 global WB_fc1, WB_fc2, WB_out2, bB_fc1, bB_fc2, bB_out2 global W_conv1, W_conv2, W_conv3, b_conv1, b_conv2, b_conv3 global WB_conv1, WB_conv2, WB_conv3, bB_conv1, bB_conv2, bB_conv3 W_fc1 = WB_fc1 W_fc2 = WB_fc2 W_out2 = WB_out2 W_conv1 = WB_conv1 W_conv2 = WB_conv2 W_conv3 = WB_conv3 b_fc1 = bB_fc1 b_fc2 = bB_fc2 b_out2 = bB_out2 b_conv1 = bB_conv1 b_conv2 = bB_conv2 b_conv3 = bB_conv3 def convolutional_neural_network(x, sess ): global Tval, W_fc1, W_fc2, W_out2, b_fc1, b_fc2, b_out2 global W_conv1, W_conv2, W_conv3, b_conv1, b_conv2, b_conv3 W_conv1 = tf.Variable(tf.random_normal([fSize,fSize,1, l1 ]), name="W_conv1") W_conv2 = tf.Variable(tf.random_normal([fSize,fSize,l1, l2 ]), name="W_conv2") W_conv3 = tf.Variable(tf.random_normal([fSize,fSize,l2, l3 ]), name="W_conv3") W_fc1 = tf.Variable(tf.random_normal([dimX8*dimY8*l3, fc ]), name="W_fc1" ) W_fc2 = tf.Variable(tf.random_normal([fc, fc ]), name="W_fc2" ) W_out2 = tf.Variable(tf.random_normal([fc, nOs ]), name="W_out2" ) b_conv1 = tf.Variable(tf.random_normal([ l1]), name="b_conv1") b_conv2 = tf.Variable(tf.random_normal([ l2]), name="b_conv2") b_conv3 = tf.Variable(tf.random_normal([ l3]), name="b_conv3") b_fc1 = tf.Variable(tf.random_normal([ fc]), name="b_fc1" ) b_fc2 = tf.Variable(tf.random_normal([ fc]), name="b_fc2" ) b_out2 = tf.Variable(tf.random_normal([ nOs]), name="b_out2" ) weights = { "W_conv1": W_conv1, "W_conv2": W_conv2, "W_conv3": W_conv3, "W_fc1" : W_fc1, "W_fc2" : W_fc2, "W_out2" : W_out2 } biases = { "b_conv1": b_conv1, "b_conv2": b_conv2, "b_conv3": b_conv3, "b_fc1" : b_fc1, "b_fc2" : b_fc2, "b_out2" : b_out2 } x = tf.reshape(x, shape=[-1, dimX, dimY, 1]) conv1 = conv2d(x, weights['W_conv1']) + biases['b_conv1'] conv1 = normalizeNN(conv1, sess) conv1 = tf.nn.relu(conv1) conv1 = maxpool2d(conv1) conv2 = conv2d(conv1, weights['W_conv2']) + biases['b_conv2'] conv2 = normalizeNN(conv2, sess) conv2 = tf.nn.relu(conv2) conv2 = maxpool2d(conv2) conv3 = conv2d(conv2, weights['W_conv3']) + biases['b_conv3'] conv3 = normalizeNN(conv3, sess) conv3 = tf.nn.relu(conv3) conv3 = maxpool2d(conv3) fconv3= tf.reshape(conv3, [-1, dimX8*dimY8*l3]) # change conv3 output to linear array choice = tf.where(Train, True, False) fc1 = tf.matmul(fconv3, weights['W_fc1']) + biases['b_fc1'] fc1 = normalizeNN(fc1, sess) fc1 = tf.nn.relu(fc1) if sess.run(choice, feed_dict={Train : Tval}) : fc1 = tf.nn.dropout(fc1, kRate) else : fc1 = tf.nn.dropout(fc1, 1.0) fc2 = tf.matmul(fc1, weights['W_fc2']) + biases['b_fc2'] fc2 = normalizeNN(fc2, sess) fc2 = tf.nn.relu(fc2) if sess.run(choice, feed_dict={Train : Tval}) : fc2 = tf.nn.dropout(fc2, kRate) else : fc2 = tf.nn.dropout(fc2, 1.0) out2 = tf.nn.relu(tf.matmul(fc2, weights['W_out2']) + biases['b_out2']) return out2 def train_neural_network(x) : global Tval, W_fc1, W_fc2, W_out2, b_fc1, b_fc2, b_out2, correct, accuracy with tf.Session() as sess: pnratio = np.random.uniform(0.6,0.9) class_weight = tf.constant( [[ 1/(1+pnratio), pnratio/(1+pnratio) ]] ) sample_weight = tf.transpose( tf.matmul(y, tf.transpose(class_weight)) ); #[1, batchsize] prediction = convolutional_neural_network(x, sess) # [batchsize, 2] xent = sample_weight * tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y, name='xent') #[1, batchsize] cost = tf.reduce_mean(xent) # 1x1 correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct, 'float')) if nType == 'full' : # train the whole network cnn + fnn update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer(1., 0.95, 1e-6).minimize(cost) # construct an optimizer elif nType == 'half' : # train only the fnn (with cnn weights supplied by autoencoder) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer(1., 0.95, 1e-6).minimize(cost, var_list=[W_fc1,W_fc2,W_out2,b_fc1,b_fc2,b_out2]) else : print 'network type not understood: ', nType sys.exit(0) # using 2 norm # xent = sample_weight*tf.pow(tf.matmul(prediction-y, tf.transpose(class_weight)),2) # cost = tf.reduce_mean(xent) # using other optimizer options # optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost) # construct an optimizer # optimizer = tf.train.AdamOptimizer(0.01).minimize(cost) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saverCNN = tf.train.Saver( { "W_conv1" : W_conv1, "W_conv2" : W_conv2, "W_conv3" : W_conv3, "b_conv1" : b_conv1, "b_conv2" : b_conv2, "b_conv3" : b_conv3 } ) file2 = '/home/yfwang/Documents/rogue1/microwave/rec/saved_models/boost_auto_cnn+fnn_full_' + sys.argv[1] + '.ckpt' if os.path.isfile(file2 + '.meta') : # already conducted some run saver.restore(sess, file2) print 'densenet model ' + file2 + ' from previous run restored' count =20 badIter = 0 while 1 : files = np.random.permutation(NDIRS-1)+1 # there are 830 directory trainData = files[0:10] # use 10 files to train testData = files[10:15] print '\n\niteration: ', sys.argv[1], ',', count, '\n\n' count += 1 if count >= maxCount : posErrFnl = [0]*(NDIRS/10) negErrFnl = [0]*(NDIRS/10) for i in range(NDIRS/10) : iBeg = i*10 iEnd = (i+1)*10 testData = range(iBeg,iEnd) Tval = False posErrFnl[i], negErrFnl[i], validFnl = testErr(testData,pnratio,accuracy) print 'final err +:', sum(posErrFnl)/len(posErrFnl), '-:', sum(negErrFnl)/len(negErrFnl), 'model:', save_path, '\n\n\n' save_path = saver.save(sess, file2) return if badIter == maxBad : print 'give up, too many bad iterations ' + file2 return Tval = False posErrBef, negErrBef, validBef = testErr(testData,pnratio,accuracy) print '*****************************************************************' print '[pos,neg]:', sess.run(class_weight), 'pnratio:', pnratio, 'network:', nType, 'filter size:', fSize print '*****************************************************************' tX, tY = inputData(trainData, pnratio) dSize = len(tX) backupNetwork() print datetime.datetime.now() Tval = True for epoch in range(maxEpochs): epoch_loss = 0 for i in range(dSize/bSize): sidx = i*bSize eidx = (i+1)*bSize epoch_x = tX[sidx:eidx] epoch_y = tY[sidx:eidx] i, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y}) epoch_loss += c print ('Epoch', epoch, 'completed out of',maxEpochs,'loss:',epoch_loss) print datetime.datetime.now() Tval = False posErrAft, negErrAft, validAft = testErr(testData,pnratio,accuracy) if not validBef and not validAft : badIter += 1 print count, 'pos err (bef, aft): ', posErrBef, posErrAft, ' neg err (bef, aft): ', negErrBef, negErrAft , if ( (not validBef and validAft) or ( validBef and validAft and ( # valid aft but not bef or valid both before and after ( posErrAft>=posErrBef ) # ( posErrAft>=posErrBef and negErrAft-negErrBef>-epsilon ) or # + or - gets better, - or + cannot get much worse # ( negErrAft>=negErrBef and posErrAft-posErrBef>-epsilon ) or # posErrAft-posErrBef + negErrAft-negErrBef>0 # + and - together must improve ) ) ) : save_path = saver.save(sess, file2) print('model saved in file: %s' %save_path) else : restoreNetwork() print 'model not saved and old CNN model restored' print 'keyboard interuption to terminate learning in next 10 secs ... ' , rlist, wlist, xlist = select([sys.stdin], [], [], 10) if rlist : print 'wrap up prematurelly ...' , count = maxCount print 'continue.' print datetime.datetime.now() train_neural_network(x) print datetime.datetime.now()