'''
    using CNN + 2 fully connected layers for recognition 
    Can be trained in two modes: 
    full == cnn + fnn, all layers are trainable
    half == fnn, only top fully connected layers are tained, the bottom CNN is fixed with parameters supplied by autoencoder
    input cnn features are from autoencoder with 3 cnn layers and 2 fully connected layers (cnn+fnn)
    boosting mode:  (a) random permuting [0..29] directories into two parts: training and testing. 
                    (b) verify testing performance before and after training, 
                    (c) save updated model parameters if training improves performance, discard model parameters if not
                    (d) repeat 
'''


from utils import *

W_conv1 = 0.0
W_conv2 = 0.0
W_conv3 = 0.0
W_fc1   = 0.0
W_fc2   = 0.0
W_out2  = 0.0
b_conv1 = 0.0
b_conv2 = 0.0
b_conv3 = 0.0
b_fc1   = 0.0
b_fc2   = 0.0
b_out2  = 0.0


Type = [ 'half', 'full' ]
nType = 'full'
l1    = 256         # CNN layer 1, 2, and 3 size
l2    = 512
l3    = 1024
# fdim  = dim/(2**3)  # 6x6 grid after 3 layers
fSize = 3           # filter size in CNN
fc    = 1024        # fully connected layer  size 

def backupNetwork() :
    global W_fc1,   W_fc2,   W_out2,  b_fc1,   b_fc2,   b_out2
    global WB_fc1,  WB_fc2,  WB_out2, bB_fc1,  bB_fc2,  bB_out2
    global W_conv1,  W_conv2,  W_conv3,  b_conv1,  b_conv2,  b_conv3
    global WB_conv1, WB_conv2, WB_conv3, bB_conv1, bB_conv2, bB_conv3

    WB_fc1      = W_fc1
    WB_fc2      = W_fc2
    WB_out2     = W_out2
    WB_conv1    = W_conv1
    WB_conv2    = W_conv2
    WB_conv3    = W_conv3
    bB_fc1      = b_fc1
    bB_fc2      = b_fc2
    bB_out2     = b_out2
    bB_conv1    = b_conv1
    bB_conv2    = b_conv2
    bB_conv3    = b_conv3


def restoreNetwork() :
    global W_fc1,   W_fc2,   W_out2,  b_fc1,   b_fc2,   b_out2
    global WB_fc1,  WB_fc2,  WB_out2, bB_fc1,  bB_fc2,  bB_out2
    global W_conv1,  W_conv2,  W_conv3,  b_conv1,  b_conv2,  b_conv3
    global WB_conv1, WB_conv2, WB_conv3, bB_conv1, bB_conv2, bB_conv3

    W_fc1      = WB_fc1
    W_fc2      = WB_fc2
    W_out2     = WB_out2
    W_conv1    = WB_conv1
    W_conv2    = WB_conv2
    W_conv3    = WB_conv3
    b_fc1      = bB_fc1
    b_fc2      = bB_fc2
    b_out2     = bB_out2
    b_conv1    = bB_conv1
    b_conv2    = bB_conv2
    b_conv3    = bB_conv3


def convolutional_neural_network(x, sess ):
    global Tval, W_fc1,   W_fc2,   W_out2,  b_fc1,   b_fc2,   b_out2
    global W_conv1,  W_conv2,  W_conv3,  b_conv1,  b_conv2,  b_conv3

    W_conv1 = tf.Variable(tf.random_normal([fSize,fSize,1,  l1  ]), name="W_conv1")
    W_conv2 = tf.Variable(tf.random_normal([fSize,fSize,l1, l2  ]), name="W_conv2")
    W_conv3 = tf.Variable(tf.random_normal([fSize,fSize,l2, l3  ]), name="W_conv3")
    W_fc1   = tf.Variable(tf.random_normal([dimX8*dimY8*l3,   fc  ]), name="W_fc1"  )
    W_fc2   = tf.Variable(tf.random_normal([fc,             fc  ]), name="W_fc2"  )
    W_out2  = tf.Variable(tf.random_normal([fc,             nOs ]), name="W_out2" )

    b_conv1 = tf.Variable(tf.random_normal([  l1]), name="b_conv1")
    b_conv2 = tf.Variable(tf.random_normal([  l2]), name="b_conv2")
    b_conv3 = tf.Variable(tf.random_normal([  l3]), name="b_conv3")
    b_fc1   = tf.Variable(tf.random_normal([  fc]), name="b_fc1"  )
    b_fc2   = tf.Variable(tf.random_normal([  fc]), name="b_fc2"  )
    b_out2  = tf.Variable(tf.random_normal([ nOs]), name="b_out2" )

    weights = { "W_conv1": W_conv1,
                "W_conv2": W_conv2,
                "W_conv3": W_conv3,
                "W_fc1"  : W_fc1,
                "W_fc2"  : W_fc2,
                "W_out2" : W_out2 }

    biases  = { "b_conv1": b_conv1,
                "b_conv2": b_conv2,
                "b_conv3": b_conv3,
                "b_fc1"  : b_fc1,
                "b_fc2"  : b_fc2,
                "b_out2" : b_out2 }

    x = tf.reshape(x, shape=[-1, dimX, dimY, 1])


    conv1 = conv2d(x,     weights['W_conv1']) + biases['b_conv1']
    conv1 = normalizeNN(conv1, sess) 
    conv1 = tf.nn.relu(conv1)
    conv1 = maxpool2d(conv1)

    conv2 = conv2d(conv1, weights['W_conv2']) + biases['b_conv2']
    conv2 = normalizeNN(conv2, sess)
    conv2 = tf.nn.relu(conv2)
    conv2 = maxpool2d(conv2)

    conv3 = conv2d(conv2, weights['W_conv3']) + biases['b_conv3']
    conv3 = normalizeNN(conv3, sess)
    conv3 = tf.nn.relu(conv3)
    conv3 = maxpool2d(conv3)


    fconv3= tf.reshape(conv3, [-1, dimX8*dimY8*l3])    # change conv3 output to linear array 

    choice = tf.where(Train, True, False)

    fc1   = tf.matmul(fconv3,  weights['W_fc1']) + biases['b_fc1']
    fc1   = normalizeNN(fc1, sess)
    fc1   = tf.nn.relu(fc1)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        fc1   = tf.nn.dropout(fc1, kRate)
    else :
        fc1   = tf.nn.dropout(fc1, 1.0)

    fc2   = tf.matmul(fc1,     weights['W_fc2']) + biases['b_fc2']
    fc2   = normalizeNN(fc2, sess)
    fc2   = tf.nn.relu(fc2)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        fc2   = tf.nn.dropout(fc2, kRate)
    else :
        fc2   = tf.nn.dropout(fc2, 1.0)

    out2  = tf.nn.relu(tf.matmul(fc2,     weights['W_out2']) + biases['b_out2'])


    return out2


def train_neural_network(x) : 
    global Tval, W_fc1,   W_fc2,   W_out2,  b_fc1,   b_fc2,   b_out2, correct, accuracy

    
    with tf.Session() as sess:
        pnratio = np.random.uniform(0.6,0.9)
        class_weight = tf.constant( [[ 1/(1+pnratio), pnratio/(1+pnratio) ]] ) 
        sample_weight = tf.transpose( tf.matmul(y, tf.transpose(class_weight)) );   #[1, batchsize]
        prediction = convolutional_neural_network(x, sess)    # [batchsize, 2]
        xent = sample_weight * tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y, name='xent') #[1, batchsize]
        cost = tf.reduce_mean(xent)  # 1x1
        correct  = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

        if nType == 'full' :         # train the whole network cnn + fnn 
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                optimizer = tf.train.AdadeltaOptimizer(1., 0.95, 1e-6).minimize(cost)  # construct an optimizer
        elif nType == 'half' :       # train only the fnn (with cnn weights supplied by autoencoder)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                optimizer = tf.train.AdadeltaOptimizer(1., 0.95, 1e-6).minimize(cost, var_list=[W_fc1,W_fc2,W_out2,b_fc1,b_fc2,b_out2])  
        else :
            print 'network type not understood: ', nType 
            sys.exit(0)

#   using 2 norm 
#    xent =  sample_weight*tf.pow(tf.matmul(prediction-y, tf.transpose(class_weight)),2)
#    cost = tf.reduce_mean(xent)

#   using other optimizer options
#   optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost)  # construct an optimizer
#   optimizer = tf.train.AdamOptimizer(0.01).minimize(cost)

    
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver() 
        saverCNN = tf.train.Saver( { "W_conv1" : W_conv1, "W_conv2" : W_conv2, "W_conv3" : W_conv3, "b_conv1" : b_conv1, "b_conv2" : b_conv2, "b_conv3" : b_conv3 } )


        file2 = '/home/yfwang/Documents/rogue1/microwave/rec/saved_models/boost_auto_cnn+fnn_full_' + sys.argv[1] + '.ckpt'
        if os.path.isfile(file2 + '.meta') :                          # already conducted some run 
            saver.restore(sess, file2)
            print 'densenet model ' + file2 + ' from previous run restored'

        count =20 
        badIter = 0
        while 1 : 
            files = np.random.permutation(NDIRS-1)+1  # there are 830 directory 
            trainData = files[0:10]         # use 10 files to train 
            testData  = files[10:15]    

            print '\n\niteration: ', sys.argv[1], ',', count, '\n\n'

            count += 1
            if count >= maxCount  : 
                posErrFnl = [0]*(NDIRS/10)
                negErrFnl = [0]*(NDIRS/10)
                for i in range(NDIRS/10) : 
                    iBeg = i*10
                    iEnd = (i+1)*10
                    testData = range(iBeg,iEnd) 
                    Tval = False
                    posErrFnl[i], negErrFnl[i], validFnl = testErr(testData,pnratio,accuracy)
                print 'final err +:', sum(posErrFnl)/len(posErrFnl), '-:', sum(negErrFnl)/len(negErrFnl), 'model:',  save_path, '\n\n\n'
                save_path = saver.save(sess, file2)
                return 
            if badIter ==  maxBad :
                print 'give up, too many bad iterations ' + file2
                return


            Tval = False
            posErrBef, negErrBef, validBef = testErr(testData,pnratio,accuracy)
            print '*****************************************************************'
            print '[pos,neg]:', sess.run(class_weight), 'pnratio:', pnratio, 'network:', nType, 'filter size:', fSize 
            print '*****************************************************************'
            tX, tY    = inputData(trainData, pnratio)
            dSize     = len(tX)
            backupNetwork() 
            print datetime.datetime.now()
            Tval = True
            for epoch in range(maxEpochs):
                epoch_loss = 0
                for i in range(dSize/bSize):
                    sidx = i*bSize
                    eidx = (i+1)*bSize
                    epoch_x = tX[sidx:eidx]
                    epoch_y = tY[sidx:eidx]
                    i, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
                    epoch_loss += c
                print ('Epoch', epoch, 'completed out of',maxEpochs,'loss:',epoch_loss)
            print datetime.datetime.now()

            Tval = False
            posErrAft, negErrAft, validAft = testErr(testData,pnratio,accuracy)

            if not validBef and not validAft :
                badIter += 1


            print count, 'pos err (bef, aft): ', posErrBef, posErrAft, ' neg err (bef, aft): ', negErrBef, negErrAft , 

            if (  (not validBef and validAft) or ( validBef and validAft and (  # valid aft but not bef or valid both before and after
                   ( posErrAft>=posErrBef )
#                  ( posErrAft>=posErrBef and negErrAft-negErrBef>-epsilon ) or  # + or - gets better, - or + cannot get much worse 
#                  ( negErrAft>=negErrBef and posErrAft-posErrBef>-epsilon ) or
#                    posErrAft-posErrBef + negErrAft-negErrBef>0          # + and - together must improve 
                    ) ) ) :

                save_path = saver.save(sess, file2)
                print('model saved in file: %s' %save_path)
            else :
                restoreNetwork()
                print 'model not saved and old CNN model restored' 
            print 'keyboard interuption to terminate learning in next 10 secs ... ' ,
            rlist, wlist, xlist = select([sys.stdin], [], [], 10)
            if rlist :
                print 'wrap up prematurelly ...' ,
                count = maxCount
            print 'continue.'


print datetime.datetime.now()
train_neural_network(x)
print datetime.datetime.now()