'''
    using 3 fully connected feature layers + 3 fully connected recognition layers for recognition 
    Can be trained in two modes: 
    full == all layers are trainable 
    half == only top fully connected layers are tained, the bottom feature layers are fixed with parameters supplied by autoencoder
'''


from utils import *

W_encoder1 = 0.0
W_encoder2 = 0.0
W_encoder3 = 0.0
b_encoder1 = 0.0
b_encoder2 = 0.0
b_encoder3 = 0.0
W_fc1   = 0.0
W_fc2   = 0.0
W_fc3   = 0.0
W_out2  = 0.0
b_fc1   = 0.0
b_fc2   = 0.0
b_fc3   = 0.0
b_out2  = 0.0


Type  = [ 'full', 'half' ]
nType = 'full'
h1    = 1024        # NN layer one 
h2    = 512         # NN layer two 
h3    = 256         # NN layer three
fc    = 1024        # fully connected layer  size 

def backupNetwork() :
    global  W_fc1,   W_fc2,   W_fc3,   W_out2,   b_fc1,    b_fc2,    b_fc3,  b_out2
    global WB_fc1,  WB_fc2,  WB_fc3,  WB_out2,  bB_fc1,   bB_fc2,   bB_fc3, bB_out2
    global  W_encoder1,  W_encoder2,  W_encoder3,  b_encoder1,  b_encoder2,  b_encoder3 
    global WB_encoder1, WB_encoder2, WB_encoder3, bB_encoder1, bB_encoder2, bB_encoder3

    WB_fc1      = W_fc1
    WB_fc2      = W_fc2
    WB_fc3      = W_fc3
    WB_out2     = W_out2
    WB_encoder1 = W_encoder1
    WB_encoder2 = W_encoder2
    WB_encoder3 = W_encoder3
    bB_fc1      = b_fc1
    bB_fc2      = b_fc2
    bB_fc3      = b_fc3
    bB_out2     = b_out2
    bB_encoder1 = b_encoder1
    bB_encoder2 = b_encoder2
    bB_encoder3 = b_encoder3


def restoreNetwork() : 
    global  W_fc1,   W_fc2,   W_fc3,   W_out2,   b_fc1,    b_fc2,    b_fc3,  b_out2
    global WB_fc1,  WB_fc2,  WB_fc3,  WB_out2,  bB_fc1,   bB_fc2,   bB_fc3, bB_out2
    global  W_encoder1,  W_encoder2,  W_encoder3,  b_encoder1,  b_encoder2,  b_encoder3 
    global WB_encoder1, WB_encoder2, WB_encoder3, bB_encoder1, bB_encoder2, bB_encoder3

    W_fc1      = WB_fc1
    W_fc2      = WB_fc2
    W_fc3      = WB_fc3
    W_out2     = WB_out2
    W_encoder1 = WB_encoder1
    W_encoder2 = WB_encoder2
    W_encoder3 = WB_encoder3
    b_fc1      = bB_fc1
    b_fc2      = bB_fc2
    b_fc3      = bB_fc3
    b_out2     = bB_out2
    b_encoder1 = bB_encoder1
    b_encoder2 = bB_encoder2
    b_encoder3 = bB_encoder3


def fully_connected_neural_network(x, sess ):
    global  Tval, W_fc1,   W_fc2,   W_fc3,   W_out2,   b_fc1,    b_fc2,    b_fc3,  b_out2
    global  W_encoder1,  W_encoder2,  W_encoder3,  b_encoder1,  b_encoder2,  b_encoder3 


    W_encoder1 = tf.Variable(tf.random_normal([nIs,  h1  ]), name="W_encoder1"  )
    W_encoder2 = tf.Variable(tf.random_normal([h1,   h2  ]), name="W_encoder2"  )
    W_encoder3 = tf.Variable(tf.random_normal([h2,   h3  ]), name="W_encoder3"  )
    W_fc1      = tf.Variable(tf.random_normal([h3,   fc  ]), name="W_fc1"  )
    W_fc2      = tf.Variable(tf.random_normal([fc,   fc  ]), name="W_fc2"  )
    W_fc3      = tf.Variable(tf.random_normal([fc,   fc  ]), name="W_fc3"  )
    W_out2     = tf.Variable(tf.random_normal([fc,   nOs ]), name="W_out2" )

    b_encoder1 = tf.Variable(tf.random_normal([  h1]), name="b_encoder1"  )
    b_encoder2 = tf.Variable(tf.random_normal([  h2]), name="b_encoder2"  )
    b_encoder3 = tf.Variable(tf.random_normal([  h3]), name="b_encoder3"  )
    b_fc1      = tf.Variable(tf.random_normal([  fc]), name="b_fc1"  )
    b_fc2      = tf.Variable(tf.random_normal([  fc]), name="b_fc2"  )
    b_fc3      = tf.Variable(tf.random_normal([  fc]), name="b_fc3"  )
    b_out2     = tf.Variable(tf.random_normal([ nOs]), name="b_out2" )

    weights = { "W_encoder1": W_encoder1,
                "W_encoder2": W_encoder2,
                "W_encoder3": W_encoder3,
                "W_fc1"     : W_fc1,
                "W_fc2"     : W_fc2,
                "W_fc3"     : W_fc3,
                "W_out2"    : W_out2 }

    biases  = { "b_encoder1": b_encoder1,
                "b_encoder2": b_encoder2,
                "b_encoder3": b_encoder3,
                "b_fc1"     : b_fc1,
                "b_fc2"     : b_fc2,
                "b_fc3"     : b_fc3,
                "b_out2"    : b_out2 }


    choice = tf.where(Train, True, False)

    encoder1   = tf.matmul(x,         weights['W_encoder1']) + biases['b_encoder1']
    encoder1   = normalizeNN(encoder1, sess)
    encoder1   = tf.nn.sigmoid(encoder1)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        encoder1 = tf.nn.dropout(encoder1, kRate)
    else :
        encoder1 = tf.nn.dropout(encoder1, 1.0)

    encoder2   = tf.matmul(encoder1,  weights['W_encoder2']) + biases['b_encoder2']
    encoder2   = normalizeNN(encoder2, sess)
    encoder2   = tf.nn.sigmoid(encoder2)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        encoder2 = tf.nn.dropout(encoder2, kRate)
    else :
        encoder2 = tf.nn.dropout(encoder2, 1.0)

    encoder3   = tf.matmul(encoder2,  weights['W_encoder3']) + biases['b_encoder3']
    encoder3   = normalizeNN(encoder3, sess)
    encoder3   = tf.nn.sigmoid(encoder3)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        encoder3 = tf.nn.dropout(encoder3, kRate)
    else :
        encoder3 = tf.nn.dropout(encoder3, 1.0)

    fc1    = tf.matmul(encoder3,  weights['W_fc1']) + biases['b_fc1']
    fc1    = normalizeNN(fc1, sess)
    fc1    = tf.nn.relu(fc1)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        fc1   = tf.nn.dropout(fc1, kRate)
    else :
        fc1   = tf.nn.dropout(fc1, 1.0)

    fc2    = tf.matmul(fc1,     weights['W_fc2']) + biases['b_fc2']
    fc2    = normalizeNN(fc2, sess)
    fc2    = tf.nn.relu(fc2)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        fc2   = tf.nn.dropout(fc2, kRate)
    else :
        fc2   = tf.nn.dropout(fc2, 1.0)

    fc3    = tf.matmul(fc2,     weights['W_fc3']) + biases['b_fc3']
    fc3    = normalizeNN(fc3, sess)
    fc3    = tf.nn.relu(fc3)
    if sess.run(choice, feed_dict={Train : Tval}) : 
        fc3   = tf.nn.dropout(fc3, kRate)
    else :
        fc3   = tf.nn.dropout(fc3, 1.0)

    out2       = tf.nn.relu(tf.matmul(fc3,         weights['W_out2']) + biases['b_out2'])

    return out2


def train_neural_network(x) : 
    global Tval, W_fc1,   W_fc2,   W_out2,  b_fc1,   b_fc2,   b_out2, correct, accuracy

    with tf.Session() as sess:
        pnratio = np.random.uniform(0.6,0.9)
        class_weight = tf.constant( [[ 1/(1+pnratio), pnratio/(1+pnratio) ]] ) 
        sample_weight = tf.transpose( tf.matmul(y, tf.transpose(class_weight)) );   #[1, batchsize]
        prediction = fully_connected_neural_network(x, sess)  # [batchsize, 2]
        xent   = sample_weight * tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y, name='xent') #[1, batchsize]
        cost = tf.reduce_mean(xent)  # 1x1
        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            if nType == 'full' :         # train the whole network cnn + fnn 
            optimizer = tf.train.AdadeltaOptimizer(1., 0.95, 1e-6).minimize(cost)  # construct an optimizer
            elif nType == 'half' :       # train only the fnn (with cnn weights supplied by autoencoder)
                optimizer = tf.train.AdadeltaOptimizer(1., 0.95, 1e-6).minimize(cost, var_list=[W_fc1,W_fc2,W_fc3,W_out2,b_fc1,b_fc2,b_fc3,b_out2])  
            else :
                print 'network type not understood: ', nType 
                sys.exit(0)

#   using 2 norm 
#    xent =  sample_weight*tf.pow(tf.matmul(prediction-y, tf.transpose(class_weight)),2)
#    cost = tf.reduce_mean(xent)

#   using other optimizer options
#   optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost)  # construct an optimizer
#   optimizer = tf.train.AdamOptimizer(0.01).minimize(cost)

    
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver() 
        saverAUTO = tf.train.Saver( { "W_encoder1" : W_encoder1, "W_encoder2" : W_encoder2, "W_encoder3" : W_encoder3,
                                      "b_encoder1" : b_encoder1, "b_encoder2" : b_encoder2, "b_encoder3" : b_encoder3  } )

        '''
        file1 = '/home/yfwang/Documents/rogue1/microwave/rec/saved_models/model_auto_fnn3_wnorm_.ckpt' 
        file2 = '/home/yfwang/Documents/rogue1/microwave/rec/saved_models/model_boost_auto_fnn3_wnorm_' + nType + '_' + sys.argv[1] + '.ckpt'
        file3 = '/home/yfwang/Documents/rogue1/microwave/rec/saved_models/model_boost_auto_fnn3_wnorm_' + nType + '_' + sys.argv[1] + '.ckpt.meta'

        if not os.path.isfile(file3) : 
            saverAUTO.restore(sess, file1)
            print 'model ' + file1 + ' extracted from Autoencoder restored'
        else :
            saver.restore(sess, file2)
            print 'cnn model ' + file2 + ' from previous run restored'
        ''' 

        file2 = '/home/yfwang/Documents/rogue1/microwave/rec/saved_models/boost_auto_fnn3_full_' + sys.argv[1] + '.ckpt'
        if os.path.isfile(file2 + '.meta') :                          # already conducted some run 
            saver.restore(sess, file2)
            print 'densenet model ' + file2 + ' from previous run restored'

        count = 0 
        badIter = 0
        while 1 : 
            files = np.random.permutation(NDIRS-1)+1  # there are 830 directory 
            trainData = files[0:10]         # use 10 files to train 
            testData  = files[10:15]    

            print '\n\niteration: ', sys.argv[1], ',', count, '\n\n'


            count += 1
            if count >= maxCount : 
                posErrFnl = [0]*(NDIRS/10)
                negErrFnl = [0]*(NDIRS/10)
                for i in range(NDIRS/10) : 
                    iBeg = i*10
                    iEnd = (i+1)*10
                    testData = range(iBeg,iEnd) 
                    Tval = False
                    posErrFnl[i], negErrFnl[i], validFnl = testErr(testData,pnratio,accuracy)
                print 'final err +:', sum(posErrFnl)/len(posErrFnl), '-:', sum(negErrFnl)/len(negErrFnl), 'model:',  save_path, '\n\n\n'
                save_path = saver.save(sess, file2)
                return 
            if badIter ==  maxBad :
                print 'give up, too many bad iterations ' + file2
                return

    
            Tval = False
            posErrBef, negErrBef, validBef = testErr(testData,pnratio,accuracy)
            print '*****************************************************************'
            print '[pos,neg]:', sess.run(class_weight), 'pnratio:', pnratio, 'network:', nType
            print '*****************************************************************'
            tX, tY    = inputData(trainData, pnratio)
            dSize     = len(tX)
            backupNetwork()     # backup current network 
            print datetime.datetime.now()
            Tval = True
            for epoch in range(maxEpochs):
                epoch_loss = 0
                for i in range(dSize/bSize):
                    sidx = i*bSize
                    eidx = (i+1)*bSize
                    epoch_x = tX[sidx:eidx]
                    epoch_y = tY[sidx:eidx]
                    i, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
                    epoch_loss += c
                print('Epoch', epoch, 'completed out of',maxEpochs,'loss:',epoch_loss)
            print datetime.datetime.now()
    
            Tval = False
            posErrAft, negErrAft, validAft = testErr(testData,pnratio,accuracy)
            if not validBef and not validAft :
                badIter += 1

            print count, 'pos err (bef, aft): ', posErrBef, posErrAft, ' neg err (bef, aft): ', negErrBef, negErrAft , 
            if (  (not validBef and validAft) or ( validBef and validAft and (  # valid aft but not bef or valid both before and after
                   ( posErrAft>=posErrBef )
#                  ( posErrAft>=posErrBef and negErrAft-negErrBef>-epsilon ) or  # + or - gets better, - or + cannot get much worse 
#                  ( negErrAft>=negErrBef and posErrAft-posErrBef>-epsilon ) or
#                    posErrAft-posErrBef + negErrAft-negErrBef>0          # + and - together must improve 
                    ) ) ) :
                save_path = saver.save(sess, file2)
                print ('%s model saved in file: %s' %(nType, save_path))
            else : 
                restoreNetwork()    # restore previous network 
                print 'model not saved and previous CNN model restored' 

            print 'keyboard interuption to terminate learning in next 10 secs ... ' ,
            rlist, wlist, xlist = select([sys.stdin], [], [], 10)
            if rlist :
                count = maxCount
                print 'wrap up prematurelly ...' ,
            print 'continue.'


print datetime.datetime.now()
train_neural_network(x)
print datetime.datetime.now()