#!/usr/bin/env python
import sys
from optparse import OptionParser
import random
import os
import os.path
import numpy as np

# strings for optionparser
usage_str = "usage: %prog [options] ic_file rate_file"
version_str = "%prog 0.9"

guaranteed_continuity = 4
guaranteed_connection = 6

def is_valid(struc):
    plist, breaks = dpp_to_plist(struc)
    valid = True

    g_len = guaranteed_continuity - 1

    for bi, b in enumerate(breaks):
        c = plist[b]
        if c != -1:
            for i in range(g_len):
                if plist[b + i] != plist[b + i + 1] + 1:
                    valid = False
                    # print struc
                    # print ' '*(b+i+bi+1) + '^'
                    # print b+i, b+i+1
                    # print plist[b+i], plist[b+i+1]

        c2 = plist[b - 1]
        if c2 != -1:
            for i in range(g_len):
                if plist[b - i - 1] != plist[b - i - 2] - 1:
                    valid = False

    if not valid:
        return valid

    identity = []
    lastm = 0
    for i, m in enumerate(breaks):
        identity += [i] * (m - lastm)
        lastm = m

    identity += [len(breaks)]*(len(plist) - lastm)

    n_strands = len(breaks) + 1
    connection_count = np.zeros((n_strands, n_strands))

    for i, j, in enumerate(plist):
        if j >= 0:
            id1 = identity[i]
            id2 = identity[j]
            connection_count[id1, id2] += 1

    connected_sets = dict([(i, i) for i in range(n_strands)])

    for i in range(n_strands):
        for j in range(n_strands):
            if connection_count[i, j] > guaranteed_connection:
                val = min(connected_sets[i], connected_sets[j])
                connected_sets[i] = val
                connected_sets[j] = val

    for i, j in connected_sets.iteritems():
        if j != 0:
            valid = False


    return valid


def dpp_to_plist(struc):
    stack = []
    breaks = []
    j = 0
    plist = [-5] * (len(struc) - struc.count('+'))
    for c in struc:
        if c == '(':
            stack.append(j)
            j += 1
        elif c == ')':
            k = stack[-1]
            stack.pop()
            plist[j] = k
            plist[k] = j
            j += 1
        elif c == '.':
            plist[j] = -1
            j += 1
        elif c == '+':
            breaks.append(j)

    return plist, breaks



def main(argv=None):
    if argv is None:
        argv = sys.argv
    parser = OptionParser(usage=usage_str,version=version_str)

    parser.add_option('-n', '--ntrials', dest='ntrials',
            help='Number of trials per length', default=30, type='int')
    parser.add_option('-l', '--lengths', dest='lengths',
            help='comma-separated list of lengths', default='100,200')
    parser.add_option('--nstrands', dest='nstrands',
            help='the number of strands', default=4, type='int')

    (options,args) = parser.parse_args(argv[1:])

    NUPACKHOME='/home/wolfe/install/'

    n_trials = options.ntrials
    lengths = [int(s) for s in options.lengths.split(',')]
    n_strands = options.nstrands

    # get input options
    if len(args) != 0:
        sys.stderr.write("No arguments\n")
        parser.print_help()
        return 2
    path = 'strucs'

    try:
        os.makedirs(path)
    except os.error:
        pass

    dir_format = os.path.join(path, '%04i')
    name_format = os.path.join(path, '%04i', '%04i.fold')

    for length in lengths:

        try:
            print dir_format % length
            os.makedirs(dir_format % length)
        except:
            pass

        strand_len = length / n_strands
        if length % n_strands > 0:
            strand_len += 1
        for trial in range(n_trials):
            name = name_format % (length, trial)

            struc = ".+."
            while not is_valid(struc):
                seq = '+'.join([''.join([random.choice('ACGU') for i in range(strand_len)]) for s in range(n_strands)])
                constraint = '+'.join(['N'*strand_len for s in range(n_strands)])
                seq = seq[:length + n_strands - 1]
                constraint = constraint[:length + n_strands - 1]
                struc = getMFEStruct(seq, 37, 'rna1999', NUPACKHOME)
            print struc

            f = open(name, 'w')
            f.write('%s\n%s\n'%(struc, constraint))
            f.close()


# ############################################################### #
def getMFEStruct(sequence,T,material,NUPACKHOME):
    # USAGE: mfeStruct = getMFEStruct(sequence,T,material,NUPACKHOME)
    # This function was written by Justin Bois, copied from the old MFE
    # utilities for BE/ChE 163
    #
    # Runs the NUPACK executable mfe to get the MFE structure for the
    # input sequence.  Parses all I/O to and from NUPACK.
    #
    # The inputs are:
    #   sequence: the base sequence of the strand
    #   T: the temperature in degrees celsius
    #   material: a string, either 'dna', 'rna', or 'rna37'
    #    (see NUPACK User Guide)
    #   NUPACKHOME: a string with the name of the directory containing
    #     NUPACK.  There is NO trailing slash in this string.  E.g.,
    #     NUPACKHOME = '/home/wolfe/3.0'
    #
    # The minimal free energy structure in dot-paren notation is returned.

    import os
    import re
    whiteSpaceSearch = re.compile('\s+')
    import random


    # Name of file for I/O
    fname = 'junk_file_delete_me%d' % random.randint(100000,999999)
    outputfile = '%s.mfe' % fname

    sequence = sequence.split('+')

    # Make the input file
    inputFileName = '%s.in' % fname
    f = open(inputFileName,'w')
    f.write('%i\n' % len(sequence))
    for seq in sequence:
        f.write('%s\n' % seq)
    for i in range(1, len(sequence) + 1):
        f.write('%i ' % i)
    f.write('\n')

    f.close()

    # Create the command to compute the MFE struct
    cmd = '%s/bin/mfe -multi -T %.1f -material %s %s' % (NUPACKHOME,T,material,fname)

    # Run the command in a subshell
    os.system(cmd)

    # Parse the output
    f = open(outputfile,'r')

    # Blow through comments and blank lines
    line = f.readline()
    while line[0] == '%' or line[0] == '\n' or line[0] == '\0':
        line = f.readline()

    # Now we are at the entry containing number of bases
    line = f.readline()

    # Now we're at the free energy of the MFE structure
    line = f.readline()

    # Now we're at the dot paren structure
    lineData = whiteSpaceSearch.split(line)
    mfeStruct = lineData[0]

    f.close()

    # Remove junk files
    cmd = 'rm -f %s.in ; rm -f %s' % (fname,outputfile)
    os.system(cmd)

    return mfeStruct
# ############################################################### #

if __name__ == "__main__":
    sys.exit(main())

