#!/bin/env python

import sys
import optparse
import os
import re
import subprocess

#### Classes ###################################################################

#### Functions #################################################################

def mdb_update(db, obj, var, val, mdb_table=None):
    if mdb_table:
        print "mdbUpdate %s -table=%s -obj=%s -setVar=%s -setVal=%s" % (db, mdb_table, obj, var, val)
    else:
        print "mdbUpdate %s -obj=%s -setVar=%s -setVal=%s" % (db, obj, var, val)

def get_obj_for_file(db, file, mdb_table=None):
    if mdb_table:
        command = "mdbPrint %s -table=%s -vars=\"fileName=%s\" -byVar" % (db, mdb_table, file)
    else:
        command = "mdbPrint %s -vars=\"fileName=%s\" -byVar" % (db, file)
    proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,
                                                 stdin=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)
    stdout_lines = proc.communicate()[0].split("\n")
    assert proc.returncode == 0, "command %s returned non-zero" % command
    if len(stdout_lines) > 1:
        assert stdout_lines[1].split()[0] == "metaObject"
        return stdout_lines[1].split()[1]
    else:
        return None

#### Main ######################################################################

def main(argv=None):
    """ """
    if argv is None: argv = sys.argv
    # parse the args
    parser = optparse.OptionParser(usage="%prog [options] database manifest.soft accessions_table",
        version="%prog 0.9")
    parser.add_option("-t", "--table", dest="table", action="store", default=None)
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False)

    global options
    (options, args) = parser.parse_args()

    # output a usage message
    if len(args) != 3:
        parser.print_help()
        sys.exit(10)

    # get the positional args
    database            = args[0]
    manifest_filename   = args[1]
    accessions_filename = args[2]

    # patterns to match accessions
    composite_name_re = re.compile("ucsc_encode_dcc_(.*)\.soft")
    series_accession_re = re.compile("GSE\d+")    
    sample_accession_re = re.compile("GSM\d+")    
    sample_line_re      = re.compile("\^SAMPLE = (.*)")
    file_line_re        = re.compile("!Sample_(raw|supplementary)_file_\d+ = (.*)")

    # get the composite name
    composite_name = composite_name_re.search(manifest_filename).group(1)
    if options.verbose:
        print >>sys.stderr, "Using composite name: %s" % composite_name

    # get a list of tokens from the accessions table
    manifest_line = "".join(open(accessions_filename).readlines())
    manifest_tokens = re.split("(\xc2|\xa0|\s)+", manifest_line)
    
    # get the series and sample accession
    series_accession = None
    sample_accessions = {}
    for i in range(0, len(manifest_tokens), 2):
        if series_accession_re.match(manifest_tokens[i]):
            series_accession = manifest_tokens[i]
        elif sample_accession_re.match(manifest_tokens[i]):
            sample_accessions[manifest_tokens[i + 2]] = manifest_tokens[i]  

    # process the series accession
    print "# add composite level metadata"
    mdb_update(database, composite_name, "objType", "composite", options.table)
    mdb_update(database, composite_name, "geoSeriesAccession", series_accession, options.table)
    mdb_update(database, composite_name, "composite", composite_name, options.table)

    print "# added file levle metadata"
    # read the manifest file
    current_sample = None
    for line in open(manifest_filename).readlines():
        # get the sample name
        if sample_line_re.match(line):
            current_sample = sample_line_re.match(line).group(1)
            if options.verbose:
                print >>sys.stderr, "Processing sample: %s" % current_sample
        # process files
        elif file_line_re.match(line):
            current_file = file_line_re.match(line).group(2)
            if options.verbose:
                print >>sys.stderr, "Processing file: %s" % current_file
            # get the metaObject for the file
            object_name = get_obj_for_file(database, current_file, options.table)
            if object_name == None: # didn't find metaObject
                print >>sys.stderr, "Could not find metaObject for file %s" % current_file
            else:
                mdb_update(database, object_name, "geoSampleAccession", sample_accessions[current_sample], options.table)

#### Module ####################################################################

if __name__ == "__main__":
    sys.exit(main())
