# for emacs: -*- mode: sh; -*-

# This file describes browser build for the dm6
# Drosophila melanogaster -- BDGP Release 6.0 + ISO1 MT (Aug, 2014)
#  The FlyBase Consortium/Berkeley Drosophila Genome Project/Celera Genomics
#  BDGP Release 6 + ISO1 MT


#############################################################################
# fetch sequence from new style download directory (DONE - 2014-08-27 - Hiram)
    # NCBI has redesigned their FTP download site, new type of address
    #      and naming scheme
    mkdir -p /hive/data/genomes/dm6/genbank
    cd /hive/data/genomes/dm6/genbank

    rsync -L -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/invertebrate/Drosophila_melanogaster/latest_assembly_versions/GCA_000001215.4_Release_6_plus_ISO1_MT/ ./

    # measure what we have here:
    faSize GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/*.fna.gz \
     GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/chrMT.fna.gz \
     GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly/unplaced_scaffolds/FASTA/*.fna.gz \
GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly/unlocalized_scaffolds/FASTA/*.fna.gz

    # 143726002 bases (1152978 N's 142573024 real 142573024 upper 0 lower)
    #  in 1870 sequences in 11 files
    # Total size: mean 76858.8 sd 1382100.2 min 544 (DS485566.2)
    #   max 32079331 (AE014297.3) median 1577

#############################################################################
# fixup to UCSC naming scheme (DONE - 2014-08-27 - Hiram)
    mkdir /hive/data/genomes/dm6/ucsc
    cd /hive/data/genomes/dm6/ucsc
    # evolving these scripts over time, could become source tree scripts

    cat << '_EOF_' > ucscCompositeAgp.pl
#!/bin/env perl

use strict;
use warnings;

my %accToChr;

my $primary="../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly";

open (FH, "<$primary/assembled_chromosomes/chr2acc") or
        die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    chomp $line;
    my ($chrN, $acc) = split('\s+', $line);
    $accToChr{$acc} = $chrN;
}
close (FH);

foreach my $acc (keys %accToChr) {
    my $chrN =  $accToChr{$acc};
    print "$acc $accToChr{$acc}\n";
    open (FH, "zcat $primary/assembled_chromosomes/AGP/chr${chrN}.comp.agp.gz|") or die "can not read chr${chrN}.comp.agp.gz";
    open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
    while (my $line = <FH>) {
        if ($line =~ m/^#/) {
            print UC $line;
        } else {
            $line =~ s/^$acc/chr${chrN}/;
            print UC $line;
        }
    }
    close (FH);
    close (UC);
    open (FH, "zcat $primary/assembled_chromosomes/FASTA/chr${chrN}.fna.gz|") or die "can not read chr${chrN}.fna.gz";
    open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
    while (my $line = <FH>) {
        if ($line =~ m/^>/) {
            printf UC ">chr${chrN}\n";
        } else {
            print UC $line;
        }
    }
    close (FH);
    close (UC);
}
'_EOF_'
    # << happy emacs

    cat << '_EOF_' > unlocalized.pl
#!/bin/env perl

use strict;
use warnings;

my %accToChr;
my %chrNames;

my $primary="../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly";

open (FH, "<$primary/unlocalized_scaffolds/unlocalized.chr2scaf") or
        die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    chomp $line;
    my ($chrN, $acc) = split('\s+', $line);
    $acc =~ s/\./v/;
    $accToChr{$acc} = $chrN;
    $chrNames{$chrN} += 1;
}
close (FH);

foreach my $chrN (keys %chrNames) {
    my $agpFile =  "$primary/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
    my $fastaFile =  "$primary/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fna.gz";
    open (FH, "zcat $agpFile|") or die "can not read $agpFile";
    open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
    while (my $line = <FH>) {
        if ($line =~ m/^#/) {
            print UC $line;
        } else {
            chomp $line;
            my (@a) = split('\t', $line);
            my $acc = $a[0];
            $acc =~ s/\./v/;
            die "ERROR: chrN $chrN not correct for $acc"
                if ($accToChr{$acc} ne $chrN);
            my $ucscName = "chr${chrN}_${acc}_random";
            printf UC "%s", $ucscName;
            for (my $i = 1; $i < scalar(@a); ++$i) {
                printf UC "\t%s", $a[$i];
            }
            printf UC "\n";
        }
    }
    close (FH);
    close (UC);
    printf "chr%s\n", $chrN;
    open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
    open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
    while (my $line = <FH>) {
        if ($line =~ m/^>/) {
            chomp $line;
            my $acc = $line;
            $acc =~ s/ .*//;
            $acc =~ s/\./v/;
            $acc =~ s/>//;
            die "ERROR: chrN $chrN not correct for $acc"
                if ($accToChr{$acc} ne $chrN);
            my $ucscName = "chr${chrN}_${acc}_random";
            printf UC ">$ucscName\n";
        } else {
            print UC $line;
        }
    }
    close (FH);
    close (UC);
}
'_EOF_'
    # << happy emacs

    cat << '_EOF_' > unplaced.pl
#!/bin/env perl

use strict;
use warnings;

my $primary="../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly";

my $agpFile =  "$primary/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
my $fastaFile =  "$primary/unplaced_scaffolds/FASTA/unplaced.scaf.fna.gz";
open (FH, "zcat $agpFile|") or die "can not read $agpFile";
open (UC, ">chrUn.agp") or die "can not write to chrUn.agp";
while (my $line = <FH>) {
    if ($line =~ m/^#/) {
        print UC $line;
    } else {
        $line =~ s/\./v/;
        printf UC "chrUn_%s", $line;
    }
}
close (FH);
close (UC);

open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
open (UC, ">chrUn.fa") or die "can not write to chrUn.fa";
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        chomp $line;
        $line =~ s/ .*//;
        $line =~ s/\./v/;
        $line =~ s/>//;
        printf UC ">chrUn_$line\n";
    } else {
        print UC $line;
    }
}
close (FH);
close (UC);
'_EOF_'
    # << happy emacs

    chmod +x ucscCompositeAgp.pl unlocalized.pl unplaced.pl
    ./ucscCompositeAgp.pl
    ./unlocalized.pl
    ./unplaced.pl

    zcat ../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/non-nuclear/assembled_chromosomes/AGP/chrMT.comp.agp.gz \
        | grep -v "^#" | sed -e 's/^KJ947872.2/chrM/' > chrM.agp

    zcat ../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/chrMT.fna.gz \
        | sed -e 's/^>.*/>chrM/' > chrM.fa

    # verify this sequence is still the same as it was before
    #  these translations
    faSize *.fa

    # 143726002 bases (1152978 N's 142573024 real 142573024 upper 0 lower)
    #   in 1870 sequences in 11 files
    # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
    #   max 32079331 (chr3R) median 1577

#############################################################################
# pick up photo (DONE - 2014-06-06 - Chin)
    mkdir /hive/data/genomes/dm6/photo
    cd /hive/data/genomes/dm6/photo
    wget --timestamping \
http://upload.wikimedia.org/wikipedia/commons/1/1d/Drosophila_melanogaster.jpg

    convert -geometry "190x300" Drosophila_melanogaster.jpg New-Drosophila_melanogaster.jpg

    # check in the New-Drosophila_melanogaster.jpg file into
    # the source tree:
# -rw-rw-r-- 1 9842 Jun  6 14:55 Drosophila_melanogaster.jpg

    # and get that installed in /usr/local/apache/htdocs/images/

#############################################################################
#  Initial database build (DONE - 2014-08-28 - Hiram)

    cd /hive/data/genomes/dm6
    cat << '_EOF_' > dm6.config.ra
# Config parameters for makeGenomeDb.pl:
db dm6
clade insect
genomeCladePriority 10
scientificName Drosophila melanogaster
commonName D. melanogaster
assemblyDate Aug. 2014
assemblyLabel The FlyBase Consortium/Berkeley Drosophila Genome Project/Celera Genomics
assemblyShortLabel BDGP Release 6 + ISO1 MT
orderKey 9060
# chrM mito sequence KJ947872.2 included
mitoAcc none
fastaFiles /hive/data/genomes/dm6/ucsc/chr*.fa
agpFiles /hive/data/genomes/dm6/ucsc/chr*.agp
# qualFiles /dev/null
dbDbSpeciesDir drosophila
photoCreditURL http://de.wikipedia.org/wiki/Benutzer:Mr.checker
photoCreditName Wikimedia Commons/Mr.checker
ncbiGenomeId 47
ncbiAssemblyId 202931
ncbiAssemblyName BDGP Release 6 + ISO1 MT
ncbiBioProject 164
genBankAccessionID GCA_000001215.4
taxId 7227
'_EOF_'
    # << happy emacs

    # stepwise to verify sequence and AGP file
    time makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
	-stop=seq dm6.config.ra > seq.log 2>&1
    # real    0m9.989s

    # verify sequence and AGP are OK:
    time makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
	-continue=agp -stop=agp dm6.config.ra > agp.log 2>&1
    # real    0m4.901s

    # then finish it off:
    time nice -n +19 makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
       -fileServer=hgwdev -continue=db dm6.config.ra > db.log 2>&1
    # real    1m27.449s

##########################################################################
# running repeat masker (DONE - 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/repeatMasker
    cd /hive/data/genomes/dm6/bed/repeatMasker
    time  doRepeatMasker.pl -buildDir=`pwd` \
	-bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=ku dm6 > do.log 2>&1 &
    # real     44m51.83

    cat faSize.rmsk.txt
    # 143726002 bases (1152978 N's 142573024 real 112610723 upper
    #    29962301 lower) in 1870 sequences in 1 files
    # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
    #   max 32079331 (chr3R) median 1577
    # %20.85 masked total, %21.02 masked real

    egrep -i "versi|relea" do.log
    # RepeatMasker version open-4.0.5
    #    January 31 2015 (open-4-0-5) version of RepeatMasker
    # CC   RELEASE 20140131;

    featureBits -countGaps dm6 rmsk
    # 29968013 bases of 143726002 (20.851%) in intersection
    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/simpleRepeat
    cd /hive/data/genomes/dm6/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \
	dm6 > do.log 2>&1 &
    #  real    23m17.471s

    cat fb.simpleRepeat
    #  3925949 bases of 142573024 (2.754%) in intersection

    # add to rmsk after it is done:
    cd /hive/data/genomes/dm6
    twoBitMask dm6.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed dm6.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa dm6.2bit stdout | faSize stdin > faSize.dm6.2bit.txt
    cat faSize.dm6.2bit.txt
    # 143726002 bases (1152978 N's 142573024 real 112531045 upper
    #   30041979 # lower) in 1870 sequences in 1 files
    # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
    #   max 32079331 (chr3R) median 1577
    # %20.90 masked total, %21.07 masked real

    rm /gbdb/dm6/dm6.2bit
    ln -s `pwd`/dm6.2bit /gbdb/dm6/dm6.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/gap
    cd /hive/data/genomes/dm6/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../dm6.unmasked.2bit > findMotif.txt 2>&1
    #	real    0m1.290s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits -countGaps dm6 -not gap -bed=notGap.bed
    # 143726002 bases of 143726002 (100.000%) in intersection


    featureBits dm6 -not gap -bed=notGap.bed allGaps.bed
    time featureBits -countGaps dm6 allGaps.bed notGap.bed -bed=new.gaps.bed
    # 143726002 bases of 143726002 (100.000%) in intersection
    time featureBits -countGaps dm6 allGaps.bed notGap.bed -bed=new.gaps.bed
    # 1152978 bases of 143726002 (0.802%) in intersection

    # there are some very large gaps that are unannotated:
    awk '{print $3-$2,$0}' new.gaps.bed  | sort -rn | head
    # 53860 chr3L     26952525        27006385        chr3L.7
    # 53800 chr3L     26882321        26936121        chr3L.6
    # 32000 chrX      22317804        22349804        chrX.5

    # the original 'component' AGP files have zero annotations in
    # the chromosomes.  In fact, there are no gaps annotated:
    hgsql -e 'select count(*) from gap;' dm6
#   +----------+
#   | count(*) |
#   +----------+
#   |        0 |
#   +----------+

    #	what is the highest index in the existing gap table:
    hgsql -N -e "select ix from gap;" dm6 | sort -n | tail -1
    #	(no output, assume 0)
    cat << '_EOF_' > mkGap.pl
#!/bin/env perl

use strict;
use warnings;

my $ix=0;

open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
while (my $line = <FH>) {
    my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
    ++$ix;
    printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
        $chromEnd, $ix, $chromEnd-$chromStart;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./mkGap.pl
    ./mkGap.pl > other.bed
    featureBits -countGaps dm6 other.bed
    #  1152978 bases of 143726002 (0.802%) in intersection

    wc -l other.bed
    #	572
    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
	-noLoad dm6 otherGap other.bed
    #   Read 572 elements of size 8 from other.bed
    #	starting with this many
    hgsql -e "select count(*) from gap;" dm6
    #	 0
    hgsql dm6 -e 'load data local infile "bed.tab" into table gap;'
    #	result count:
    hgsql -e "select count(*) from gap;" dm6
    #	572
    # == 0 + 572
    # verify we aren't adding gaps where gaps already exist
    # this would output errors if that were true:
    gapToLift -minGap=1 dm6 nonBridged.lift -bedFile=nonBridged.bed
    # see example in danRer7.txt
    # there are no non-bridged gaps here:
    hgsql -N -e "select bridge from gap;" dm6 | sort | uniq -c
    #    572 yes

##########################################################################
## WINDOWMASKER (DONE - 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/windowMasker
    cd /hive/data/genomes/dm6/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev dm6 > do.log 2>&1 &
    #  real    6m13.489s

    # Masking statistics
    cat faSize.dm6.cleanWMSdust.txt
    #  143726002 bases (1152978 N's 142573024 real 108961589 upper
    #    33611435  lower) in 1870 sequences in 1 files
    # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
    #   max 32079331 (chr3R) median 1577
    # %23.39 masked total, %23.57 masked real

    # how much does this window masker and repeat masker overlap:
    # if RM finished before this got here, the answer is in:
    cat fb.dm6.rmsk.windowmaskerSdust.txt
    # 16809665 bases of 143726002 (11.696%) in intersection

#############################################################################
# cytoBandIdeo - (OBSOLETE, redone below 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/cytoBand
    cd /hive/data/genomes/dm6/bed/cytoBand
    makeCytoBandIdeo.csh dm6

#########################################################################
# CHROMOSOME BANDS (DONE - 2014-10-09 - Hiram)
    ssh hgwdev
    cd /cluster/data/dm6/bed/cytoBand
    # As of 5.3, the GFF now includes what we need to make cytoBandIdeo
    # directly:

    zgrep chromosome_band ../../flyBase/dmel-all-r6.02.gff.gz > chromBand.gff

    cat << '_EOF_' > gffToBand.pl
#!/usr/bin/env perl

use warnings;
use strict;

open (FH, "<chromBand.gff") or die "can not read chromBand.gff";
while (my $line = <FH>) {
  chomp $line;
  my @w = split('\t', $line);
  $w[3]--;
  $w[3] = 0 if ($w[3] < 0);
  $w[8] =~ s/^.*Name=band-//;
  $w[8] =~ s/[;].*$//;
  if ($w[8] !~ /^\d+[A-Z]\d+$/) { next; }
  if ($w[4] <= 0) { next; }
  printf "chr%s\t%d\t%d\t%s\tn/a\n", $w[0], $w[3], $w[4], $w[8];
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x gffToBand.pl
    ./gffToBand.pl > dm6.cytoBand.tab

    # re-use items from the previously loaded cytoBand data to add
    # to the ones defined for the 'real' cytoBand data:
    egrep -v -w "chr2L|chr2R|chr3L|chr3R|chr4|chrX" dm6.cytoBand \
       > dm6.combined.bed
    cat dm6.cytoBand.tab >> dm6.combined.bed

    featureBits -bed=notCovered.bed -not -countGaps dm6 dm6.combined.bed
    awk '{printf "%s\t%d\t%d\tn/a\tgneg\n", $1, $2, $3}' notCovered.bed \
       >> dm6.combined.bed

    featureBits -countGaps dm6 dm6.combined.bed

    maxLen=`awk '{print length($1)}' dm6.combined.bed | sort -rn | head -1`

    sed -e "s/12/${maxLen}/;" ~/kent/src/hg/lib/cytoBand.sql > dm6.cytoBand.sql

    hgLoadSqlTab dm6 cytoBand dm6.cytoBand.sql dm6.combined.bed

    checkTableCoords -verbose=2  dm6 cytoBand

    hgLoadSqlTab dm6 cytoBandIdeo dm6.cytoBand.sql dm6.combined.bed

    checkTableCoords -verbose=2  dm6 cytoBandIdeo

##########################################################################
# cpgIslands - (DONE - 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/cpgIslands
    cd /hive/data/genomes/dm6/bed/cpgIslands
    time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev -smallClusterHub=ku dm6 > do.log 2>&1 &
    #  real    35m11.446s

    cat fb.dm6.cpgIslandExt.txt
    #   15067148 bases of 142573024 (10.568%) in intersection

##############################################################################
# cpgIslands on UNMASKED sequence (DONE - 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/cpgIslandsUnmasked
    cd /hive/data/genomes/dm6/bed/cpgIslandsUnmasked

    # run stepwise so the loading can be done in a different table
    time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
       -tableName=cpgIslandExtUnmasked \
          -maskedSeq=/hive/data/genomes/dm6/dm6.unmasked.2bit \
             -workhorse=hgwdev -smallClusterHub=ku dm6 > do.log 2>&1 &
    #  real    40m18.250s

    cat fb.dm6.cpgIslandExtUnmasked.txt
    # 21347147 bases of 142573024 (14.973%) in intersection

#########################################################################
# genscan - (DONE 2014-08-28 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/genscan
    cd /hive/data/genomes/dm6/bed/genscan
    time doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
      -bigClusterHub=ku dm6 > do.log 2>&1  &
    # real    35m23.353s

    cat fb.dm6.genscan.txt
    #  24277318 bases of 142573024 (17.028%) in intersection

    cat fb.dm6.genscanSubopt.txt
    #  676185 bases of 142573024 (0.474%) in intersection

########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE 2014-08-28 - Hiram)
    # Use -repMatch=100 (based on size -- for human we use 1024, and
    # fly size is ~4.96% of human judging by gapless dm1 genome size from
    # featureBits -- we would use 50, but bump that up to 100
    # a bit to be more  conservative).
    #  143726002 bases (1152978 N's 142573024 real 108961589 upper
    calc \( 142573024 / 2897316137 \) \* 1024
    # ( 142573024 / 2897316137 ) * 1024 = 50.389661
    # round up to 100

    cd /hive/data/genomes/dm6
    blat dm6.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/dm6.11.ooc -repMatch=100
    #	Wrote 4574 overused 11-mers to jkStuff/dm6.11.ooc

    # there are *only* bridged gaps, no lift file needed for genbank
    # Ignore the 2 yes non-bridged gap
    hgsql -N -e "select bridge from gap;" dm6 | sort | uniq -c
    #    572 yes

#########################################################################
# MAKE 11.OOC FILE FOR BLAT (DONE 2014-06-07 - Chin)
# dm3 11.ooc is needed to lift flyBase from dm3 to dm6 for encode3 submission

    # Use -repMatch=100 (based on size -- for human we use 1024, and
    # fly size is ~4.4% of human judging by gapless dm1 genome size from
    # featureBits -- we would use 45, but bump that up a bit to be more
    # conservative).
    cd /hive/data/genomes/dm3
    blat dm3.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/dm3.11.ooc -repMatch=100
    # Wrote 7657 overused 11-mers to /hive/data/genomes/dm3/11.ooc

#############################################################################
# dm6 LIFTOVER to dm3 (DONE - 2015-01-20 - Hiram)
    sh hgwdev
    mkdir /hive/data/genomes/dm6/bed/blat.dm3.2015-01-20
    cd /hive/data/genomes/dm6/bed/blat.dm3.2015-01-20

    # -debug run to create run dir, preview scripts...
    doSameSpeciesLiftOver.pl -verbose=2 \
        -debug -ooc=/hive/data/genomes/dm6/jkStuff/dm6.11.ooc \
          -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         dm6 dm3

    # if that appears OK, then running:
    time doSameSpeciesLiftOver.pl -verbose=2 \
        -ooc=/hive/data/genomes/dm6/jkStuff/dm6.11.ooc \
          -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         dm6 dm3 > do.log 2>&1
    # about 36 minutes

    # verify the convert link on the browser is now active from hg19 to hg38

#############################################################################
# dm3 LIFTOVER to dm6 (DONE - 2014-08-28 - Chin)
# will move steps to dm3.txt when dm6 is released.
    sh hgwdev
    cd /hive/data/genomes/dm3
    ln -s jkStuff/dm3.11.ooc 11.ooc

    # -debug run to create run dir, preview scripts...
    doSameSpeciesLiftOver.pl -debug dm3 dm6
    cd /hive/data/genomes/dm3/bed/blat.dm6.2014-08-28

    time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         dm3 dm6 > do.log 2>&1 &
    # real    22m18.988s

#########################################################################
# LIFTOVER FLYBASE 5.12 ANNOTATIONS FROM DM3 (working 2014-06-09 - Chin)
# NOTE:
# In order to make flyBase gene ui to work, more steps are needed, other wise,
# hgc will shows errors like "Doh, no go for CG3727-RB from flyBasePep"
# all the time. Major objevtive here is to create flyBaseGene tables
# so that we can do featureBits on it for encode3 submission.
    ssh hgwdev
    mkdir /hive/data/genomes/dm6/bed/flybase5.12.liftOver
    cd /hive/data/genomes/dm6/bed/flybase5.12.liftOver
    hgsql dm3 -N -e 'select * from flyBaseGene' | cut -f2- > dm3.flyBaseGene.tab
    liftOver -genePred dm3.flyBaseGene.tab \
      /hive/data/genomes/dm3/bed/liftOver/dm3ToDm6.over.chain.gz \
      flyBaseLiftGene.tab flyBaseLiftGene.unmapped.tab
    wc -l *.tab
    #  21243 dm3.flyBaseGene.tab
    #  21216 flyBaseLiftGene.tab
    #     54 flyBaseLiftGene.unmapped.tab

    ldHgGene -predTab dm6 flyBaseGene flyBaseLiftGene.tab

############################################################################
# set same default position asm dm3 (DONE - 2014-08-28 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="chr2L:826,001-851,000" where name="dm6";' \
     hgcentraltest

############################################################################
# create ucscToINSDC name mapping (DONE - 2014-08-29 - Hiram)
    mkdir /hive/data/genomes/dm6/bed/ucscToINSDC
    cd /hive/data/genomes/dm6/bed/ucscToINSDC

    # this script has been maturing over time, it is close to complete.
    # to find a latest copy of it:
    # ls -ogrt /hive/data/genomes/*/bed/ucscToINSDC/translateNames.sh

    cp /hive/data/genomes/rn6/bed/ucscToINSDC/translateNames.sh .
    # needed to be fixed up here for new genbank organization

    # to see chrM accession:
    hgsql -e 'select * from gold;' dm6 | grep chrM
    # use that accession here:
    ./translateNames.sh KJ947872.2

    awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes | sort \
        > name.coordinate.tab

    join name.coordinate.tab ucscToINSDC.txt | tr '[ ]' '[\t]' > ucscToINSDC.bed

    cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1
# 22

    # use the 22 in this sed:
    sed -e "s/21/22/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
        | hgLoadSqlTab dm6 ucscToINSDC stdin ucscToINSDC.bed
    checkTableCoords dm6 ucscToINSDC
    # should cover all bases
    featureBits -countGaps dm6 ucscToINSDC
    # 143726002 bases of 143726002 (100.000%) in intersection

#########################################################################
# fixup search rule for assembly track/gold table (DONE - 2014-07-09 - Hiram)
   export maxLen=`hgsql -N -e 'select frag from gold;' dm6 | awk '{print length($0)}' | sort -run | head -1`

   echo $maxLen
   #  10

export C=1
while [ $C -le $maxLen ];
do
  echo -n " $C: "
  hgsql -N -e 'select frag from gold;' dm6 | sort -u \
    | awk '{ print substr($0,'$C',1) }' | sort -u | xargs echo | sed -e 's/ //g'
  C=`echo $C | awk '{print $1+1}'`
done
#  1: ACDK
#  2: EJPS
#  3: 049
#  4: 0148
#  5: 34567
#  6: 0123456789
#  7: 0123456789
#  8: 0123456789
#  9: .
# 10: 123456

    # since we have the composite fragment IDs, there are a variety of
    # dot versions:
    hgsql -N -e 'select frag from gold;' dm6 | sed -e 's/.*\.//' \
       | sort | uniq -c  | sort -rn
#  1861 1
#     3 5
#     3 2
#     1 6
#     1 4
#     1 3

    # hence, add to trackDb/drosophila/dm6/trackDb.ra

searchTable gold
searchMethod prefix
searchType bed
shortCircuit 1
termRegex [ACDK][EJPS][0-9]+(\.[0-9]+)*
query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
searchPriority 8

  # test pattern:
   hgsql -N -e 'select frag from gold;' dm6 | wc -l
   # 1870
   hgsql -N -e 'select frag from gold;' dm6 \
     | egrep -e '[ACDK][EJPS][0-9]+(\.[0-9]+)*' | wc -l
   # 1870
   hgsql -N -e 'select frag from gold;' dm6 \
     | egrep -v -e '[ACDK][EJPS][0-9]+(\.[0-9]+)*' | wc -l
   # 0

############################################################################
# AUTO UPDATE GENBANK (DONE - 2012-05-04 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Drosophila melanogaster	61221	821011	30813

    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add before dm3 section::

# dm6 (D. melanogaster)
dm6.serverGenome = /hive/data/genomes/dm6/dm6.2bit
dm6.clusterGenome = /hive/data/genomes/dm6/dm6.2bit
dm6.ooc = /hive/data/genomes/dm6/jkStuff/dm6.11.ooc
dm6.lift = no
dm6.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
dm6.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
dm6.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
dm6.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
dm6.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
dm6.genbank.mrna.xeno.load = yes
# dm6.upstreamGeneTbl = refGene
# dm6.upstreamMaf = multiz15way
# /hive/data/genomes/dm3/bed/multiz15way/species.lst

    git commit -m "adding dm6 fly refs #4049" etc/genbank.conf
    git push
    make etc-update

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S dm6           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial dm6
    #	logFile: var/build/logs/2014.08.29-10:41:04.dm6.initalign.log
    #   real    199m22.304s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad dm6 &
    #	logFile: var/dbload/hgwdev/logs/2014.09.03-15:51:49.dm6.dbload.log
    #   real    19m24.163s

    # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add dm6 to:
        etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added dm6 refs #4049" etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
#  BLATSERVERS ENTRY (DONE - 2014-08-29 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("dm6", "blat4c", "17858", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("dm6", "blat4c", "17859", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

##############################################################################
# LASTZ D. ananassae DroAna3 (DONE 2014-08-29 - Hiram)
    screen -S droAna3-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. ananassae
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. ananassae
SEQ2_DIR=/hive/data/genomes/droAna3/droAna3.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droAna3/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << this line keeps emacs coloring happy
    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    64m19.065s

    cat fb.dm6.chainDroAna3Link.txt
    # 95925289 bases of 142573024 (67.282%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droAna3/bed/blastz.dm6.swap
    cd /hive/data/genomes/droAna3/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    10m43.121s
    cat fb.droAna3.chainDm6Link.txt
    # 125698535 bases of 213918817 (58.760%) in intersection

###########################################################################
# LASTZ D. sechellia DroSec1 (DONE 2014-08-29 - Hiram)
    screen -S droSec1-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. sechellia
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. sechellia
SEQ2_DIR=/hive/data/genomes/droSec1/droSec1.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droSec1/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    47m43.186s
    cat fb.dm6.chainDroSec1Link.txt
    # 117709624 bases of 142573024 (82.561%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droSec1/bed/blastz.dm6.swap
    cd /hive/data/genomes/droSec1/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    9m56.239s
    cat fb.droSec1.chainDm6Link.txt
    # 128262529 bases of 157238575 (81.572%) in intersection

###########################################################################
# LASTZ D. simulans DroSim1 (DONE 2014-08-29 - Hiram)
    screen -S droSim1-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. simulans
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. simulans
SEQ2_DIR=/hive/data/genomes/droSim1/droSim1.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droSim1/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    39m35.348s
    cat fb.dm6.chainDroSim1Link.txt
    # 114117667 bases of 142573024 (80.042%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droSim1/bed/blastz.dm6.swap
    cd /hive/data/genomes/droSim1/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m50.918s
    cat fb.droSim1.chainDm6Link.txt
    # 113676741 bases of 127256433 (89.329%) in intersection

###########################################################################
# LASTZ D. erecta DroEre2 (DONE 2014-08-29 - Hiram)
    screen -S droEre2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. erecta
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. erecta
SEQ2_DIR=/hive/data/genomes/droEre2/droEre2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droEre2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    19m31.724s
    cat fb.dm6.chainDroEre2Link.txt
    # 114091585 bases of 142573024 (80.023%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droEre2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droEre2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    8m35.520s
    cat fb.droEre2.chainDm6Link.txt
    # 122430082 bases of 145084019 (84.386%) in intersection

###########################################################################
# LASTZ D. grimshawi DroGri2 (DONE 2014-08-29 - Hiram)
    screen -S droGri2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. grimshawi
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. grimshawi
SEQ2_DIR=/hive/data/genomes/droGri2/droGri2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droGri2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    43m54.742s
    cat fb.dm6.chainDroGri2Link.txt
    # 72682996 bases of 142573024 (50.979%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droGri2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droGri2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m15.091s
    cat fb.droGri2.chainDm6Link.txt
    # 79673484 bases of 186090669 (42.814%) in intersection

###########################################################################
# LASTZ D. persimilis DroPer1 (DONE 2014-08-29 - Hiram)
    screen -S droPer1-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. persimilis
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. persimilis
SEQ2_DIR=/hive/data/genomes/droPer1/droPer1.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droPer1/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    49m9.697s
    cat fb.dm6.chainDroPer1Link.txt
    # 84187839 bases of 142573024 (59.049%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droPer1/bed/blastz.dm6.swap
    cd /hive/data/genomes/droPer1/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    13m28.190s
    cat fb.droPer1.chainDm6Link.txt
    # 92967575 bases of 175583556 (52.948%) in intersection

###########################################################################
# LASTZ D. virilis DroVir3 (DONE 2014-08-29 - Hiram)
    screen -S droVir3-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. virilis
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. virilis
SEQ2_DIR=/hive/data/genomes/droVir3/droVir3.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droVir3/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    53m38.503s
    cat fb.dm6.chainDroVir3Link.txt
    # 74048111 bases of 142573024 (51.937%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droVir3/bed/blastz.dm6.swap
    cd /hive/data/genomes/droVir3/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m56.701s
    cat fb.droVir3.chainDm6Link.txt
    # 82924532 bases of 189205863 (43.828%) in intersection

###########################################################################
# LASTZ T. castaneum TriCas2 (DONE 2014-08-29 - Hiram)
    screen -S triCas2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. T. castaneum
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - T. castaneum
SEQ2_DIR=/hive/data/genomes/triCas2/triCas2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/triCas2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    43m49.065s
    cat fb.dm6.chainTriCas2Link.txt
    # 20091664 bases of 142573024 (14.092%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/triCas2/bed/blastz.dm6.swap
    cd /hive/data/genomes/triCas2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    1m48.420s
    cat fb.triCas2.chainDm6Link.txt
    # 22441942 bases of 151549969 (14.808%) in intersection

###########################################################################
# LASTZ A. gambiae AnoGam1 (DONE 2014-08-29 - Hiram)
    screen -S anoGam1-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. sechellia
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - A. gambiae
SEQ2_DIR=/hive/data/genomes/anoGam1/anoGam1.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/anoGam1/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    85m43.807s
    cat fb.dm6.chainAnoGam1Link.txt
    # 20604473 bases of 142573024 (14.452%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/anoGam1/bed/blastz.dm6.swap
    cd /hive/data/genomes/anoGam1/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    2m1.764s
    cat fb.anoGam1.chainDm6Link.txt
    # 22154872 bases of 278268413 (7.962%) in intersection

###########################################################################
# LASTZ D. pseudoobscura DroPse3 (DONE 2014-08-29 - Hiram)
    screen -S droPse3-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. pseudoobscura
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. pseudoobscura
SEQ2_DIR=/hive/data/genomes/droPse3/droPse3.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droPse3/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    54m34.163s
    cat fb.dm6.chainDroPse3Link.txt
    # 85310450 bases of 142573024 (59.836%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droPse3/bed/blastz.dm6.swap
    cd /hive/data/genomes/droPse3/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m27.317s
    cat fb.droPse3.chainDm6Link.txt
    # 89188686 bases of 149047912 (59.839%) in intersection

###########################################################################
# LASTZ D. willistoni DroWil2 (DONE 2014-08-29 - Hiram)
    screen -S droWil2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. willistoni
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. willistoni
SEQ2_DIR=/hive/data/genomes/droWil2/droWil2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droWil2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    12m46.450s
    cat fb.dm6.chainDroWil2Link.txt
    # 73328691 bases of 142573024 (51.432%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droWil2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droWil2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m21.824s
    cat fb.droWil2.chainDm6Link.txt
    # 90409687 bases of 223610028 (40.432%) in intersection

###########################################################################
# LASTZ D. yakuba DroYak3 (DONE 2014-08-29 - Hiram)
    screen -S droYak3-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. yakuba
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. yakuba
SEQ2_DIR=/hive/data/genomes/droYak3/droYak3.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droYak3/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    68m39.923s
    cat fb.dm6.chainDroYak3Link.txt
    # 114859096 bases of 142573024 (80.562%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droYak3/bed/blastz.dm6.swap
    cd /hive/data/genomes/droYak3/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    8m13.655s
    cat fb.droYak3.chainDm6Link.txt
    # 131647225 bases of 162598019 (80.965%) in intersection

###########################################################################
# LASTZ A. mellifera ApiMel4 (DONE 2014-08-29 - Hiram)
    screen -S apiMel4-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. A. mellifera
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - A. mellifera
SEQ2_DIR=/hive/data/genomes/apiMel4/apiMel4.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/apiMel4/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    42m58.632s
    cat fb.dm6.chainApiMel4Link.txt
    # 11667011 bases of 142573024 (8.183%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/apiMel4/bed/blastz.dm6.swap
    cd /hive/data/genomes/apiMel4/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    1m19.091s
    cat fb.apiMel4.chainDm6Link.txt
    # 12280818 bases of 229123650 (5.360%) in intersection

###########################################################################
# LASTZ D. mojavensis DroMoj3 (DONE 2014-08-29 - Hiram)
    screen -S droMoj3-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. mojavensis
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. mojavensis
SEQ2_DIR=/hive/data/genomes/droMoj3/droMoj3.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droMoj3/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    76m3.636s
    cat fb.dm6.chainDroMoj3Link.txt
    # 71474162 bases of 142573024 (50.132%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droMoj3/bed/blastz.dm6.swap
    cd /hive/data/genomes/droMoj3/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    5m40.109s
    cat fb.droMoj3.chainDm6Link.txt
    # 74578701 bases of 180207831 (41.385%) in intersection

###########################################################################
# LASTZ D. pseudoobscura DP4 (Done 2014-08-29 - Hiram)
    screen -S dp4-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDP4.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDP4.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. pseudoobscura
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. pseudoobscura
SEQ2_DIR=/hive/data/genomes/dp4/dp4.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/dp4/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDP4.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    46m17.831s
    cat fb.dm6.chainDp4Link.txt
    # 85549372 bases of 142573024 (60.004%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/dp4/bed/blastz.dm6.swap
    cd /hive/data/genomes/dp4/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDP4.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m45.065s
    cat fb.dp4.chainDm6Link.txt
    # 88406867 bases of 146066673 (60.525%) in intersection

###########################################################################
# LASTZ D. albomicans DroAlb1 (DONE 2014-08-29 - Hiram)
    screen -S droAlb1-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. albomicans
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. albomicans
SEQ2_DIR=/hive/data/genomes/droAlb1/droAlb1.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droAlb1/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << this line keeps emacs coloring happy
    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    86m58.011s
    cat fb.dm6.chainDroAlb1Link.txt
    # 63507363 bases of 142573024 (44.544%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droAlb1/bed/blastz.dm6.swap
    cd /hive/data/genomes/droAlb1/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m39.524s
    cat fb.droAlb1.chainDm6Link.txt
    # 82594337 bases of 216120500 (38.217%) in intersection

###########################################################################
# LASTZ D. biarmipes DroBia2 (DONE 2014-08-29 - Hiram)
    screen -S droBia2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. biarmipes
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. biarmipes
SEQ2_DIR=/hive/data/genomes/droBia2/droBia2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droBia2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    67m19.563s
    cat fb.dm6.chainDroBia2Link.txt
    # 106989297 bases of 142573024 (75.042%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droBia2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droBia2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m41.031s
    cat fb.droBia2.chainDm6Link.txt
    # 113609151 bases of 168601808 (67.383%) in intersection

###########################################################################
# LASTZ D. bipectinata DroBip2 (DONE 2014-08-29 - Hiram)
    screen -S droBip2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. bipectinata
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. bipectinata
SEQ2_DIR=/hive/data/genomes/droBip2/droBip2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droBip2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    72m58.502s

    cat fb.dm6.chainDroBip2Link.txt
    # 93905122 bases of 142573024 (65.865%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droBip2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droBip2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m57.737s
    cat fb.droBip2.chainDm6Link.txt
    # 100268467 bases of 166411051 (60.253%) in intersection

###########################################################################
# LASTZ D. elegans DroEle2 (DONE 2014-08-29 - Hiram)
    screen -S droEle2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. elegans
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. elegans
SEQ2_DIR=/hive/data/genomes/droEle2/droEle2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droEle2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    52m32.697s
    cat fb.dm6.chainDroEle2Link.txt
    # 107563805 bases of 142573024 (75.445%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droEle2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droEle2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m37.855s
    cat fb.droEle2.chainDm6Link.txt
    # 115222391 bases of 170533638 (67.566%) in intersection

###########################################################################
# LASTZ D. eugracilis DroEug2 (DONE  2014-08-29 - Hiram)
    screen -S droEug2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. eugracilis
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. eugracilis
SEQ2_DIR=/hive/data/genomes/droEug2/droEug2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droEug2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    81m4.137s
    cat fb.dm6.chainDroEug2Link.txt
    # 107307060 bases of 142573024 (75.265%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droEug2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droEug2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m6.555s
    cat fb.droEug2.chainDm6Link.txt
    # 114576241 bases of 156328338 (73.292%) in intersection

###########################################################################
# LASTZ D. ficusphila DroFic2 (DONE 2014-08-29 - Hiram)
    screen -S droFic2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. ficusphila
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. ficusphila
SEQ2_DIR=/hive/data/genomes/droFic2/droFic2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droFic2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    62m0.114s
    cat fb.dm6.chainDroFic2Link.txt
    # 106559757 bases of 142573024 (74.740%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droFic2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droFic2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    6m54.593s

    cat fb.droFic2.chainDm6Link.txt
    # 112386864 bases of 151050603 (74.403%) in intersection

###########################################################################
# LASTZ D. kikkawai DroKik2 (DONE 2014-08-29 - Hiram)
    screen -S droKik2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. kikkawai
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. kikkawai
SEQ2_DIR=/hive/data/genomes/droKik2/droKik2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droKik2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    68m49.238s
    cat fb.dm6.chainDroKik2Link.txt
    # 100642052 bases of 142573024 (70.590%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droKik2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droKik2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m5.278s
    cat fb.droKik2.chainDm6Link.txt
    # 105765944 bases of 163450645 (64.708%) in intersection

###########################################################################
# LASTZ D. miranda DroMir2 (DONE 2014-08-29 - Hiram)
    screen -S droMir2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. miranda
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. miranda
SEQ2_DIR=/hive/data/genomes/droMir2/droMir2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droMir2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    46m36.289s
    cat fb.dm6.chainDroMir2Link.txt
    # 84557634 bases of 142573024 (59.308%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droMir2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droMir2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    real    4m55.434s

    cat fb.droMir2.chainDm6Link.txt
    # 84386358 bases of 132586288 (63.646%) in intersection

###########################################################################
# LASTZ D. suzukii DroSuz1 (DONE 2014-08-29 - Hiram)
    screen -S droSuz1-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. suzukii
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. suzukii
SEQ2_DIR=/hive/data/genomes/droSuz1/droSuz1.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droSuz1/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    69m12.803s
    cat fb.dm6.chainDroSuz1Link.txt
    # 106346119 bases of 142573024 (74.591%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droSuz1/bed/blastz.dm6.swap
    cd /hive/data/genomes/droSuz1/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    9m7.373s
    cat fb.droSuz1.chainDm6Link.txt
    # 129874262 bases of 202303443 (64.198%) in intersection

###########################################################################
# LASTZ D. rhopaloa DroRho2 (DONE 2014-08-29 - Hiram)
    screen -S droRho2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. rhopaloa
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. rhopaloa
SEQ2_DIR=/hive/data/genomes/droRho2/droRho2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droRho2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real    79m31.589s
    cat fb.dm6.chainDroRho2Link.txt
    # 106844545 bases of 142573024 (74.940%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droRho2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droRho2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real  10m15.734s
    cat fb.droRho2.chainDm6Link.txt
    # 126832194 bases of 193889799 (65.415%) in intersection

###########################################################################
# LASTZ D. takahashii DroTak2 (DONE 2014-08-29 - Hiram)
    screen -S droTak2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. D. takahashii
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. takahashii
SEQ2_DIR=/hive/data/genomes/droTak2/droTak2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droTak2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # real     66m20.654s
    cat fb.dm6.chainDroTak2Link.txt
    # 107653248 bases of 142573024 (75.507%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droTak2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droTak2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    7m15.787s
    cat fb.droTak2.chainDm6Link.txt
    # 114989522 bases of 181031891 (63.519%) in intersection

###########################################################################
# LASTZ M. domestica MusDom2 (DONE 2014-08-29 - Hiram)
    screen -S musDom2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29
    cd /hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29

    cat << '_EOF_' > DEF
# D. melanogaster vs. M. domestica
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - M. domestica
SEQ2_DIR=/hive/data/genomes/musDom2/musDom2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/musDom2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
    # Elapsed time: 81m30s
    cat fb.dm6.chainMusDom2Link.txt
    # 27485718 bases of 142573024 (19.278%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/musDom2/bed/blastz.dm6.swap
    cd /hive/data/genomes/musDom2/bed/blastz.dm6.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
    # real    3m35.469s
    cat fb.musDom2.chainDm6Link.txt
    # 33622643 bases of 691751600 (4.861%) in intersection

#########################################################################
## 27-Way Multiz (DONE - 2014-09-05 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/dm6/bed/multiz27way
    cd /hive/data/genomes/dm6/bed/multiz27way

    # worked up a phylo tree with the help of
    # http://phylot.biobyte.de/
    # http://itol.embl.de/
    # and editor: http://treegraph.bioinfweb.info/

    cp -p ~/kent/src/hg/utils/phyloTrees/dm6.27way.nh ./


    #	what that looks like:
    ~/kent/src/hg/utils/phyloTrees/asciiTree.pl dm6.27way.nh
(((((((((((((((((dm6:0.1,
                (droSim1:0.1,
                droSec1:0.1):0.1):0.1,
               (droYak3:0.1,
               droEre2:0.1):0.1):0.1,
              (droBia2:0.1,
              droSuz1:0.1):0.1):0.1,
             (droAna3:0.1,
             droBip2:0.1):0.1):0.1,
            droEug2:0.1):0.1,
           droEle2:0.1):0.1,
          droKik2:0.1):0.1,
         droTak2:0.1):0.1,
        droRho2:0.1):0.1,
       droFic2:0.1):0.1,
      ((droPse3:0.1,
       droPer1:0.1):0.1,
      droMir2:0.1):0.1):0.1,
     droWil2:0.1):0.1,
    (((droVir3:0.1,
      droMoj3:0.1):0.1,
     droAlb1:0.1):0.1,
    droGri2:0.1):0.1):0.1,
   musDom2:0.1):0.1,
  anoGam1:0.1):0.1,
 apiMel4:0.1):0.1,
triCas2:0.1);

    # extract species list from that .nh file
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
        dm6.27way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
        | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt

    # construct db to common name translation list:
    cat species.list.txt | while read DB
do
hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
        > db.to.name.txt
    # construct db to scientific name translation list:
    cat species.list.txt | while read DB
do
hgsql -N -e "select name,scientificName from dbDb where name=\"${DB}\";" hgcentraltest
done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
     | sed -e 's/pseudoobscura_pseudoobscura/pseudoobscura/;' \
        > db.to.sciName.txt

    # construct a common name .nh file:
    /cluster/bin/phast/tree_doctor --rename \
    "`cat db.to.name.txt`" dm6.27way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
         > dm6.27way.commonNames.nh

    # construct a scientific name .nh file:
    /cluster/bin/phast/tree_doctor --rename \
    "`cat db.to.sciName.txt`" dm6.27way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
         > dm6.27way.scientificNames.nh
    # construct a scientific name .nh file:

    cat dm6.27way.scientificNames.nh
(((((((((((((((((Drosophila_melanogaster:0.1,
                (Drosophila_simulans:0.1,
                Drosophila_sechellia:0.1):0.1):0.1,
               (Drosophila_yakuba:0.1,
               Drosophila_erecta:0.1):0.1):0.1,
              (Drosophila_biarmipes:0.1,
              Drosophila_suzukii:0.1):0.1):0.1,
             (Drosophila_ananassae:0.1,
             Drosophila_bipectinata:0.1):0.1):0.1,
            Drosophila_eugracilis:0.1):0.1,
           Drosophila_elegans:0.1):0.1,
          Drosophila_kikkawai:0.1):0.1,
         Drosophila_takahashii:0.1):0.1,
        Drosophila_rhopaloa:0.1):0.1,
       Drosophila_ficusphila:0.1):0.1,
      ((Drosophila_pseudoobscura:0.1,
       Drosophila_persimilis:0.1):0.1,
      Drosophila_miranda:0.1):0.1):0.1,
     Drosophila_willistoni:0.1):0.1,
    (((Drosophila_virilis:0.1,
      Drosophila_mojavensis:0.1):0.1,
     Drosophila_albomicans:0.1):0.1,
    Drosophila_grimshawi:0.1):0.1):0.1,
   Musca_domestica:0.1):0.1,
  Anopheles_gambiae:0.1):0.1,
 Apis_mellifera:0.1):0.1,
Tribolium_castaneum:0.1);

    #	Use this specification in the phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    #	to obtain a png image for src/hg/htdocs/images/phylo/dm6_27way.png

    /cluster/bin/phast/all_dists dm6.27way.nh | grep dm6 \
        | sed -e "s/dm6.//" | sort -k2n > 27way.distances.txt
    #	Use this output to create the table below
    head 27way.distances.txt
droSec1 0.300000
droSim1 0.300000
droEre2 0.400000
droYak3 0.400000
droBia2 0.500000
droSuz1 0.500000
droAna3 0.600000
droBip2 0.600000
droEug2 0.600000
droEle2 0.700000

    cat << '_EOF_' > sizeStats.pl
#!/usr/bin/env perl

use strict;
use warnings;

open (FH, "<27way.distances.txt") or
        die "can not read 27way.distances.txt";

my $count = 0;
while (my $line = <FH>) {
    chomp $line;
    my ($D, $dist) = split('\s+', $line);
    my $chain = "chain" . ucfirst($D);
    my $B="/hive/data/genomes/dm6/bed/lastz.$D/fb.dm6." .
        $chain . "Link.txt";
    my $chainLinkMeasure =
        `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
    chomp $chainLinkMeasure;
    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
    $chainLinkMeasure =~ s/\%//;
    my $swapFile="/hive/data/genomes/${D}/bed/lastz.dm6/fb.${D}.chainDm6Link.txt";
    my $swapMeasure = "N/A";
    if ( -s $swapFile ) {
	$swapMeasure =
	    `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
	chomp $swapMeasure;
	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
	$swapMeasure =~ s/\%//;
    }
    my $orgName=
    `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
    chomp $orgName;
    if (length($orgName) < 1) {
        $orgName="N/A";
    }
    ++$count;
    printf "# %02d  %.4f (%% %06.3f) (%% %06.3f) - %s %s\n", $count, $dist,
        $chainLinkMeasure, $swapMeasure, $orgName, $D;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./sizeStats.pl
    ./sizeStats.pl
#

#	If you can fill in all the numbers in this table, you are ready for
#	the multiple alignment procedure

#       featureBits chainLink measures
#               chainLink
#  N distance  on dm6  on other     other species
# 01  0.3000 (% 82.561) (% 81.572) - D. sechellia droSec1
# 02  0.3000 (% 80.042) (% 89.329) - D. simulans droSim1
# 03  0.4000 (% 80.023) (% 84.386) - D. erecta droEre2
# 04  0.4000 (% 80.562) (% 80.965) - D. yakuba droYak3
# 05  0.5000 (% 75.042) (% 67.383) - D. biarmipes droBia2
# 06  0.5000 (% 74.591) (% 64.198) - D. suzukii droSuz1
# 07  0.6000 (% 67.282) (% 58.760) - D. ananassae droAna3
# 08  0.6000 (% 65.865) (% 60.253) - D. bipectinata droBip2
# 09  0.6000 (% 75.265) (% 73.292) - D. eugracilis droEug2
# 10  0.7000 (% 75.445) (% 67.566) - D. elegans droEle2
# 11  0.8000 (% 70.590) (% 64.708) - D. kikkawai droKik2
# 12  0.9000 (% 75.507) (% 63.519) - D. takahashii droTak2
# 13  1.0000 (% 74.940) (% 65.415) - D. rhopaloa droRho2
# 14  1.1000 (% 74.740) (% 74.403) - D. ficusphila droFic2
# 15  1.3000 (% 59.308) (% 63.646) - D. miranda droMir2
# 16  1.3000 (% 51.432) (% 40.432) - D. willistoni droWil2
# 17  1.4000 (% 59.049) (% 52.949) - D. persimilis droPer1
# 18  1.4000 (% 59.836) (% 59.839) - D. pseudoobscura droPse3
# 19  1.5000 (% 50.979) (% 42.814) - D. grimshawi droGri2
# 20  1.5000 (% 19.278) (% 04.861) - M. domestica musDom2
# 21  1.6000 (% 14.452) (% 07.962) - A. gambiae anoGam1
# 22  1.6000 (% 44.544) (% 38.217) - D. albomicans droAlb1
# 23  1.7000 (% 08.183) (% 05.360) - A. mellifera apiMel4
# 24  1.7000 (% 50.132) (% 41.385) - D. mojavensis droMoj3
# 25  1.7000 (% 51.937) (% 43.828) - D. virilis droVir3
# 26  1.8000 (% 14.092) (% 14.808) - T. castaneum triCas2

# None of this concern for distances matters in building the first step, the
# maf files.

    # create species list and stripped down tree for autoMZ
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
	dm6.27way.nh | xargs echo | sed 's/ //g; s/,/ /g' > tree.nh

    sed 's/[()]//g; s/,/ /g' tree.nh > species.list
# dm6 droSim1 droSec1 droYak3 droEre2 droBia2 droSuz1 droAna3 droBip2 droEug2
# droEle2 droKik2 droTak2 droRho2 droFic2 droPse3 droPer1 droMir2 droWil2
# droVir3 droMoj3 droAlb1 droGri2 musDom2 anoGam1 apiMel4 triCas2

    #	bash shell syntax here ...
    cd /hive/data/genomes/dm6/bed/multiz27way
    export H=/hive/data/genomes/dm6/bed
    mkdir mafLinks
    # want syntenic net for all the Drosophilas
    for G in droAlb1 droAna3 droBia2 droBip2 droEle2 droEre2 droEug2 droFic2 droGri2 droKik2 droMir2 droMoj3 droPer1 droPse3 droRho2 droSec1 droSim1 droSuz1 droTak2 droVir3 droWil2 droYak3
    do
      mkdir mafLinks/$G
      echo ln -s ${H}/lastz.$G/axtChain/dm6.${G}.synNet.maf.gz ./mafLinks/$G
      ln -s ${H}/lastz.$G/axtChain/dm6.${G}.synNet.maf.gz ./mafLinks/$G
    done

    # and basic maf net for the others:
    for G in anoGam1 apiMel4 musDom2 triCas2
do
      mkdir mafLinks/$G
      echo ln -s ${H}/lastz.$G/mafNet/dm6.${G}.net.maf.gz ./mafLinks/$G
      ln -s ${H}/lastz.$G/mafNet/dm6.${G}.net.maf.gz ./mafLinks/$G
done

    # verify the symLinks are good:
    ls -ogrtL mafLinks/*/*
# -rw-rw-r-- 1 57989657 Aug 29 11:52 mafLinks/droAna3/dm6.droAna3.synNet.maf.gz
# -rw-rw-r-- 1 66653928 Aug 29 13:31 mafLinks/droEre2/dm6.droEre2.synNet.maf.gz
# -rw-rw-r-- 1 55414479 Aug 29 13:48 mafLinks/droSim1/dm6.droSim1.synNet.maf.gz
# -rw-rw-r-- 1 64261526 Aug 29 13:55 mafLinks/droSec1/dm6.droSec1.synNet.maf.gz
# -rw-rw-r-- 1 43326252 Aug 29 13:58 mafLinks/droGri2/dm6.droGri2.synNet.maf.gz
# -rw-rw-r-- 1 14470342 Aug 29 14:03 mafLinks/triCas2/dm6.triCas2.net.maf.gz
# -rw-rw-r-- 1 53914019 Aug 29 14:06 mafLinks/droPer1/dm6.droPer1.synNet.maf.gz
# -rw-rw-r-- 1  8871501 Aug 29 14:09 mafLinks/apiMel4/dm6.apiMel4.net.maf.gz
# -rw-rw-r-- 1 44299779 Aug 29 14:12 mafLinks/droVir3/dm6.droVir3.synNet.maf.gz
# -rw-rw-r-- 1 56305568 Aug 29 14:19 mafLinks/droPse3/dm6.droPse3.synNet.maf.gz
# -rw-rw-r-- 1 58119972 Aug 29 14:31 mafLinks/droMir2/dm6.droMir2.synNet.maf.gz
# -rw-rw-r-- 1 68523213 Aug 29 14:31 mafLinks/droEle2/dm6.droEle2.synNet.maf.gz
# -rw-rw-r-- 1 68100446 Aug 29 14:35 mafLinks/droYak3/dm6.droYak3.synNet.maf.gz
# -rw-rw-r-- 1 64321608 Aug 29 14:43 mafLinks/droBia2/dm6.droBia2.synNet.maf.gz
# -rw-rw-r-- 1 44761222 Aug 29 14:43 mafLinks/droWil2/dm6.droWil2.synNet.maf.gz
# -rw-rw-r-- 1 68869400 Aug 29 14:44 mafLinks/droFic2/dm6.droFic2.synNet.maf.gz
# -rw-rw-r-- 1 42864649 Aug 29 14:45 mafLinks/droMoj3/dm6.droMoj3.synNet.maf.gz
# -rw-rw-r-- 1 14470901 Aug 29 14:47 mafLinks/anoGam1/dm6.anoGam1.net.maf.gz
# -rw-rw-r-- 1 61398493 Aug 29 14:51 mafLinks/droBip2/dm6.droBip2.synNet.maf.gz
# -rw-rw-r-- 1 66734812 Aug 29 14:52 mafLinks/droKik2/dm6.droKik2.synNet.maf.gz
# -rw-rw-r-- 1 67398540 Aug 29 14:54 mafLinks/droTak2/dm6.droTak2.synNet.maf.gz
# -rw-rw-r-- 1 60781324 Aug 29 14:54 mafLinks/droSuz1/dm6.droSuz1.synNet.maf.gz
# -rw-rw-r-- 1 28941891 Aug 29 15:01 mafLinks/droAlb1/dm6.droAlb1.synNet.maf.gz
# -rw-rw-r-- 1 69660184 Aug 29 15:02 mafLinks/droEug2/dm6.droEug2.synNet.maf.gz
# -rw-rw-r-- 1 61713626 Aug 29 15:06 mafLinks/droRho2/dm6.droRho2.synNet.maf.gz
# -rw-rw-r-- 1 19994083 Aug 29 15:08 mafLinks/musDom2/dm6.musDom2.net.maf.gz

    # split the maf files into a set of hashed named files
    # this hash named split keeps the same chr/contig names in the same
    # named hash file.
    mkdir /hive/data/genomes/dm6/bed/multiz27way/mafSplit
    cd /hive/data/genomes/dm6/bed/multiz27way/mafSplit
    for D in `sed -e "s/dm6 //" ../species.list`
do
    echo "${D}"
    mkdir $D
    cd $D
    echo "mafSplit -byTarget -useHashedName=8 /dev/null . ../../mafLinks/${D}/*.maf.gz"
    mafSplit -byTarget -useHashedName=8 /dev/null . \
	../../mafLinks/${D}/*.maf.gz
    cd ..
done

    # construct a list of all possible maf file names.
    # they do not all exist in each of the species directories
    find . -type f | wc -l
    # 678
    find . -type f | grep ".maf$" | xargs -L 1 basename | sort -u > maf.list
    wc -l maf.list
    # 161 maf.list

    mkdir /hive/data/genomes/dm6/bed/multiz27way/splitRun
    cd /hive/data/genomes/dm6/bed/multiz27way/splitRun
    mkdir maf run
    cd run
    mkdir penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn

    #	set the db and pairs directories here
    cat > autoMultiz.csh << '_EOF_'
#!/bin/csh -ef
set db = dm6
set c = $1
set result = $2
set run = `/bin/pwd`
set tmp = /dev/shm/$db/multiz.$c
set pairs = /hive/data/genomes/dm6/bed/multiz27way/mafSplit
/bin/rm -fr $tmp
/bin/mkdir -p $tmp
/bin/cp -p ../../tree.nh ../../species.list $tmp
pushd $tmp > /dev/null
foreach s (`/bin/sed -e "s/$db //" species.list`)
    set in = $pairs/$s/$c
    set out = $db.$s.sing.maf
    if (-e $in.gz) then
        /bin/zcat $in.gz > $out
        if (! -s $out) then
            echo "##maf version=1 scoring=autoMZ" > $out
        endif
    else if (-e $in) then
        /bin/ln -s $in $out
    else
        echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($run/penn $path); rehash
$run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
        > /dev/null
popd > /dev/null
/bin/rm -f $result
/bin/cp -p $tmp/$c $result
/bin/rm -fr $tmp
'_EOF_'
# << happy emacs
    chmod +x autoMultiz.csh

    cat  << '_EOF_' > template
#LOOP
./autoMultiz.csh $(file1) {check out line+ /hive/data/genomes/dm6/bed/multiz27way/splitRun/maf/$(root1).maf}
#ENDLOOP
'_EOF_'
# << happy emacs

    ln -s ../../mafSplit/maf.list maf.list
    ssh ku
    cd /hive/data/genomes/dm6/bed/multiz27way/splitRun/run
    gensub2 maf.list single template stdout > jobList
    para -ram=8g create jobList
# Completed: 161 of 161 jobs
# CPU time in finished jobs:     172027s    2867.12m    47.79h    1.99d  0.005 y
# IO & Wait Time:                  1065s      17.75m     0.30h    0.01d  0.000 y
# Average job time:                1075s      17.92m     0.30h    0.01d
# Longest finished job:           42777s     712.95m    11.88h    0.50d
# Submission to last job:         42793s     713.22m    11.89h    0.50d

    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
    cd /hive/data/genomes/dm6/bed/multiz27way
    head -1 splitRun/maf/053.maf > multiz27way.maf
    for F in splitRun/maf/*.maf
do
    echo "${F}" 1>&2
    egrep -v "^#" ${F}
done >> multiz27way.maf
    tail -1 splitRun/maf/053.maf >> multiz27way.maf
# -rw-rw-r--  1 5808772759 Sep  5 15:14 multiz27way.maf

    # Load into database
    ssh hgwdev
    cd /hive/data/genomes/dm6/bed/multiz27way
    mkdir /gbdb/dm6/multiz27way
    ln -s `pwd`/multiz27way.maf /gbdb/dm6/multiz27way
    cd /dev/shm
    time hgLoadMaf dm6 multiz27way
    # Loaded 2110160 mafs in 1 files from /gbdb/dm6/multiz27way
    # real    5m42.300s

    time hgLoadMafSummary -verbose=2 -minSize=30000 \
	-mergeGap=1500 -maxSize=200000 dm6 multiz27waySummary \
	/gbdb/dm6/multiz27way/multiz27way.maf
    # Created 157893 summary blocks from 34026658 components and 2110160 mafs from /gbdb/dm6/multiz27way/multiz27way.maf
    # real    2m44.856s


    wc -l multiz27way*.tab
    #  2110160 multiz27way.tab
    #   157893 multiz27waySummary.tab
    #  2268053 total

-rw-rw-r-- 1 102915136 Sep  5 15:20 multiz27way.tab
-rw-rw-r-- 1   7215723 Sep  5 15:35 multiz27waySummary.tab

    rm multiz27way*.tab

#######################################################################
# GAP ANNOTATE MULTIZ27WAY MAF AND LOAD TABLES (DONE - 2014-11-13 - Hiram)
    # mafAddIRows has to be run on single chromosome maf files, it does not
    #   function correctly when more than one reference sequence
    #   are in a single file.
    mkdir -p /hive/data/genomes/dm6/bed/multiz27way/anno/mafSplit
    cd /hive/data/genomes/dm6/bed/multiz27way/anno/mafSplit
    time mafSplit -byTarget -useFullSequenceName \
        /dev/null . ../../multiz27way.maf

    ls | wc
    # 277     277    6007

    cd /hive/data/genomes/dm6/bed/multiz27way/anno
    # skipped making the N.bed files - all dbs should have one already
    # check for N.bed files everywhere:
    for DB in `cat ../species.list.txt`
do
    if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
        echo "MISS: ${DB}"
        cd /hive/data/genomes/${DB}
        twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
    else
        echo "  OK: ${DB}"
    fi
    cd /hive/data/genomes/dm6/bed/multiz27way/anno
done

    for DB in `cat ../species.list.txt`
    do
      echo "${DB} "
      ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
      echo ${DB}.bed  >> nBeds
      ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
      echo ${DB}.len  >> sizes
    done

    # make sure they all are successful symLinks:
    ls -ogrtL

    cd /hive/data/genomes/dm6/bed/multiz27way/anno
    mkdir result
    cat << '_EOF_' > template
#LOOP
mafAddIRows -nBeds=nBeds $(path1) /hive/data/genomes/dm6/dm6.2bit {check out exists+ result/$(file1)}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ls mafSplit/*.maf > maf.list
    ssh ku
    cd /hive/data/genomes/dm6/bed/multiz27way/anno
    gensub2 maf.list single template jobList

    para create jobList
    para try ... check ... etc
    para push
# Completed: 276 of 277 jobs
# Other count: 1 jobs
# CPU time in finished jobs:        285s       4.75m     0.08h    0.00d  0.000 y
# IO & Wait Time:                  4730s      78.84m     1.31h    0.05d  0.000 y
# Average job time:                  18s       0.30m     0.01h    0.00d
# Longest finished job:             103s       1.72m     0.03h    0.00d
# Submission to last job:           195s       3.25m     0.05h    0.00d
    # one job was completed manually due to ku problems

    # combine into one file
    head -q -n 1 result/chr2L.maf > dm6.27way.maf
    for F in result/*.maf
    do
	grep -h -v "^#" ${F}
    done >> dm6.27way.maf
    #   these maf files do not have the end marker, this does nothing:
    #   tail -q -n 1 result/chr2L.maf > dm6.27way.maf
    #   tail -q -n 1 result/GL172637.maf >> dm6.27way.maf
    # How about an official end marker:
    echo "##eof maf" >> dm6.27way.maf

    # Load into database
    rm /gbdb/dm6/multiz27way/multiz27way.maf   # remove old symlink
    ln -s `pwd`/dm6.27way.maf /gbdb/dm6/multiz27way/multiz27way.maf
    cd /dev/shm
    time nice -n +19 hgLoadMaf dm6 multiz27way

    #  real    1m43.417s

    time hgLoadMafSummary -verbose=2 -minSize=30000 \
	-mergeGap=1500 -maxSize=200000 dm6 multiz27waySummary \
	/gbdb/dm6/multiz27way/multiz27way.maf
    # Created 157893 summary blocks from 34026658 components and 2115903 mafs
    #  from /gbdb/dm6/multiz27way/multiz27way.maf
    # real    2m41.434s

    wc -l multiz27way*.tab
    # 2115903 multiz27way.tab
    #  157893 multiz27waySummary.tab
    # 2273796 total

    rm multiz27way*.tab

#########################################################################
# multiz27way MAF FRAMES (DONE - 2014-11-13 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/dm6/bed/multiz27way/frames
    cd /hive/data/genomes/dm6/bed/multiz27way/frames
    mkdir genes

    cat << '_EOF_' > showGenes.sh
#!/bin/sh
cat ../species.list.txt | while read db
do
    echo -n "${db}: "
    hgsql $db -N -e "show tables like '%Gene%'" | while read table
    do
        if [ $table = "ensGene" -o $table = "refGene" -o \
           $table = "mgcGenes" -o $table = "knownGene" -o \
           $table = "xenoRefGene" -o $table = "flyBaseGene" ]; then
           count=`hgsql $db -N -e "select count(*) from $table"`
           echo -n "${table}: ${count}, "
        else
           echo -n "${table}: n/a, "
        fi
    done
    orgName=`hgsql hgcentraltest -N -e "select scientificName from dbDb where name='$db'"`
    orgId=`hgsql dm6 -N -e "select id from organism where name='$orgName'"`
    if [ "x${orgId}y" = "xy" ]; then
        echo "Mrnas: 0"
    else
        count=`hgsql dm6 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
        echo "Mrnas: ${count}"
    fi
done
'_EOF_'
    # << happy emacs
    chmod +x showGenes.sh
    ./showGenes.sh > survey.txt

    # Getting numbers of items in gene tracks for each of the assemblies
    #   to pick which is best gene set to use:
    cat survey.txt
dm6: flyBaseGene: 30264, flyBaseGenePep: n/a, refGene: 36039, xenoRefGene: 49470, Mrnas: 907097
droSim1: dmRefGene: n/a, xenoRefGene: 251522, Mrnas: 341
droSec1: xenoRefGene: 222725, Mrnas: 17
droYak3: Mrnas: 815
droEre2: ensGene: 15902, ensemblToGeneName: n/a, Mrnas: 42
droBia2: Mrnas: 0
droSuz1: Mrnas: 1
droAna3: ensGene: 16061, ensemblToGeneName: n/a, Mrnas: 30
droBip2: Mrnas: 3
droEug2: Mrnas: 5
droEle2: Mrnas: 1
droKik2: Mrnas: 2
droTak2: Mrnas: 1
droRho2: Mrnas: 4
droFic2: Mrnas: 1
droPse3: Mrnas: 18
droPer1: xenoRefGene: 220792, Mrnas: 34
droMir2: Mrnas: 2
droWil2: Mrnas: 20
droVir3: Mrnas: 84
droMoj3: Mrnas: 25
droAlb1: Mrnas: 1
droGri2: Mrnas: 10
musDom2: Mrnas: 6447
anoGam1: ensGene: 15802, xenoRefGene: 106775, Mrnas: 64135
apiMel4: Mrnas: 121235
triCas2: Mrnas: 1376

    # therefore:
    # refGene - dm6
    # ensGene - droEre2 droAna3 anoGam1

    #------------------------------------------------------------------------
    # get the genes for all genomes
    # mRNAs with CDS.  single select to get cds+psl, then split that up and
    # create genePred

    # using ensGene for: cavPor3 panTro4 canFam3 ailMel1 monDom5 galGal4 melGal1
    for qDB in droEre2 droAna3
    do
      echo hgsql -N -e \"'select * from 'ensGene\;\" ${qDB}
	  hgsql -N -e "select * from ensGene" ${qDB} | cut -f 2-11 \
	  | genePredSingleCover stdin stdout | gzip -2c > genes/$qDB.gp.gz
    done

    # seems to be an older ensGene here, no 'bin' column:
    for qDB in anoGam1
    do
      echo hgsql -N -e \"'select * from 'ensGene\;\" ${qDB}
	  hgsql -N -e "select * from ensGene" ${qDB} \
	  | genePredSingleCover stdin stdout | gzip -2c > genes/$qDB.gp.gz
    done

    # Using refGene for: dm6 - want the full extended genePred:
    for qDB in dm6
do
  hgsql -N -e "select * from refGene" ${qDB} | cut -f2- \
     | genePredSingleCover stdin stdout | gzip -2c \
        > genes/$qDB.gp.gz
  echo "${qDB} done"
done

    # verify counts for genes are reasonable:
    for T in genes/*.gz
do
  echo -n "# $T: "
  zcat $T | cut -f1 | sort | uniq -c | wc -l
done

# genes/anoGam1.gp.gz: 14267
# genes/dm6.gp.gz: 13823
# genes/droAna3.gp.gz: 15022
# genes/droEre2.gp.gz: 14992

    ssh hgwdev
    cd /hive/data/genomes/dm6/bed/multiz27way/frames
    time (cat ../multiz27way.maf \
	| genePredToMafFrames dm6 stdin stdout \
          dm6 genes/dm6.gp.gz droAna3 genes/droAna3.gp.gz \
          droEre2 genes/droEre2.gp.gz anoGam1 genes/anoGam1.gp.gz \
		| gzip > multiz27way.mafFrames.gz)
    #	real    2m22.546s

    # verify there are frames on everything:
    zcat multiz27way.mafFrames.gz | awk '{print $4}' | sort | uniq -c

    # 164994 anoGam1
    #  55050 dm6
    # 118998 droAna3
    #  77171 droEre2

    ssh hgwdev
    cd /hive/data/genomes/dm6/bed/multiz27way/frames
    time hgLoadMafFrames dm6 multiz27wayFrames multiz27way.mafFrames.gz
    # real    0m4.457s

##############################################################################
# construct downloads files and pushQ entry (DONE - 2014-10-14 - Hiram)
#   before starting downloads, the joinerCheck should be clean
#   after dm6 is added to all.joiner:
    joinerCheck -tableCoverage -database=dm6 all.joiner
    joinerCheck -keys -database=dm6 all.joiner

    cd /hive/data/genomes/dm6
    makeDownloads.pl -dbHost=hgwdev -workhorse=hgwdev dm6 \
	> downloads.log 2>&1
    # real    28m50.409s

    # examine the goldenPath/*/README.txt files to verify the text

    # ready for pushQ entry (DONE - 2014-10-14 - Hiram)
    mkdir /hive/data/genomes/dm6/pushQ
    cd /hive/data/genomes/dm6/pushQ
    makePushQSql.pl dm6 > dm6.sql 2> stderr.out
    # real    1m52.294s
    # some errors are legitimate and OK:
     head stderr.out
# WARNING: hgwdev does not have /gbdb/dm6/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/dm6/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/dm6/bbi/qualityBw/quality.bw
# WARNING: dm6 does not have seq

    scp -p dm6.sql qateam@hgwbeta:/tmp
    ssh qateam@hgwbeta './bin/x86_64/hgsql qapushq < /tmp/dm6.sql'

#########################################################################
# Phylogenetic tree from 27-way (DONE - 2014-11-13 - Hiram)

    mkdir /hive/data/genomes/dm6/bed/multiz27way/4d
    cd /hive/data/genomes/dm6/bed/multiz27way/4d

    # the split annotated maf's are in:
    ../anno/result/*.maf

    # using refGene for dm6, only transcribed genes and nothing
    #	from the randoms and other misc.
    hgsql dm6 -Ne \
    "select * from refGene WHERE cdsEnd > cdsStart;" | cut -f 2- \
	| egrep -v "_random|chrUn" > refGene.gp
    # verify primary chroms only:
    cut -f2 refGene.gp | sort | uniq -c | sort -rn | less
    #  7213 chr3R
    #  6581 chr2L
    #  6167 chr2R
    #  5914 chr3L
    #  5577 chrX
    #   300 chr4
    #    31 chrY

    genePredSingleCover refGene.gp stdout | sort > refGeneNP.gp
    wc -l *.gp
    #  31783 refGene.gp
    #  13955 refGeneNP.gp

    ssh ku
    mkdir /hive/data/genomes/dm6/bed/multiz27way/4d/run
    cd /hive/data/genomes/dm6/bed/multiz27way/4d/run
    mkdir ../mfa

    # newer versions of msa_view have a slightly different operation
    # the sed of the gp file inserts the reference species in the chr name

    cat << '_EOF_' > 4d.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set r = "/hive/data/genomes/dm6/bed/multiz27way"
set c = $1
set infile = $r/anno/result/$2
set outfile = $3
cd /scratch/tmp
# 'clean' maf
perl -wpe 's/^s ([^.]+)\.\S+/s $1/' $infile > $c.maf
awk -v C=$c '$2 == C {print}' $r/4d/refGeneNR.gp | sed -e "s/\t$c\t/\tdm6.$c\t/" > $c.gp
set NL=`wc -l $c.gp| gawk '{print $1}'`
if ("$NL" != "0") then
    $PHASTBIN/msa_view --4d --features $c.gp -i MAF $c.maf -o SS > $c.ss
    $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $r/4d/run/$outfile
else
    echo "" > $r/4d/run/$outfile
endif
rm -f $c.gp $c.maf $c.ss
'_EOF_'
    # << happy emacs
    chmod +x 4d.csh

    ls -1S /hive/data/genomes/dm6/bed/multiz27way/anno/result/*.maf \
	| egrep -v "_random|chrUn" | sed -e "s#.*multiz27way/anno/result/##" \
	> maf.list

    cat << '_EOF_' > template
#LOOP
4d.csh $(root1) $(path1) {check out line+ ../mfa/$(root1).mfa}
#ENDLOOP
'_EOF_'
    # << happy emacs

    # the tac puts the quick jobs at the front
    gensub2 maf.list single template stdout | tac > jobList
    para create jobList
    para try ... check
    para -maxJob=5 push
    para time
# Completed: 8 of 8 jobs
# CPU time in finished jobs:        502s       8.37m     0.14h    0.01d  0.000 y
# IO & Wait Time:                    39s       0.64m     0.01h    0.00d  0.000 y
# Average job time:                  68s       1.13m     0.02h    0.00d
# Longest finished job:             118s       1.97m     0.03h    0.00d
# Submission to last job:           125s       2.08m     0.03h    0.00d

    # combine mfa files
    ssh hgwdev
    cd /hive/data/genomes/dm6/bed/multiz27way/4d
    # one file result is useless:
# -rw-rw-r-- 1       1 Nov 13 14:13 chrM.mfa
# -rw-rw-r-- 1   48002 Nov 13 14:13 chrY.mfa
# -rw-rw-r-- 1  207788 Nov 13 14:13 chr4.mfa
# -rw-rw-r-- 1 3658928 Nov 13 14:14 chr2R.mfa
# -rw-rw-r-- 1 3532514 Nov 13 14:14 chr2L.mfa
# -rw-rw-r-- 1 3198092 Nov 13 14:14 chrX.mfa
# -rw-rw-r-- 1 3670700 Nov 13 14:14 chr3L.mfa
# -rw-rw-r-- 1 4577630 Nov 13 14:14 chr3R.mfa

    rm -f mfa/chrM.mfa

    #want comma-less species.list.txt
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
	--aggregate "`cat ../species.list.txt`" mfa/*.mfa | sed s/"> "/">"/ \
	    > 4d.all.mfa
    # check they are all in there:
    grep "^>" 4d.all.mfa
# >dm6
# >droSim1
# >droSec1
# >droYak3
# >droEre2
....
# >droAlb1
# >droGri2
# >musDom2
# >anoGam1
# >apiMel4
# >triCas2

    # create species list and stripped down tree for autoMZ
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
        ../dm6.27way.nh > tmp.nh
    echo `cat tmp.nh` > tree-commas.nh
    echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
    # tree-commas.nh:
# (((((((((((((((((dm6, (droSim1, droSec1)), (droYak3, droEre2)),
# (droBia2, droSuz1)), (droAna3, droBip2)), droEug2), droEle2), droKik2),
# droTak2), droRho2), droFic2), ((droPse3, droPer1), droMir2)), droWil2),
# (((droVir3, droMoj3), droAlb1), droGri2)), musDom2), anoGam1), apiMel4),
# triCas2)

    # use phyloFit to create tree model (output is phyloFit.mod)
    time /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
	    --EM --precision MED --msa-format FASTA --subst-mod REV \
		--tree tree-commas.nh 4d.all.mfa
    #	real    22m8.563s

    mv phyloFit.mod all.mod

    grep TREE all.mod
# TREE: (((((((((((((((((dm6:0.0542029,(droSim1:0.0213103,
# droSec1:0.0227973):0.0242621):0.0585014,(droYak3:0.0889658,
# droEre2:0.0771526):0.0254381):0.181996,(droBia2:0.126739,
# droSuz1:0.0934222):0.0964756):0.0125386,(droAna3:0.132676,
# droBip2:0.134892):0.531515):0.00264138,droEug2:0.318569):0.0369765,
# droEle2:0.207426):0.0025605,droKik2:0.485329):0.00243957,
# droTak2:0.226746):0.0143511,droRho2:0.195624):0.0495906,
# droFic2:0.257704):0.290027,((droPse3:0.0138526,droPer1:0.014979):0.00969914,
# droMir2:0.0172853):0.398344):0.150107,droWil2:0.550935):0.0418275,
# (((droVir3:0.242786,droMoj3:0.321598):0.090819,droAlb1:0.436249):0.0143777,
# droGri2:0.318782):0.162991):0.222902,musDom2:0.772795):0.268197,
# anoGam1:1.14709):0.387299,apiMel4:0.865834):0.588712,triCas2:0.588712);

##############################################################################
# phastCons 27-way (DONE - 2014-11-27 - Hiram)

    # split 27way mafs into 10M chunks and generate sufficient statistics
    # files for # phastCons
    ssh ku
    mkdir -p /hive/data/genomes/dm6/bed/multiz27way/cons/SS
    cd /hive/data/genomes/dm6/bed/multiz27way/cons/SS
    mkdir result done

    # XXX beware missed jobs in this procedure.  The first time through this,
    # the chr3R was missed.  It was caught up manually 2014-12-04

    cat << '_EOF_' > mkSS.csh
#!/bin/csh -ef
set c = $1
set MAF = /hive/data/genomes/dm6/bed/multiz27way/anno/result/$c.maf
set WINDOWS = /hive/data/genomes/dm6/bed/multiz27way/cons/SS/result/$c
set WC = `cat $MAF | wc -l`
set NL = `grep "^#" $MAF | wc -l`
if ( -s $2 ) then
    exit 0
endif
if ( -s $2.running ) then
    exit 0
endif

date >> $2.running

rm -fr $WINDOWS
mkdir $WINDOWS
pushd $WINDOWS > /dev/null
if ( $WC != $NL ) then
/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
    $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
endif
popd > /dev/null
date >> $2
rm -f $2.running
'_EOF_'
    # << happy emacs

    chmod +x mkSS.csh

    cat << '_EOF_' > template
#LOOP
mkSS.csh $(root1) {check out line+ done/$(root1)}
#ENDLOOP
'_EOF_'
    # << happy emacs

    #	do the easy ones first to see some immediate results
    ls -1S -r ../../anno/result | sed -e "s/.maf//" > maf.list

    gensub2 maf.list single template jobList
    para create jobList
    para try ... check ... etc

# Completed: 276 of 277 jobs
# CPU time in finished jobs:        896s      14.93m     0.25h    0.01d  0.000 y
# IO & Wait Time:                 16749s     279.16m     4.65h    0.19d  0.001 y
# Average job time:                  64s       1.07m     0.02h    0.00d
# Longest finished job:             283s       4.72m     0.08h    0.00d
# Submission to last job:          1017s      16.95m     0.28h    0.01d
    # one job was finished manually due to ku problems

    #	 Not all files have a result due to low alignment
    wc -l jobList
    #  277 jobList
    find ./result -type f | wc
    #  158     158    8397

    # Run phastCons
    #	This job is I/O intensive in its output files, beware where this
    #	takes place or do not run too many at once.
    ssh ku
    mkdir -p /hive/data/genomes/dm6/bed/multiz27way/cons/run.cons
    cd /hive/data/genomes/dm6/bed/multiz27way/cons/run.cons

    #	there are going to be only one phastCons run using
    #	this same script.  It triggers off of the current working directory
    #	$cwd:t which is the "grp" in this script.  It is:
    #	all

    cat << '_EOF_' > doPhast.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set c = $1
set cX = $1:r
set f = $2
set len = $3
set cov = $4
set rho = $5
set grp = $cwd:t
set cons = /hive/data/genomes/dm6/bed/multiz27way/cons
set tmp = $cons/tmp/$f
mkdir -p $tmp
set ssSrc = $cons
set useGrp = "$grp.mod"
ln -s $ssSrc/SS/result/$c/$f.ss $tmp
ln -s $cons/$grp/$grp.mod $tmp
pushd $tmp > /dev/null
$PHASTBIN/phastCons $f.ss $useGrp \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
popd > /dev/null
mkdir -p pp/$c bed/$c
sleep 4
touch pp/$c bed/$c
rm -f pp/$c/$f.pp
rm -f bed/$c/$f.bed
mv $tmp/$f.pp pp/$c
mv $tmp/$f.bed bed/$c
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod +x doPhast.csh

    #	this template will serve for all runs
    #	root1 == chrom name, file1 == ss file name without .ss suffix

    cat << '_EOF_' > template
#LOOP
../run.cons/doPhast.csh $(root1) $(file1) 45 0.3 0.3 {check out line+ pp/$(root1)/$(file1).pp}
#ENDLOOP
'_EOF_'
    # << happy emacs

    find ../SS -type f | grep ".ss$" | sed -e 's/.ss$//' > ss.list
    wc -l ss.list
    # 158 ss.list

    # Create parasol batch and run it
    # run for all species
    mkdir /hive/data/genomes/dm6/bed/multiz27way/cons/all
    cd /hive/data/genomes/dm6/bed/multiz27way/cons/all
    #	Using the .mod tree
    cp -p ../../4d/all.mod ./all.mod

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para create jobList
    para try ... check ...
    para push
# Completed: 158 of 158 jobs
# CPU time in finished jobs:       1356s      22.59m     0.38h    0.02d  0.000 y
# IO & Wait Time:                  1391s      23.19m     0.39h    0.02d  0.000 y
# Average job time:                  17s       0.29m     0.00h    0.00d
# Longest finished job:             228s       3.80m     0.06h    0.00d
# Submission to last job:           900s      15.00m     0.25h    0.01d

    cd /hive/data/genomes/dm6/bed/multiz27way/cons/all

    find ./bed -type f | grep ".bed$" | xargs cat | sort -k1,1 -k2,2n \
	| awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' \
	    > tmpMostConserved.bed

    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed

    # load into database
    time hgLoadBed dm6 phastConsElements27way mostConserved.bed
    # Read 1427898 elements of size 5 from mostConserved.bed
    # real    0m13.345s

    # Read 1078721 elements of size 5 from mostConserved.bed
    # real    0m9.166s

    # on human we often try for 5% overall cov, and 70% CDS cov
    # most bets are off here for that goal, these alignments are too few
    #	and too far between
    featureBits dm6 -enrichment refGene:cds phastConsElements27way
    # refGene:cds 16.046%, phastConsElements27way 51.675%, both 13.124%,
    #   cover 81.79%, enrich 1.58x

    # Create merged posterier probability file and wiggle track data files
    cd /hive/data/genomes/dm6/bed/multiz27way/cons/all
    mkdir downloads

    time (find ./pp -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
	| gzip -c > downloads/phastCons27way.wigFix.gz)
    #  real    1m12.361s

    # check integrity of data with wigToBigWig
    time (zcat downloads/phastCons27way.wigFix.gz \
	| wigToBigWig -verbose=2 stdin /hive/data/genomes/dm6/chrom.sizes \
	    phastCons27way.bw) > bigWig.log 2>&1 &
    tail bigWig.log

    # pid=34798: VmPeak:     1054096 kB
    # real    1m8.421s

    bigWigInfo phastCons27way.bw
# version: 4
# isCompressed: yes
# isSwapped: 0
# primaryDataSize: 172,458,601
# primaryIndexSize: 3,804,252
# zoomLevels: 10
# chromCount: 151
# basesCovered: 117,294,910
# mean: 0.627817
# min: 0.000000
# max: 1.000000
# std: 0.446656

    #	encode those files into wiggle data
    time (zcat downloads/phastCons27way.wigFix.gz \
	| wigEncode stdin phastCons27way.wig phastCons27way.wib)
    # Converted stdin, upper limit 1.00, lower limit 0.00
    # real    0m25.328s

    du -hsc *.wi?
    #    112M    phastCons27way.wib
    #    11M     phastCons27way.wig
    #    123M    total

    # Load gbdb and database with wiggle.
    ln -s `pwd`/phastCons27way.wib /gbdb/dm6/multiz27way/phastCons27way.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/dm6/multiz27way \
	dm6 phastCons27way phastCons27way.wig
    #   real    0m0.854s

    # use to set trackDb.ra entries for wiggle min and max
    # and verify table is loaded correctly

    wigTableStats.sh dm6 phastCons27way
    # db.table      min max mean count sumData stdDev viewLimits
    # dm6.phastCons27way 0 1 0.618592 89437277 5.53252e+07 0.448759
    #  viewLimits=0:1

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram -db=dm6 \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    phastCons27way > histogram.data 2>&1
    #   real    0m5.630s
    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " D. melanogaster dm6 Histogram phastCons27way track"
set xlabel " phastCons27way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

#############################################################################
# phyloP for 27-way (DONE - 2014-11-13 - Hiram)
    # run phyloP with score=LRT
    ssh ku
    mkdir /cluster/data/dm6/bed/multiz27way/consPhyloP
    cd /cluster/data/dm6/bed/multiz27way/consPhyloP

    mkdir run.phyloP
    cd run.phyloP
    # Adjust model file base composition background and rate matrix to be
    # representative of the chromosomes in play
    grep BACKGROUND ../../cons/all/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
    # 0.662
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/all/all.mod 0.662 > all.mod
    # verify, the BACKGROUND should now be paired up:
    grep BACK all.mod
    # BACKGROUND: 0.169000 0.331000 0.331000 0.169000

    #	following the pattern from rn5 with grp: "all" (no other grp)
    cat << '_EOF_' > doPhyloP.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set f = $1
set file1 = $f:t
set out = $2
set cName = $f:t:r
set n = $f:r:e
set grp = $cwd:t
set cons = /hive/data/genomes/dm6/bed/multiz27way/consPhyloP
set tmp = $cons/tmp/$grp/$f
rm -fr $tmp
mkdir -p $tmp
set ssSrc = "/hive/data/genomes/dm6/bed/multiz27way/cons/SS/result/$f"
set useGrp = "$grp.mod"
ln -s $cons/run.phyloP/$grp.mod $tmp
pushd $tmp > /dev/null
$PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \
    -i SS $useGrp $ssSrc.ss > $file1.wigFix
popd > /dev/null
mkdir -p $out:h
sleep 4
mv $tmp/$file1.wigFix $out
rm -fr $tmp
rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$cName
rmdir --ignore-fail-on-non-empty $cons/tmp/$grp
rmdir --ignore-fail-on-non-empty $cons/tmp
'_EOF_'
    # << happy emacs
    chmod +x doPhyloP.csh

    # Create list of chunks
    find ../../cons/SS/result -type f | grep ".ss$" \
	| sed -e "s/.ss$//; s#^../../cons/SS/result/##" > ss.list
    # make sure the list looks good
    wc -l ss.list
    #   162 ss.list

    # Create template file
    #	file1 == $chr/$chunk/file name without .ss suffix
    cat << '_EOF_' > template
#LOOP
../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ######################   Running all species  #######################
    # setup run for all species
    mkdir /hive/data/genomes/dm6/bed/multiz27way/consPhyloP/all
    cd /hive/data/genomes/dm6/bed/multiz27way/consPhyloP/all
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    # the -ram=8g will allow only one job per node to slow this down since
    #	it would run too fast otherwise.  Either run on one of the small
    #	klusters or use the -ram=8g on the para create
    para -ram=8g create jobList
    para try ... check ... etc ...
    para -maxJob=64 push
    para time > run.time
# Completed: 158 of 158 jobs
# CPU time in finished jobs:      45635s     760.58m    12.68h    0.53d  0.001 y
# IO & Wait Time:                  2235s      37.26m     0.62h    0.03d  0.000 y
# Average job time:                 303s       5.05m     0.08h    0.00d
# Longest finished job:            6072s     101.20m     1.69h    0.07d
# Submission to last job:          6125s     102.08m     1.70h    0.07d

    # make downloads
    mkdir downloads

    time (find ./wigFix -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
	| gzip -c > downloads/phyloP27way.wigFix.gz)
    #   real    2m6.787s

    # check integrity of data with wigToBigWig
    time (zcat downloads/phyloP27way.wigFix.gz \
	| wigToBigWig -verbose=2 stdin /hive/data/genomes/dm6/chrom.sizes \
	phyloP27way.bw) > bigWig.log 2>&1
    egrep "real|VmPeak" bigWig.log
    # pid=523: VmPeak:       1372692 kB
    # real    1m13.231s

    bigWigInfo phyloP27way.bw
# version: 4
# isCompressed: yes
# isSwapped: 0
# primaryDataSize: 328,237,985
# primaryIndexSize: 3,804,252
# zoomLevels: 10
# chromCount: 151
# basesCovered: 117,294,910
# mean: 1.206203
# min: -8.216000
# max: 7.073000
# std: 1.669809

    #	encode those files into wiggle data
    time (zcat downloads/phyloP27way.wigFix.gz \
	| wigEncode stdin phyloP27way.wig phyloP27way.wib)
    #   Converted stdin, upper limit 7.07, lower limit -8.22
    #   real    0m35.814s

    du -hsc *.wi?
    #   112M    phyloP27way.wib
    #   12M     phyloP27way.wig
    #   124M    total

    # Load gbdb and database with wiggle.
    ln -s `pwd`/phyloP27way.wib /gbdb/dm6/multiz27way/phyloP27way.wib
    hgLoadWiggle -pathPrefix=/gbdb/dm6/multiz27way dm6 \
	phyloP27way phyloP27way.wig

    # use to set trackDb.ra entries for wiggle min and max
    # and verify table is loaded correctly

    wigTableStats.sh dm6 phyloP27way
    # db.table      min max mean count sumData stdDev viewLimits
    # dm6.phyloP27way -8.216 7.073 1.2062 117294910 1.41482e+08 1.66981
    # viewLimits=-7.14284:7.073

    calc 8.216\+7.7073
    #  8.216+7.7073 = 15.923300

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram -db=dm6 \
	-hBinSize=0.0161 -hBinCount=1000 -hMinVal=-8.3 -verbose=2 \
	    phyloP27way > histogram.data 2>&1
    #	real    0m7.284s

    # find out the range for the 2:5 graph
    grep -v chrom histogram.data | grep "^[0-9]" | ave -col=5 stdin
    # Q1 0.000012
    # median 0.000255
    # Q3 0.001728
    # average 0.001134
    # min 0.000000
    # max 0.009261
    # count 882
    # total 0.999997
    # standard deviation 0.001655

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " D. melanogaster dm6 Histogram phyloP27way track"
set xlabel " phyloP27way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.007]
set xrange [-3:6]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

#########################################################################
# Construct download files for 27-way (DONE - 2014-11-20 - Hiram)
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way/maf
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way/alignments
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way/dm6.27way.phastCons
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way/dm6.27way.phyloP27way

    mkdir /hive/data/genomes/dm6/bed/multiz27way/downloads
    cd /hive/data/genomes/dm6/bed/multiz27way/downloads
    mkdir multiz27way phastCons27way phyloP27way
    cd multiz27way
    mkdir maf alignments
    cd maf
    time rsync -a -P ../../../anno/result/ ./
    #   real    xxx
    time gzip *.maf
    #   real    real    8m46.494s
    time md5sum *.maf.gz > md5sum.txt
    #   real    0m6.206s

    ln -s `pwd`/*.maf.gz `pwd`/md5sum.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way/maf
    cd ..
    du -hsc maf ../../anno/result/
    # 1.1G    maf
    # 7.4G    ../../anno/result/
    # 8.5G    total

    grep TREE ../../4d/all.mod | sed -e 's/TREE: //' \
       | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > dm6.27way.nh

   ~/kent/src/hg/utils/phyloTrees/commonNames.sh dm6.27way.nh \
       | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > dm6.27way.commonNames.nh
   ~/kent/src/hg/utils/phyloTrees/scientificNames.sh dm6.27way.nh \
       | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > dm6.27way.scientificNames.nh
    md5sum *.gz *.nh > md5sum.txt

    ln -s `pwd`/*.nh `pwd`/*.txt `pwd`/*.maf.gz \
        /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way

    #####################################################################
    cd /hive/data/genomes/dm6/bed/multiz27way/downloads/phastCons27way
    ln -s ../../cons/all/downloads/phastCons27way.wigFix.gz .

    ln -s ../../cons/all/all.mod dm6.27way.phastCons.mod
    ln -s ../../cons/all/phastCons27way.bw dm6.27way.phastCons.bw
    time md5sum *.mod *.bw *.gz > md5sum.txt
    #  real    0m4.631s

    ln -s `pwd`/*.gz `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
  /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way/

    # obtain the README.txt from hg38/phastCons7way and update for this
    #   situation, from:
    # /hive/data/genomes/hg38/bed/multiz7way/downloads/phastCons7way/README.txt
    ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way

    #####################################################################
    cd /hive/data/genomes/dm6/bed/multiz27way/downloads/phyloP27way
    mkdir dm6.27way.phyloP27way
    cd dm6.27way.phyloP27way
    ln -s ../../../consPhyloP/all/downloads/*.gz .
    time md5sum *.gz > md5sum.txt &
    # real    3m37.827s

    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way/dm6.27way.phyloP27way

    cd ..
    ln -s ../../consPhyloP/run.phyloP/all.mod dm6.27way.phyloP27way.mod

    ln -s ../../consPhyloP/all/phyloP27way.bw dm6.27way.phyloP27way.bw

    time md5sum *.mod *.bw > md5sum.txt &
    #  real    4m44.724s

    # obtain the README.txt from hg38/phyloP7way and update for this
    #   situation, from:
    # /hive/data/genomes/hg38/bed/multiz7way/downloads/phyloP7way/README.txt
    ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way

    ############################################################################
    # creating upstream mafs (DONE - 2014-11-20 - Hiram)
    ssh hgwdev

    mkdir /hive/data/genomes/dm6/bed/multiz27way/downloads/multiz27way
    cd /hive/data/genomes/dm6/bed/multiz27way/downloads/multiz27way
    # run this bash script
    cat << '_EOF_' > mkUpstream.sh
#!/bin/sh
DB=dm6
GENE=refGene
NWAY=multiz27way
export DB GENE

for S in 1000 2000 5000
do
    echo "making upstream${S}.${GENE}.maf"
    echo "featureBits ${DB} ${GENE}:upstream:${S} -fa=/dev/null -bed=stdout"
    featureBits ${DB} ${GENE}:upstreamAll:${S} -fa=/dev/null -bed=stdout \
        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
        | mafFrags ${DB} ${NWAY} stdin stdout \
                -orgs=/hive/data/genomes/${DB}/bed/${NWAY}/species.list.txt \
        | gzip -c > upstream${S}.${GENE}.maf.gz
    echo "done upstream${S}.${GENE}.maf.gz"
done
'_EOF_'
    # << happy emacs
    chmod +x mkUpstream.sh
    time ./mkUpstream.sh
    #   real    69m25.852s

    # FIXUP/VERIFY the genbank.conf file to indicate this gene choice for the
    #   upstream automatic generation
    ~/kent/src/hg/makeDb/genbank/etc/genbank.conf
# dm6.upstreamGeneTbl = refGene
# dm6.upstreamMaf = multiz27way /hive/data/genomes/dm6/bed/multiz27way/species.list.txt

    git commit -m "dm6 GeneTbl upstreamMaf set to 27-way refs #4049" genbank.conf
    git push
    make etc-update

#############################################################################
# hgPal downloads (DONE - 2014-11-20 - Hiram)
#   FASTA from 27-way for refGene

    ssh hgwdev
    screen -S dm6HgPal
    mkdir /hive/data/genomes/dm6/bed/multiz27way/pal
    cd /hive/data/genomes/dm6/bed/multiz27way/pal
    cat ../species.list | tr '[ ]' '[\n]' > order.list

    export mz=multiz27way
    export gp=refGene
    export db=dm6
    export I=0
    mkdir exonAA exonNuc
    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
        if [ $I -gt 6 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done > $gp.jobs
    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    time ./$gp.jobs > $gp.jobs.log 2>&1
    #   real    20m2.909s

    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    #   real    0m32.720s

    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
    #   real    5m53.612s

    # we're only distributing exons at this time
    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    md5sum *.fa.gz > md5sum.txt
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
    ln -s `pwd`/md5sum.txt $pd/

    rm -rf exonAA exonNuc

#############################################################################
# make refGenePfam table (Done - 2016-09-19 - Brian Raney)
# Pfam in Refseq Genes

export dir=/cluster/data/dm6/bed/pfam
mkdir -p $dir
cd $dir
rm -rf  splitProt
mkdir  splitProt
refSeqGet -protSeqs=refSeq.faa dm6
#20090154 bases (15768554 N's 4321600 real 4321600 upper 0 lower) in 30430 sequences in 1 files
refSeqGet -rnaSeqs=refSeq.fa dm6
hgsql dm6 -Ne "select * from refSeqAli" | cut -f 2- > refGene.psl

faSplit sequence refSeq.faa 10000 splitProt/
mkdir -p result
ls -1 splitProt > prot.list
# /hive/data/outside/pfam/hmmpfam -E 0.1 /hive/data/outside/pfam/current/Pfam_fs \
cat << '_EOF_' > doPfam
#!/bin/csh -ef
/hive/data/outside/pfam/Pfam29.0/PfamScan/hmmer-3.1b2-linux-intel-x86_64/binaries/hmmsearch   --domtblout /scratch/tmp/pfam.$2.pf --noali -o /dev/null -E 0.1 /hive/data/outside/pfam/Pfam29.0/Pfam-A.hmm     splitProt/$1
mv /scratch/tmp/pfam.$2.pf $3
_EOF_
    # << happy emacs
chmod +x doPfam
cat << '_EOF_' > template
#LOOP
doPfam $(path1) $(root1) {check out line+ result/$(root1).pf}
#ENDLOOP
_EOF_
gensub2 prot.list single template jobList

ssh ku "cd $dir; para make jobList"
ssh ku "cd $dir; para time > run.time"
cat run.time

# Completed: 9690 of 9690 jobs
# CPU time in finished jobs:    2667749s   44462.49m   741.04h   30.88d  0.085
# y
# IO & Wait Time:                148194s    2469.89m    41.16h    1.72d  0.005
# y
# Average job time:                 291s       4.84m     0.08h    0.00d
# Longest finished job:             511s       8.52m     0.14h    0.01d
# Submission to last job:          8704s     145.07m     2.42h    0.10d
#
# Make up pfamDesc.tab by converting pfam to a ra file first
cat << '_EOF_' > makePfamRa.awk
/^NAME/ {print}
/^ACC/ {print}
/^DESC/ {print}
/^TC/ {print $1,$3; printf("\n");}
_EOF_
awk -f makePfamRa.awk  /hive/data/outside/pfam/Pfam29.0/Pfam-A.hmm  > pfamDesc.ra
raToTab -cols=ACC,NAME,DESC,TC pfamDesc.ra stdout |  awk -F '\t' '{printf("%s\t%s\t%s\t%g\n", $1, $2, $3, $4);}' | sort > pfamDesc.tab

# Convert output to tab-separated file.
cd $dir
catDir result | sed '/^#/d' > allResults.tab
awk 'BEGIN {OFS="\t"} { print $5,$1,$18-1,$19,$4,$14}' allResults.tab | sort > allUcscPfam.tab
join  -t $'\t' -j 1  allUcscPfam.tab pfamDesc.tab | tawk '{if ($6 > $9) print $2, $3, $4, $5, $6, $1}' > ucscPfam.tab

cd $dir

cat refSeq.faa | sed 's/\.[0-9]*//' > refSeq.noVers.faa
cat refSeq.fa | sed 's/\.[0-9]*//' > refSeq.noVers.fa
sort ucscPfam.tab | sed 's/\.[0-9]*//' > sortPfam.tab
faCount refSeq.noVers.faa | awk '{print $1, $2}' > protein.sizes

hgsql dm6 -Ne "select protAcc, mrnaAcc from refLink, refGene where mrnaAcc=refGene.name" | awk '{if (NF == 2) print}' |sort > sortMap.txt
while read prot mrna
do
echo ./doBlat $prot $mrna
done > blat.jobs < sortMap.txt

ssh ku  "cd $dir; para make blat.jobs"

cat results/* > proteinToMrna.psl
tawk '{print $10, $16, $17}'  proteinToMrna.psl | sort > sortCdsOut.tab
awk '{print $10, $11}' refGene.psl > gene.sizes
join sortCdsOut.tab sortPfam.tab |  join /dev/stdin sortMap.txt | awk '{print $9, $2 + 3 * $4, $2  + 3 * $5, $6}' | bedToPsl gene.sizes stdin domainToGene.psl

pslMap domainToGene.psl refGene.psl stdout | pslToBed stdin stdout | bedOrBlocks -useName stdin domainToGenome.bed

hgLoadBed dm6 refGenePfam domainToGenome.bed

##############################################################################
# Crispr track. See ../crisprTrack/README.txt (2016-09-15 max)
# Command: doCrispr.sh dm6 ensGene

##############################################################################
# LASTZ A. gambiae AnoGam3 (DONE 2017-12-20 - Hiram)
    screen -S anoGam3-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20
    cd /hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20

    printf '# D. melanogaster vs. A. gambiae
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - A. gambiae
SEQ2_DIR=/hive/data/genomes/anoGam3/anoGam3.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/anoGam3/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF \
	-syntenicNet -fileServer=hgwdev \
	-chainMinScore=3000  -chainLinearGap=loose \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
    # real    8m50.927s

    cat fb.dm6.chainAnoGam3Link.txt
    # 19641460 bases of 142573024 (13.776%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev \
	-buildDir=`pwd` dm6 anoGam3) > rBest.log 2>&1 &
    # real    44m34.312s

    cat fb.dm6.chainRBestAnoGam3Link.txt
    # 15964959 bases of 142573024 (11.198%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/anoGam3/bed/blastz.dm6.swap
    cd /hive/data/genomes/anoGam3/bed/blastz.dm6.swap
    time (doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20/DEF \
        -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > swap.log 2>&1
    # real    2m0.901s

    cat fb.anoGam3.chainDm6Link.txt
    # 20168390 bases of 264424304 (7.627%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev \
	-buildDir=`pwd` anoGam3 dm6) > rBest.log 2>&1 &
    # real    46m7.441s

    cat fb.anoGam3.chainRBestDm6Link.txt
    # 16040573 bases of 264424304 (6.066%) in intersection

###########################################################################
# LASTZ D. simulans DroSim2 (DONE 2018-09-12 - Hiram)
    screen -S droSim2-dm6
    mkdir /hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12
    cd /hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12

    cat << '_EOF_' > DEF
    printf '# D. melanogaster vs. D. simulans
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - D. melanogaster
SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes

# QUERY - D. simulans
SEQ2_DIR=/hive/data/genomes/droSim2/droSim2.2bit
SEQ2_CHUNK=5000000
SEQ2_LAP=10000
SEQ2_LEN=/hive/data/genomes/droSim2/chrom.sizes

BASE=/hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000  -chainLinearGap=loose \
        -syntenicNet -fileServer=hgwdev \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1

    # real    39m35.348s
    cat fb.dm6.chainDroSim2Link.txt
    # 117723253 bases of 142573024 (82.570%) in intersection

    cat fb.dm6.chainSynDroSim2Link.txt
    # 108599452 bases of 142573024 (76.171%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` dm6 droSim2) > rbest.log 2>&1 &
    # real    6m47.792s

    cat fb.dm6.chainRBest.DroSim2.txt
    # 110510644 bases of 142573024 (77.512%) in intersection

    #   running the swap
    mkdir /hive/data/genomes/droSim2/bed/blastz.dm6.swap
    cd /hive/data/genomes/droSim2/bed/blastz.dm6.swap
    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1 &
    # real    4m32.910s

    cat fb.droSim2.chainDm6Link.txt
    # 115776543 bases of 124963774 (92.648%) in intersection
    cat fb.droSim2.chainSynDm6Link.txt
    # 108004181 bases of 124963774 (86.428%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` droSim2 dm6) > rbest.log 2>&1 &
    # real    7m22.120s

    cat fb.droSim2.chainRBest.Dm6.txt
    # 110784245 bases of 124963774 (88.653%) in intersection

###########################################################################

# ReMap refs #28960 (2022-04-13 Gerardo)
cd /hive/data/genomes/dm6/bed
mkdir reMap
cd reMap
wget https://remap.univ-amu.fr/storage/public/hubReMap2022/dm6/bigBed/remap2022_all_macs2_dm6_v1_0.bb
mv remap2022_all_macs2_dm6_v1_0.bb reMap2022.bb
wget https://remap.univ-amu.fr/storage/public/hubReMap2022/dm6/bigBed/dm6.bw
mv dm6.bw reMapDensity2022.bw
cd /gbdb/dm6
mkdir reMap
cd reMap
ln -s /hive/data/genomes/dm6/bed/reMap/reMap2022.bb
ln -s /hive/data/genomes/dm6/bed/reMap/reMapDensity2022.bw
cd ~/kent/src/hg/makeDb/trackDb/drosophila/dm6/
wget https://remap.univ-amu.fr/storage/public/hubReMap2022/remapHub2022.html
cp remapHub2022.html  reMap.html
cd ~/kent/src/hg/makeDb/trackDb
curl https://remap.univ-amu.fr/storage/public/hubReMap2022/dm6/trackDb.txt > drosophila/dm6/reMap.ra
vi drosophila/dm6/reMap.ra
vi drosophila/dm6/trackDb.ra 
vi drosophila/dm6/reMap.html
