# for emacs: -*- mode: sh; -*-

# This file describes browser build for the mm10
# Mus musculus (mouse)

#	DATE:   07-Dec-2011
#	ORGANISM:       Mus musculus
#	TAXID:  10090
#	ASSEMBLY LONG NAME:     Genome Reference Consortium Mouse Build 38
#	ASSEMBLY SHORT NAME:    GRCm38
#	ASSEMBLY SUBMITTER:     Genome Reference Consortium
#	ASSEMBLY TYPE:  Haploid + alternate loci
#	NUMBER OF ASSEMBLY-UNITS:       16
#	ASSEMBLY ACCESSION:     GCA_000001635.2

#	rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mus_musculus/GRCm38/

#	Genome ID:
#	http://www.ncbi.nlm.nih.gov/genome/52

#	Taxonomy:
#	http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090
#	http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=39442

#	GRC information
#	http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/mouse/

#	Mitochondrial sequence:
#	http://www.ncbi.nlm.nih.gov/bioproject/13767
#	C57BL/6J sequence:
#	http://www.ncbi.nlm.nih.gov/bioproject/51977
#	Finishing project:
#	http://www.ncbi.nlm.nih.gov/bioproject/20689

#	Assembly ID: 327618
#	http://www.ncbi.nlm.nih.gov/genome/assembly/327618/

#	Celera Assembly
# http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AAHY00

#############################################################################
# fetch sequence from genbank (DONE - 2012-01-30 - Hiram)
    mkdir -p /hive/data/genomes/mm10/genbank
    cd /hive/data/genomes/mm10/genbank

    rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mus_musculus/GRCm38/ ./

    # measure sequence to be used here
    faSize Primary_Assembly/assembled_chromosomes/FASTA/*.fa.gz \
	Primary_Assembly/unplaced_scaffolds/FASTA/*.fa.gz \
	Primary_Assembly/unlocalized_scaffolds/FASTA/*.fa.gz \
	non-nuclear/assembled_chromosomes/FASTA/chrMT.fa.gz
    # 2730871774 bases (78088274 N's 2652783500 real 2652783500 upper 0 lower)
    # in 66 sequences in 29 files
    #	Total size: mean 41376845.1 sd 63617337.3 min 1976
    #	(gi|371559559|gb|JH584295.1|) max 195471971
    #	(gi|371561115|gb|CM000994.2|) median 184189

#############################################################################
# fixup names for UCSC standards (DONE - 2012-02-06 - Hiram)
    mkdir /hive/data/genomes/mm10/ucsc
    cd /hive/data/genomes/mm10/ucsc

    ########################  Assembled Chromosomes
    cat << '_EOF_' > toUcsc.pl
#!/bin/env perl

use strict;
use warnings;

my %accToChr;

open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
        die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    chomp $line;
    my ($chrN, $acc) = split('\s+', $line);
    $accToChr{$acc} = $chrN;
}
close (FH);

foreach my $acc (keys %accToChr) {
    my $chrN =  $accToChr{$acc};
    print "$acc $accToChr{$acc}\n";
    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.agp.gz|") or die "can not read chr${chrN}.agp.gz";
    open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
    while (my $line = <FH>) {
        if ($line =~ m/^#/) {
            print UC $line;
        } else {
            $line =~ s/^$acc/chr${chrN}/;
            print UC $line;
        }
    }
    close (FH);
    close (UC);
    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
    open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
    while (my $line = <FH>) {
        if ($line =~ m/^>/) {
            printf UC ">chr${chrN}\n";
        } else {
            print UC $line;
        }
    }
    close (FH);
    close (UC);
}
'_EOF_'
    # << happy emacs
    chmod +x toUcsc.pl
    time ./toUcsc.pl
    #	real    0m53.256s
    faSize chr*.fa
    #	2725521370 bases (77999939 N's 2647521431 real 2647521431 upper 0
    #	lower) in 21 sequences in 21 files
    #	Total size: mean 129786731.9 sd 33408399.1 min 61431566 (chr19)
    #	max 195471971 (chr1) median 124902244

    ########################  Unplaced scaffolds
    cat << '_EOF_' > unplaced.pl
#!/bin/env perl

use strict;
use warnings;

my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
open (FH, "zcat $agpFile|") or die "can not read $agpFile";
open (UC, ">unplaced.agp") or die "can not write to unplaced.agp";
while (my $line = <FH>) {
    if ($line =~ m/^#/) {
        print UC $line;
    } else {
        $line =~ s/\.1//;
        printf UC "chrUn_%s", $line;
    }
}
close (FH);
close (UC);

open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
open (UC, ">unplaced.fa") or die "can not write to unplaced.fa";
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        chomp $line;
        $line =~ s/.*gb\|//;
        $line =~ s/\.1\|.*//;
        printf UC ">chrUn_$line\n";
    } else {
        print UC $line;
    }
}
close (FH);
close (UC);
'_EOF_'
    # << happy emacs
    chmod +x unplaced.pl
    time ./unplaced.pl
    #	real    0m0.119s
    # make sure none of the names got to be over 31 characers long:
    grep -v "^#" unplaced.agp | cut -f1 | sort | uniq -c | sort -rn
    # not much in that sequence:
    faSize unplaced.fa
    #	803895 bases (62411 N's 741484 real 741484 upper 0 lower)
    #	in 22 sequences in 1 files
    #	Total size: mean 36540.7 sd 21518.0 min 20208 (chrUn_GL456368)
    #	max 114452 (chrUn_JH584304) median 28772

    ########## chrM
    zcat ../genbank/non-nuclear/assembled_chromosomes/FASTA/chrMT.fa.gz \
	| sed -e "s/^>.*/>chrM/" > chrM.fa
    zcat ../genbank/non-nuclear/assembled_chromosomes/AGP/chrMT.comp.agp.gz \
	| sed -e "s/^AY172335.1/chrM/" > chrM.agp

    ########################  Unlocalized scaffolds
    cat << '_EOF_' > unlocalized.pl
#!/bin/env perl

use strict;
use warnings;

my %accToChr;
my %chrNames;

open (FH, "<../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf") or
        die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    chomp $line;
    my ($chrN, $acc) = split('\s+', $line);
    $accToChr{$acc} = $chrN;
    $chrNames{$chrN} += 1;
}
close (FH);

foreach my $chrN (keys %chrNames) {
    my $agpFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
    my $fastaFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
    open (FH, "zcat $agpFile|") or die "can not read $agpFile";
    open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
    while (my $line = <FH>) {
        if ($line =~ m/^#/) {
            print UC $line;
        } else {
            chomp $line;
            my (@a) = split('\t', $line);
            my $acc = $a[0];
            my $accNo1 = $acc;
            $accNo1 =~ s/.1$//;
            die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
            die "ERROR: chrN $chrN not correct for $acc"
                if ($accToChr{$acc} ne $chrN);
            my $ucscName = "chr${chrN}_${accNo1}_random";
            printf UC "%s", $ucscName;
            for (my $i = 1; $i < scalar(@a); ++$i) {
                printf UC "\t%s", $a[$i];
            }
            printf UC "\n";
        }
    }
    close (FH);
    close (UC);
    printf "chr%s\n", $chrN;
    open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
    open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
    while (my $line = <FH>) {
        if ($line =~ m/^>/) {
            chomp $line;
            my $acc = $line;
            $acc =~ s/.*gb\|//;
            $acc =~ s/\|.*//;
            my $accNo1 = $acc;
            $accNo1 =~ s/.1$//;
            die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
            die "ERROR: chrN $chrN not correct for $acc"
                if ($accToChr{$acc} ne $chrN);
            my $ucscName = "chr${chrN}_${accNo1}_random";
            printf UC ">$ucscName\n";
        } else {
            print UC $line;
        }
    }
    close (FH);
    close (UC);
}
'_EOF_'
    # << happy emacs
    chmod +x unlocalized.pl
    time ./unlocalized.pl
    #	real    0m0.430s
    faSize chr*_random.fa
    #	4530210 bases (25924 N's 4504286 real 4504286 upper 0 lower)
    #	in 22 sequences in 6 files
    #	Total size: mean 205918.6 sd 184688.0 min 1976 (chr4_JH584295_random)
    #	max 953012 (chr5_JH584299_random) median 191905
    # verify none of the names are longer than 31 characters:
    grep -h -v "^#" chr*_random.agp | cut -f1 | sort | uniq -c | sort -nr


    # compress all these fasta and agp files:
    gzip *.fa *.agp
    #	verify all the sequence is still here after all this rigamarole:
    time faSize *.fa.gz
    #	2730871774 bases (78088274 N's 2652783500 real 2652783500 upper 0
    #	lower) in 66 sequences in 29 files
    #	Total size: mean 41376845.1 sd 63617337.3 min 1976
    #	(chr4_JH584295_random) max 195471971 (chr1) median 184189

#############################################################################
#   Initial browser build (DONE - 2012-01-06 - Hiram)
    cd /hive/data/genomes/mm10
    cat << '_EOF_' > mm10.config.ra
# Config parameters for makeGenomeDb.pl:
db mm10
clade mammal
genomeCladePriority 40
scientificName Mus musculus
commonName Mouse
assemblyDate Dec. 2011
assemblyLabel Genome Reference Consortium Mouse Build 38 (GCA_000001635.2)
assemblyShortLabel GRCm38
orderKey 1209
mitoAcc none
fastaFiles /hive/data/genomes/mm10/ucsc/*.fa.gz
agpFiles /hive/data/genomes/mm10/ucsc/*.agp.gz
dbDbSpeciesDir mouse
taxId   10090
ncbiAssemblyId   327618
ncbiAssemblyName   GRCm38
'_EOF_'
    # << happy emacs

    time makeGenomeDb.pl -stop=agp mm10.config.ra > agp.log 2>&1
    #	real    3m4.568s
    # check the end of agp.log to verify it is OK
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev \
	-continue=db mm10.config.ra > db.log 2>&1
    #	real    20m51.374s
    # verify the end of db.log indicates successful


#############################################################################
# running repeat masker (DONE - 2012-02-06 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/repeatMasker
    cd /hive/data/genomes/mm10/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek mm10 > do.log 2>&1 &
    #	real    609m48.767s

    cat faSize.rmsk.txt
    #	2730871774 bases (78088274 N's 2652783500 real 1456094545 upper
    #	1196688955 lower) in 66 sequences in 1 files
    #	Total size: mean 41376845.1 sd 63617337.3 min 1976
    #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
    #	%43.82 masked total, %45.11 masked real

    grep -i versi do.log
# RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $
#    April 26 2011 (open-3-3-0) version of RepeatMasker

    time featureBits -countGaps mm10 rmsk
    #	1196694219 bases of 2730871774 (43.821%) in intersection
    #	real    0m30.460s
    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2012-02-06 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/simpleRepeat
    cd /hive/data/genomes/mm10/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	mm10 > do.log 2>&1 &
    #	real    16m35.603s

    #	batch failed, one job failed:
    # ./TrfRun.csh /hive/data/genomes/mm10/TrfPart/062/062.lst.bed
    # which is the chrM sequence - it has no simple repeats
    # create an empty output file result:
    touch /hive/data/genomes/mm10/TrfPart/062/062.lst.bed
    # go to encodek and create the run.time file to signal this step is done
    cd /hive/data/genomes/mm10/bed/simpleRepeat/run.cluster
    para time > run.time
# Completed: 70 of 71 jobs
# Crashed: 1 jobs
# CPU time in finished jobs:      13103s     218.38m     3.64h    0.15d  0.000 y
# IO & Wait Time:                   163s       2.72m     0.05h    0.00d  0.000 y
# Average job time:                 190s       3.16m     0.05h    0.00d
# Longest finished job:             392s       6.53m     0.11h    0.00d
# Submission to last job:           894s      14.90m     0.25h    0.01d


    # continue procedure:
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	-continue=filter mm10 > filter.log 2>&1 &
    #	real    1m20.021s

    cat fb.simpleRepeat
    #	92161833 bases of 2652783500 (3.474%) in intersection

    # when RepeatMasker is done, add this mask to the sequence:
    cd /hive/data/genomes/mm10
    twoBitMask mm10.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed mm10.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa mm10.2bit stdout | faSize stdin > faSize.mm10.2bit.txt
    cat faSize.mm10.2bit.txt
    #	2730871774 bases (78088274 N's 2652783500 real 1454267808 upper
    #	1198515692 lower) in 66 sequences in 1 files
    #	Total size: mean 41376845.1 sd 63617337.3 min 1976
    #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
    #	%43.89 masked total, %45.18 masked real

    # set SymLink in gbdb to this masked sequence
    rm /gbdb/mm10/mm10.2bit
    ln -s `pwd`/mm10.2bit /gbdb/mm10/mm10.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2012-02-06 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/gap
    cd /hive/data/genomes/mm10/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../mm10.unmasked.2bit > findMotif.txt 2>&1
    #	real    1m0.372s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits -countGaps mm10 -not gap -bed=notGap.bed
    #	2658879040 bases of 2730871774 (97.364%) in intersection
    #	real    0m13.067s

    time featureBits -countGaps mm10 allGaps.bed notGap.bed -bed=new.gaps.bed
    #	6095540 bases of 2730871774 (0.223%) in intersection
    #	real    0m15.177s

    #	what is the highest index in the existing gap table:
    hgsql -N -e "select ix from gap;" mm10 | sort -n | tail -1
    #	54
    cat << '_EOF_' > mkGap.pl
#!/bin/env perl

use strict;
use warnings;

my $ix=`hgsql -N -e "select ix from gap;" mm10 | sort -n | tail -1`;
chomp $ix;

open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
while (my $line = <FH>) {
    my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
    ++$ix;
    printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
        $chromEnd, $ix, $chromEnd-$chromStart;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./mkGap.pl
    ./mkGap.pl > other.bed
    wc -l other.bed
    #	384
    featureBits -countGaps mm10 other.bed
    #	6095540 bases of 2730871774 (0.223%) in intersection
    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
	-noLoad mm10 otherGap other.bed
    # verify no overlap with gap table:
    time featureBits -countGaps mm10 gap other.bed
    #	0 bases of 2730871774 (0.000%) in intersection
    #	real    0m1.281s

    # verify no errors before adding to the table:
    time gapToLift -minGap=1 mm10 nonBridged.before.lift \
	-bedFile=nonBridged.before.bed > before.gapToLift.txt 2>&1 &
    #	real    0m7.205s
    # check for warnings in before.gapToLift.txt, should be empty:
    #	-rw-rw-r-- 1     1633 Jan  6 15:20 before.gapToLift.txt
    # it indicates that there are telomere's adjacent to centromere's
    #	and heterochromatin
    #	starting with this many:
    hgsql -e "select count(*) from gap;" mm10
    #	302
    hgsql mm10 -e 'load data/genomes local infile "bed.tab" into table gap;'
    #	result count:
    hgsql -e "select count(*) from gap;" mm10
    #	686
    # == 302 + 384
    # verify we aren't adding gaps where gaps already exist
    # this would output errors if that were true:
    gapToLift -minGap=1 mm10 nonBridged.lift -bedFile=nonBridged.bed
    #same set of warnings as before, telomere's centromere's and heterochromatin
    # there should be no errors or other output, checked bridged gaps:
    hgsql -N -e "select bridge from gap;" mm10 | sort | uniq -c
    #	191 no
    #	495 yes

##########################################################################
## WINDOWMASKER (DONE - 2012-02-06 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/windowMasker
    cd /hive/data/genomes/mm10/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev mm10 > do.log 2>&1 &
    #	real    167m12.012s

    # Masking statistics
    twoBitToFa mm10.wmsk.2bit stdout | faSize stdin
    #	2730871774 bases (78088274 N's 2652783500 real 1686407708 upper
    #	966375792 lower) in 66 sequences in 1 files
    #	Total size: mean 41376845.1 sd 63617337.3 min 1976
    #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
    #	%35.39 masked total, %36.43 masked real


    twoBitToFa mm10.wmsk.sdust.2bit stdout | faSize stdin
    #	2730871774 bases (78088274 N's 2652783500 real 1670424648 upper
    #	982358852 lower) in 66 sequences in 1 files
    #	Total size: mean 41376845.1 sd 63617337.3 min 1976
    #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
    #	%35.97 masked total, %37.03 masked real

    hgLoadBed mm10 windowmaskerSdust windowmasker.sdust.bed.gz
    #	Loaded 12655947 elements of size 3

    featureBits -countGaps mm10 windowmaskerSdust
    #	1060447084 bases of 2730871774 (38.832%) in intersection

    #	eliminate the gaps from the masking
    featureBits mm10 -not gap -bed=notGap.bed
    #	2652783500 bases of 2652783500 (100.000%) in intersection
    time nice -n +19 featureBits mm10 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    #	982358852 bases of 2652783500 (37.031%) in intersection
    #	real    1m42.449s

    #	reload track to get it clean
    hgLoadBed mm10 windowmaskerSdust cleanWMask.bed.gz
    #	Loaded 12655987  elements of size 4
    time featureBits -countGaps mm10 windowmaskerSdust
    #	982358852 bases of 2730871774 (35.972%) in intersection
    #	real    1m13.889s

    #	do *not* need to mask with this clean result since RepeatMasker
    #	does a very good job here.  Using RM masking instead.
    zcat cleanWMask.bed.gz \
	| twoBitMask ../../mm10.unmasked.2bit stdin \
	    -type=.bed mm10.cleanWMSdust.2bit
    twoBitToFa mm10.cleanWMSdust.2bit stdout | faSize stdin \
        > mm10.cleanWMSdust.faSize.txt
    cat mm10.cleanWMSdust.faSize.txt

    # how much does this window masker and repeat masker overlap:
    time featureBits -countGaps mm10 rmsk windowmaskerSdust
    #	753614881 bases of 2730871774 (27.596%) in intersection
    #	real    1m42.691s
    # RM by itself:
    time featureBits -countGaps mm10 rmsk
    #	1196694219 bases of 2730871774 (43.821%) in intersection
    #	real    0m30.460s

#############################################################################
# PREPARE LINEAGE SPECIFIC REPEAT FILES FOR BLASTZ (DONE - 2012-02-07 - Hiram)
    ssh encodek
    mkdir /hive/data/genomes/mm10/bed/linSpecRep
    cd /hive/data/genomes/mm10/bed/linSpecRep

    # split the RM output by chromosome name into separate files
    mkdir rmsk dateRepeats
    head -3 ../repeatMasker/mm10.sorted.fa.out > rmsk.header.txt
    headRest 3 ../repeatMasker/mm10.sorted.fa.out \
	| splitFileByColumn -ending=.out -col=5 -head=rmsk.header.txt stdin rmsk

    ls -1S rmsk/* > rmOut.list

    cat << '_EOF_' > mkLSR
#!/bin/csh -fe
rm -f dateRepeats/$1_homo-sapiens_rattus_canis-familiaris_bos-taurus
/scratch/data/genomes/RepeatMasker/DateRepeats \
    $1 -query mouse -comp human -comp rat -comp dog -comp cow
mv $1_homo-sapiens_rattus_canis-lupus-familiaris_bos-taurus dateRepeats
'_EOF_'
    #	<< happy emacs
    chmod +x mkLSR

    cat << '_EOF_' > template
#LOOP
./mkLSR $(path1) {check out line+ dateRepeats/$(file1)_homo-sapiens_rattus_canis-lupus-familiaris_bos-taurus}
#ENDLOOP
'_EOF_'
    #	<< happy emacs

    gensub2 rmOut.list single template jobList
    para create jobList
    para try ... check ... push ... etc...
    para time
# Completed: 66 of 66 jobs
# CPU time in finished jobs:       1743s      29.05m     0.48h    0.02d  0.000 y
# IO & Wait Time:                   190s       3.16m     0.05h    0.00d  0.000 y
# Average job time:                  29s       0.49m     0.01h    0.00d
# Longest finished job:              65s       1.08m     0.02h    0.00d
# Submission to last job:           160s       2.67m     0.04h    0.00d

    mkdir notInHuman notInRat notInDog notInCow
    for F in dateRepeats/chr*.out_homo-sapiens*
    do
	B=`basename ${F}`
	B=${B/.out*/}
	echo $B
        /cluster/bin/scripts/extractRepeats 1 ${F} > \
		notInHuman/${B}.out.spec
        /cluster/bin/scripts/extractRepeats 2 ${F} > \
		notInRat/${B}.out.spec
        /cluster/bin/scripts/extractRepeats 3 ${F} > \
		notInDog/${B}.out.spec
        /cluster/bin/scripts/extractRepeats 4 ${F} > \
		notInCow/${B}.out.spec
    done

    #	notInDog, and notInCow ended up being identical.
    #	The notInRat and notInHuman are different
    #	To check identical
    find . -name "*.out.spec" | \
	while read FN; do echo `cat ${FN} | sum -r` ${FN}; done \
	| sort -k1,1n | sort -t"/" -k3,3 > check.same
    # this produces a count of 2 for the sums for Cow and Dog, all the same
    egrep "Cow|Dog" check.same | awk '{print $1}' | sort | uniq -c | sort -rn
    # this does not produce a count of 2 for the sums for Cow and Human
    egrep "Cow|Human" check.same | awk '{print $1}' | sort | uniq -c | sort -rn
    #	Copy to data/genomes staging for cluster replication
    mkdir /hive/data/genomes/staging/data/genomes/mm10
    rsync -a -P ./notInRat/ /hive/data/genomes/staging/data/genomes/mm10/notInRat/
    rsync -a -P ./notInHuman/ /hive/data/genomes/staging/data/genomes/mm10/notInHuman/
    rsync -a -P ./notInCow/ /hive/data/genomes/staging/data/genomes/mm10/notInOthers/


    # We also need the nibs for the lastz runs with lineage specific repeats
    mkdir /hive/data/genomes/mm10/nib
    cd /hive/data/genomes/mm10
    cut -f1 chrom.sizes | while read C
do
    twoBitToFa -seq=${C} mm10.2bit stdout | faToNib -softMask stdin nib/${C}.nib
    ls -og nib/$C.nib
done
    # verify one is properly masked:
    nibFrag -masked nib/chrM.nib 0 16299 + stdout | less
    # compare to:
    twoBitToFa -seq=chrM mm10.fa stdout | less

    #	Copy to data/genomes staging for cluster replication
    rsync -a -P ./nib/ /hive/data/genomes/staging/data/genomes/mm10/nib/

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-02-08 - Hiram)
    # Use -repMatch=650, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 2652783500 / 2897316137 \) \* 1024
    #	( 2652783500 / 2897316137 ) * 1024 = 937.574699

    # round up to 1000  (mm9 used 912)

    cd /hive/data/genomes/mm10
    time blat mm10.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/mm10.11.ooc -repMatch=1000
    #	Wrote 27208 overused 11-mers to jkStuff/mm10.11.ooc
    #	real    2m9.568s

    #	at repMatch=900:
    #	Wrote 31822 overused 11-mers to jkStuff/mm10.11.ooc

    # there are non-bridged gaps, make lift file for genbank
    hgsql -N -e "select bridge from gap;" mm10 | sort | uniq -c
    #	191 no
    #	495 yes
    cd /hive/data/genomes/mm10/jkStuff
    gapToLift mm10 mm10.nonBridged.lift -bedFile=mm10.nonBridged.bed
    # largest non-bridged contig:
    awk '{print $3-$2,$0}' mm10.nonBridged.bed | sort -nr | head
    116378660 chr2  59120641        175499301       chr2.02

    #	copy all of this stuff to the klusters:
    cd /hive/data/genomes/mm10
    mkdir /hive/data/genomes/staging/data/genomes/mm10
    cp -p jkStuff/mm10.11.ooc jkStuff/mm10.nonBridged.lift chrom.sizes \
	mm10.2bit /hive/data/genomes/staging/data/genomes/mm10
    # request rsync copy from cluster admin

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-02-08 - Hiram)
    # examine the file:
    /cluster/data/genomes/genbank/data/genomes/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Mus musculus    334577  4853663 26288
    # to decide which "native" mrna or ests you want to specify in genbank.conf
    # of course, mm10 has plenty of everything

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add mm10 just after mm9 and commit to GIT
# mm10
mm10.serverGenome = /hive/data/genomes/mm10/mm10.2bit
mm10.clusterGenome = /scratch/data/genomes/mm10/mm10.2bit
mm10.ooc = /scratch/data/genomes/mm10/mm10.11.ooc
mm10.align.unplacedChroms = chr*
mm10.lift = /scratch/data/genomes/mm10/mm10.nonBridged.lift
mm10.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
mm10.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
mm10.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
mm10.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
mm10.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
mm10.downloadDir = mm10
mm10.refseq.mrna.xeno.load  = yes
mm10.refseq.mrna.xeno.loadDesc = yes
mm10.mgc = yes
mm10.genbank.mrna.blatTargetDb = yes
# mm10.ccds.ncbiBuild = 37.2
# mm10.upstreamGeneTbl = refGene
# mm10.upstreamMaf = multiz30way
# /hive/data/genomes/mm10/bed/multiz30way/species.list

    # end of section added to etc/genbank.conf
    git commit -m "adding mm10 definitions" genbank.conf
    git push
    make etc-update

    ssh hgwdev			# used to do this on "genbank" machine
    screen			# long running job managed in screen
    cd /cluster/data/genomes/genbank
    time nice -n +19 ./bin/gbAlignStep -initial mm10 &
    #	var/build/logs/2012.02.08-11:38:50.mm10.initalign.log
    #	real    795m52.388s

    # load data/genomesbase when finished
    ssh hgwdev
    cd /cluster/data/genomes/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad mm10 &
    #	logFile: var/dbload/hgwdev/logs/2012.02.09-10:05:25.dbload.log
    #	real    114m56.461s

    # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add mm10 to:
        etc/align.dbs
        etc/hgwdev.dbs
    git commit -m "Added mm10." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

############################################################################
# running cpgIsland business (DONE - 2012-02-09 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/cpgIsland
    cd /hive/data/genomes/mm10/bed/cpgIsland
    # use a previous binary for this program
    ln -s ../../../mm9/bed/cpgIsland/hg3rdParty/cpgIslands/cpglh.exe .

    mkdir -p hardMaskedFa
    cut -f1 ../../chrom.sizes | while read C
do
    echo ${C}
    twoBitToFa ../../mm10.2bit:$C stdout \
	| maskOutFa stdin hard hardMaskedFa/${C}.fa
done

    ssh swarm
    cd /hive/data/genomes/mm10/bed/cpgIsland
    mkdir results
    cut -f1 ../../chrom.sizes > chr.list
    cat << '_EOF_' > template
#LOOP
./runOne $(root1) {check out exists results/$(root1).cpg}
#ENDLOOP
'_EOF_'
    # << happy emacs

    #	the faCount business is to make sure there is enough sequence to
    #	work with in the fasta.  cpglh.exe does not like files with too many
    #	N's - it gets stuck.
    cat << '_EOF_' > runOne
#!/bin/csh -fe
set C = `faCount hardMaskedFa/$1.fa | egrep -v "^#seq|^total" | awk '{print  $2 - $7 }'`
if ( $C > 200 ) then
    ./cpglh.exe hardMaskedFa/$1.fa > /scratch/tmp/$1.$$
    mv /scratch/tmp/$1.$$ $2
else
    touch $2
endif
'_EOF_'
    # << happy emacs
    chmod +x runOne

    gensub2 chr.list single template jobList
    para create jobList
    para try
    para check ... etc
    para time
# Completed: 66 of 66 jobs
# CPU time in finished jobs:        191s       3.19m     0.05h    0.00d  0.000 y
# IO & Wait Time:                   189s       3.14m     0.05h    0.00d  0.000 y
# Average job time:                   6s       0.10m     0.00h    0.00d
# Longest finished job:              19s       0.32m     0.01h    0.00d
# Submission to last job:            51s       0.85m     0.01h    0.00d

    # Transform cpglh output to bed +
    catDir results | awk '{
$2 = $2 - 1;
width = $3 - $2;
printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
       $1, $2, $3, $5,$6, width,
       $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
}' > cpgIsland.bed

    # verify longest unique chrom name:
    cut -f1 cpgIsland.bed | awk '{print length($0)}' | sort -rn | head -1
    #	20
    # update the length 14 in the template to be 16:
    sed -e "s/14/20/" $HOME/kent/src/hg/lib/cpgIslandExt.sql > cpgIslandExt.sql

    cd /hive/data/genomes/mm10/bed/cpgIsland
    hgLoadBed mm10 cpgIslandExt -tab -sqlTable=cpgIslandExt.sql cpgIsland.bed
# Loaded 16023 elements of size 10

    featureBits mm10 cpgIslandExt
    #	10495450 bases of 2652783500 (0.396%) in intersection
    # compare to previous:
    featureBits mm9 cpgIslandExt
    #	10496250 bases of 2620346127 (0.401%) in intersection

    # there should be no output from checkTableCoords:
    checkTableCoords -verboseBlocks -table=cpgIslandExt mm10

    #	cleanup, unless you want to move them to the genscan procedure below
    rm -fr hardMaskedFa

#########################################################################
# GENSCAN GENE PREDICTIONS (DONE - 2012-02-09,10 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/genscan
    cd /hive/data/genomes/mm10/bed/genscan
    # use a previously existing genscan binary
    ln -s ../../../mm9/bed/genscan/hg3rdParty .

    # create hard masked .fa files
    mkdir -p hardMaskedFa
    cut -f1 ../../chrom.sizes | while read C
do
    echo ${C}
    twoBitToFa ../../mm10.2bit:$C stdout \
	| maskOutFa stdin hard hardMaskedFa/${C}.fa
done

    # Generate a list file, genome.list, of all the hard-masked contig chunks:
    find ./hardMaskedFa/ -type f | sed -e 's#^./##' > genome.list

    wc -l genome.list
    #	66 genome.list

    # Run on small cluster (more mem than big cluster).
    ssh encodek
    cd /hive/data/genomes/mm10/bed/genscan
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Create template file, template, for gensub2.  For example (3-line file):
    cat << '_EOF_' > template
#LOOP
/cluster/bin/x86_64/gsBig {check in exists+ $(path1)} {check out exists gtf/$(root1).gtf} -trans={check out exists pep/$(root1).pep} -subopt={check out exists subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
#ENDLOOP
'_EOF_'
    # << emacs
    gensub2 genome.list single template jobList
    para create jobList
    para try
    para check ... etc...
    para time
# Crashed: 2 jobs
# CPU time in finished jobs:     171336s    2855.60m    47.59h    1.98d  0.005 y
# IO & Wait Time:                   261s       4.35m     0.07h    0.00d  0.000 y
# Average job time:                2640s      44.00m     0.73h    0.03d
# Longest finished job:           22618s     376.97m     6.28h    0.26d
# Submission to last job:         28682s     478.03m     7.97h    0.33d

    # one of the two crashed jobs was just a stray line in the jobList,
    # somehow a line with the string: '_EOF_' got in there.

    # as with mm9, chr7 did not work.  Break it up into pieces
    mkdir /hive/data/genomes/mm10/bed/genscan/chr7Split
    cd /hive/data/genomes/mm10/bed/genscan/chr7Split
    grep chr7 ../../../jkStuff/mm10.nonBridged.lift | grep -v random \
	> chr7.nonBridged.lift
    faToTwoBit ../hardMaskedFa/chr7.fa chr7.2bit
    ~/kent/src/hg/utils/lft2BitToFa.pl chr7.2bit chr7.nonBridged.lift \
	| sed -e "s/chr7./chr7_/" > chr7.nonBridged.fa
    faSplit sequence chr7.nonBridged.fa 100 split7/chr7_

    ln -s ../../../../mm9/bed/genscan/hg3rdParty .
    echo '#!/bin/sh' > cmdList.sh
    ls split7 | while read F
do
echo "/cluster/bin/x86_64/gsBig split7/${F} gtf/${F}.gtf} -trans=pep/${F}.pep} -subopt=subopt/${F}.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000 &"
done >> cmdList.sh
    echo "wait" >> cmdList.sh
    chmod +x cmdList.sh
    mkdir gtf pep subopt
    time ./cmdList.sh > run.log 2>&1
    # about 20 minutes

    # fix the names in the lift file
    cat chr7.nonBridged.lift | sed -e "s/chr7./chr7_/" > chr7.lift

    # the sed mangling will provide unique names for them all, but they
    #	will not be in the strict numerical order that genscan usually produces
    cat gtf/chr7_*.gtf | liftUp -type=.gtf stdout chr7.lift error stdin \
	| sed -e "s/chr7_0\([0-4]\)\./chr7.\1/g" > chr7.gtf
    cat subopt/chr7_*.bed | liftUp -type=.bed stdout chr7.lift error stdin \
	| sed -e "s/chr7_0\([0-4]\)\./chr7.\1/g" > chr7.subopt.bed
    cat pep/chr7_*.pep | sed -e "s/chr7_0\([0-4]\)\./chr7.\1/g" > chr7.pep
    cp -p chr7.pep ../pep
    cp -p chr7.gtf ../gtf
    cp -p chr7.subopt.bed ../subopt/chr7.bed

    find ./gtf -type f | xargs -n 256 endsInLf -zeroOk

    # Concatenate results:
    cd /hive/data/genomes/mm10/bed/genscan
    find ./gtf -type f | xargs cat > genscan.gtf
    find ./pep -type f | xargs cat > genscan.pep
    find ./subopt -type f | xargs cat > genscanSubopt.bed

    # Load into the data/genomesbase (without -genePredExt because no frame info):
    # Don't load the Pep anymore -- redundant since it's from genomic.
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/genscan
    # to construct a local file with the genePred business:
    gtfToGenePred genscan.gtf genscan.gp
    # this produces exactly the same thing and loads the table:
    ldHgGene -gtf mm10 genscan genscan.gtf
    #	Read 45012 transcripts in 323529 lines in 1 files
    #	45012 groups 59 seqs 1 sources 1 feature types
    #	45012 gene predictions
    hgLoadBed mm10 genscanSubopt genscanSubopt.bed
    #	Read 526572 elements of size 6 from genscanSubopt.bed
    featureBits mm10 genscan
    #	55743040 bases of 2652783500 (2.101%) in intersection
    # previously:
    featureBits mm9 genscan
    #	55293837 bases of 2620346127 (2.110%) in intersection

#########################################################################
# CREATE MICROSAT TRACK (DONE - 2012-02-09 - Hiram
     ssh hgwdev
     mkdir /cluster/data/genomes/mm10/bed/microsat
     cd /cluster/data/genomes/mm10/bed/microsat
     awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \
	../simpleRepeat/simpleRepeat.bed > microsat.bed
    hgLoadBed mm10 microsat microsat.bed
    #	Read 197237 elements of size 4 from microsat.bed

#########################################################################
#  BLATSERVERS ENTRY (DONE - 2012-02-09 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("mm10", "blat13", "17832", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("mm10", "blat13", "17833", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
# set default position the same as was mm9 via blat
#	(DONE - 2012-02-09 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="chr12:56694976-56714605" where name="mm10";' \
	hgcentraltest

############################################################################
# constructing downloads (DONE - 2012-02-09 - Hiram)
    cd /hive/data/genomes/mm10
    # some of the smaller bits are missing the simple repeat results
    time makeDownloads.pl -allowMissedTrfs -workhorse=hgwdev mm10
    #	real    41m42.408s

    # edit the README files in goldenPath/*/README.txt

#########################################################################
# create pushQ entry (DONE - 2012-02-09 - Hiram)
    # first make sure all.joiner is up to date and has this new organism
    # a keys check should be clean:
    cd ~/kent/src/hg/makeDb/schema
    joinerCheck -data/genomesbase=mm10 -keys all.joiner

    mkdir /hive/data/genomes/mm10/pushQ
    cd /hive/data/genomes/mm10/pushQ
    makePushQSql.pl mm10 > mm10.sql 2> stderr.out
    # check stderr.out for no significant problems, it is common to see:
# WARNING: hgwdev does not have /gbdb/mm10/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/mm10/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/mm10/bbi/quality.bw
# WARNING: mm10 does not have seq
# WARNING: mm10 does not have extFile
# *** All done!

    # which are not real problem
    # if some tables are not identified:
# WARNING: Could not tell (from trackDb, all.joiner and hardcoded lists of
# supporting and genbank tables) which tracks to assign these tables to:
#  list of tables will be in the output
# put them in manually after loading the pushQ entry
    scp -p mm10.sql hgwbeta:/tmp
    ssh hgwbeta
    cd /tmp
    hgsql qapushq < mm10.sql

#########################################################################
# lifting ensGene track from mm9 (DONE - 2012-02-22 - Hiram)
    # no gene tracks yet on mm10.  liftUp mm9 ensGenes to mm10
    # history of mm9 ensGene indicates it is the same as v64 release
    #	with v65 being identical
    mkdir /hive/data/genomes/mm10/bed/ensGene
    cd /hive/data/genomes/mm10/bed/ensGene
    hgsql -N -e "select * from ensGene;" mm9 | cut -f2- > mm9.ensGene.gp
    liftOver -genePred mm9.ensGene.gp \
	/gbdb/mm9/liftOver/mm9ToMm10.over.chain.gz \
	mm10.lifted.ensGene.gp unmapped.ensGene.gp
    wc -l *.gp
    #	95651 mm10.lifted.ensGene.gp
    #	95883 mm9.ensGene.gp
    #	464 unmapped.ensGene.gp

    hgLoadGenePred -skipInvalid -genePredExt mm10 ensGene mm10.lifted.ensGene.gp
    #	Warning: skipping 118 invalid genePreds

    # make a list of what did get loaded:
    hgsql -N -e "select name from ensGene;" mm10 \
	| sort -u > mm10.name.ensGene.txt
    wc -l mm10.name.ensGene.txt
    #	95533 mm10.name.ensGene.txt

    hgsql -N -e "select * from ensPep;" mm9 | sort > mm9.ensPep.tab
    hgsql -N -e "select * from ensGtp;" mm9 | sort -k2,2 > mm9.ensGtp.tab
    hgsql -N -e "select * from ensemblToGeneName;" mm9 | sort -k1,1 \
	> mm9.ensemblToGeneName.tab
    hgsql -N -e "select * from ensemblSource;" mm9 | sort -k1,1 \
	> mm9.ensemblSource.tab

    # select out ensGtp records that match with the names in mm10 ensGene:
    join -1 2 -2 1 -o "1.1,1.2,1.3" mm9.ensGtp.tab mm10.name.ensGene.txt \
	| tr '[ ]' '[\t]' > mm10.ensGtp.tab
    wc -l *.ensGtp.tab
    #	95533 mm10.ensGtp.tab
    #	95883 mm9.ensGtp.tab

    # select out ensPep records that match with the names in mm10 ensGene:
    join -1 1 -2 2 -o "1.1,1.2" mm9.ensPep.tab mm10.ensGtp.tab \
	| tr '[ ]' '[\t]' > mm10.ensPep.tab

    wc -l mm9.ensPep.tab mm10.ensPep.tab
    #	55798 mm9.ensPep.tab
    #	55485 mm10.ensPep.tab

    # select out ensemblSource records that match the mm10 ensGene names:
    join -1 1 -2 1 -o "1.1,1.2" mm9.ensemblSource.tab mm10.name.ensGene.txt \
	| tr '[ ]' '[\t]' > mm10.ensemblSource.tab
    wc -l mm9.ensemblSource.tab mm10.ensemblSource.tab
  95883 mm9.ensemblSource.tab
  95533 mm10.ensemblSource.tab

    # select out ensemblToGeneName records that match the mm10 ensGene names:
    join -1 1 -2 1 -o "1.1,1.2" mm9.ensemblToGeneName.tab \
	mm10.name.ensGene.txt | tr '[ ]' '[\t]' > mm10.ensemblToGeneName.tab
    wc -l mm9.ensemblToGeneName.tab mm10.ensemblToGeneName.tab
    #	95883 mm9.ensemblToGeneName.tab
    #	95533 mm10.ensemblToGeneName.tab

    hgPepPred mm10 tab ensPep mm10.ensPep.tab
    hgLoadSqlTab mm10 ensGtp ~/kent/src/hg/lib/ensGtp.sql mm10.ensGtp.tab
    sed -e "s/15/18/" ~/kent/src/hg/lib/ensemblSource.sql > ensemblSource.sql
    hgLoadSqlTab mm10 ensemblSource ensemblSource.sql mm10.ensemblSource.tab

    # find sizes for indexes
  NL=`awk '{print length($1)}' mm10.ensemblToGeneName.tab | sort -rn | head -1`
  VL=`awk '{print length($2)}' mm10.ensemblToGeneName.tab | sort -rn | head -1`
    # construct sql definition with appropriate index sizes
    sed -e "s/ knownTo / ensemblToGeneName /; s/known gene/ensGen/; s/INDEX(name(12)/PRIMARY KEY(name($NL)/; s/value(12)/value($VL)/" \
	~/kent/src/hg/lib/knownTo.sql > ensemblToGeneName.sql

    hgLoadSqlTab mm10 ensemblToGeneName ensemblToGeneName.sql \
	mm10.ensemblToGeneName.tab

hgsql -e 'INSERT INTO trackVersion \
    (db, name, who, version, updateTime, comment, source, dateReference) \
    VALUES("mm10", "ensGene", "hiram", "65", now(), \
        "lifted from mm9 ensGene 65", \
        "lifted from mm9 ensGene 65", \
        "dec2011" );' hgFixed

#########################################################################
# Swap lastz Human hg19 (DONE - 2012-03-08 - Hiram)
    # original alignment to hg19
    cd /hive/data/genomes/hg19/bed/lastzMm10.2012-03-07
    cat fb.hg19.chainMm10Link.txt
    #	1021265143 bases of 2897316137 (35.249%) in intersection

    #	and the swap
    mkdir /hive/data/genomes/mm10/bed/blastz.hg19.swap
    cd /hive/data/genomes/mm10/bed/blastz.hg19.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/hg19/bed/lastzMm10.2012-03-07/DEF \
	-swap -noLoadChainSplit -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    72m32.794s
    cat fb.mm10.chainHg19Link.txt
    #	1014045890 bases of 2652783500 (38.226%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s blastz.hg19.swap lastz.hg19

#########################################################################
# LASTZ RAT Rn4 (DONE - 2012-03-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRn4.2012-03-08
    cd /hive/data/genomes/mm10/bed/lastzRn4.2012-03-08

    cat << '_EOF_' > DEF
# mouse vs rat
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# Specially tuned blastz parameters from Webb Miller
BLASTZ_O=600
BLASTZ_E=150
BLASTZ_Y=15000
BLASTZ_T=2
BLASTZ_K=4500
BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rat Rn4
SEQ2_DIR=/scratch/data/rn4/rn4.2bit
SEQ2_LEN=/scratch/data/rn4/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzRn4.2012-03-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S rn4Mm10
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
	-smallClusterHub=encodek > do.log 2>&1 &
    #	real    129m48.444s
    cat fb.mm10.chainRn4Link.txt
    #	1449612208 bases of 2652783500 (54.645%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzRn4.2012-03-08 lastz.rn4

    #	and the swap
    mkdir /hive/data/genomes/rn4/bed/blastz.mm10.swap
    cd /hive/data/genomes/rn4/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRn4.2012-03-08/DEF \
	-swap -bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
	-smallClusterHub=encodek > swap.log 2>&1 &
    #	real    71m10.645s
    cat fb.rn4.chainMm10Link.txt
    #	1449012636 bases of 2571531505 (56.348%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/rn4/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# LASTZ Gorilla gorGor3 (DONE - 2012-03-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08
    cd /hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08

    cat << '_EOF_' > DEF
# gorilla vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Gorilla GorGor3
SEQ2_DIR=/scratch/data/gorGor3/gorGor3.2bit
SEQ2_LEN=/scratch/data/gorGor3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10GorGor3
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    625m17.180s
    cat fb.mm10.chainGorGor3Link.txt
    #	901610588 bases of 2652783500 (33.987%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzGorGor3.2012-03-08 lastz.gorGor3

    mkdir /hive/data/genomes/gorGor3/bed/blastz.mm10.swap
    cd /hive/data/genomes/gorGor3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    91m3.616s
    cat fb.gorGor3.chainMm10Link.txt
    #	969595533 bases of 2822760080 (34.349%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/gorGor3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Gibbon nomLeu1 (DONE - 2012-03-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08
    cd /hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08

    cat << '_EOF_' > DEF
# gibbon vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Gibbon NomLeu1
SEQ2_DIR=/scratch/data/nomLeu1/nomLeu1.2bit
SEQ2_LEN=/scratch/data/nomLeu1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10NomLeu1
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    556m26.589s
    cat fb.mm10.chainNomLeu1Link.txt
    #	905455766 bases of 2652783500 (34.132%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzNomLeu1.2012-03-08 lastz.nomLeu1

    mkdir /hive/data/genomes/nomLeu1/bed/blastz.mm10.swap
    cd /hive/data/genomes/nomLeu1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    66m50.839s
    cat fb.nomLeu1.chainMm10Link.txt
    #	892362811 bases of 2756591777 (32.372%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/nomLeu1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Rhesus rheMac3 (DONE - 2012-03-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08
    cd /hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08

    cat << '_EOF_' > DEF
# rhesus vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rhesus RheMac3
SEQ2_DIR=/scratch/data/rheMac3/rheMac3.2bit
SEQ2_LEN=/scratch/data/rheMac3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10RheMac3
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    596m55.622s
    cat fb.mm10.chainRheMac3Link.txt
    #	900117108 bases of 2652783500 (33.931%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzRheMac3.2012-03-08 lastz.rheMac3

    mkdir /hive/data/genomes/rheMac3/bed/blastz.mm10.swap
    cd /hive/data/genomes/rheMac3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    69m5.839s
    cat fb.rheMac3.chainMm10Link.txt
    #	883164992 bases of 2639145830 (33.464%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/rheMac3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Baboon papHam1 (DONE - 2012-03-09 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
    cd /hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09

    cat << '_EOF_' > DEF
# baboon vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Baboon PapHam1
SEQ2_DIR=/scratch/data/papHam1/papHam1.2bit
SEQ2_LEN=/scratch/data/papHam1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10PapHam1
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1138m52.716s
    cat fb.mm10.chainPapHam1Link.txt
    #	890718423 bases of 2652783500 (33.577%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzPapHam1.2012-03-09 lastz.papHam1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
    time doRecipBest.pl mm10 papHam1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    899m48.908s

    mkdir /hive/data/genomes/papHam1/bed/blastz.mm10.swap
    cd /hive/data/genomes/papHam1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    548m15.438s
    cat fb.mm10.chainPapHam1Link.txt
    #	878016290 bases of 2741867288 (32.023%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/papHam1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# Swap ponAbe2 lastz (DONE - 2012-03-09 - Hiram)
    # original alignment result:
    cd /hive/data/genomes/ponAbe2/bed/lastzMm10.2012-03-08
    cat fb.ponAbe2.chainMm10Link.txt
    #	946932454 bases of 3093572278 (30.610%) in intersection

    #	and the swap
    mkdir /hive/data/genomes/mm10/bed/blastz.ponAbe2.swap
    cd /hive/data/genomes/mm10/bed/blastz.ponAbe2.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/ponAbe2/bed/lastzMm10.2012-03-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    72m38.550s
    cat fb.mm10.chainPonAbe2Link.txt
    #	915093866 bases of 2652783500 (34.496%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s blastz.ponAbe2.swap lastz.ponAbe2

##############################################################################
# LASTZ Squirrel monkey saiBol1 (DONE - 2012-03-09 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09
    cd /hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09

    cat << '_EOF_' > DEF
# squirrel monkey vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Squirrel monkey SaiBol1
SEQ2_DIR=/hive/data/genomes/saiBol1/saiBol1.2bit
SEQ2_LEN=/hive/data/genomes/saiBol1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10SaiBol1
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    538m42.643s
    cat fb.mm10.chainSaiBol1Link.txt
    #	857872391 bases of 2652783500 (32.339%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSaiBol1.2012-03-09 lastz.saiBol1

    mkdir /hive/data/genomes/saiBol1/bed/blastz.mm10.swap
    cd /hive/data/genomes/saiBol1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    59m36.306s
    cat fb.saiBol1.chainMm10Link.txt
    #	838457857 bases of 2477131095 (33.848%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/saiBol1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Marmoset calJac3 (DONE - 2012-03-09 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09
    cd /hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09

    cat << '_EOF_' > DEF
# marmoset vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Marmoset monkey CalJac3
SEQ2_DIR=/scratch/data/calJac3/calJac3.2bit
SEQ2_LEN=/scratch/data/calJac3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10CalJac3
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    529m39.657s
    cat fb.mm10.chainCalJac3Link.txt
    #	860830771 bases of 2652783500 (32.450%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzCalJac3.2012-03-09 lastz.calJac3

    mkdir /hive/data/genomes/calJac3/bed/blastz.mm10.swap
    cd /hive/data/genomes/calJac3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    67m21.635s
    cat fb.calJac3.chainMm10Link.txt
    #	861565545 bases of 2752505800 (31.301%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/calJac3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Chimp PanTro4 (DONE - 2012-03-09 - Hiram)
    mkdir /hive/data/genomes/panTro3/bed/lastzMm10.2012-03-09
    cd /hive/data/genomes/panTro3/bed/lastzMm10.2012-03-09

    cat << '_EOF_' > DEF
# chimp vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Chimp PanTro4
SEQ2_DIR=/hive/data/genomes/panTro4/panTro4.2bit
SEQ2_LEN=/hive/data/genomes/panTro4/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzPanTro4.2012-03-09
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10PanTro4
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet -noLoadChainSplit \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    682m53.046s
    cat fb.mm10.chainPanTro4Link.txt
    #	919836299 bases of 2652783500 (34.674%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzPanTro4.2012-03-09 lastz.panTro4

    mkdir /hive/data/genomes/panTro4/bed/blastz.mm10.swap
    cd /hive/data/genomes/panTro4/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPanTro4.2012-03-09/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    73m23.855s
    cat fb.panTro4.chainMm10Link.txt
    #	926540065 bases of 2902338967 (31.924%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/panTro4/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ tarsier tarSyr1 (DONE - 2012-03-10 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
    cd /hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10

    cat << '_EOF_' > DEF
# tarsier vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: tarsier TarSyr1
SEQ2_DIR=/scratch/data/tarSyr1/tarSyr1.2bit
SEQ2_LEN=/scratch/data/tarSyr1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=800

BASE=/hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10TarSyr1
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2457m45.759s
    cat fb.mm10.chainTarSyr1Link.txt
    #	651517559 bases of 2652783500 (24.560%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTarSyr1.2012-03-10 lastz.tarSyr1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
    time doRecipBest.pl mm10 tarSyr1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1176m19.336s

    mkdir /hive/data/genomes/tarSyr1/bed/blastz.mm10.swap
    cd /hive/data/genomes/tarSyr1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    746m30.852s
    cat fb.tarSyr1.chainMm10Link.txt
    #	691746721 bases of 2768536343 (24.986%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/tarSyr1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# Swap chimp panTro3 to Mm10 (DONE - 2012-03-12 - Hiram)
    # original alignment on panTro3
    cd /hive/data/genomes/panTro3/bed/lastzMm10.2012-03-08
    cat fb.panTro3.chainMm10Link.txt
    #	929073028 bases of 2900529764 (32.031%) in intersection

    # and this swap:
    mkdir /hive/data/genomes/mm10/bed/blastz.panTro3.swap
    cd /hive/data/genomes/mm10/bed/blastz.panTro3.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/panTro3/bed/lastzMm10.2012-03-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    68m46.408s
    cat fb.mm10.chainPanTro3Link.txt
    #	922491113 bases of 2652783500 (34.774%) in intersection

##############################################################################
# LASTZ bushbaby otoGar3 (DONE - 2012-03-13 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13
    cd /hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# bushbaby vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: bushbaby OtoGar3
SEQ2_DIR=/hive/data/genomes/otoGar3/otoGar3.2bit
SEQ2_LEN=/hive/data/genomes/otoGar3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10OtoGar3
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    757m32.438s
    cat fb.mm10.chainOtoGar3Link.txt
    #	790408953 bases of 2652783500 (29.795%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOtoGar3.2012-03-13 lastz.otoGar3

    mkdir /hive/data/genomes/otoGar3/bed/blastz.mm10.swap
    cd /hive/data/genomes/otoGar3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    61m18.952s
    cat fb.otoGar3.chainMm10Link.txt
    #	776907989 bases of 2359530453 (32.926%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/otoGar3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ mouse lemur micMur1 (DONE - 2012-03-13 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
    cd /hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# mouse lemur vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: mouse lemur MicMur1
SEQ2_DIR=/scratch/data/micMur1/micMur1.2bit
SEQ2_LEN=/scratch/data/micMur1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=400

BASE=/hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10MicMur1
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    687m41.863s
    cat fb.mm10.chainMicMur1Link.txt
    #	706607444 bases of 2652783500 (26.636%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzMicMur1.2012-03-13 lastz.micMur1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
    time doRecipBest.pl mm10 micMur1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    877m18.105s

    mkdir /hive/data/genomes/micMur1/bed/blastz.mm10.swap
    cd /hive/data/genomes/micMur1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    116m54.411s
    cat fb.micMur1.chainMm10Link.txt
    #	696025630 bases of 1852394361 (37.574%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/micMur1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ squirrel speTri2 (DONE - 2012-03-15 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15
    cd /hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# squirrel vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: squirrel SpeTri2
SEQ2_DIR=/hive/data/genomes/speTri2/speTri2.2bit
SEQ2_LEN=/hive/data/genomes/speTri2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10SpeTri2
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    935m27.893s
    cat fb.mm10.chainSpeTri2Link.txt
    #	907715417 bases of 2652783500 (34.217%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSpeTri2.2012-03-15 lastz.speTri2

    mkdir /hive/data/genomes/speTri2/bed/blastz.mm10.swap
    cd /hive/data/genomes/speTri2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    74m41.819s
    #	real    116m54.411s
    cat fb.speTri2.chainMm10Link.txt
    #	906956512 bases of 2311060300 (39.244%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/speTri2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ kangaroo rat dipOrd1 (DONE - 2012-03-15 - Hiram)
    #	establish a screen to control this job
    screen -S mm10DipOrd1
    mkdir /hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
    cd /hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# kangaroo rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: kangaroo rat DipOrd1
SEQ2_DIR=/scratch/data/dipOrd1/dipOrd1.2bit
SEQ2_LEN=/scratch/data/dipOrd1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=400

BASE=/hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    867m19.972s
    cat fb.mm10.chainDipOrd1Link.txt
    #	516232678 bases of 2652783500 (19.460%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzDipOrd1.2012-03-15 lastz.dipOrd1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
    time doRecipBest.pl mm10 dipOrd1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    914m20.405s

    mkdir /hive/data/genomes/dipOrd1/bed/blastz.mm10.swap
    cd /hive/data/genomes/dipOrd1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    115m1.497s
    cat fb.dipOrd1.chainMm10Link.txt
    #	507580668 bases of 1844961421 (27.512%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/dipOrd1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Naked mole-rat hetGla1 (DONE - 2012-03-15 - Hiram)
    #	establish a screen to control this job
    screen -S mm10HetGla1
    mkdir /hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15
    cd /hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# Naked mole-rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Naked mole-rat HetGla1
SEQ2_DIR=/scratch/data/hetGla1/hetGla1.2bit
SEQ2_LEN=/scratch/data/hetGla1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    745m15.097s
    cat fb.mm10.chainHetGla1Link.txt
    #	853221843 bases of 2652783500 (32.163%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzHetGla1.2012-03-15 lastz.hetGla1

    mkdir /hive/data/genomes/hetGla1/bed/blastz.mm10.swap
    cd /hive/data/genomes/hetGla1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    74m26.471s
    cat fb.hetGla1.chainMm10Link.txt
    #	885195861 bases of 2430064805 (36.427%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/hetGla1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ horse equCab2 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10EquCab2
    mkdir /hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# horse vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: horse EquCab2
SEQ2_DIR=/scratch/data/equCab2/equCab2.2bit
SEQ2_LEN=/scratch/data/equCab2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=1

BASE=/hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    566m34.024s
    cat fb.mm10.chainEquCab2Link.txt
    #	912967841 bases of 2652783500 (34.415%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzEquCab2.2012-03-16 lastz.equCab2

    mkdir /hive/data/genomes/equCab2/bed/blastz.mm10.swap
    cd /hive/data/genomes/equCab2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    87m2.261s
    cat fb.equCab2.chainMm10Link.txt
    #	901995882 bases of 2428790173 (37.138%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/equCab2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ guinea pig cavPor3 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10CavPor3
    mkdir /hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# guinea pig vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: guinea pig CavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=10

BASE=/hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1523m35.729s
    cat fb.mm10.chainCavPor3Link.txt
    #	754642254 bases of 2652783500 (28.447%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzCavPor3.2012-03-16 lastz.cavPor3

    mkdir /hive/data/genomes/cavPor3/bed/blastz.mm10.swap
    cd /hive/data/genomes/cavPor3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    80m23.870s
    cat fb.cavPor3.chainMm10Link.txt
    #	775452752 bases of 2663369733 (29.115%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/cavPor3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ alpaca vicPac1 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10VicPac1
    mkdir /hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# guinea pig vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: alpaca VicPac1
SEQ2_DIR=/scratch/data/vicPac1/vicPac1.2bit
SEQ2_LEN=/scratch/data/vicPac1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=700

BASE=/hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2049m38.674s
    cat fb.mm10.chainVicPac1Link.txt
    #	600477253 bases of 2652783500 (22.636%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzVicPac1.2012-03-16 lastz.vicPac1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
    time doRecipBest.pl mm10 vicPac1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    824m37.107s

    mkdir /hive/data/genomes/vicPac1/bed/blastz.mm10.swap
    cd /hive/data/genomes/vicPac1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    159m21.952s
    cat fb.vicPac1.chainMm10Link.txt
    #	610885692 bases of 1922910435 (31.769%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/vicPac1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ dolphin turTru1 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10TurTru1
    mkdir /hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# dolphin vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: dolphin TurTru1
SEQ2_DIR=/scratch/data/turTru1/turTru1.2bit
SEQ2_LEN=/scratch/data/turTru1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1484m14.609s
    cat fb.mm10.chainTurTru1Link.txt
    #	762961671 bases of 2652783500 (28.761%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTurTru1.2012-03-16 lastz.turTru1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
    time doRecipBest.pl mm10 turTru1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    733m37.272s

    mkdir /hive/data/genomes/turTru1/bed/blastz.mm10.swap
    cd /hive/data/genomes/turTru1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    79m38.703s
    cat fb.turTru1.chainMm10Link.txt
    #	744359707 bases of 2298444090 (32.385%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/turTru1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ tree shrew tupBel1 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10TupBel1
    mkdir /hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# tree shrew vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: tree shrew TupBel1
SEQ2_DIR=/scratch/data/tupBel1/tupBel1.2bit
SEQ2_LEN=/scratch/data/tupBel1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=400

BASE=/hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1731m30.449s
    cat fb.mm10.chainTupBel1Link.txt
    #	524337666 bases of 2652783500 (19.766%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTupBel1.2012-03-16 lastz.tupBel1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
    time doRecipBest.pl mm10 tupBel1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1090m30.429s

    mkdir /hive/data/genomes/tupBel1/bed/blastz.mm10.swap
    cd /hive/data/genomes/tupBel1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    136m7.163s
    cat fb.tupBel1.chainMm10Link.txt
    #	537379661 bases of 2137225476 (25.144%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/tupBel1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ pig susScr2 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10SusScr2
    mkdir /hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# pig vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: pig SusScr2
SEQ2_DIR=/scratch/data/susScr2/susScr2.2bit
SEQ2_LEN=/scratch/data/susScr2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=1

BASE=/hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1272m57.727s
    cat fb.mm10.chainSusScr2Link.txt
    #	616716602 bases of 2652783500 (23.248%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSusScr2.2012-03-16 lastz.susScr2

    mkdir /hive/data/genomes/susScr2/bed/blastz.mm10.swap
    cd /hive/data/genomes/susScr2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    62m47.465s
    cat fb.susScr2.chainMm10Link.txt
    #	656498040 bases of 2231298548 (29.422%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/susScr2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ rabbit oryCun2 (DONE - 2012-03-16 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OryCun2
    mkdir /hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16
    cd /hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# rabbit vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: rabbit OryCun2
SEQ2_DIR=/scratch/data/oryCun2/oryCun2.2bit
SEQ2_LEN=/scratch/data/oryCun2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=20

BASE=/hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1412m58.641s
    cat fb.mm10.chainOryCun2Link.txt
    #	669778489 bases of 2652783500 (25.248%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOryCun2.2012-03-16 lastz.oryCun2

    mkdir /hive/data/genomes/oryCun2/bed/blastz.mm10.swap
    cd /hive/data/genomes/oryCun2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    64m40.959s
    cat fb.oryCun2.chainMm10Link.txt
    #	668643668 bases of 2604023284 (25.677%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/oryCun2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ sloth choHof1 (DONE - 2012-03-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10ChoHof1
    mkdir /hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
    cd /hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# sloth vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: sloth ChoHof1
SEQ2_DIR=/scratch/data/choHof1/choHof1.2bit
SEQ2_LEN=/scratch/data/choHof1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=800

BASE=/hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    # rebooted hgwdev during first swarm run, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-continue=cat -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
    #	Elapsed time: 65m26s
    cat fb.mm10.chainChoHof1Link.txt
    #	477994856 bases of 2652783500 (18.019%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzChoHof1.2012-03-19 lastz.choHof1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
    time doRecipBest.pl mm10 choHof1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1171m56.481s

    mkdir /hive/data/genomes/choHof1/bed/blastz.mm10.swap
    cd /hive/data/genomes/choHof1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    1613m3.348s
    cat fb.choHof1.chainMm10Link.txt
    #	488047499 bases of 2060419685 (23.687%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/choHof1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ megabat pteVam1 (DONE - 2012-03-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10PteVam1
    mkdir /hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
    cd /hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# megabat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: megabat PteVam1
SEQ2_DIR=/scratch/data/pteVam1/pteVam1.2bit
SEQ2_LEN=/scratch/data/pteVam1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1843m33.186s
    cat fb.mm10.chainPteVam1Link.txt
    #	725414059 bases of 2652783500 (27.345%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzPteVam1.2012-03-19 lastz.pteVam1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
    time doRecipBest.pl mm10 pteVam1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    743m57.901s

    mkdir /hive/data/genomes/pteVam1/bed/blastz.mm10.swap
    cd /hive/data/genomes/pteVam1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	Elapsed time: 75m35s
    cat fb.pteVam1.chainMm10Link.txt
    #	710519911 bases of 1839436660 (38.627%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/pteVam1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ elephant loxAfr3 (DONE - 2012-03-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10LoxAfr3
    mkdir /hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19
    cd /hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# elephant vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: elephant LoxAfr3
SEQ2_DIR=/scratch/data/loxAfr3/loxAfr3.2bit
SEQ2_LEN=/scratch/data/loxAfr3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1848m11.111s
    cat fb.mm10.chainLoxAfr3Link.txt
    #	685029753 bases of 2652783500 (25.823%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzLoxAfr3.2012-03-19 lastz.loxAfr3

    mkdir /hive/data/genomes/loxAfr3/bed/blastz.mm10.swap
    cd /hive/data/genomes/loxAfr3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	Elapsed time: 73m14s
    cat fb.loxAfr3.chainMm10Link.txt
    #	674108752 bases of 3118565340 (21.616%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/loxAfr3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ cat felCat4 (DONE - 2012-03-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10FelCat4
    mkdir /hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
    cd /hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# cat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cat FelCat4
SEQ2_DIR=/scratch/data/felCat4/felCat4.2bit
SEQ2_LEN=/scratch/data/felCat4/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2010m48.963s
    cat fb.mm10.chainFelCat4Link.txt
    #	637531191 bases of 2652783500 (24.033%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzFelCat4.2012-03-19 lastz.felCat4

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
    time doRecipBest.pl mm10 felCat4 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1135m12.207s

    mkdir /hive/data/genomes/felCat4/bed/blastz.mm10.swap
    cd /hive/data/genomes/felCat4/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	 Elapsed time: 88m12s
    cat fb.felCat4.chainMm10Link.txt
    #	616167655 bases of 1990635005 (30.953%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/felCat4/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ panda ailMel1 (DONE - 2012-03-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10AilMel1
    mkdir /hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19
    cd /hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# panda vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: panda AilMel1
SEQ2_DIR=/scratch/data/ailMel1/ailMel1.2bit
SEQ2_LEN=/scratch/data/ailMel1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # forgot to copy to the log
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium
    #	real    1914m15.921s
    cat fb.mm10.chainAilMel1Link.txt
    #	821806974 bases of 2652783500 (30.979%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzAilMel1.2012-03-19 lastz.ailMel1

    mkdir /hive/data/genomes/ailMel1/bed/blastz.mm10.swap
    cd /hive/data/genomes/ailMel1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	Elapsed time: 65m50s
    cat fb.ailMel1.chainMm10Link.txt
    #	798482731 bases of 2245312831 (35.562%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/ailMel1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ dog canFam3 (DONE - 2012-03-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10CanFam3
    mkdir /hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19
    cd /hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# dog vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: dog CanFam3
SEQ2_DIR=/hive/data/genomes/canFam3/canFam3.2bit
SEQ2_LEN=/hive/data/genomes/canFam3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=20

BASE=/hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # forgot to copy to the log
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1883m21.850s
    cat fb.mm10.chainCanFam3Link.txt
    #	773114990 bases of 2652783500 (29.144%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzCanFam3.2012-03-19 lastz.canFam3

    mkdir /hive/data/genomes/canFam3/bed/blastz.mm10.swap
    cd /hive/data/genomes/canFam3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	Elapsed time: 63m22s
    cat fb.canFam3.chainMm10Link.txt
    #	756678903 bases of 2392715236 (31.624%) in intersectio
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/canFam3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ armadillo dasNov2 (DONE - 2012-03-21 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10DasNov2
    mkdir /hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
    cd /hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# armadillo vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: armadillo DasNov2
SEQ2_DIR=/scratch/data/dasNov2/dasNov2.2bit
SEQ2_LEN=/scratch/data/dasNov2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=800

BASE=/hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2655m49.904s
    cat fb.mm10.chainDasNov2Link.txt
    #	451070039 bases of 2652783500 (17.004%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzDasNov2.2012-03-21 lastz.dasNov2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
    time doRecipBest.pl mm10 dasNov2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1163m1.023s

    mkdir /hive/data/genomes/dasNov2/bed/blastz.mm10.swap
    cd /hive/data/genomes/dasNov2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    201m9.701s
    cat fb.dasNov2.chainMm10Link.txt
    #	461142417 bases of 2371493872 (19.445%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/dasNov2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ microbat myoLuc2 (DONE - 2012-03-21 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10MyoLuc2
    mkdir /hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
    cd /hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# microbat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: microbat MyoLuc2
SEQ2_DIR=/scratch/data/myoLuc2/myoLuc2.2bit
SEQ2_LEN=/scratch/data/myoLuc2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1033m38.184s
    cat fb.mm10.chainMyoLuc2Link.txt
    #	646292112 bases of 2652783500 (24.363%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzMyoLuc2.2012-03-21 lastz.myoLuc2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
    time doRecipBest.pl mm10 myoLuc2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #   real    29m16.249s

    mkdir /hive/data/genomes/myoLuc2/bed/blastz.mm10.swap
    cd /hive/data/genomes/myoLuc2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    54m5.607s
    cat fb.myoLuc2.chainMm10Link.txt
    #	661704053 bases of 1966419868 (33.650%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/myoLuc2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ cow bosTau7 (DONE - 2012-03-21 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10BosTau7
    mkdir /hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21
    cd /hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# cow vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cow BosTau7
SEQ2_DIR=/scratch/data/bosTau7/bosTau7.2bit
SEQ2_LEN=/scratch/data/bosTau7/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1151m20.445s
    cat fb.mm10.chainBosTau7Link.txt
    #	696498363 bases of 2652783500 (26.255%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzBosTau7.2012-03-21 lastz.bosTau7

    mkdir /hive/data/genomes/bosTau7/bed/blastz.mm10.swap
    cd /hive/data/genomes/bosTau7/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    77m58.759s
    cat fb.bosTau7.chainMm10Link.txt
    #	711923052 bases of 2804673174 (25.383%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/bosTau7/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ sheep oviAri1 (DONE - 2012-03-21 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OviAri1
    mkdir /hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
    cd /hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# sheep vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: sheep OviAri1
SEQ2_DIR=/scratch/data/oviAri1/oviAri1.2bit
SEQ2_LEN=/scratch/data/oviAri1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    892m33.068s
    cat fb.mm10.chainOviAri1Link.txt
    #	406955832 bases of 2652783500 (15.341%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOviAri1.2012-03-21 lastz.oviAri1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
    time doRecipBest.pl mm10 oviAri1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1183m43.488s

    mkdir /hive/data/genomes/oviAri1/bed/blastz.mm10.swap
    cd /hive/data/genomes/oviAri1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    30m5.554s
    cat fb.oviAri1.chainMm10Link.txt
    #	383499897 bases of 1201271277 (31.925%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/oviAri1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ rock hyrax proCap1 (DONE - 2012-03-21 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10ProCap1
    mkdir /hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
    cd /hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# rock hyrax vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: rock hyrax ProCap1
SEQ2_DIR=/scratch/data/proCap1/proCap1.2bit
SEQ2_LEN=/scratch/data/proCap1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=600

BASE=/hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2859m51.317s
    cat fb.mm10.chainProCap1Link.txt
    #	401804601 bases of 2652783500 (15.147%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzProCap1.2012-03-21 lastz.proCap1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
    time doRecipBest.pl mm10 proCap1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1083m57.139s

    mkdir /hive/data/genomes/proCap1/bed/blastz.mm10.swap
    cd /hive/data/genomes/proCap1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    165m10.285s
    cat fb.proCap1.chainMm10Link.txt
    #	390409777 bases of 2407847681 (16.214%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/proCap1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ pika ochPri2 (DONE - 2012-03-22 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OchPri2
    mkdir /hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
    cd /hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# pika vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: pika OchPri2
SEQ2_DIR=/scratch/data/ochPri2/ochPri2.2bit
SEQ2_LEN=/scratch/data/ochPri2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2578m43.648s
    cat fb.mm10.chainOchPri2Link.txt
    #	385766335 bases of 2652783500 (14.542%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOchPri2.2012-03-22 lastz.ochPri2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
    time doRecipBest.pl mm10 ochPri2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1036m29.080s

    mkdir /hive/data/genomes/ochPri2/bed/blastz.mm10.swap
    cd /hive/data/genomes/ochPri2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    103m34.369s
    cat fb.ochPri2.chainMm10Link.txt
    #	382959642 bases of 1923624051 (19.908%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/ochPri2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ hedgehog eriEur1 (DONE - 2012-03-22 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10EriEur1
    mkdir /hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
    cd /hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# hedgehog vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: hedgehog EriEur1
SEQ2_DIR=/scratch/data/eriEur1/eriEur1.2bit
SEQ2_LEN=/scratch/data/eriEur1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=700

BASE=/hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    3006m41.470s
    cat fb.mm10.chainEriEur1Link.txt
    #	261447061 bases of 2652783500 (9.856%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzEriEur1.2012-03-22 lastz.eriEur1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
    time doRecipBest.pl mm10 eriEur1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1171m41.349s

    mkdir /hive/data/genomes/eriEur1/bed/blastz.mm10.swap
    cd /hive/data/genomes/eriEur1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    241m24.183s
    cat fb.eriEur1.chainMm10Link.txt
    #	261605017 bases of 2133134836 (12.264%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/eriEur1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ tenrec echTel1 (DONE - 2012-03-22 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10EchTel1
    mkdir /hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
    cd /hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# tenrec vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: tenrec EchTel1
SEQ2_DIR=/scratch/data/echTel1/echTel1.2bit
SEQ2_LEN=/scratch/data/echTel1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=700

BASE=/hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    3047m28.723s
    cat fb.mm10.chainEchTel1Link.txt
    #	290413150 bases of 2652783500 (10.947%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzEchTel1.2012-03-22 lastz.echTel1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
    time doRecipBest.pl mm10 echTel1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1201m39.275s

    mkdir /hive/data/genomes/echTel1/bed/blastz.mm10.swap
    cd /hive/data/genomes/echTel1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    269m52.619s
    cat fb.echTel1.chainMm10Link.txt
    #	298082139 bases of 2111581369 (14.117%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/echTel1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ shrew sorAra1 (DONE - 2012-03-22 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10SorAra1
    mkdir /hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
    cd /hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# shrew vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: shrew SorAra1
SEQ2_DIR=/scratch/data/sorAra1/sorAra1.2bit
SEQ2_LEN=/scratch/data/sorAra1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2600m22.528s
    cat fb.mm10.chainSorAra1Link.txt
    #	248874412 bases of 2652783500 (9.382%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSorAra1.2012-03-22 lastz.sorAra1

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
    time doRecipBest.pl mm10 sorAra1 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1074m22.651s

    mkdir /hive/data/genomes/sorAra1/bed/blastz.mm10.swap
    cd /hive/data/genomes/sorAra1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    141m38.806s
    cat fb.sorAra1.chainMm10Link.txt
    #	248692550 bases of 1832864697 (13.569%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/sorAra1/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ wallaby macEug2 (DONE - 2012-03-22 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10MacEug2
    mkdir /hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
    cd /hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# wallaby vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: wallaby MacEug2
SEQ2_DIR=/scratch/data/macEug2/macEug2.2bit
SEQ2_LEN=/scratch/data/macEug2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    2893m50.341s
    cat fb.mm10.chainMacEug2Link.txt
    #	115481931 bases of 2652783500 (4.353%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzMacEug2.2012-03-22 lastz.macEug2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
    time doRecipBest.pl mm10 macEug2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    1032m58.798s

    mkdir /hive/data/genomes/macEug2/bed/blastz.mm10.swap
    cd /hive/data/genomes/macEug2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    130m7.404s
    cat fb.macEug2.chainMm10Link.txt
    #	112811810 bases of 2536076957 (4.448%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/macEug2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ RAT Rn5 (DONE - 2012-03-23 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10Rn5
    mkdir /hive/data/genomes/mm10/bed/lastzRn5.2012-03-23
    cd /hive/data/genomes/mm10/bed/lastzRn5.2012-03-23

    cat << '_EOF_' > DEF
# mouse vs rat
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# From tuning experiment between mouse chr12:15000000-25000000 and
#       rat chr6:38000000-48000000
BLASTZ_O=600
BLASTZ_E=55
BLASTZ_Y=5000
BLASTZ_T=2
BLASTZ_K=3000
BLASTZ_L=3000
BLASTZ_Q=/hive/data/genomes/mm10/bed/lastzRn5.2012-03-23/mouse_rat_2.q

BLASTZ_ABRIDGE_REPEATS=1

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_SMSK=/scratch/data/mm10/notInRat
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rat Rn5
SEQ2_DIR=/hive/data/genomes/rn5/rn5.2bit
SEQ2_LEN=/hive/data/genomes/rn5/chrom.sizes
SEQ2_SMSK=/hive/data/genomes/rn5/bed/linSpecRep/notInMouse
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=10

BASE=/hive/data/genomes/mm10/bed/lastzRn5.2012-03-23
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S rn5Mm10
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
	-smallClusterHub=encodek > do.log 2>&1 &
    # broken lastz run when SMSK files did not exist for some of the
    #	Rn5 contigs - made empty files for those and completed, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	-continue=cat `pwd`/DEF \
	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
	-smallClusterHub=encodek > cat.log 2>&1 &
    #	real    285m28.458s
    cat fb.mm10.chainRn5Link.txt
    #	1786721927 bases of 2652783500 (67.353%) in intersection
    # FYI: rn4 was:
    #	1449612208 bases of 2652783500 (54.645%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzRn5.2012-03-23 lastz.rn5

    #	and the swap
    mkdir /hive/data/genomes/rn5/bed/blastz.mm10.swap
    cd /hive/data/genomes/rn5/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRn5.2012-03-23/DEF \
	-swap -bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
	-smallClusterHub=encodek > swap.log 2>&1 &
    #	real    121m21.029s
    cat fb.rn5.chainMm10Link.txt
    #	1808154679 bases of 2572853723 (70.278%) in intersection
    # FYI, rn4 was:
    #	1449012636 bases of 2571531505 (56.348%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/rn5/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# LASTZ Manatee triMan1 (DONE - 2012-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10TriMan1
    mkdir /hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29
    cd /hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# manatee vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: manatee TriMan1
SEQ2_DIR=/hive/data/genomes/triMan1/triMan1.2bit
SEQ2_LEN=/hive/data/genomes/triMan1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    1455m24.772s
    cat fb.mm10.chainTriMan1Link.txt
    #	704207702 bases of 2652783500 (26.546%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTriMan1.2012-03-29 lastz.triMan1

    mkdir /hive/data/genomes/triMan1/bed/blastz.mm10.swap
    cd /hive/data/genomes/triMan1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    62m33.530s
    cat fb.triMan1.chainMm10Link.txt
    #	682557025 bases of 2769099677 (24.649%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/triMan1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz Opossum monDom5 (DONE - 2012-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10MonDom5
    mkdir /hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29
    cd /hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29

    cat << '_EOF_' > DEF
# Mouse vs. opossum
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Opossum monDom5
SEQ2_DIR=/scratch/data/monDom5/monDom5.2bit
SEQ2_LEN=/scratch/data/monDom5/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    #	Can't do this when there are only the single small set of chroms
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    1792m40.071s

    cat fb.mm10.chainMonDom5Link.txt
    #	254245903 bases of 2652783500 (9.584%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzMonDom5.2012-03-29 lastz.monDom5

    # add syntenic net and recip best per user request 2020-11 - Hiram
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-continue=syntenicNet -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=loose) > synNet.log 2>&1 &
    # real    4m58.921s

    cat fb.mm10.chainSynMonDom5Link.txt
    # 181282548 bases of 2652783500 (6.834%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 monDom5 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    1024m21.233s

    cat fb.mm10.chainRBest.MonDom5.txt
    # 208921944 bases of 2652783500 (7.876%) in intersection


    #	and for the swap
    mkdir /hive/data/genomes/monDom5/bed/blastz.mm10.swap
    cd /hive/data/genomes/monDom5/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    73m49.230s
    cat  fb.monDom5.chainMm10Link.txt
    #	252291401 bases of 3501660299 (7.205%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/monDom5/bed
    ln -s blastz.mm10.swap lastz.mm10

    # add syntenic net and recip best per user request 2020-11 - Hiram
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29/DEF \
	-continue=syntenicNet -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=loose) > synNet.log 2>&1 &
    # real    4m52.417s
    cat fb.monDom5.chainMm10Link.txt
    # 252291401 bases of 3501660299 (7.205%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev monDom5 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    1013m15.298s

    cat fb.monDom5.chainRBest.Mm10.txt
    # 209696912 bases of 3501660299 (5.988%) in intersection

#########################################################################
# lastz Tasmanian Devil sarHar1 (DONE - 2012-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10SarHar1
    mkdir /hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29
    cd /hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29

    cat << '_EOF_' > DEF
# Mouse vs. tasmanian devil
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: tasmanian devil sarHar1
SEQ2_DIR=/scratch/data/sarHar1/sarHar1.2bit
SEQ2_LEN=/scratch/data/sarHar1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    1208m55.866s

    cat fb.mm10.chainSarHar1Link.txt
    #	224935746 bases of 2652783500 (8.479%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSarHar1.2012-03-29 lastz.sarHar1

    #	and for the swap
    mkdir /hive/data/genomes/sarHar1/bed/blastz.mm10.swap
    cd /hive/data/genomes/sarHar1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    45m53.015s
    cat  fb.sarHar1.chainMm10Link.txt
    #	231249436 bases of 2931539702 (7.888%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/sarHar1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz budgerigar melUnd1 (DONE - 2012-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10MelUnd1
    mkdir /hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29
    cd /hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29

    cat << '_EOF_' > DEF
# Mouse vs. budgerigar
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: budgerigar melUnd1
SEQ2_DIR=/hive/data/genomes/melUnd1/melUnd1.2bit
SEQ2_LEN=/hive/data/genomes/melUnd1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    883m58.198s

    cat fb.mm10.chainMelUnd1Link.txt
    #	95217653 bases of 2652783500 (3.589%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzMelUnd1.2012-03-29 lastz.melUnd1

    #	and for the swap
    mkdir /hive/data/genomes/melUnd1/bed/blastz.mm10.swap
    cd /hive/data/genomes/melUnd1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    9m9.260s
    cat  fb.melUnd1.chainMm10Link.txt
    #	79867911 bases of 1086614815 (7.350%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/melUnd1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz platypus ornAna1 (DONE - 2012-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OrnAna1
    mkdir /hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29
    cd /hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29

    cat << '_EOF_' > DEF
# Mouse vs. platypus
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: platypus ornAna1
SEQ2_DIR=/scratch/data/ornAna1/ornAna1.2bit
SEQ2_LEN=/scratch/data/ornAna1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=400

BASE=/hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    1264m1.056s

    cat fb.mm10.chainOrnAna1Link.txt
    #	141873792 bases of 2652783500 (5.348%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOrnAna1.2012-03-29 lastz.ornAna1

    # add syntenic net and recip best per user request 2020-11 - Hiram
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-continue=syntenicNet -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=loose) > synNet.log 2>&1 &
    # real    2m3.337s

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 ornAna1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    196m25.100s

    #	and for the swap
    mkdir /hive/data/genomes/ornAna1/bed/blastz.mm10.swap
    cd /hive/data/genomes/ornAna1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    49m45.308s
    cat  fb.ornAna1.chainMm10Link.txt
    #	135101083 bases of 1842236818 (7.334%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/ornAna1/bed
    ln -s blastz.mm10.swap lastz.mm10

    # add syntenic net and recip best per user request 2020-11 - Hiram
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29/DEF \
	-continue=syntenicNet -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=loose) > synNet.log 2>&1 &
    # real    3m17.422s
    cat fb.ornAna1.chainSynMm10Link.txt
    # 76550450 bases of 1842236818 (4.155%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev ornAna1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    207m50.665s

    cat fb.ornAna1.chainRBest.Mm10.txt
    # 114682047 bases of 1842236818 (6.225%) in intersection

#########################################################################
# lastz turtle chrPic1 (DONE - 2012-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10ChrPic1
    mkdir /hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29
    cd /hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29

    cat << '_EOF_' > DEF
# Mouse vs. turtle
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: turtle chrPic1
SEQ2_DIR=/hive/data/genomes/chrPic1/chrPic1.2bit
SEQ2_LEN=/hive/data/genomes/chrPic1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    1243m2.518s
    cat fb.mm10.chainChrPic1Link.txt
    #	125499965 bases of 2652783500 (4.731%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzChrPic1.2012-03-29 lastz.chrPic1

    #	and for the swap
    mkdir /hive/data/genomes/chrPic1/bed/blastz.mm10.swap
    cd /hive/data/genomes/chrPic1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    19m26.835s
    cat  fb.chrPic1.chainMm10Link.txt
    #	118436838 bases of 2158289746 (5.488%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/chrPic1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz chicken galGal4 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10GalGal4
    mkdir /hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. chicken
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: chicken galGal4
SEQ2_DIR=/hive/data/genomes/galGal4/galGal4.2bit
SEQ2_LEN=/hive/data/genomes/galGal4/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    109m21.068s
    #	broken swarm cluster, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    57m24.155s
    cat fb.mm10.chainGalGal4Link.txt
    #	97510773 bases of 2652783500 (3.676%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzGalGal4.2012-04-02 lastz.galGal4

    #	and for the swap
    mkdir /hive/data/genomes/galGal4/bed/blastz.mm10.swap
    cd /hive/data/genomes/galGal4/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    95m50.996s
    cat  fb.galGal4.chainMm10Link.txt
    #	83660034 bases of 1032854810 (8.100%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/galGal4/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz zebra finch taeGut1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10TaeGut1
    mkdir /hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. zebra finch
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: zebra finch taeGut1
SEQ2_DIR=/scratch/data/taeGut1/taeGut1.2bit
SEQ2_LEN=/scratch/data/taeGut1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=5

BASE=/hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    106m11.612s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    29m11.090s
    cat fb.mm10.chainTaeGut1Link.txt
    #	95469341 bases of 2652783500 (3.599%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTaeGut1.2012-04-02 lastz.taeGut1

    #	and for the swap
    mkdir /hive/data/genomes/taeGut1/bed/blastz.mm10.swap
    cd /hive/data/genomes/taeGut1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    37m17.483s
    cat  fb.taeGut1.chainMm10Link.txt
    #	89312133 bases of 1222864691 (7.304%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/taeGut1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz lizard anoCar2 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10AnoCar2
    mkdir /hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. lizard
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: lizard anoCar2
SEQ2_DIR=/scratch/data/anoCar2/anoCar2.2bit
SEQ2_LEN=/scratch/data/anoCar2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=15

BASE=/hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    103m17.133s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    43m2.183s
    cat fb.mm10.chainAnoCar2Link.txt
    #	88356459 bases of 2652783500 (3.331%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzAnoCar2.2012-04-02 lastz.anoCar2

    #	and for the swap
    mkdir /hive/data/genomes/anoCar2/bed/blastz.mm10.swap
    cd /hive/data/genomes/anoCar2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    97m50.599s
    cat  fb.anoCar2.chainMm10Link.txt
    #	84865552 bases of 1701353770 (4.988%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/anoCar2/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz turkey melGal1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10MelGal1
    mkdir /hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. turkey
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: turkey melGal1
SEQ2_DIR=/scratch/data/melGal1/melGal1.2bit
SEQ2_LEN=/scratch/data/melGal1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=15

BASE=/hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    101m17.902s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    20m47.771s
    cat fb.mm10.chainMelGal1Link.txt
    #	93132953 bases of 2652783500 (3.511%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzMelGal1.2012-04-02 lastz.melGal1

    #	and for the swap
    mkdir /hive/data/genomes/melGal1/bed/blastz.mm10.swap
    cd /hive/data/genomes/melGal1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    88m39.591s
    cat  fb.melGal1.chainMm10Link.txt
    #	76848161 bases of 935922386 (8.211%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/melGal1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz frog xenTro3 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10XenTro3
    mkdir /hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. frog
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: frog xenTro3
SEQ2_DIR=/scratch/data/xenTro3/xenTro3.2bit
SEQ2_LEN=/scratch/data/xenTro3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=40

BASE=/hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    99m10.611s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    37m52.678s
    cat fb.mm10.chainXenTro3Link.txt
    #	82900338 bases of 2652783500 (3.125%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzXenTro3.2012-04-02 lastz.xenTro3

    #	and for the swap
    mkdir /hive/data/genomes/xenTro3/bed/blastz.mm10.swap
    cd /hive/data/genomes/xenTro3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    53m19.485s
    cat  fb.xenTro3.chainMm10Link.txt
    #	90345130 bases of 1358334882 (6.651%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/xenTro3/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz coelacanth latCha1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10LatCha1
    mkdir /hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. coelacanth
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: coelacanth latCha1
SEQ2_DIR=/hive/data/genomes/latCha1/latCha1.2bit
SEQ2_LEN=/hive/data/genomes/latCha1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    95m34.477s
    #	broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    214m7.324s
    cat fb.mm10.chainLatCha1Link.txt
    #	72036116 bases of 2652783500 (2.715%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzLatCha1.2012-04-02 lastz.latCha1

    #	and for the swap
    mkdir /hive/data/genomes/latCha1/bed/blastz.mm10.swap
    cd /hive/data/genomes/latCha1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    14m44.600s
    cat  fb.latCha1.chainMm10Link.txt
    #	73798131 bases of 2183592768 (3.380%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/latCha1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz atlantic cod gadMor1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10GadMor1
    mkdir /hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. atlantic cod
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: atlantic cod gadMor1
SEQ2_DIR=/hive/data/genomes/gadMor1/gadMor1.2bit
SEQ2_LEN=/hive/data/genomes/gadMor1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=700

BASE=/hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    91m23.642s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    39m41.194s
    cat fb.mm10.chainGadMor1Link.txt
    #	45795692 bases of 2652783500 (1.726%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzGadMor1.2012-04-02 lastz.gadMor1

    #	and for the swap
    mkdir /hive/data/genomes/gadMor1/bed/blastz.mm10.swap
    cd /hive/data/genomes/gadMor1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    62m58.963s
    cat  fb.gadMor1.chainMm10Link.txt
    #	41406507 bases of 608038597 (6.810%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/gadMor1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz nile tilapia oreNil1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OreNil1
    mkdir /hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. nile tilapia
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: nile tilapia oreNil1
SEQ2_DIR=/scratch/data/oreNil1/oreNil1.2bit
SEQ2_LEN=/scratch/data/oreNil1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=10

BASE=/hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    89m6.727s
    #	broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    24m3.960s
    cat fb.mm10.chainOreNil1Link.txt
    #	51915568 bases of 2652783500 (1.957%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOreNil1.2012-04-02 lastz.oreNil1

    #	and for the swap
    mkdir /hive/data/genomes/oreNil1/bed/blastz.mm10.swap
    cd /hive/data/genomes/oreNil1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    90m55.298s
    cat  fb.oreNil1.chainMm10Link.txt
    #	49709461 bases of 816084674 (6.091%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/oreNil1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz stickleback gasAcu1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10GasAcu1
    mkdir /hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. stickleback
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: stickleback gasAcu1
SEQ2_DIR=/scratch/data/gasAcu1/gasAcu1.2bit
SEQ2_LEN=/scratch/data/gasAcu1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=1

BASE=/hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    87m5.963s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    9m49.199s
    cat fb.mm10.chainGasAcu1Link.txt
    #	53469711 bases of 2652783500 (2.016%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzGasAcu1.2012-04-02 lastz.gasAcu1

    #	and for the swap
    mkdir /hive/data/genomes/gasAcu1/bed/blastz.mm10.swap
    cd /hive/data/genomes/gasAcu1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    12m58.072s
    cat  fb.gasAcu1.chainMm10Link.txt
    #	48802831 bases of 446627861 (10.927%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/gasAcu1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz fugu fr3 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10Fr3
    mkdir /hive/data/genomes/mm10/bed/lastzFr3.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzFr3.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. fugu
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: fugu fr3
SEQ2_DIR=/scratch/data/fr3/fr3.2bit
SEQ2_LEN=/scratch/data/fr3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzFr3.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    84m37.070s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    171m16.627s
    cat fb.mm10.chainFr3Link.txt
    #	47460021 bases of 2652783500 (1.789%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzFr3.2012-04-02 lastz.fr3

    #	and for the swap
    mkdir /hive/data/genomes/fr3/bed/blastz.mm10.swap
    cd /hive/data/genomes/fr3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzFr3.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    7m13.151s
    cat  fb.fr3.chainMm10Link.txt
    #	42586058 bases of 350961831 (12.134%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/fr3/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz tetraodon tetNig2 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10TetNig2
    mkdir /hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. tetraodon
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: tetraodon tetNig2
SEQ2_DIR=/scratch/data/tetNig2/tetNig2.2bit
SEQ2_LEN=/scratch/data/tetNig2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    13m21.638s
    cat fb.mm10.chainTetNig2Link.txt
    #	46035322 bases of 2652783500 (1.735%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTetNig2.2012-04-02 lastz.tetNig2

    #	and for the swap
    mkdir /hive/data/genomes/tetNig2/bed/blastz.mm10.swap
    cd /hive/data/genomes/tetNig2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    7m24.115s
    cat  fb.tetNig2.chainMm10Link.txt
    #	41242926 bases of 302314788 (13.642%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/tetNig2/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz zebrafish danRer7 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10DanRer7
    mkdir /hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. zebrafish
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: zebrafish danRer7
SEQ2_DIR=/scratch/data/danRer7/danRer7.2bit
SEQ2_LEN=/scratch/data/danRer7/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    80m32.118s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    40m27.762s
    cat fb.mm10.chainDanRer7Link.txt
    #	69028912 bases of 2652783500 (2.602%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzDanRer7.2012-04-02 lastz.danRer7

    #	and for the swap
    mkdir /hive/data/genomes/danRer7/bed/blastz.mm10.swap
    cd /hive/data/genomes/danRer7/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    109m49.939s
    cat  fb.danRer7.chainMm10Link.txt
    #	72001768 bases of 1409770109 (5.107%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/danRer7/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz medaka oryLat2 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OryLat2
    mkdir /hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. medaka
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: medaka oryLat2
SEQ2_DIR=/scratch/data/oryLat2/oryLat2.2bit
SEQ2_LEN=/scratch/data/oryLat2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    78m53.408s
    #	broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    #	real    113m29.462s
    cat fb.mm10.chainOryLat2Link.txt
    #	51344841 bases of 2652783500 (1.936%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOryLat2.2012-04-02 lastz.oryLat2

    #	and for the swap
    mkdir /hive/data/genomes/oryLat2/bed/blastz.mm10.swap
    cd /hive/data/genomes/oryLat2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    7m52.846s
    cat  fb.oryLat2.chainMm10Link.txt
    #	45954178 bases of 700386597 (6.561%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/oryLat2/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz lamprey petMar1 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10PetMar1
    mkdir /hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02
    cd /hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02

    cat << '_EOF_' > DEF
# Mouse vs. lamprey
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: lamprey petMar1
SEQ2_DIR=/scratch/data/petMar1/petMar1.2bit
SEQ2_LEN=/scratch/data/petMar1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #	real    77m3.923s
    # broken swarm, continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -qRepeats=windowmaskerSdust -continue=cat `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
    # missing qRepeats specification
    rm axtChain/mm10.petMar1.net
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        -qRepeats=windowmaskerSdust -continue=load `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > load.log 2>&1 &
    #	real    6m31.527s
    cat fb.mm10.chainPetMar1Link.txt
    #	29205053 bases of 2652783500 (1.101%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzPetMar1.2012-04-02 lastz.petMar1

    #	and for the swap
    mkdir /hive/data/genomes/petMar1/bed/blastz.mm10.swap
    cd /hive/data/genomes/petMar1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02/DEF \
        -qRepeats=windowmaskerSdust -workhorse=hgwdev \
	-smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #   real    17m40.196s
    cat  fb.petMar1.chainMm10Link.txt
    #   26274715 bases of 831696438 (3.159%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/petMar1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
## 60-Way Multiz (DONE - 2011-09-28 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/multiz60way
    cd /hive/data/genomes/mm10/bed/multiz60way

    # from the 62-way in the source tree, do not need aliMis1 and croPor1:
    /cluster/bin/phast/tree_doctor --prune ailMis1,croPor1 \
        /cluster/home/hiram/kent/src/hg/utils/phyloTrees/62way.nh > 60way.nh

    # note, newer assemblies: susScr3, dasNov3, felCat5, hetGla2, turTru2,
    #   nomLeu2, oreNil2

    #	what that looks like:
    cat 60way.nh
# (((((((((((((((((((hg19:0.006550,panTro4:0.006840):0.002220,
# gorGor3:0.008964):0.009693,ponAbe2:0.018940):0.003471,
# nomLeu2:0.022270):0.012040,(rheMac3:0.007991,
# papHam1:0.008042):0.029610):0.021830,(calJac3:0.030000,
# saiBol1:0.040000):0.039650):0.052090,tarSyr1:0.111400):0.020520,
# (micMur1:0.085600,otoGar3:0.119400):0.020520):0.015494,
# tupBel1:0.186203):0.004937,(((((mm10:0.084509,rn5:0.091589):0.197773,
# dipOrd1:0.211609):0.022992,(hetGla2:0.100000,
# cavPor3:0.125629):0.100000):0.010150,speTri2:0.148468):0.025746,
# (oryCun2:0.114227,ochPri2:0.201069):0.101463):0.015313):0.020593,
# (((susScr3:0.120000,(vicPac1:0.087275,(turTru2:0.064688,
# (oviAri1:0.100000,bosTau7:0.100000):0.023592):0.025153):0.020335):0.020000,
# ((equCab2:0.109397,(felCat5:0.098612,
# (canFam3:0.052458,ailMel1:0.050000):0.050000):0.049845):0.006219,
# (myoLuc2:0.142540,pteVam1:0.113399):0.033706):0.004508):0.011671,
# (eriEur1:0.221785,sorAra1:0.269562):0.056393):0.021227):0.023664,
# ((((loxAfr3:0.082242,proCap1:0.155358):0.026990,echTel1:0.245936):0.010000,
# triMan1:0.100000):0.049697,(dasNov3:0.116664,
# choHof1:0.096357):0.053145):0.006717):0.234728,(monDom5:0.125686,
# (sarHar1:0.100000,macEug2:0.072008):0.050000):0.215100):0.071664,
# ornAna1:0.456592):0.109504,(((((melGal1:0.100000,galGal4:0.065536):0.100000,
# taeGut1:0.171542):0.199223,melUnd1:0.100000):0.155143,
# anoCar2:0.539241):0.122371,chrPic1:0.200000):0.010000):0.050000,
# xenTro3:0.855573):0.100000,latCha1:0.855573):0.311354,
# ((((((tetNig2:0.224159,fr3:0.203847):0.097590,oreNil2:0.200000):0.097590,
# gasAcu1:0.316413):0.030000,oryLat2:0.511970):0.030000,
# gadMor1:0.350000):0.225640,danRer7:0.730752):0.147949):0.526688,
# petMar1:0.526688);

    #	rearrange to get mm10 on top:
    cat << '_EOF_' > mm10.60way.nh
(((((((((((((((mm10:0.084509,rn5:0.091589):0.197773,dipOrd1:0.211609):0.022992,
(hetGla2:0.1,cavPor3:0.125629):0.1):0.01015,speTri2:0.148468):0.025746,(oryCun2:0.114227,ochPri2:0.201069):0.101463):0.015313,
(((((((((hg19:0.00655,panTro4:0.00684):0.00222,gorGor3:0.008964):0.009693,ponAbe2:0.01894):0.003471,
nomLeu2:0.02227):0.01204,(rheMac3:0.007991,papHam1:0.008042):0.02961):0.02183,
(calJac3:0.03,saiBol1:0.04):0.03965):0.05209,tarSyr1:0.1114):0.02052,(micMur1:0.0856,otoGar3:0.1194):0.02052):0.015494,
tupBel1:0.186203):0.004937):0.020593,
((susScr3:0.12,(vicPac1:0.087275,(turTru2:0.064688,
(oviAri1:0.1,bosTau7:0.1):0.023592):0.025153):0.020335):0.01,
((((felCat5:0.098612,
(canFam3:0.052458,ailMel1:0.05):0.05):0.049845,equCab2:0.109397):0.006219,
(myoLuc2:0.14254,pteVam1:0.113399):0.033706):0.004508,(eriEur1:0.221785,
sorAra1:0.269562):0.056393):0.021227):0.01):0.013664,((((loxAfr3:0.082242,proCap1:0.155358):0.02699,
echTel1:0.245936):0.01,triMan1:0.1):0.049697,(dasNov3:0.116664,
choHof1:0.096357):0.053145):0.006717):0.234728,(monDom5:0.125686,(sarHar1:0.1,
macEug2:0.072008):0.05):0.2151):0.071664,ornAna1:0.456592):0.109504,
(((((melGal1:0.1,galGal4:0.065536):0.1,taeGut1:0.171542):0.199223,melUnd1:0.1):0.155143,anoCar2:0.539241):0.122371,
chrPic1:0.2):0.01):0.05,xenTro3:0.855573):0.1,latCha1:0.855573):0.311354,
((((((tetNig2:0.224159,fr3:0.203847):0.09759,oreNil2:0.2):0.09759,gasAcu1:0.316413):0.03,
oryLat2:0.51197):0.03,gadMor1:0.35):0.22564,danRer7:0.730752):0.147949):0.526688,petMar1:0.526688);
'_EOF_'
    # << happy emacs

    # extract species list from that .nh file
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
        mm10.60way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
        | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt

    # construct db to name translation list:
    cat species.list.txt | while read DB
do
hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
        > db.to.name.txt

    # construct a common name .nh file:
    /cluster/bin/phast/tree_doctor --rename \
    "`cat db.to.name.txt`" mm10.60way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
        | sed -e 's/X__trop/X._trop/' > mm10.60way.commonNames.nh
# (((((((((((((((Mouse:0.084509,Rat:0.091589):0.197773,
# Kangaroo_rat:0.211609):0.022992,(Naked_mole:0.1,
# Guinea_pig:0.125629):0.1):0.01015,Squirrel:0.148468):0.025746,
# (Rabbit:0.114227,Pika:0.201069):0.101463):0.015313,
# (((((((((Human:0.00655,Chimp:0.00684):0.00222,Gorilla:0.008964):0.009693,
# Orangutan:0.01894):0.003471,Gibbon:0.02227):0.01204,
# (Chinese_rhesus:0.007991,Baboon:0.008042):0.02961):0.02183,
# (Marmoset:0.03,Squirrel_monkey:0.04):0.03965):0.05209,
# Tarsier:0.1114):0.02052,(Mouse_lemur:0.0856,
# Bushbaby:0.1194):0.02052):0.015494,Tree_shrew:0.186203):0.004937):0.020593,
# ((Pig:0.12,(Alpaca:0.087275,(Dolphin:0.064688,
# (Sheep:0.1,Cow:0.1):0.023592):0.025153):0.020335):0.01,
# ((((Cat:0.098612,(Dog:0.052458,Panda:0.05):0.05):0.049845,
# Horse:0.109397):0.006219,(Microbat:0.14254,
# Megabat:0.113399):0.033706):0.004508,(Hedgehog:0.221785,
# Shrew:0.269562):0.056393):0.021227):0.01):0.013664,
# ((((Elephant:0.082242,Rock_hyrax:0.155358):0.02699,
# Tenrec:0.245936):0.01,Manatee:0.1):0.049697,
# (Armadillo:0.116664,Sloth:0.096357):0.053145):0.006717):0.234728,
# (Opossum:0.125686,(Tasmanian_devil:0.1,
# Wallaby:0.072008):0.05):0.2151):0.071664,Platypus:0.456592):0.109504,
# (((((Turkey:0.1,Chicken:0.065536):0.1,Zebra_finch:0.171542):0.199223,
# Budgerigar:0.1):0.155143,Lizard:0.539241):0.122371,
# Painted_turtle:0.2):0.01):0.05,X._tropicalis:0.855573):0.1,
# Coelacanth:0.855573):0.311354,((((((Tetraodon:0.224159,
# Fugu:0.203847):0.09759,Nile_tilapia:0.2):0.09759,
# Stickleback:0.316413):0.03,Medaka:0.51197):0.03,
# Atlantic_cod:0.35):0.22564,Zebrafish:0.730752):0.147949):0.526688,
# Lamprey:0.526688);


    #	Use this specification in the phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    #	to obtain a png image for src/hg/htdocs/images/phylo/mm10_60way.png

    /cluster/bin/phast/all_dists mm10.60way.nh | grep mm10 \
        | sed -e "s/mm10^I//" | sort -k2n > 60way.distances.txt
    #	Use this output to create the table below
    head 60way.distances.txt
# rn5     0.176098
# speTri2 0.463892
# micMur1 0.483034
# dipOrd1 0.493891
# vicPac1 0.504686
# hetGla2 0.505274
# hg19    0.505328
# gorGor3 0.505522
# panTro4 0.505618
# nomLeu2 0.505664
    cat << '_EOF_' > sizeStats.pl
#!/usr/bin/env perl

use strict;
use warnings;

open (FH, "<60way.distances.txt") or
        die "can not read 60way.distances.txt";

my $count = 0;
while (my $line = <FH>) {
    chomp $line;
    my ($D, $dist) = split('\s+', $line);
    my $chain = "chain" . ucfirst($D);
    my $B="/hive/data/genomes/mm10/bed/lastz.$D/fb.mm10." .
        $chain . "Link.txt";
    my $chainLinkMeasure =
        `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
    chomp $chainLinkMeasure;
    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
    $chainLinkMeasure =~ s/\%//;
    my $swapFile="/hive/data/genomes/${D}/bed/lastz.mm10/fb.${D}.chainMm10Link.txt";
    my $swapMeasure = "N/A";
    if ( -s $swapFile ) {
	$swapMeasure =
	    `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
	chomp $swapMeasure;
	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
	$swapMeasure =~ s/\%//;
    }
    my $orgName=
    `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
    chomp $orgName;
    if (length($orgName) < 1) {
        $orgName="N/A";
    }
    ++$count;
    printf "# %02d  %.4f (%% %06.3f) (%% %06.3f) - %s %s\n", $count, $dist,
        $chainLinkMeasure, $swapMeasure, $orgName, $D;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./sizeStats.pl
    ./sizeStats.pl
#

#	If you can fill in all the numbers in this table, you are ready for
#	the multiple alignment procedure

#       featureBits chainLink measures
#               chainAnoCar2Link
#  N distance  on mm10  on other     other species
# 01  0.1761 (% 67.353) (% 70.278) - Rat rn5
# 02  0.4639 (% 34.217) (% 39.244) - Squirrel speTri2
# 03  0.4830 (% 26.636) (% 37.574) - Mouse lemur micMur1
# 04  0.4939 (% 19.460) (% 27.512) - Kangaroo rat dipOrd1
# 05  0.5047 (% 22.636) (% 31.769) - Alpaca vicPac1
# 06  0.5053 (% 32.753) (% 37.989) - Naked mole rat hetGla2
# 07  0.5053 (% 38.226) (% 35.249) - Human hg19
# 08  0.5055 (% 33.987) (% 34.349) - Gorilla gorGor3
# 09  0.5056 (% 34.674) (% 31.924) - Chimp panTro4
# 10  0.5057 (% 34.031) (% 32.274) - Gibbon nomLeu2
# 11  0.5058 (% 34.496) (% 30.610) - Orangutan ponAbe2
# 12  0.5073 (% 30.267) (% 33.492) - Dolphin turTru2
# 13  0.5088 (% 24.560) (% 24.986) - Tarsier tarSyr1
# 14  0.5090 (% 33.931) (% 33.464) - Chinese rhesus rheMac3
# 15  0.5090 (% 33.577) (% 32.023) - Baboon papHam1
# 16  0.5168 (% 29.795) (% 32.926) - Bushbaby otoGar3
# 17  0.5171 (% 25.685) (% 29.445) - Pig susScr3
# 18  0.5192 (% 32.450) (% 31.301) - Marmoset calJac3
# 19  0.5284 (% 34.415) (% 37.138) - Horse equCab2
# 20  0.5292 (% 32.339) (% 33.848) - Squirrel monkey saiBol1
# 21  0.5309 (% 28.447) (% 29.115) - Guinea pig cavPor3
# 22  0.5470 (% 18.019) (% 23.687) - Sloth choHof1
# 23  0.5472 (% 26.546) (% 24.649) - Manatee triMan1
# 24  0.5476 (% 19.766) (% 25.144) - Tree shrew tupBel1
# 25  0.5569 (% 25.248) (% 25.677) - Rabbit oryCun2
# 26  0.5599 (% 27.345) (% 38.627) - Megabat pteVam1
# 27  0.5662 (% 26.255) (% 25.383) - Cow bosTau7
# 28  0.5662 (% 15.341) (% 31.925) - Sheep oviAri1
# 29  0.5664 (% 25.823) (% 21.616) - Elephant loxAfr3
# 30  0.5673 (% 25.201) (% 21.066) - Armadillo dasNov3
# 31  0.5675 (% 29.725) (% 32.244) - Cat felCat5
# 32  0.5689 (% 30.979) (% 35.562) - Panda ailMel1
# 33  0.5713 (% 29.144) (% 31.624) - Dog canFam3
# 34  0.5891 (% 24.363) (% 33.650) - Microbat myoLuc2
# 35  0.6395 (% 15.147) (% 16.214) - Rock hyrax proCap1
# 36  0.6437 (% 14.542) (% 19.908) - Pika ochPri2
# 37  0.6865 (% 09.856) (% 12.264) - Hedgehog eriEur1
# 38  0.7031 (% 10.947) (% 14.117) - Tenrec echTel1
# 39  0.7343 (% 09.382) (% 13.569) - Shrew sorAra1
# 40  0.9626 (% 04.353) (% 04.448) - Wallaby macEug2
# 41  0.9663 (% 09.584) (% 07.205) - Opossum monDom5
# 42  0.9906 (% 08.479) (% 07.888) - Tasmanian devil sarHar1
# 43  1.0166 (% 04.731) (% 05.488) - Painted turtle chrPic1
# 44  1.1537 (% 05.348) (% 07.334) - Platypus ornAna1
# 45  1.1942 (% 03.589) (% 07.350) - Budgerigar melUnd1
# 46  1.4589 (% 03.676) (% 08.100) - Chicken galGal4
# 47  1.4649 (% 03.599) (% 07.304) - Zebra finch taeGut1
# 48  1.4782 (% 03.331) (% 04.988) - Lizard anoCar2
# 49  1.4934 (% 03.511) (% 08.211) - Turkey melGal1
# 50  1.7122 (% 03.125) (% 06.651) - X. tropicalis xenTro3
# 51  1.8122 (% 02.715) (% 03.380) - Coelacanth latCha1
# 52  1.9916 (% 01.726) (% 06.810) - Atlantic cod gadMor1
# 53  1.9992 (% 01.957) (% 06.091) - Nile tilapia oreNil2
# 54  2.0180 (% 02.016) (% 10.927) - Stickleback gasAcu1
# 55  2.1006 (% 01.789) (% 12.134) - Fugu fr3
# 56  2.1209 (% 01.735) (% 13.642) - Tetraodon tetNig2
# 57  2.1467 (% 02.602) (% 05.107) - Zebrafish danRer7
# 58  2.1835 (% 01.936) (% 06.561) - Medaka oryLat2
# 59  2.3214 (% 01.101) (% 03.159) - Lamprey petMar1

# None of this concern for distances matters in building the first step, the
# maf files.

    # create species list and stripped down tree for autoMZ
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
	mm10.60way.nh > tmp.nh
    echo `cat tmp.nh` > tree-commas.nh
    echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
    sed 's/[()]//g; s/,/ /g' tree.nh > species.list

    #	bash shell syntax here ...
    cd /hive/data/genomes/mm10/bed/multiz60way
    export H=/hive/data/genomes/mm10/bed
    mkdir mafLinks
    for G in `sed -e "s/mm10 //" species.list`
    do
	mkdir mafLinks/$G
	if [ -s ${H}/lastz.${G}/mafRBestNet/chr1.maf.gz ]; then
	    echo "$G - recipBest"
	    ln -s ${H}/lastz.$G/mafRBestNet/*.maf.gz ./mafLinks/$G
	else
	    if [ -s ${H}/lastz.${G}/mafSynNet/chr1.maf.gz ]; then
		echo "$G - synNet"
		ln -s ${H}/lastz.$G/mafSynNet/*.maf.gz ./mafLinks/$G
	    else
		if [ -s ${H}/lastz.${G}/mafNet/chr1.maf.gz ]; then
		    echo "$G - mafNet"
		    ln -s ${H}/lastz.$G/mafNet/*.maf.gz ./mafLinks/$G
		else
		    echo "missing directory lastz.${G}/*Net"
		fi
	    fi
	fi
    done

    #	verify the alignment type is correct:
    for D in `grep -v mm10 /hive/users/hiram/bigWays/mm10.60way/ordered.list`
do
    ls -l mafLinks/$D/chr1.maf.gz | awk '{print $NF}'
done
    #	compare to the list at:
    #	http://genomewiki.ucsc.edu/index.php/Mm10_Genome_size_statistics

    #	need to split these things up into smaller pieces for
    #	efficient kluster run.
    cd /hive/data/genomes/mm10/bed/multiz60way
    mkdir mafSplit
    cd mafSplit
    #	mafSplitPos splits on gaps or repeat areas that will not have
    #	any chains, approx 5 Mbp intervals, gaps at least 10,000
    mafSplitPos -minGap=10000 mm10 5 stdout | sort -u \
	| sort -k1,1 -k2,2n > mafSplit.bed
    #	There is a splitRegions.pl script here (copied from previous hg19 46way)
    #	that can create a custom track from this mafSplit.bed file.
    #	Take a look at that in the browser and see if it looks OK,
    #	check the number of sections on each chrom to verify none are
    #	too large.  Despite the claim above, it does appear that some
    #	areas are split where actual chains exist.
    ./splitRegions.pl mafSplit.bed > splitRegions.ct

    # to see the sizes of the regions:
    grep "^chr" splitRegions.ct | awk '{print $3-$2,$0}' | sort -rn | less

    #	run a kluster job to split them all
    ssh swarm
    cd /hive/data/genomes/mm10/bed/multiz60way/mafSplit
    cat << '_EOF_' > runOne
#!/bin/csh -ef
set G = $1
set C = $2
mkdir -p $G
pushd $G > /dev/null
if ( -s ../../mafLinks/${G}/${C}.maf.gz ) then
    if ( -s mm10_${C}.00.maf ) then
        /bin/rm -f mm10_${C}.*.maf
    endif
    /cluster/bin/x86_64/mafSplit ../mafSplit.bed mm10_ ../../mafLinks/${G}/${C}.maf.gz
    /bin/gzip mm10_${C}.*.maf
else
    /bin/touch mm10_${C}.00.maf
    /bin/gzip mm10_${C}.00.maf
endif
popd > /dev/null
'_EOF_'
    # << happy emacs
    chmod +x runOne

    cat << '_EOF_' > template
#LOOP
runOne $(root1) $(root2) {check out exists+ $(root1)/mm10_$(root2).00.maf.gz}
#ENDLOOP
'_EOF_'
    # << happy emacs

    for G in `sed -e "s/mm10 //" ../species.list`
do
    echo $G
done > species.list
    cut -f 1 ../../../chrom.sizes > chr.list

    gensub2 species.list chr.list template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc...
# Completed: 3894 of 3894 jobs
# CPU time in finished jobs:      18929s     315.49m     5.26h    0.22d  0.001 y
# IO & Wait Time:                 62908s    1048.46m    17.47h    0.73d  0.002 y
# Average job time:                  21s       0.35m     0.01h    0.00d
# Longest finished job:             346s       5.77m     0.10h    0.00d
# Submission to last job:           471s       7.85m     0.13h    0.01d

    # construct a list of all possible maf file names.
    # they do not all exist in each of the species directories
    find . -type f | grep "maf.gz" | wc -l
    # 19733
    find . -type f | grep ".maf.gz$" | xargs -L 1 basename | sort -u > maf.list
    wc -l maf.list
    #   336 maf.list

    mkdir /hive/data/genomes/mm10/bed/multiz60way/splitRun
    cd /hive/data/genomes/mm10/bed/multiz60way/splitRun
    mkdir maf run
    cd run
    mkdir penn
    cp -p /cluster/bin/penn/multiz.2009-01-21/multiz penn
    cp -p /cluster/bin/penn/multiz.2009-01-21/maf_project penn
    cp -p /cluster/bin/penn/multiz.2009-01-21/autoMZ penn

    #	set the db and pairs directories here
    cat > autoMultiz.csh << '_EOF_'
#!/bin/csh -ef
set db = mm10
set c = $1
set result = $2
set run = `/bin/pwd`
set tmp = /scratch/tmp/$db/multiz.$c
set pairs = /hive/data/genomes/mm10/bed/multiz60way/mafSplit
/bin/rm -fr $tmp
/bin/mkdir -p $tmp
/bin/cp -p ../../tree.nh ../../species.list $tmp
pushd $tmp > /dev/null
foreach s (`/bin/sed -e "s/$db //" species.list`)
    set in = $pairs/$s/$c
    set out = $db.$s.sing.maf
    if (-e $in.gz) then
        /bin/zcat $in.gz > $out
        if (! -s $out) then
            echo "##maf version=1 scoring=autoMZ" > $out
        endif
    else if (-e $in) then
        /bin/ln -s $in $out
    else
        echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($run/penn $path); rehash
$run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
        > /dev/null
popd > /dev/null
/bin/rm -f $result
/bin/cp -p $tmp/$c $result
/bin/rm -fr $tmp
'_EOF_'
# << happy emacs
    chmod +x autoMultiz.csh

    cat  << '_EOF_' > template
#LOOP
./autoMultiz.csh $(root1) {check out line+ /hive/data/genomes/mm10/bed/multiz60way/splitRun/maf/$(root1)}
#ENDLOOP
'_EOF_'
# << happy emacs

    ln -s ../../mafSplit/maf.list maf.list
    ssh swarm
    cd /hive/data/genomes/mm10/bed/multiz60way/splitRun/run
    # the tac reverses the list to get the small jobs first
    gensub2 maf.list single template stdout | tac > jobList
    para -ram=8g create jobList
# Completed: 336 of 336 jobs
# CPU time in finished jobs:    2828651s   47144.19m   785.74h   32.74d  0.090 y
# IO & Wait Time:                200533s    3342.21m    55.70h    2.32d  0.006 y
# Average job time:                9015s     150.26m     2.50h    0.10d
# Longest finished job:           47029s     783.82m    13.06h    0.54d
# Submission to last job:         48982s     816.37m    13.61h    0.57d

    # put the split maf results back together into a single maf file
    #	eliminate duplicate comments
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/splitRun
    mkdir ../maf
    #	the sed edits take out partitioning name information from the comments
    #	so the multiple parts will condense to smaller number of lines
    #	this takes almost 2 hours of time, resulting in a bit over 150 Gb,
    #	almost all chrom files over 1 Gb, up to almost 10 Gb for chr2
    #	HOWEVER, this is actually not necessary to maintain these comments,
    #	they are lost during the mafAddIRows

    cat << '_EOF_' >> runOne
#!/bin/csh -fe
set C = $1
if ( -s ../maf/${C}.maf.gz ) then
    rm -f ../maf/${C}.maf.gz
endif
head -q -n 1 maf/mm10_${C}.*.maf | sort -u > ../maf/${C}.maf
grep -h "^#" maf/mm10_${C}.*.maf | egrep -v "maf version=1|eof maf" | \
    sed -e "s#${C}.[0-9][0-9]*#${C}#g; s#_MZ_[^ ]* # #g;" \
        | sort -u >> ../maf/${C}.maf
grep -h -v "^#" `ls maf/mm10_${C}.*.maf | sort -t. -k2,2n` >> ../maf/${C}.maf
tail -q -n 1 maf/mm10_${C}.*.maf | sort -u >> ../maf/${C}.maf
'_EOF_'
    # << happy emacs
    chmod +x runOne

    cat << '_EOF_' >> template
#LOOP
runOne $(root1) {check out exists+ ../maf/$(root1).maf}
#ENDLOOP
'_EOF_'
    # << happy emacs

    cut -f1 ../../../chrom.sizes > chr.list
    ssh encodek
    cd /hive/data/genomes/mm10/bed/multiz60way/splitRun
    gensub2 chr.list single template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc ...
# Completed: 62 of 66 jobs
# Crashed: 4 jobs
# CPU time in finished jobs:        461s       7.68m     0.13h    0.01d  0.000 y
# IO & Wait Time:                 17863s     297.72m     4.96h    0.21d  0.001 y
# Average job time:                 296s       4.93m     0.08h    0.00d
# Longest finished job:            1144s      19.07m     0.32h    0.01d
# Submission to last job:          1156s      19.27m     0.32h    0.01d

    # these four have empty results:
#       chrUn_GL456383
#       chrUn_GL456389
#       chrUn_GL456390
#       chrUn_GL456396

    # Load into database
    ssh hgwdev
    mkdir -p /gbdb/mm10/multiz60way
    cd /hive/data/genomes/mm10/bed/multiz60way/maf
    ln -s `pwd`/*.maf /gbdb/mm10/multiz60way

    # this generates an immense multiz60way.tab file in the directory
    #	where it is running.  Best to run this over in scratch.
    #   This is going to take all day.
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf mm10 multiz60way
    #   Loaded 56185270 mafs in 66 files from /gbdb/mm10/multiz60way
    #   real    72m45.513s
# -rw-rw-r-- 1 2857704841 Apr 18 10:49 multiz60way.tab

    time cat /gbdb/mm10/multiz60way/*.maf \
        | nice -n +19 hgLoadMafSummary -verbose=2 -minSize=30000 \
	-mergeGap=1500 -maxSize=200000 mm10 multiz60waySummary stdin
    #   Created 12012784 summary blocks from 1074134156 components and
    #   56185270 mafs from stdin
    #   real    104m2.107s

    wc -l multiz60way*.tab
    #   56185270 multiz60way.tab
    #   12012784 multiz60waySummary.tab
    #   68198054 total
    #   -rw-rw-r-- 1 2857704841 Apr 18 10:49 multiz60way.tab
    #   -rw-rw-r-- 1  567210414 Apr 18 17:28 multiz60waySummary.tab

    rm multiz60way*.tab

#######################################################################
# GAP ANNOTATE MULTIZ9WAY MAF AND LOAD TABLES (DONE - 2012-05-31 - Hiram)
    # mafAddIRows has to be run on single chromosome maf files, it does not
    #	function correctly when more than one reference sequence
    #	are in a single file.
    mkdir -p /hive/data/genomes/mm10/bed/multiz60way/anno
    cd /hive/data/genomes/mm10/bed/multiz60way/anno

    cd /hive/data/genomes/mm10/bed/multiz60way/anno
    # check for N.bed files everywhere:
    for DB in `cat ../species.list`
do
    if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
        echo "MISS: ${DB}"
        cd /hive/data/genomes/${DB}
        twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
    else
        echo "  OK: ${DB}"
    fi
done

    cd /hive/data/genomes/mm10/bed/multiz60way/anno
    for DB in `cat ../species.list`
do
    echo "${DB} "
    ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
    echo ${DB}.bed  >> nBeds
    ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
    echo ${DB}.len  >> sizes
done
    # make sure they all are successful symLinks:
    ls -ogrtL

    screen -S mm10      # use a screen to control this longish job
    ssh swarm
    cd /hive/data/genomes/mm10/bed/multiz60way/anno
    mkdir result
    # NEXT TIME: this template should have a check out exists+ statement
    cat << '_EOF_' > template
#LOOP
mafAddIRows -nBeds=nBeds $(path1) /hive/data/genomes/mm10/mm10.2bit {check out line+ result/$(file1)}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ls ../maf/*.maf > maf.list
    # the tac puts the short jobs first
    gensub2 maf.list single template stdout | tac > jobList
    # limit jobs to one per node with the ram=8g requirement
    para -ram=8g create jobList
    para try ... check ... push ...
# Completed: 46 of 66 jobs
# CPU time in finished jobs:        350s       5.83m     0.10h    0.00d  0.000 y
# IO & Wait Time:                   603s      10.06m     0.17h    0.01d  0.000 y
# Average job time:                  21s       0.35m     0.01h    0.00d
# Longest finished job:              54s       0.90m     0.01h    0.00d
# Submission to last job:           113s       1.88m     0.03h    0.00d

    # a number of these jobs did not finish due to memory limitations.
    # The jobs would sit on the nodes appearing to occupy 8 Gb of memory,
    # but did not see any swapping or CPU time accumulation.  Stop the
    # batch and run the rest manually on hgwdev:
#!/bin/sh

export maxMem=188743680
ulimit -S -m $maxMem -v $maxMem

mafAddIRows -nBeds=nBeds ../maf/chrX.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chrX.maf &
mafAddIRows -nBeds=nBeds ../maf/chr9.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr9.maf &
mafAddIRows -nBeds=nBeds ../maf/chr8.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr8.maf &
mafAddIRows -nBeds=nBeds ../maf/chr7.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr7.maf &
wait
mafAddIRows -nBeds=nBeds ../maf/chr6.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr6.maf &
mafAddIRows -nBeds=nBeds ../maf/chr5.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr5.maf &
mafAddIRows -nBeds=nBeds ../maf/chr4.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr4.maf &
mafAddIRows -nBeds=nBeds ../maf/chr3.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr3.maf &
wait
... etc ...
    # the run time for those 20 jobs:
    #   real    159m49.217s

    # verify all result files have some content, look for 0 size files:
    find . -type f -size 0
    # should see none

    # combine into one file  (realized after this, that we do *not* need
    #                           this single file.  Individual files are OK.
    head -q -n 1 result/chrM.maf > mm10.60way.maf
    time for F in hgwdev/*.maf result/*.maf
do
    grep -h -v "^#" ${F}
done >> mm10.60way.maf
    #   real    1082m47.484s -> 18 hours !
# -rw-rw-r-- 1 261567878241 Jun  8 10:30 mm10.60way.maf
    du -hsc mm10.60way.maf
    #   244G    mm10.60way.maf

    #	these maf files do not have the end marker, this does nothing:
    #	tail -q -n 1 result/chrM.maf >> mm10.60way.maf
    # How about an official end marker:
    echo "##eof maf" >> mm10.60way.maf

    # construct symlinks to get the individual maf files into gbdb:
    mkdir /gbdb/mm10/multiz60way/maf
    ln -s `pwd`/result/*.maf `pwd`/hgwdev/*.maf /gbdb/mm10/multiz60way/maf/

    # Load into database
    rm /gbdb/mm10/multiz60way/*.maf   # remove previous results
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/mm10/multiz60way/maf \
        mm10 multiz60way
    #   Loaded 58087742 mafs in 66 files from /gbdb/mm10/multiz60way/maf
    #   real    868m28.108s

    time (cat /gbdb/mm10/multiz60way/maf/*.maf \
        | hgLoadMafSummary -verbose=2 -minSize=30000 \
	-mergeGap=1500 -maxSize=200000 mm10 multiz60waySummary stdin)

    #   -rw-rw-r-- 1 3009209972 Jun  9 03:23 multiz60way.tab
    #   -rw-rw-r-- 1  591235982 Jun 11 18:34 multiz60waySummary.tab

    rm multiz60way*.tab

#######################################################################
# MULTIZ60WAY MAF FRAMES (DONE - 2012-05-30 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/multiz60way/frames
    cd /hive/data/genomes/mm10/bed/multiz60way/frames
#   survey all the genomes to find out what kinds of gene tracks they have
    cat << '_EOF_' > showGenes.csh
#!/bin/csh -fe
foreach db (`cat ../species.list`)
    echo -n "${db}: "
    set tables = `hgsql $db -N -e "show tables like '%Gene%'"`
    foreach table ($tables)
        if ($table == "ensGene" || $table == "refGene" || \
           $table == "mgcGenes" || $table == "knownGene" || \
           $table == "xenoRefGene" ) then
           set count = `hgsql $db -N -e "select count(*) from $table"`
            echo -n "${table}: ${count}, "
        endif
    end
    set orgName = `hgsql hgcentraltest -N -e \
            "select scientificName from dbDb where name='$db'"`
    set orgId = `hgsql hg19 -N -e \
            "select id from organism where name='$orgName'"`
    if ($orgId == "") then
        echo "Mrnas: 0"
    else
        set count = `hgsql hg19 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
        echo "Mrnas: ${count}"
    endif
end
'_EOF_'
    # << happy emacs
    chmod +x ./showGenes.csh
    time ./showGenes.csh > showGenes.txt
    #   real    9m11.678s

    #   rearrange that output to create four sections, and place these names
    #           in .list files here:
    #   1. knownGene: hg19
    #   2. refGene: bosTau7 danRer7 galGal4 mm10 rheMac3 rn5 susScr3 xenTro3
    #   3. ensGene: ailMel1 anoCar2 calJac3 cavPor3 choHof1 dipOrd1 echTel1
    #               equCab2 eriEur1 fr3 gasAcu1 gorGor3 loxAfr3 melGal1
    #               micMur1 monDom5 myoLuc2 ochPri2 ornAna1 oryCun2 oryLat2
    #               panTro4 ponAbe2 proCap1 pteVam1 sorAra1 taeGut1 tarSyr1
    #               tetNig2 tupBel1 vicPac1
    #   4. xenoRefGene: canFam3 chrPic1 dasNov3 felCat5 hetGla2 latCha1 macEug2
    #               nomLeu2 otoGar3 oviAri1 papHam1 petMar1 saiBol1 sarHar1
    #               triMan1
    #   5. genscan: gadMor1 melUnd1 oreNil2 speTri2 turTru2

    mkdir genes
    #   1. knownGene: hg19
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" hg19 \
      | genePredSingleCover stdin stdout | gzip -2c \
        > genes/hg19.gp.gz
    #   2. refGene, want the full extended genePred:
    for DB in `cat refGene.list`
do
hgsql -N -e "select * from refGene" ${DB} | cut -f2- \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done
    #   3. ensGene, want the full extended genePred:
    for DB in `cat ensGene.list`
do
hgsql -N -e "select * from ensGene" ${DB} | cut -f2- \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done
    #   4. xenoRefGene, want the full extended genePred:
    for DB in `cat xenoRG.list`
do
hgsql -N -e "select * from xenoRefGene" ${DB} | cut -f2- \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done
    #   5. genscan: gadMor1 melUnd1 oreNil2 speTri2 turTru2
    # this was done in error the first time, mistakenly using
    # the xenoRefGene table instead of genscan
    for DB in `cat genscan.list`
do
hgsql -N -e "select * from genscan" ${DB} | cut -f2- \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done

    # verify counts for genes are reasonable:
    for T in genes/*.gz
do
    echo -n "# $T: "
    zcat $T | cut -f1 | sort | uniq -c | wc -l
done

# genes/ailMel1.gp.gz: 19204
# genes/anoCar2.gp.gz: 17766
# genes/bosTau7.gp.gz: 12958
# genes/calJac3.gp.gz: 20843
# genes/canFam3.gp.gz: 20652
# genes/cavPor3.gp.gz: 18631
# genes/choHof1.gp.gz: 12403
# genes/chrPic1.gp.gz: 19433
# genes/danRer7.gp.gz: 13902
# genes/dasNov3.gp.gz: 29551
# genes/dipOrd1.gp.gz: 15784
# genes/echTel1.gp.gz: 16499
# genes/equCab2.gp.gz: 20403
# genes/eriEur1.gp.gz: 11712
# genes/felCat5.gp.gz: 19512
# genes/fr3.gp.gz: 18014
# genes/gadMor1.gp.gz: 27572
# genes/galGal4.gp.gz: 4892
# genes/gasAcu1.gp.gz: 20631
# genes/gorGor3.gp.gz: 20759
# genes/hetGla2.gp.gz: 25749
# genes/hg19.gp.gz: 20718
# genes/latCha1.gp.gz: 18786
# genes/loxAfr3.gp.gz: 19986
# genes/macEug2.gp.gz: 26006
# genes/melGal1.gp.gz: 14050
# genes/melUnd1.gp.gz: 15296
# genes/micMur1.gp.gz: 16240
# genes/mm10.gp.gz: 20985
# genes/monDom5.gp.gz: 19188
# genes/myoLuc2.gp.gz: 19685
# genes/nomLeu2.gp.gz: 22996
# genes/ochPri2.gp.gz: 15970
# genes/oreNil2.gp.gz: 18636
# genes/ornAna1.gp.gz: 17728
# genes/oryCun2.gp.gz: 18921
# genes/oryLat2.gp.gz: 19576
# genes/otoGar3.gp.gz: 24061
# genes/oviAri1.gp.gz: 17890
# genes/panTro4.gp.gz: 18647
# genes/papHam1.gp.gz: 27842
# genes/petMar1.gp.gz: 11089
# genes/ponAbe2.gp.gz: 19895
# genes/proCap1.gp.gz: 16043
# genes/pteVam1.gp.gz: 16966
# genes/rheMac3.gp.gz: 5580
# genes/rn5.gp.gz: 16393
# genes/saiBol1.gp.gz: 23419
# genes/sarHar1.gp.gz: 20694
# genes/sorAra1.gp.gz: 13156
# genes/speTri2.gp.gz: 22377
# genes/susScr3.gp.gz: 3771
# genes/taeGut1.gp.gz: 17354
# genes/tarSyr1.gp.gz: 13615
# genes/tetNig2.gp.gz: 19539
# genes/triMan1.gp.gz: 19514
# genes/tupBel1.gp.gz: 15407
# genes/turTru2.gp.gz: 28375
# genes/vicPac1.gp.gz: 11754
# genes/xenTro3.gp.gz: 8447

    # kluster job to annotate each maf file
    screen -S mm10      # manage long running procedure with screen
    ssh swarm
    cd /hive/data/genomes/mm10/bed/multiz60way/frames
    cat << '_EOF_' > runOne
#!/bin/csh -fe

set C = $1
set G = $2

cat ../maf/${C}.maf | genePredToMafFrames mm10 stdin stdout \
        ${G} genes/${G}.gp.gz | gzip > parts/${C}.${G}.mafFrames.gz
'_EOF_'
    # << happy emacs
    chmod +x runOne

    # older instructions excluded mm10 from the gene.list
    #   this was a mistake.  mm10 can be annotated too.
    #   Mistakenly did this the first run through, had to manually
    #   do the mm10 genes separately on hgwdev after this was done
    ls ../maf | sed -e "s/.maf//" > chr.list
    ls genes | sed -e "s/.gp.gz//" > gene.list

    cat << '_EOF_' > template
#LOOP
runOne $(root1) $(root2) {check out exists+ parts/$(root1).$(root2).mafFrames.gz}
#ENDLOOP
'_EOF_'
    # << happy emacs

    mkdir parts
    gensub2 chr.list gene.list template jobList
    para -ram=8g create jobList
    para try ... check ... push
# Completed: 3960 of 3960 jobs
# CPU time in finished jobs:      85610s    1426.83m    23.78h    0.99d  0.003 y
# IO & Wait Time:               2030956s   33849.27m   564.15h   23.51d  0.064 y
# Average job time:                 534s       8.91m     0.15h    0.01d
# Longest finished job:            3877s      64.62m     1.08h    0.04d
# Submission to last job:         12974s     216.23m     3.60h    0.15d

    # collect all results into one file:
    cd /hive/data/genomes/mm10/bed/multiz60way/frames
    find ./parts -type f | while read F
do
    zcat ${F}
done | sort -k1,1 -k2,2n > multiz60wayFrames.bed
    #   -rw-rw-r-- 1 1164299719 May 30 11:28 multiz60wayFrames.bed

    # verify there are frames on everything:
    cut -f4 multiz60wayFrames.bed | sort | uniq -c | sort -n \
        > annotation.survey.txt
    # should be 60 species:
    wc -l annotation.survey.txt
    #   60 annotation.survey.txt
    # and the minimum numbers:
    head annotation.survey.txt
    #   43900 susScr3
    #   59839 rheMac3
    #   153246 petMar1
    #   162501 choHof1
    # ... etc ...

    #   load the resulting file
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/frames
    time gzip multiz60wayFrames.bed
    #   real    0m51.826s
    # reloading this table 2012-10-11 with more accurate frames:
    time hgLoadMafFrames mm10 multiz60wayFrames multiz60wayFrames.bed.gz
    #   real    3m2.449s
    time featureBits -countGaps mm10 multiz60wayFrames
    #   57707702 bases of 2730871774 (2.113%) in intersection
    #   real    1m45.141s

    # reload table to fix frames problems 2014-03-19 - Hiram
    time featureBits -countGaps mm10 multiz60wayFrames
    # 79955378 bases of 2730871774 (2.928%) in intersection

    #   enable the trackDb entries:
# frames multiz60wayFrames
# irows on
    #   appears to work OK

#########################################################################
# Phylogenetic tree from 60-way (DONE - 2012-05-31 - 2012-06-12 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/multiz60way/4d
    cd /hive/data/genomes/mm10/bed/multiz60way/4d

    # the annotated maf's are in:
    ../anno/result/*.maf

    # using ensGene for mm10, only transcribed genes and nothing
    #	from the randoms and other misc.
    hgsql mm10 -Ne \
    "select * from ensGene WHERE cdsEnd > cdsStart;" | cut -f 2-20 \
	| egrep -E -v "chrM|chrUn|random|_hap" > ensGene.gp
    wc -l *.gp
    #   55423 ensGene.gp

    genePredSingleCover ensGene.gp stdout | sort > ensGeneNR.gp
    wc -l ensGeneNR.gp
    #	22457 ensGeneNR.gp

    ssh encodek
    mkdir /hive/data/genomes/mm10/bed/multiz60way/4d/run
    cd /hive/data/genomes/mm10/bed/multiz60way/4d/run
    mkdir ../mfa

    # newer versions of msa_view have a slightly different operation
    # the sed of the gp file inserts the reference species in the chr name
    cat << '_EOF_' > 4d.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set r = "/hive/data/genomes/mm10/bed/multiz60way"
set c = $1
set infile = $r/anno/result/$2
set outfile = $3
cd /scratch/tmp
# 'clean' maf
perl -wpe 's/^s ([^.]+)\.\S+/s $1/' $infile > $c.maf
awk -v C=$c '$2 == C {print}' $r/4d/ensGeneNR.gp | sed -e "s/\t$c\t/\tmm10.$c\t/" > $c.gp
set NL=`wc -l $c.gp| gawk '{print $1}'`
if ("$NL" != "0") then
    $PHASTBIN/msa_view --4d --features $c.gp -i MAF $c.maf -o SS > $c.ss
    $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $r/4d/run/$outfile
else
    echo "" > $r/4d/run/$outfile
endif
rm -f $c.gp $c.maf $c.ss
'_EOF_'
    # << happy emacs
    chmod +x 4d.csh

    ls -1S /hive/data/genomes/mm10/bed/multiz60way/anno/result/*.maf \
	| sed -e "s#.*multiz60way/anno/result/##" \
	> maf.list

    cat << '_EOF_' > template
#LOOP
4d.csh $(root1) $(path1) {check out line+ ../mfa/$(root1).mfa}
#ENDLOOP
'_EOF_'
    # << happy emacs

    # the tac puts the quick jobs at the front
    gensub2 maf.list single template stdout | tac > jobList
    para create jobList
    para try ... check
    para -maxJob=5 push
    para time
# Completed: 66 of 66 jobs
# CPU time in finished jobs:      13176s     219.60m     3.66h    0.15d  0.000 y
# IO & Wait Time:                 31790s     529.84m     8.83h    0.37d  0.001 y
# Average job time:                 681s      11.36m     0.19h    0.01d
# Longest finished job:            2883s      48.05m     0.80h    0.03d
# Submission to last job:          2925s      48.75m     0.81h    0.03d

    # combine mfa files
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/4d
    # remove the broken empty files, size 0 and size 1:
    find ./mfa -type f -size 0 | xargs rm -f
    # most interesting, this did not identify files of size 1:
#    find ./mfa -type f -size 1
    ls -og mfa | awk '$3 == 1' | awk '{print $NF}' > empty.list
    sed -e "s#^#mfa/##" empty.list | xargs rm -f
    #want comma-less species.list
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
	--aggregate "`cat ../species.list`" mfa/*.mfa | sed s/"> "/">"/ \
	    > 4d.all.mfa
    # check they are all in there:
    grep "^>" 4d.all.mfa | wc -l
    #   60

    # use phyloFit to create tree model (output is phyloFit.mod)
    time nice -n +19 \
	/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
	    --EM --precision MED --msa-format FASTA --subst-mod REV \
		--tree ../tree-commas.nh 4d.all.mfa
    #   real    98m59.203s
    mv phyloFit.mod all.mod

    grep TREE all.mod
#TREE: (((((((((((((((mm10:0.0855383,rn5:0.0922719):0.202381,dipOrd1:0.210819):0.0258471,(hetGla2:0.0917322,cavPor3:0.136876):0.0994271):0.00910944,speTri2:0.145483):0.0274969,(oryCun2:0.109639,ochPri2:0.200966):0.102067):0.0141654,(((((((((hg19:0.00674057,panTro4:0.00692231):0.00309904,gorGor3:0.00918625):0.00954082,ponAbe2:0.0191843):0.00356049,nomLeu2:0.0218207):0.0116848,(rheMac3:0.00814945,papHam1:0.0079848):0.0289473):0.0208338,(calJac3:0.0342405,saiBol1:0.0333221):0.0359171):0.0594469,tarSyr1:0.137467):0.011091,(micMur1:0.0918138,otoGar3:0.127231):0.0351527):0.0153171,tupBel1:0.18879):0.0042463):0.0214646,((susScr3:0.121641,(vicPac1:0.109818,(turTru2:0.0635753,(oviAri1:0.0392493,bosTau7:0.0315816):0.0939861):0.0203711):0.00368417):0.0444758,((((felCat5:0.0897448,(canFam3:0.0888602,ailMel1:0.0767935):0.021837):0.05011,equCab2:0.109367):0.00605998,(myoLuc2:0.137144,pteVam1:0.114013):0.0339604):0.00395001,(eriEur1:0.226934,sorAra1:0.270619):0.0628319):0.00292667):0.0291403):0.0231397,((((loxAfr3:0.078841,proCap1:0.160295):0.00825096,echTel1:0.266786):0.0031636,triMan1:0.0685675):0.0736043,(dasNov3:0.112086,choHof1:0.0974658):0.0535724):0.00739115):0.245967,(monDom5:0.139913,(sarHar1:0.132596,macEug2:0.111778):0.0294309):0.21273):0.0770867,ornAna1:0.50425):0.135096,(((((melGal1:0.067697,galGal4:0.05253):0.13729,taeGut1:0.202681):0.00899388,melUnd1:0.127774):0.216078,anoCar2:0.575186):0.0128221,chrPic1:0.201659):0.137011):0.113527,xenTro3:0.943162):0.0646458,latCha1:0.596956):0.463611,((((((tetNig2:0.223213,fr3:0.198755):0.263107,oreNil2:0.33649):0.0139699,gasAcu1:0.314841):0.0573697,oryLat2:0.430105):0.185668,gadMor1:0.562778):0.169352,danRer7:0.753326):0.117017):0.501088,petMar1:0.501088);

    #   four different subset lists:
    paste glire.list euarchontoglires.list placental.list all.list
# mm10    mm10    mm10    mm10
# rn5     rn5     rn5     rn5
# dipOrd1 dipOrd1 dipOrd1 dipOrd1
# hetGla2 hetGla2 hetGla2 hetGla2
# cavPor3 cavPor3 cavPor3 cavPor3
# speTri2 speTri2 speTri2 speTri2
# oryCun2 oryCun2 oryCun2 oryCun2
# ochPri2 ochPri2 ochPri2 ochPri2
#         tupBel1 tupBel1 tupBel1
#         hg19    hg19    hg19
#         gorGor3 gorGor3 gorGor3
#         panTro4 panTro4 panTro4
#         nomLeu2 nomLeu2 nomLeu2
#         ponAbe2 ponAbe2 ponAbe2
#         tarSyr1 tarSyr1 tarSyr1
#         rheMac3 rheMac3 rheMac3
#         papHam1 papHam1 papHam1
#         otoGar3 otoGar3 otoGar3
#         calJac3 calJac3 calJac3
#         micMur1 micMur1 micMur1
#         saiBol1 saiBol1 saiBol1
#                 equCab2 equCab2
#                 vicPac1 vicPac1
#                 turTru2 turTru2
#                 susScr3 susScr3
#                 bosTau7 bosTau7
#                 oviAri1 oviAri1
#                 pteVam1 pteVam1
#                 myoLuc2 myoLuc2
#                 felCat5 felCat5
#                 canFam3 canFam3
#                 ailMel1 ailMel1
#                 eriEur1 eriEur1
#                 sorAra1 sorAra1
#                 choHof1 choHof1
#                 dasNov3 dasNov3
#                 proCap1 proCap1
#                 echTel1 echTel1
#                 triMan1 triMan1
#                 loxAfr3 loxAfr3
#                         macEug2
#                         sarHar1
#                         monDom5
#                         ornAna1
#                         galGal4
#                         taeGut1
#                         melGal1
#                         melUnd1
#                         anoCar2
#                         chrPic1
#                         xenTro3
#                         latCha1
#                         gadMor1
#                         gasAcu1
#                         fr3
#                         oreNil2
#                         tetNig2
#                         danRer7
#                         oryLat2
#                         petMar1

    # on organisms that do not have all species in all files, the file names
    #	need to be filtered.  Using this perl script to extract from
    # the full mfa files, only the subset of species from the four lists:
    cat << '_EOF_' > filterMfa.pl
#!/usr/bin/env perl

use strict;
use warnings;

my $argc = scalar(@ARGV);

if ($argc != 1) {
    printf STDERR "usage: filterMfa.pl <subset.list>\n";
    exit 255;
}

my %dbList;
my $file = shift;
open (FH, "<$file") or die "can not read $file";
printf STDERR "using list: $file\n";
while (my $db = <FH>) {
    chomp $db;
    $dbList{$db} = 1;
}
close (FH);

my $dirName = $file;
$dirName =~ s/.list//;
$dirName .= "Mfa";

my @mfaFileList = split('\n', `ls mfa/*.mfa`);
for (my $i = 0; $i < scalar(@mfaFileList); ++$i) {
    my $file = $mfaFileList[$i];
    my $chr = $file;
    $chr =~ s#^mfa/##;
#    printf STDERR "processing: %s into %s/%s\n", $file, $dirName, $chr;
    open (FH, "<$file") or die "can not read $file";
    open (OF, ">$dirName/$chr") or die "can not write to $dirName/$chr";
    my $inGroup = 0;
    while (my $line = <FH>) {
        if ($line =~ m/^> /) {
          chomp $line;
          my ($faHead, $faDbName) = split('\s+', $line);
          if (exists($dbList{$faDbName})) {
              $inGroup = 1;
                printf OF "> %s\n", $faDbName;
          } else {
              $inGroup = 0;
          }
        } elsif ($inGroup) {
            printf OF "%s", $line;
        }
    }
    close (FH);
    close (OF);
}
'_EOF_'
    # << happy emacs
    chmod +x filterMfa.pl

    mkdir glireMfa euarchontogliresMfa placentalMfa vertebrateMfa

    # extract each set from the full mfa files, run msa_view on
    #   each subset and construct .nh tree for that subset
    for N in glire euarchontoglires placental vertebrate
do
    ./filterMfa.pl ${N}.list
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
	--aggregate "`cat ${N}.list|xargs echo`" ${N}Mfa/*.mfa \
        | sed s/"> "/">"/ > 4d.${N}.mfa
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/tree_doctor \
        --no-branchlen --prune-all-but="`cat ${N}.list|xargs echo`" \
        ../tree-commas.nh > tree-commas.${N}.nh
done

    ### XXX ### MOST INTERESTING, this phyloFit operation was repeated
    ### to verify that the full 60 species vertebrate operation produced the
    ### same result as the original "all" subset.  This phyloFit appears to
    ### produce a different result each time ?
    # use phyloFit to create tree model (output is phyloFit.mod)
    for N in glire euarchontoglires placental vertebrate
do
    time nice -n +19 \
	/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
	    --EM --precision MED --msa-format FASTA --subst-mod REV \
		--tree ./tree-commas.${N}.nh 4d.${N}.mfa
    mv phyloFit.mod ${N}.mod
    grep TREE ${N}.mod | sed 's/TREE\:\ //' > ${N}.Nway.nh
done
    #   real    0m15.747s
    #   real    4m5.526s
    #   real    20m45.982s
    #   real    141m21.248s

#######################################################################
# phastCons 60-way (DONE - 2012-06-12, 2012-08-21 - Hiram)
    #	was unable to split the full chrom MAF files, now working on the
    #	maf files as they were split up during multiz

    # split 60way mafs into 10M chunks and generate sufficient statistics
    # files for # phastCons
    ssh encodek
    mkdir -p /hive/data/genomes/mm10/bed/multiz60way/cons/ss
    mkdir -p /hive/data/genomes/mm10/bed/multiz60way/cons/msa.split
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/msa.split

    cat << '_EOF_' > doSplit.csh
#!/bin/csh -ef
set c = $1
set MAF = /hive/data/genomes/mm10/bed/multiz60way/anno/result/$c.maf
set WINDOWS = /hive/data/genomes/mm10/bed/multiz60way/cons/ss/$c
set WC = `cat $MAF | wc -l`
set NL = `grep "^#" $MAF | wc -l`
if ( -s $2 ) then
    exit 0
endif
if ( -s $2.running ) then
    exit 0
endif

date >> $2.running

rm -fr $WINDOWS
mkdir $WINDOWS
pushd $WINDOWS > /dev/null
if ( $WC != $NL ) then
/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
    $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
endif
popd > /dev/null
date >> $2
rm -f $2.running
'_EOF_'
    # << happy emacs
    chmod +x doSplit.csh

    cat << '_EOF_' > template
#LOOP
doSplit.csh $(root1) {check out line+ $(root1).done}
#ENDLOOP
'_EOF_'
    # << happy emacs

    #	do the easy ones first to see some immediate results
    ls -1S -r ../../anno/result | sed -e "s/.maf//;" > maf.list

    gensub2 maf.list single template jobList
    para -ram=8g create jobList
    para try ... check ... etc
# Completed: 64 of 66 jobs
# Crashed: 2 jobs
# CPU time in finished jobs:     347730s    5795.49m    96.59h    4.02d  0.011 y
# IO & Wait Time:                102813s    1713.56m    28.56h    1.19d  0.003 y
# Average job time:                7040s     117.33m     1.96h    0.08d
# Longest finished job:           42666s     711.10m    11.85h    0.49d
# Submission to last job:        150336s    2505.60m    41.76h    1.74d
    # finish the last two on hgwdev with more memory.
# linux data memory, in 1024-byte units
export M=188743680
ulimit -S -m $M -v $M
./doSplit.csh chr1 chr1.done &
./doSplit.csh chr2 chr2.done
wait
    #   real    864m53.235s

    # Run phastCons
    #	This job is I/O intensive in its output files, beware where this
    #	takes place or do not run too many at once.
    ssh swarm
    mkdir -p /hive/data/genomes/mm10/bed/multiz60way/cons/run.cons
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/run.cons

    #	there are going to be several different phastCons runs using
    #	this same script.  They trigger off of the current working directory
    #	$cwd:t which is the "grp" in this script.  It is one of:
    #	all glire glirePrimate glirePrimatePlacental

    cat << '_EOF_' > doPhast.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set c = $1
set f = $2
set len = $3
set cov = $4
set rho = $5
set grp = $cwd:t
set cons = /hive/data/genomes/mm10/bed/multiz60way/cons
set tmp = $cons/tmp/$f
mkdir -p $tmp
set ssSrc = $cons/ss
set useGrp = "$grp.mod"
if (-s $cons/$grp/$grp.non-inf) then
  ln -s $cons/$grp/$grp.mod $tmp
  ln -s $cons/$grp/$grp.non-inf $tmp
  ln -s $ssSrc/$c/$f.ss $tmp
else
  ln -s $ssSrc/$c/$f.ss $tmp
  ln -s $cons/$grp/$grp.mod $tmp
endif
pushd $tmp > /dev/null
if (-s $grp.non-inf) then
  $PHASTBIN/phastCons $f.ss $useGrp \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --not-informative `cat $grp.non-inf` \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
else
  $PHASTBIN/phastCons $f.ss $useGrp \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
endif
popd > /dev/null
mkdir -p pp/$c bed/$c
sleep 4
touch pp/$c bed/$c
rm -f pp/$c/$f.pp
rm -f bed/$c/$f.bed
mv $tmp/$f.pp pp/$c
mv $tmp/$f.bed bed/$c
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod a+x doPhast.csh

    #	this template will serve for all runs
    #	root1 == chrom name, file1 == ss file name without .ss suffix
    cat << '_EOF_' > template
#LOOP
../run.cons/doPhast.csh $(root1) $(file1) 45 0.3 0.3 {check out line+ pp/$(root1)/$(file1).pp}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ls -1S ../ss/chr*/chr* | sed -e "s/.ss$//" > ss.list

    # Create parasol batch and run it
    ############################ run for all species
    cd /hive/data/genomes/mm10/bed/multiz60way/cons
    mkdir all
    cd all
    cp -p ../../4d/all.mod ./all.mod

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc.
# Completed: 314 of 314 jobs
# CPU time in finished jobs:      36286s     604.77m    10.08h    0.42d  0.001 y
# IO & Wait Time:                 10101s     168.35m     2.81h    0.12d  0.000 y
# Average job time:                 148s       2.46m     0.04h    0.00d
# Longest finished job:             219s       3.65m     0.06h    0.00d
# Submission to last job:          4383s      73.05m     1.22h    0.05d

    # create Most Conserved track
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
    cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/${C} 2> /dev/null | while read D
    do
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed
    #   -rw-rw-r--  1 230642249 Jun 15 11:48 tmpMostConserved.bed
    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
    #   -rw-rw-r--  1 236425914 Jun 15 11:52 mostConserved.bed

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
    time nice -n +19 hgLoadBed mm10 phastConsElements60way mostConserved.bed
    #   Read 6748481 elements of size 5 from mostConserved.bed
    #   real    2m20.950s

    # Try for 5% overall cov, and 70% CDS cov
    featureBits mm10 -enrichment refGene:cds phastConsElements60way
    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    #   refGene:cds 1.281%, phastConsElements60way 6.517%,
    #   both 0.913%, cover 71.29%, enrich 10.94x
    time featureBits mm10 -enrichment ensGene:cds phastConsElements60way
    #   ensGene:cds 1.357%, phastConsElements60way 6.517%, both 0.942%, cover
    #   69.39%, enrich 10.65x
    #   real    0m54.109s
    time featureBits mm10 -enrichment knownGene:cds phastConsElements60way
    #   knownGene:cds 1.325%, phastConsElements60way 6.517%, both 0.930%,
    #   cover 70.18%, enrich 10.77x
    #   real    0m50.472s

    # Create merged posterier probability file and wiggle track data files
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
    mkdir downloads

    for D in `ls -d pp/chr* | sed -e 's#pp/##'`
do
    echo "working: $D"
    find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.wigFix.gz
done
    #   real    102m58.496s

    #	encode those files into wiggle data
    time (zcat downloads/*.wigFix.gz \
	| wigEncode stdin phastCons60way.wig phastCons60way.wib)
    #   Converted stdin, upper limit 1.00, lower limit 0.00
    #   real    9m32.980s
    du -hsc *.wi?
    #   1.8G    phastCons60way.wib
    #   298M    phastCons60way.wig
    #   2.1G    total

    #	encode into a bigWig file:
    #	(warning wigToBigWig process grows to about 36 Gb)
    #	in bash, to avoid the 32 Gb memory limit, set 180 Gb here:
sizeG=188743680
export sizeG
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/*.wigFix.gz \
        | wigToBigWig stdin ../../../../chrom.sizes phastCons60way.bw)
    #   real    27m1.039s
    #   -rw-rw-r--  1 4671685725 Jun 18 10:24 phastCons60way.bw
    bigWigInfo phastCons60way.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,333,510,917
primaryIndexSize: 100,774,056
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.149660
min: 0.000000
max: 1.000000
std: 0.282516

    #	if you wanted to use the bigWig file, loading bigWig table:
    #   but we don't use the bigWig file
    mkdir /gbdb/mm10/bbi
    ln -s `pwd`/phastCons60way.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phastCons60way; \
            create table phastCons60way (fileName varchar(255) not null); \
            insert into phastCons60way values
	("/gbdb/mm10/bbi/phastCons60way.bw");'

    # Load gbdb and database with wiggle.
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
    ln -s `pwd`/phastCons60way.wib /gbdb/mm10/multiz60way/phastCons60way.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phastCons60way phastCons60way.wig
    #   real    0m54.546s

    wigTableStats.sh mm10 phastCons60way
# db.table      min max mean count sumData
# mm10.phastCons60way     0 1 0.14966 1929686275 2.88797e+08
#       stdDev viewLimits
#       0.282516 viewLimits=0:1

    #  Create histogram to get an overview of all the data
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
    time nice -n +19 hgWiggle -doHistogram -db=mm10 \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    phastCons60way > histogram.data 2>&1
    #	real    7m37.212s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phastCons60way track"
set xlabel " phastCons60way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ########################################################################
    ### Create a phastCons data set for Glires

    # setup glire-only run
    ssh swarm
    mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/glire
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
    # glire-only: get the glire only tree from the 4d directory
    cp -p ../../4d/glire.mod ./glire.mod
    # and all the others become the non-informative list for phastCons to ignore
    sort ../../4d/glire.list > glire.list
    sort ../../4d/vertebrate.list > vertebrate.list
    comm -13 glire.list vertebrate.list | xargs echo \
        | sed -e "s/ /,/g" > glire.non-inf

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc.
# Completed: 314 of 314 jobs
# CPU time in finished jobs:      12411s     206.85m     3.45h    0.14d  0.000 y
# IO & Wait Time:                117850s    1964.16m    32.74h    1.36d  0.004 y
# Average job time:                 415s       6.91m     0.12h    0.00d
# Longest finished job:             658s      10.97m     0.18h    0.01d
# Submission to last job:           796s      13.27m     0.22h    0.01d

    cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
    # create Most Conserved track
    cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/${C} 2> /dev/null | while read D
    do
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed
    #   real    0m32.945s
    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
    #   real    0m19.122s

    featureBits mm10 mostConserved.bed
    #   117058023 bases of 2652783500 (4.413%) in intersection
    #   real    0m21.506s

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
    time nice -n +19 hgLoadBed mm10 phastConsElements60wayGlire \
	mostConserved.bed
    #	Loaded 1336504 elements of size 6
    #	real    0m13.672s
    # verify coverage
    time featureBits mm10 phastConsElements60wayGlire
    #   117058023 bases of 2652783500 (4.413%) in intersection
    #   real    0m15.041s

    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    featureBits mm10 -enrichment refGene:cds phastConsElements60wayGlire
    #   refGene:cds 1.282%, phastConsElements60wayGlire 4.413%,
    #   both 0.944%, cover 73.60%, enrich 16.68x

    featureBits mm10 -enrichment knownGene:cds phastConsElements60wayGlire
    #   knownGene:cds 1.325%, phastConsElements60wayGlire 4.413%,
    #   both 0.957%, cover 72.22%, enrich 16.37x

#	Create the downloads .pp files, from which the phastCons wiggle data
    #	is calculated
    # sort by chromName, chromStart so that items are in numerical order
    #  for wigEncode
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
    mkdir downloads
    for D in `ls -d pp/chr* | sed -e 's#pp/##'`
do
    echo "working: $D"
    find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.glire.wigFix.gz
done

    # Create merged posterier probability file and wiggle track data files
    time (zcat downloads/chr*.wigFix.gz \
	 | wigEncode stdin phastCons60wayGlire.wig phastCons60wayGlire.wib) &
    #   Converted stdin, upper limit 1.00, lower limit 0.00
    #   real    10m26.712s

    #	encode to bigWig
    #	(warning wigToBigWig process grows to about 36 Gb)
    #	in bash, to avoid the 32 Gb memory limit:
export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG

    time (zcat downloads/*.wigFix.gz \
        | wigToBigWig -verbose=2 stdin ../../../../chrom.sizes \
        phastCons60wayGlire.bw > bigWig.log 2>&1) &
    #   real    52m17.108s
    grep VmPeak bigWig.log
    # pid=5552: VmPeak:     20926360 kB

    bigWigInfo phastCons60wayGlire.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,631,413,425
primaryIndexSize: 100,774,056
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.142675
min: 0.000000
max: 1.000000
std: 0.252347

    #	if desired to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phastCons60wayGlire.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phastCons60wayGlire; \
            create table phastCons60wayGlire \
		(fileName varchar(255) not null); \
            insert into phastCons60wayGlire values
	("/gbdb/mm10/bbi/phastCons60wayGlire.bw");'

    ## load table with wiggle data
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
    ln -s `pwd`/phastCons60wayGlire.wib \
	/gbdb/mm10/multiz60way/phastCons60wayGlire.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phastCons60wayGlire phastCons60wayGlire.wig
    #   real    0m56.786s

    wigTableStats.sh mm10 phastCons60wayGlire
# db.table      min max mean count sumData
mm10.phastCons60wayGlire     0 1 0.142675 1929686275 2.75318e+08
#	stdDev viewLimits
#       0.252347 viewLimits=0:1

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=mm10 phastCons60wayGlire  > histogram.data 2>&1
    #	real    4m28.743s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Hg19 Histogram phastCons60wayGlire track"
set xlabel " phastCons60wayGlire score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ########################################################################
    ### Create a phastCons data set for Euarchontoglires

    # setup euarchontoglires-only run
    ssh swarm
    mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
    # euarchontoglires-only: get the euarchontoglires only tree from the 4d directory
    cp -p ../../4d/euarchontoglires.mod ./euarchontoglires.mod
    # and all the others become the non-informative list for phastCons to ignore
    sort ../../4d/euarchontoglires.list > euarchontoglires.list
    sort ../../4d/vertebrate.list > vertebrate.list
    comm -13 euarchontoglires.list vertebrate.list | xargs echo \
        | sed -e "s/ /,/g" > euarchontoglires.non-inf

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc.
# Completed: 314 of 314 jobs
# CPU time in finished jobs:      17421s     290.36m     4.84h    0.20d  0.001 y
# IO & Wait Time:                 37430s     623.83m    10.40h    0.43d  0.001 y
# Average job time:                 175s       2.91m     0.05h    0.00d
# Longest finished job:             343s       5.72m     0.10h    0.00d
# Submission to last job:          2403s      40.05m     0.67h    0.03d

    cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
    # create Most Conserved track
    cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/${C} 2> /dev/null | while read D
    do
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed
    #   real    0m32.945s
    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
    #   real    0m19.122s

    featureBits mm10 mostConserved.bed
    #   127113541 bases of 2652783500 (4.792%) in intersection
    #   real    0m21.506s

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
    time nice -n +19 hgLoadBed mm10 phastConsElements60wayEuarchontoGlires \
	mostConserved.bed
    #	Loaded 2327130 elements of size 6
    #	real    0m24.591s
    # verify coverage
    time featureBits mm10 phastConsElements60wayEuarchontoGlires
    #   127113541 bases of 2652783500 (4.792%) in intersection
    #   real    0m18.857s

    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    featureBits mm10 -enrichment refGene:cds phastConsElements60wayEuarchontoGlires
    #   refGene:cds 1.282%, phastConsElements60wayEuarchontoGlires 4.792%,
    #   both 0.929%, cover 72.46%, enrich 15.12x

    featureBits mm10 -enrichment knownGene:cds phastConsElements60wayEuarchontoGlires
    #   knownGene:cds 1.325%, phastConsElements60wayEuarchontoGlires 4.792%,
    #   both 0.943%, cover 71.16%, enrich 14.85x

    #	Create the downloads .pp files, from which the phastCons wiggle data
    #	is calculated
    # sort by chromName, chromStart so that items are in numerical order
    #  for wigEncode
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
    mkdir downloads
    for D in `ls -d pp/chr* | sed -e 's#pp/##'`
do
    echo "working: $D"
    find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.euarchontoglires.wigFix.gz
done

    # Create merged posterier probability file and wiggle track data files
    time (zcat downloads/chr*.wigFix.gz \
	 | wigEncode stdin phastCons60wayEuarchontoGlires.wig phastCons60wayEuarchontoGlires.wib \
        > wigEncode.log 2>&1) &
    # Converted stdin, upper limit 1.00, lower limit 0.00
    #   real    9m49.080s

    #	encode to bigWig
    #	(warning wigToBigWig process grows to about 36 Gb)
    #	in bash, to avoid the 32 Gb memory limit:
export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG

    time (zcat downloads/*.wigFix.gz \
        | wigToBigWig stdin ../../../../chrom.sizes phastCons60wayEuarchontoGlires.bw \
        > bigWig.log 2>&1 ) &
    #   real    26m0.111s
    bigWigInfo phastCons60wayEuarchontoGlires.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,411,704,465
primaryIndexSize: 100,774,056
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.133253
min: 0.000000
max: 1.000000
std: 0.256320

    #	if desired to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phastCons60wayEuarchontoGlires.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phastCons60wayEuarchontoGlires; \
            create table phastCons60wayEuarchontoGlires \
		(fileName varchar(255) not null); \
            insert into phastCons60wayEuarchontoGlires values
	("/gbdb/mm10/bbi/phastCons60wayEuarchontoGlires.bw");'

    ## load table with wiggle data
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
    ln -s `pwd`/phastCons60wayEuarchontoGlires.wib \
	/gbdb/mm10/multiz60way/phastCons60wayEuarchontoGlires.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phastCons60wayEuarchontoGlires phastCons60wayEuarchontoGlires.wig
    #   real    0m50.676s

    time wigTableStats.sh mm10 phastCons60wayEuarchontoGlires
# db.table      min max mean count sumData
mm10.phastCons60wayEuarchontoGlires  0 1 0.133253 1929686275 2.57137e+08
#	stdDev viewLimits
#       0.25632 viewLimits=0:1
    #   real    0m21.964s

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=mm10 phastCons60wayEuarchontoGlires  > histogram.data 2>&1
    #	real    3m31.112s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phastCons60wayEuarchontoGlires track"
set xlabel " phastCons60wayEuarchontoGlires score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ########################################################################
    ### Create a phastCons data set for primate ***### This was constructed
    ### and examined, but not used in the release

    # setup primate-only run
    ssh swarm
    mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/primate
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
    # primate-only: get the primate only tree from the 4d directory
    cp -p ../../4d/primate.mod ./primate.mod
    # and all the others become the non-informative list for phastCons to ignore
    cat ../../4d/glire.list ../../4d/placental.list ../../4d/vertebrate.list \
        | grep -v mm10 | sort | xargs echo | sed -e "s/ /,/g" \
        > primate.non-inf

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc.
# Completed: 314 of 314 jobs
# CPU time in finished jobs:      13884s     231.39m     3.86h    0.16d  0.000 y
# IO & Wait Time:                130791s    2179.86m    36.33h    1.51d  0.004 y
# Average job time:                 461s       7.68m     0.13h    0.01d
# Longest finished job:             741s      12.35m     0.21h    0.01d
# Submission to last job:           910s      15.17m     0.25h    0.01d

    cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
    # create Most Conserved track
    cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/${C} 2> /dev/null | while read D
    do
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed

    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
    #   real    0m27.199s

    featureBits mm10 mostConserved.bed
    #   112908553 bases of 2652783500 (4.256%) in intersection

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
    time nice -n +19 hgLoadBed mm10 phastConsElements60wayPrimate \
	mostConserved.bed
    #	Loaded 1119924 elements of size 6
    #	real    0m17.423s
    # verify coverage
    featureBits mm10 phastConsElements60wayPrimate
    #   112908553 bases of 2652783500 (4.256%) in intersection
    #   real    0m13.684s

    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    featureBits mm10 -enrichment refGene:cds phastConsElements60wayPrimate
    #   refGene:cds 1.281%, phastConsElements60wayPrimate 4.256%,
    #   both 0.897%, cover 69.98%, enrich 16.44x

    featureBits mm10 -enrichment knownGene:cds phastConsElements60wayPrimate
    #   knownGene:cds 1.325%, phastConsElements60wayPrimate 4.256%,
    #   both 0.909%, cover 68.64%, enrich 16.13x

    featureBits mm10 -enrichment ensGene:cds phastConsElements60wayPrimate
    #   ensGene:cds 1.357%, phastConsElements60wayPrimate 4.256%, both 0.913%,
    #   cover 67.30%, enrich 15.81x

    #	Create the downloads .pp files, from which the phastCons wiggle data
    #	is calculated
    # sort by chromName, chromStart so that items are in numerical order
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
    mkdir downloads
    for D in `ls -d pp/chr* | sed -e 's#pp/##'`
do
    echo "working: $D"
    find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.primate.wigFix.gz
done

    # Create merged posterier probability file and wiggle track data files
    zcat downloads/chr*.wigFix.gz \
	 | wigEncode stdin phastCons60wayPrimate.wig phastCons60wayPrimate.wib
    # Converted stdin, upper limit 1.00, lower limit 0.00
    #	real    12m22.465s

    #	encode to bigWig
    #	(warning wigToBigWig process grows to about 36 Gb)
    #	in bash, to avoid the 32 Gb memory limit:
export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG

    zcat downloads/*.wigFix.gz \
        | wigToBigWig stdin ../../../../chrom.sizes phastCons60wayPrimate.bw
    #    real 31m44.517s
    bigWigInfo phastCons60wayPrimate.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 2,431,379,060
primaryIndexSize: 100,774,056
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.093847
min: 0.000000
max: 1.000000
std: 0.233892

    #	if desired to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phastCons60wayPrimate.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phastCons60wayPrimate; \
            create table phastCons60wayPrimate \
		(fileName varchar(255) not null); \
            insert into phastCons60wayPrimate values
	("/gbdb/mm10/bbi/phastCons60wayPrimate.bw");'

    ## load table with wiggle data
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
    ln -s `pwd`/phastCons60wayPrimate.wib \
	/gbdb/mm10/multiz60way/phastCons60wayPrimate.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phastCons60wayPrimate phastCons60wayPrimate.wig
    #   real    1m24.188s

    wigTableStats.sh mm10 phastCons60wayPrimate
# db.table      min max mean count sumData
# mm10.phastCons60wayPrimate 0 1 0.0938475 1929686275 1.81096e+08
#       0.233892 viewLimits=0:1

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=mm10 phastCons60wayPrimate  > histogram.data 2>&1
    #   real    7m3.198s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phastCons60wayPrimate track"
set xlabel " phastCons60wayPrimate score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

#########################################################################
    ### Create a phastCons data set for Placental

    # setup placental-only run
    ssh swarm
    mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/placental
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental

    # placental-only: get the placental only tree from the 4d directory
    cp -p ../../4d/placental.mod ./placental.mod
    # and all the others become the non-informative list for phastCons to ignore
    sort ../../4d/placental.list > placental.list
    sort ../../4d/vertebrate.list > vertebrate.list
    comm -13 placental.list vertebrate.list | xargs echo \
        | sed -e "s/ /,/g" > placental.non-inf

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para create jobList
    para try ... check ... push ... etc.
# Completed: 314 of 314 jobs
# CPU time in finished jobs:      27853s     464.21m     7.74h    0.32d  0.001 y
# IO & Wait Time:                128981s    2149.69m    35.83h    1.49d  0.004 y
# Average job time:                 499s       8.32m     0.14h    0.01d
# Longest finished job:             785s      13.08m     0.22h    0.01d
# Submission to last job:          5970s      99.50m     1.66h    0.07d

    cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
    # create Most Conserved track
    cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/${C} 2> /dev/null | while read D
    do
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed
    #   real    0m44.506s
    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
    #   real    0m44.170s

    featureBits mm10 mostConserved.bed
    #   144041584 bases of 2652783500 (5.430%) in intersection
    #   real    0m54.927s

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
    time nice -n +19 hgLoadBed mm10 phastConsElements60wayPlacental \
	mostConserved.bed
    #	Loaded 5257437 elements of size 6
    #	real    0m56.788s

    # verify coverage, should be the same as the file measured above
    time featureBits mm10 phastConsElements60wayPlacental
    #   144041584 bases of 2652783500 (5.430%) in intersection
    #   real    0m39.537s

    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    time featureBits mm10 -enrichment refGene:cds phastConsElements60wayPlacental
    #   refGene:cds 1.282%, phastConsElements60wayPlacental 5.430%,
    #   both 0.920%, cover 71.73%, enrich 13.21x
    #   real    0m39.833s

    time featureBits mm10 -enrichment knownGene:cds phastConsElements60wayPlacental
    #   knownGene:cds 1.325%, phastConsElements60wayPlacental 5.430%,
    #   both 0.934%, cover 70.47%, enrich 12.98x
    #   real    0m44.567s

    time featureBits mm10 -enrichment ensGene:cds phastConsElements60wayPlacental
    #   ensGene:cds 1.357%, phastConsElements60wayPlacental 5.430%,
    #   both 0.941%, cover 69.32%, enrich 12.77x
    #   real    0m43.093s

    #	Create the downloads .pp files, from which the phastCons wiggle data
    #	is calculated
    # sort by chromName, chromStart so that items are in numerical order
    #  for wigEncode
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
    mkdir downloads
    for D in `ls -d pp/chr* | sed -e 's#pp/##'`
do
    echo "working: $D"
    find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.placental.wigFix.gz
done

    # Create merged posterier probability file and wiggle track data files
    time (zcat downloads/chr*.wigFix.gz \
	 | wigEncode stdin phastCons60wayPlacental.wig \
        phastCons60wayPlacental.wib > wigEncode.log 2>&1) &
    # Converted stdin, upper limit 1.00, lower limit 0.00
    #   real    9m48.237s

    #	encode to bigWig
    #	(warning wigToBigWig process grows to about 36 Gb)
    #	in bash, to avoid the 32 Gb memory limit:
export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG

    time (zcat downloads/*.wigFix.gz \
        | wigToBigWig stdin ../../../../chrom.sizes \
        phastCons60wayPlacental.bw > bigWig.log 2>&1) &
    #   real    25m18.556s
    bigWigInfo phastCons60wayPlacental.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,271,676,156
primaryIndexSize: 100,774,056
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.135703
min: 0.000000
max: 1.000000
std: 0.266432

    #	if desired to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phastCons60wayPlacental.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phastCons60wayPlacental; \
            create table phastCons60wayPlacental \
		(fileName varchar(255) not null); \
            insert into phastCons60wayPlacental values
	("/gbdb/mm10/bbi/phastCons60wayPlacental.bw");'

    ## load table with wiggle data
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
    ln -s `pwd`/phastCons60wayPlacental.wib \
	/gbdb/mm10/multiz60way/phastCons60wayPlacental.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phastCons60wayPlacental phastCons60wayPlacental.wig
    #   real    0m41.999s

    time wigTableStats.sh mm10 phastCons60wayPlacental
# db.table      min max mean count sumData
# mm10.phastCons60wayPlacental 0 1 0.135703 1929686275 2.61864e+08
#	stdDev viewLimits
#       0.266432 # viewLimits=0:1
    #   real    0m21.723s


    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=mm10 phastCons60wayPlacental  > histogram.data 2>&1
    #   real    2m39.659s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phastCons60wayPlacental track"
set xlabel " phastCons60wayPlacental score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

#########################################################################
    ### Create a phastCons data set for Vertebrate

    # setup vertebrate-only run
    ssh swarm
    mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate

    # vertebrate-only: get the vertebrate only tree from the 4d directory
    cp -p ../../4d/vertebrate.mod ./vertebrate.mod
    # they are all in this one, no need for non-informative list

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para create jobList
    para try ... check ... push ... etc.
# Completed: 313 of 314 jobs
# Crashed: 1 jobs
# CPU time in finished jobs:      36058s     600.97m    10.02h    0.42d  0.001 y
# IO & Wait Time:                125496s    2091.59m    34.86h    1.45d  0.004 y
# Average job time:                 516s       8.60m     0.14h    0.01d
# Longest finished job:             912s      15.20m     0.25h    0.01d
# Submission to last job:          2681s      44.68m     0.74h    0.03d
    # the one failed job was completed manually on hgwdev

    cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
    # create Most Conserved track
    cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/${C} 2> /dev/null | while read D
    do
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed
    #   real    0m44.506s
    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed

    time featureBits mm10 mostConserved.bed
    #   172842314 bases of 2652783500 (6.516%) in intersection
    #   real    1m23.298s

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
    time nice -n +19 hgLoadBed mm10 phastConsElements60wayVertebrate \
	mostConserved.bed
    #   Read 6747163 elements of size 5 from mostConserved.bed
    #   real    1m15.122s

    # verify coverage
    featureBits mm10 phastConsElements60wayVertebrate
    #   172842314 bases of 2652783500 (6.516%) in intersection

    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    featureBits mm10 -enrichment refGene:cds phastConsElements60wayVertebrate
    #   refGene:cds 1.282%, phastConsElements60wayVertebrate 6.516%,
    #   both 0.914%, cover 71.26%, enrich 10.94x

    time featureBits mm10 -enrichment ensGene:cds phastConsElements60wayVertebrate
    #   ensGene:cds 1.357%, phastConsElements60wayVertebrate 6.516%,
    #   both 0.942%, cover 69.39%, enrich 10.65x
    #   real    0m51.139s

    time featureBits mm10 -enrichment knownGene:cds phastConsElements60wayVertebrate
    #   knownGene:cds 1.325%, phastConsElements60wayVertebrate 6.516%,
    #   both 0.930%, cover 70.18%, enrich 10.77x
    #   real    0m51.545s

    #	Create the downloads .pp files, from which the phastCons wiggle data
    #	is calculated
    # sort by chromName, chromStart so that items are in numerical order
    #  for wigEncode
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
    mkdir downloads
    for D in `ls -d pp/chr* | sed -e 's#pp/##'`
do
    echo "working: $D"
    find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.vertebrate.wigFix.gz
done

    # Create merged posterier probability file and wiggle track data files
    time (zcat downloads/chr*.wigFix.gz \
	 | wigEncode stdin phastCons60wayVertebrate.wig \
        phastCons60wayVertebrate.wib > wigEncode.log 2>&1 ) &
    # Converted stdin, upper limit 1.00, lower limit 0.00
    #   real    9m48.554s

    #	encode to bigWig
    #	(warning wigToBigWig process grows to about 36 Gb)
    #	in bash, to avoid the 32 Gb memory limit:
export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG

    time (zcat downloads/*.wigFix.gz \
        | wigToBigWig stdin ../../../../chrom.sizes \
        phastCons60wayVertebrate.bw > bigWig.log 2>&1) &
    #   real    25m8.630s

    bigWigInfo phastCons60wayVertebrate.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,333,348,984
primaryIndexSize: 100,774,056
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.149646
min: 0.000000
max: 1.000000
std: 0.282502

    #	if desired to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phastCons60wayVertebrate.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phastCons60wayVertebrate; \
            create table phastCons60wayVertebrate \
		(fileName varchar(255) not null); \
            insert into phastCons60wayVertebrate values
	("/gbdb/mm10/bbi/phastCons60wayVertebrate.bw");'

    ## load table with wiggle data
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
    ln -s `pwd`/phastCons60wayVertebrate.wib \
	/gbdb/mm10/multiz60way/phastCons60wayVertebrate.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phastCons60wayVertebrate phastCons60wayVertebrate.wig
    #   real    0m45.432s

    time wigTableStats.sh mm10 phastCons60wayVertebrate
# db.table      min max mean count sumData
# mm10.phastCons60wayVertebrate 0 1 0.149646 1929686275 2.8877e+08
#	stdDev viewLimits
#       0.282502 viewLimits=0:1
    #   real    0m22.224s

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=mm10 phastCons60wayVertebrate  > histogram.data 2>&1
    #   real    2m52.041s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phastCons60wayVertebrate track"
set xlabel " phastCons60wayVertebrate score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

#########################################################################
# phyloP conservation for 60-way (DONE - 2012-06-15 - 2012-08-21 - Hiram)
#
# Vertebrate, Glire, Primate, Placental
#
    # split SS files into 1M chunks, this business needs smaller files
    #   to complete

    # many of these jobs run too much memory to finish on a kluster node
    # can run all of this on hgwdev

    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP
    mkdir ss run.split
    cd run.split

    cat << '_EOF_' > doSplit.csh
#!/bin/csh -ef
set c = $1
set MAF = /hive/data/genomes/mm10/bed/multiz60way/anno/result/$c.maf
set WINDOWS = /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/ss/$c
set WC = `cat $MAF | wc -l`
set NL = `grep "^#" $MAF | wc -l`
if ( -s $2 ) then
    exit 0
endif
if ( -s $2.running ) then
    exit 0
endif

date >> $2.running

rm -fr $WINDOWS
mkdir $WINDOWS
pushd $WINDOWS > /dev/null
if ( $WC != $NL ) then
/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
    $MAF -i MAF -o SS -r $WINDOWS/$c -w 1000000,0 -I 1000 -B 5000
endif
popd > /dev/null
date >> $2
rm -f $2.running
'_EOF_'
# << happy emacs

    #	do the easy ones first to see some immediate results
    ls -1S -r ../../anno/result | sed -e "s/.maf//;" > maf.list

    cat << '_EOF_' > template
#LOOP
./doSplit.csh $(root1) $(root1).done
#ENDLOOP
'_EOF_'
# << happy emacs

    gensub2 maf.list single template jobList
    # copy the jobList to runEm.sh, edit to make all the commands run in
    #   the background, with wait statements every few commands to run
    #   a small number of these at once, no more than four at once with
    #   the large chroms, the small randoms can run a bunch at once, they
    #   finish quickly.
    time ./runEm.sh
    # about 11h30m

    # run phyloP with score=LRT
    ssh swarm
    cd /cluster/data/mm10/bed/multiz60way/consPhyloP
    mkdir run.phyloP
    cd run.phyloP

    # Adjust model file base composition background and rate matrix to be
    # representative of the chromosomes in play
    grep BACKGROUND ../../cons/all/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
    #	0.525
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/all/all.mod 0.525 > all.mod
    grep BACKGROUND ../../cons/glire/glire.mod \
	| awk '{printf "%0.3f\n", $3 + $4}'
    #	0.531
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/glire/glire.mod 0.531 > glire.mod
    grep BACKGROUND ../../cons/primate/primate.mod \
	| awk '{printf "%0.3f\n", $3 + $4}'
    #	0.509
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/primate/primate.mod 0.509 > primate.mod
    grep BACKGROUND ../../cons/euarchontoglires/euarchontoglires.mod \
	| awk '{printf "%0.3f\n", $3 + $4}'
    #	0.518
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/euarchontoglires/euarchontoglires.mod 0.518 \
        > euarchontoglires.mod

    grep BACKGROUND ../../cons/placental/placental.mod \
	| awk '{printf "%0.3f\n", $3 + $4}'
    #	0.525
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/placental/placental.mod 0.525 > placental.mod
    grep BACKGROUND ../../cons/vertebrate/vertebrate.mod \
	| awk '{printf "%0.3f\n", $3 + $4}'
    #	0.525
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../cons/vertebrate/vertebrate.mod 0.525 > vertebrate.mod

    cat << '_EOF_' > doPhyloP.csh
#!/bin/csh -fex
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set f = $1
set ssFile = $1:t
echo "ssFile: $ssFile"
set out = $2
set cName = $f:h
echo "cName: $cName"
set n = $f:r:e
set grp = $cwd:t
set cons = /hive/data/genomes/mm10/bed/multiz60way/consPhyloP
set tmp = $cons/tmp/$grp/$f
rm -fr $tmp
mkdir -p $tmp
set ssSrc = "$cons/ss/$cName/$ssFile"
set useGrp = "$grp.mod"
ln -s $cons/run.phyloP/$grp.mod $tmp
pushd $tmp > /dev/null
echo source: $ssSrc.ss
$PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \
    -i SS $useGrp $ssSrc.ss > $ssFile.wigFix
popd > /dev/null
mkdir -p $out:h
sleep 4
mv $tmp/$ssFile.wigFix $out
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod +x doPhyloP.csh

    # Create list of chunks
    find ../ss -type f | sed -e "s/.ss$//; s#../ss/##;" > ss.list

    # Create template file
    #	file1 == $chr/$chunk/file name without .ss suffix
    cat << '_EOF_' > template
#LOOP
../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ######################   Running all species  #######################
    # setup run for all species
    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/all
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/all
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    para create jobList
    para try ... check ... push ... etc ...
    para time
# Completed: 2708 of 2708 jobs
# CPU time in finished jobs:    1832980s   30549.67m   509.16h   21.22d  0.058 y
# IO & Wait Time:                217434s    3623.90m    60.40h    2.52d  0.007 y
# Average job time:                 757s      12.62m     0.21h    0.01d
# Longest finished job:            1458s      24.30m     0.41h    0.02d
# Submission to last job:          3647s      60.78m     1.01h    0.04d

    # missed chrM in the original run:
    ../run.phyloP/doPhyloP.csh chrM/chrM.1-16296 wigFix/chrM/chrM.1-16296.wigFix

    ssh hgwdev
    cd /cluster/data/mm10/bed/multiz60way/consPhyloP/run.phyloP/all
    mkdir downloads
    for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
do
    echo "working: $D"
    find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phyloP60way.wigFix.gz
done
    #   real    38m15.538s

    zcat downloads/*.wigFix.gz \
	| wigEncode stdin phyloP60way.wig phyloP60way.wib > wigEncode.log 2>&1 &
    #   Converted stdin, upper limit 7.53, lower limit -20.00
    #   real    27m53.384s

export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/*.wigFix.gz \
        | wigToBigWig stdin ../../../../chrom.sizes phyloP60way.bw)
    #   real    30m10.440s

    bigWigInfo phyloP60way.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 4,533,501,426
primaryIndexSize: 100,775,272
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.169761
min: -20.000000
max: 7.532000
std: 0.942744

    #	if you wanted to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phyloP60way.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phyloP60wayAll; \
            create table phyloP60wayAll \
		(fileName varchar(255) not null); \
            insert into phyloP60wayAll values
	("/gbdb/mm10/bbi/phyloP60way.bw");'

    #	loading the wiggle table:
    ln -s `pwd`/phyloP60way.wib /gbdb/mm10/multiz60way
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phyloP60wayAll phyloP60way.wig
    #   real    1m16.934s

    wigTableStats.sh mm10 phyloP60wayAll
# db.table      min max mean count sumData
# mm10.phyloP60wayAll     -20 7.532 0.169761 1929686275 3.27586e+08
#	stdDev viewLimits
#       0.942744 viewLimits=-4.54396:4.88348
    #	that range is: 4.54396+4.88348 = 9.42744 for -hBinSize=0.0942744 below
    #   to get 1,000 bins

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.0942744 -hBinCount=1000 -hMinVal=-4.54396 -verbose=2 \
	    -db=mm10 phyloP60wayAll > histogram.data 2>&1
    #   real    real    5m58.309s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phyloP60way track, all 60 vertebrates"
set xlabel " phyloP60way score, all 60 vertebrates"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.2]
set xrange [-2:2]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ######################   Running the glire  #######################
    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/glire
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/glire
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    para create jobList
    para try ... check ... push ... etc ...
    para time
# Completed: 2709 of 2709 jobs
# CPU time in finished jobs:     206723s    3445.39m    57.42h    2.39d  0.007 y
# IO & Wait Time:                256366s    4272.76m    71.21h    2.97d  0.008 y
# Average job time:                 171s       2.85m     0.05h    0.00d
# Longest finished job:             487s       8.12m     0.14h    0.01d
# Submission to last job:          1926s      32.10m     0.54h    0.02d

    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/glire
    mkdir downloads
    for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
do
    echo "working: $D"
    find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.glire.wigFix.gz
XXX - copy and paste error, should have been phyloP60way and not phastCons
done

    time (zcat downloads/chr*.wigFix.gz  \
	| wigEncode stdin phyloP60wayGlire.wig phyloP60wayGlire.wib \
	> wigEncode.log 2>&1) &
    #   Converted stdin, upper limit 1.17, lower limit -4.35
    #   real     20m31.753s

export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/chr*.wigFix.gz \
	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayGlire.bw) &
    #   real    37m9.063s
    bigWigInfo phyloP60wayGlire.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,158,091,915
primaryIndexSize: 100,775,272
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.073187
min: -4.346000
max: 1.165000
std: 0.602992

    #	if you wanted to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phyloP60wayGlire.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phyloP60wayGlire; \
            create table phyloP60wayGlire \
		(fileName varchar(255) not null); \
            insert into phyloP60wayGlire values
	("/gbdb/mm10/bbi/phyloP60wayGlire.bw");'

    #	loading the wiggle table:
    ln -s `pwd`/phyloP60wayGlire.wib /gbdb/mm10/multiz60way
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phyloP60wayGlire phyloP60wayGlire.wig
    #   real    0m58.536s

    wigTableStats.sh mm10 phyloP60wayGlire
# db.table      min max mean count
# mm10.phyloP60wayGlire -4.346 1.165 0.0731873 1929686275 1.41229e+08
#	stdDev viewLimits
#       0.602992 viewLimits=-2.94177:1.165
    #	that range is: 4.346+1.165 = 5.511 -> hBinSize=0.005511

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.005511 -hBinCount=1000 -hMinVal=-4.346 -verbose=2 \
	    -db=mm10 phyloP60wayGlire > histogram.data 2>&1
    #   real    8m23.088s

    #	create plot of histogram:
    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Human Mm10 Histogram phyloP60wayGlire track"
set xlabel " phyloP60wayGlire score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.15]
set xrange [-2:1.2]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ###################   Running the euarchontoglires  #######################
    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/euarchontoglires
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/euarchontoglires
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    para create jobList
    para try ... check ... push ... etc ...
    para time
# Completed: 2709 of 2709 jobs
# CPU time in finished jobs:     542547s    9042.45m   150.71h    6.28d  0.017 y
# IO & Wait Time:                 75914s    1265.23m    21.09h    0.88d  0.002 y
# Average job time:                 228s       3.80m     0.06h    0.00d
# Longest finished job:             430s       7.17m     0.12h    0.00d
# Submission to last job:          4149s      69.15m     1.15h    0.05d

    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/euarchontoglires
    mkdir downloads
    for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
do
    echo "working: $D"
    find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.euarchontoglires.wigFix.gz
XXX - copy and paste error, should have been phyloP60way and not phastCons
done

    time (zcat downloads/chr*.wigFix.gz  \
	| wigEncode stdin phyloP60wayEuarchontoGlires.wig phyloP60wayEuarchontoGlires.wib \
	> wigEncode.log 2>&1) &
    #   Converted stdin, upper limit 1.75, lower limit -12.70
    #   real    10m52.064s

export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/chr*.wigFix.gz \
	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayEuarchontoGlires.bw) &
    #   real    26m47.912s
    bigWigInfo phyloP60wayEuarchontoGlires.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 3,970,501,521
primaryIndexSize: 100,775,272
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.078739
min: -12.704000
max: 1.753000
std: 0.689759

    #	if you wanted to use the bigWig file, loading bigWig table:
    ln -s `pwd`/phyloP60wayEuarchontoGlires.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phyloP60wayEuarchontoGlires; \
            create table phyloP60wayEuarchontoGlires \
		(fileName varchar(255) not null); \
            insert into phyloP60wayEuarchontoGlires values
	("/gbdb/mm10/bbi/phyloP60wayEuarchontoGlires.bw");'

    #	loading the wiggle table:
    ln -s `pwd`/phyloP60wayEuarchontoGlires.wib /gbdb/mm10/multiz60way
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phyloP60wayEuarchontoGlires phyloP60wayEuarchontoGlires.wig
    #   real    0m51.777s

    time wigTableStats.sh mm10 phyloP60wayEuarchontoGlires
# db.table      min max mean count
# mm10.phyloP60wayEuarchontoGlires -12.704 1.753 0.0787387 1929686275
#	sumData stdDev viewLimits
#       1.51941e+08 0.689759 viewLimits=-3.37006:1.753
    #   real    0m26.197s

    #	that range is: 12.704+1.753 = 14.457 -> hBinSize=0.014457

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.014457 -hBinCount=1000 -hMinVal=-12.704 -verbose=2 \
	    -db=mm10 phyloP60wayEuarchontoGlires > histogram.data 2>&1
    #   real    3m22.205s

    #	create plot of histogram:
    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phyloP60wayEuarchontoGlires track"
set xlabel " phyloP60wayEuarchontoGlires score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.15]
set xrange [-2:1.2]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ######################   Running the primate  #######################
    ### ***### This was constructed
    ### and examined, but not used in the release
    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/primate
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/primate
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    para -ram=8g create jobList
    para try ... check ... push ... etc ...
    para time
# Completed: 2709 of 2709 jobs
# CPU time in finished jobs:     307901s    5131.68m    85.53h    3.56d  0.010 y
# IO & Wait Time:                 42937s     715.62m    11.93h    0.50d  0.001 y
# Average job time:                 130s       2.16m     0.04h    0.00d
# Longest finished job:             234s       3.90m     0.07h    0.00d
# Submission to last job:          5975s      99.58m     1.66h    0.07d

    cd /cluster/data/mm10/bed/multiz60way/consPhyloP/run.phyloP/primate
    mkdir downloads
    for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
do
    echo "working: $D"
    find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.primate.wigFix.gz
XXX - copy and paste error, should have been phyloP60way and not phastCons
done

    time (zcat downloads/chr*.wigFix.gz \
	| wigEncode stdin phyloP60wayPrimate.wig phyloP60wayPrimate.wib \
	> wigEncode.log 2>&1) &
    #   real    9m37.055s
    #   Converted stdin, upper limit 0.93, lower limit -10.63
export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/chr*.wigFix.gz \
	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayPrimate.bw) &
    #   real    24m18.842s
    bigWigInfo phyloP60wayPrimate.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 2,715,332,211
primaryIndexSize: 100,775,272
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.060017
min: -10.633000
max: 0.930000
std: 0.518027

    #	loading bigWig table:
    ln -s `pwd`/phyloP60wayPrimate.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phyloP60wayPrimate; \
            create table phyloP60wayPrimate \
		(fileName varchar(255) not null); \
            insert into phyloP60wayPrimate values
	("/gbdb/mm10/bbi/phyloP60wayPrimate.bw");'

    #	loading the wiggle table:
    ln -s `pwd`/phyloP60wayPrimate.wib /gbdb/mm10/multiz60way
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phyloP60wayPrimate phyloP60wayPrimate.wig
    #   real    0m45.837s

    wigTableStats.sh mm10 phyloP60wayPrimate
# db.table      min max mean count sumData stdDev viewLimits
# mm10.phyloP60wayPrimate  -10.633 0.93 0.0600168 1929686275 1.15814e+08
#	stdDev viewLimits
#       0.518027 viewLimits=-2.53012:0.93
    #	that range is: 10.633+0.93 = 11.563 for the hBinSize=0.11563

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.11563 -hBinCount=1000 -hMinVal=-10.633 -verbose=2 \
	    -db=mm10 phyloP60wayPrimate > histogram.data 2>&1
    #   real    4m36.379s
    # to see yrange:
    grep -v "^#" histogram.data | ave -col=5 stdin

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Human Mm10 Histogram phyloP60wayPrimate track"
set xlabel " phyloP60wayPrimate score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.472]
set xrange [-2.5:1.0]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ######################   Running the placental  #######################
    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/placental
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/placental
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    para create jobList
    para try ... check ... push ... etc ...
    para time
# Completed: 2709 of 2709 jobs
# CPU time in finished jobs:    1188036s   19800.60m   330.01h   13.75d  0.038 y
# IO & Wait Time:                209859s    3497.65m    58.29h    2.43d  0.007 y
# Average job time:                 516s       8.60m     0.14h    0.01d
# Longest finished job:            1672s      27.87m     0.46h    0.02d
# Submission to last job:          6336s     105.60m     1.76h    0.07d

    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/placental
    mkdir downloads
    for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
do
    echo "working: $D"
    find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.placental.wigFix.gz
XXX - copy and paste error, should have been phyloP60way and not phastCons
done
    time (zcat downloads/chr*.wigFix.gz \
	| wigEncode stdin phyloP60wayPlacental.wig phyloP60wayPlacental.wib \
	> wigEncode.log 2>&1) &
    #	Converted stdin, upper limit 3.30, lower limit -20.00
    #   real    11m54.289s

export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/chr*.wigFix.gz \
	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayPlacental.bw \           > bigWig.log 2>&1) &
    #   real    28m4.576s
    bigWigInfo phyloP60wayPlacental.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 4,423,832,009
primaryIndexSize: 100,775,272
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.109489
min: -20.000000
max: 3.296000
std: 0.810657

    #	loading bigWig table if that is what you wanted to do:
    ln -s `pwd`/phyloP60wayPlacental.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phyloP60wayPlacental; \
            create table phyloP60wayPlacental \
		(fileName varchar(255) not null); \
            insert into phyloP60wayPlacental values
	("/gbdb/mm10/bbi/phyloP60wayPlacental.bw");'

    #	loading the wiggle table:
    ln -s `pwd`/phyloP60wayPlacental.wib /gbdb/mm10/multiz60way
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phyloP60wayPlacental phyloP60wayPlacental.wig
    #   real    0m50.284s

    wigTableStats.sh mm10 phyloP60wayPlacental
# db.table      min max mean count sumData
# mm10.phyloP60wayPlacental -20 3.296 0.109489 1929686275 2.11279e+08
#       stdDev viewLimits
#       0.810657 viewLimits=-3.9438:3.296

    #	that range is: 20+3.296 = 23.296 for hBinSize=0.023296
    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.023296 -hBinCount=1000 -hMinVal=-20 -verbose=2 \
	    -db=mm10 phyloP60wayPlacental > histogram.data 2>&1
    #   real    3m24.650s
    # to see yrange:
    grep -v "^#" histogram.data | ave -col=5 stdin

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phyloP60wayPlacental track"
set xlabel " phyloP60wayPlacental score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.084]
set xrange [-2.5:2.5]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    ######################   Running the vertebrate  #######################
    mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/vertebrate
    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/vertebrate
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    para create jobList
    para try ... check ... push ... etc ...
    para time
# Completed: 2709 of 2709 jobs
# CPU time in finished jobs:    1825414s   30423.56m   507.06h   21.13d  0.058 y
# IO & Wait Time:                211040s    3517.34m    58.62h    2.44d  0.007 y
# Average job time:                 752s      12.53m     0.21h    0.01d
# Longest finished job:            1530s      25.50m     0.42h    0.02d
# Submission to last job:          6045s     100.75m     1.68h    0.07d

    cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/vertebrate
    mkdir downloads
    for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
do
    echo "working: $D"
    find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
        | gzip -c > downloads/${D}.phastCons60way.vertebrate.wigFix.gz
XXX - copy and paste error, should have been phyloP60way and not phastCons
done

    time (zcat downloads/chr*.wigFix.gz \
	| wigEncode stdin phyloP60wayVertebrate.wig phyloP60wayVertebrate.wib \
	> wigEncode.log 2>&1) &
    #	Converted stdin, upper limit 7.53, lower limit -20.00
    #   real    12m2.774s

export sizeG=188743680
ulimit -d $sizeG
ulimit -v $sizeG
    time (zcat downloads/chr*.wigFix.gz \
	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayVertebrate.bw \
        > bigWig.log 2>&1) &
    #   real    27m6.791s
    bigWigInfo phyloP60wayVertebrate.bw
version: 4
isCompressed: yes
isSwapped: 0
primaryDataSize: 4,529,467,614
primaryIndexSize: 100,775,272
zoomLevels: 10
chromCount: 59
basesCovered: 1,929,686,275
mean: 0.169653
min: -20.000000
max: 7.532000
std: 0.942808

    #	loading bigWig table:
    ln -s `pwd`/phyloP60wayVertebrate.bw /gbdb/mm10/bbi
    hgsql mm10 -e 'drop table if exists phyloP60wayVertebrate; \
            create table phyloP60wayVertebrate \
		(fileName varchar(255) not null); \
            insert into phyloP60wayVertebrate values
	("/gbdb/mm10/bbi/phyloP60wayVertebrate.bw");'

    #	loading the wiggle table:
    ln -s `pwd`/phyloP60wayVertebrate.wib /gbdb/mm10/multiz60way
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
	phyloP60wayVertebrate phyloP60wayVertebrate.wig
    #   real    0m56.535s

    time wigTableStats.sh mm10 phyloP60wayVertebrate
# db.table      min max mean count sumData stdDev viewLimits
# mm10.phyloP60wayVertebrate -20 7.532 0.169653 1929686275 3.27377e+08
#	stdDev viewLimits
#       0.942808 viewLimits=-4.54439:4.88369
    #   real    0m25.320s

    #	that range is: 20+7.532 = 27.532 for hBinSize=0.027532

    #  Create histogram to get an overview of all the data
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.027532 -hBinCount=1000 -hMinVal=-20 -verbose=2 \
	    -db=mm10 phyloP60wayVertebrate > histogram.data 2>&1
    #   real    3m26.565s
    # to see yrange:
    egrep -v "^#|udcfileOpen" histogram.data  | ave -col=5 stdin

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse Mm10 Histogram phyloP60wayVertebrate track"
set xlabel " phyloP60wayVertebrate score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.1123]
set xrange [-2.5:2.5]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

#########################################################################
# construct download files for 60-way (DONE - 2012-06-27 - 2012-08-21 - Hiram)
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way/maf
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way/alignments
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/glire
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/primate
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/euarchontoglire
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/placental
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/vertebrate
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/mm10.60way.phastCons
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/glire
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/primate
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/euarchontoglire
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/placental
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/vertebrate
mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/mm10.60way.phyloP60way
    mkdir /hive/data/genomes/mm10/bed/multiz60way/downloads
    cd /hive/data/genomes/mm10/bed/multiz60way/downloads
    mkdir multiz60way phastCons60way phyloP60way
    cd multiz60way
    mkdir maf alignments
    cd maf
    time cp -p ../../../anno/result/chr*.maf .
    #   real    735m35.723s
    time gzip *.maf
    #   real    700m23.340s
    md5sum *.maf.gz > md5sum.txt
    ln -s `pwd`/*.maf.gz `pwd`/md5sum.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way/maf
    cd ..
    du -hsc maf
    #   24G     maf
    du -hsc ../../anno/result/
    #   244G    ../../anno/result/
    ln -s ../../mm10.60way.nh .
    ln -s ../../mm10.60way.commonNames.nh .
    ln -s `pwd`/*.nh \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way

    #####################################################################
    cd /hive/data/genomes/mm10/bed/multiz60way/downloads/phastCons60way
    mkdir glire euarchontoglire primate placental vertebrate mm10.60way.phastCons
    cd glire
    ln -s ../../../cons/glire/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/glire
    #   real    5m50.001s
    cd ../euarchontoglire
    ln -s ../../../cons/euarchontoglires/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    #   real    1m14.103s
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/euarchontoglire
    cd ../primate
    ln -s ../../../cons/primate/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/primate
    #   real    5m39.288s
    cd ../placental
    ln -s ../../../cons/placental/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
  /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/placental
    #   real    5m9.762s
    cd ../vertebrate
    ln -s ../../../cons/vertebrate/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
  /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/vertebrate
    #   real    0m45.408s
    cd ../mm10.60way.phastCons
    ln -s ../../../cons/all/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
  /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/mm10.60way.phastCons
    #   real    6m11.158s
    cd ..
    ln -s ../../cons/all/all.mod mm10.60way.phastCons.mod
    ln -s ../../cons/glire/glire.mod mm10.60way.phastCons.glire.mod
    ln -s ../../cons/primate/primate.mod mm10.60way.phastCons.primate.mod
    ln -s ../../cons/euarchontoglires/euarchontoglires.mod mm10.60way.phastCons.euarchontoglire.mod
    ln -s ../../cons/placental/placental.mod mm10.60way.phastCons.placental.mod
    ln -s ../../cons/vertebrate/vertebrate.mod mm10.60way.phastCons.vertebrate.mod
    ln -s ../../cons/all/phastCons60way.bw mm10.60way.phastCons.bw
    ln -s ../../cons/glire/phastCons60wayGlire.bw \
        mm10.60way.phastCons60wayGlire.bw
    ln -s ../../cons/placental/phastCons60wayPlacental.bw \
        mm10.60way.phastCons60wayPlacental.bw
    ln -s ../../cons/euarchontoglires/phastCons60wayEuarchontoGlires.bw \
        mm10.60way.phastCons60wayEuarchontoGlire.bw
    ln -s ../../cons/primate/phastCons60wayPrimate.bw \
        mm10.60way.phastCons60wayPrimate.bw
    ln -s ../../cons/vertebrate/phastCons60wayVertebrate.bw \
        mm10.60way.phastCons60wayVertebrate.bw
    time md5sum *.mod *.bw > md5sum.txt
    #   real    20m11.260s
    # obtain the README.txt from hg19/phastCons46way and update for this
    #   situation
    ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/README.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way

    #####################################################################
    cd /hive/data/genomes/mm10/bed/multiz60way/downloads/phyloP60way
    mkdir glire euarchontoglire primate placental vertebrate mm10.60way.phyloP60way
    cd glire
    ln -s ../../../consPhyloP/glire/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/glire
    #   real    6m5.733s
    cd ../euarchontoglire
    ln -s ../../../consPhyloP/euarchontoglires/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
/usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/euarchontoglire
    #   real    5m40.272s
    cd ../primate
    ln -s ../../../consPhyloP/primate/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/primate
    #   real    7m22.623s
    cd ../placental
    ln -s ../../../consPhyloP/placental/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/placental
    #   real    7m39.269s
    cd ../vertebrate
    ln -s ../../../consPhyloP/vertebrate/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/vertebrate
    cd ../mm10.60way.phyloP60way
    ln -s ../../../consPhyloP/all/downloads/chr*.gz .
    time md5sum *.gz > md5sum.txt &
    ln -s `pwd`/*.gz `pwd`/md5sum.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/mm10.60way.phyloP60way
    #   real    8m5.777s

    cd ..
    ln -s ../../consPhyloP/run.phyloP/all.mod mm10.60way.phyloP60way.mod
    ln -s ../../consPhyloP/run.phyloP/glire.mod ./mm10.phyloP.glire.mod
    ln -s ../../consPhyloP/run.phyloP/placental.mod ./mm10.phyloP.placental.mod
    ln -s ../../consPhyloP/run.phyloP/euarchontoglires.mod ./mm10.phyloP.euarchontoglire.mod
    ln -s ../../consPhyloP/run.phyloP/primate.mod ./mm10.phyloP.primate.mod
    ln -s ../../consPhyloP/run.phyloP/vertebrate.mod ./mm10.60way.vertebrate.mod

    ln -s ../../consPhyloP/all/phyloP60way.bw mm10.60way.phyloP60way.bw
    ln -s ../../consPhyloP/glire/phyloP60wayGlire.bw \
        mm10.60way.phyloP60wayGlire.bw
    ln -s ../../consPhyloP/vertebrate/phyloP60wayVertebrate.bw \
        mm10.60way.phyloP60wayVertebrate.bw
    ln -s ../../consPhyloP/placental/phyloP60wayPlacental.bw \
        mm10.60way.phyloP60wayPlacental.bw
    ln -s ../../consPhyloP/euarchontoglires/phyloP60wayEuarchontoGlires.bw \
        mm10.60way.phyloP60wayEuarchontoglire.bw
    ln -s ../../consPhyloP/primate/phyloP60wayPrimate.bw \
        mm10.60way.phyloP60wayPrimate.bw

    time md5sum *.mod *.bw > md5sum.txt &
    #   real    20m17.082s

    # obtain the README.txt from hg19/phyloP46way and update for this
    #   situation
    ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/md5sum.txt `pwd`/README.txt \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way

    ###########################################################################
    ## create upstream refGene maf files
    cd /hive/data/genomes/mm10/bed/multiz60way/downloads/maf
    # bash script
#!/bin/sh
for S in 1000 2000 5000
do
    echo "making upstream${S}.maf"
    featureBits mm10 refGene:upstream:${S} -fa=/dev/null -bed=stdout \
        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
        | /cluster/bin/$MACHTYPE/mafFrags mm10 multiz60way \
                stdin stdout \
                -orgs=/hive/data/genomes/mm10/bed/multiz60way/species.list \
        | gzip -c > upstream${S}.maf.gz
    echo "done upstream${S}.maf.gz"
done
    #   real    199m45.558s

    md5sum *.nh *.maf.gz > md5sum.txt
    #   real    27m59.778s

    # obtain the README.txt from hg19/multiz46way and update for this
    #   situation
    ln -s `pwd`/*.nh `pwd`/*.maf.gz `pwd`/*.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way

#############################################################################
# hgPal downloads (DONE - 2012-07-05 - 2012-07-09 - Hiram)
#   FASTA from 60-way for refGene

    ssh hgwdev
    screen -S mm10HgPal
    mkdir /hive/data/genomes/mm10/bed/multiz60way/pal
    cd /hive/data/genomes/mm10/bed/multiz60way/pal
    cat ../species.list | tr '[ ]' '[\n]' > order.list

    export mz=multiz60way
    export gp=refGene
    export db=mm10
    export I=0
    mkdir exonAA exonNuc
    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
        if [ $I -gt 6 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done > $gp.jobs
    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    time sh -x $gp.jobs > $gp.jobs.log 2>&1 &
    #   real    93m34.376s

    mz=multiz60way
    gp=refGene
    db=mm10
    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    #   real    1m16.821s
    zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz

    rm -rf exonAA exonNuc

    # we're only distributing exons at the moment
    mz=multiz60way
    gp=refGene
    db=mm10
    pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    md5sum *.fa.gz > md5sum.txt
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
    ln -s `pwd`/md5sum.txt $pd/


#########################################################################
# lastz nile tilapia oreNil2 (DONE - 2012-04-02 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OreNil2
    mkdir /hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11
    cd /hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11

    cat << '_EOF_' > DEF
# Mouse vs. nile tilapia
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: nile tilapia oreNil2
SEQ2_DIR=/hive/data/genomes/oreNil2/oreNil2.2bit
SEQ2_LEN=/hive/data/genomes/oreNil2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=10

BASE=/hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #   real    108m51.232s

    cat fb.mm10.chainOreNil2Link.txt
    #   51909908 bases of 2652783500 (1.957%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzOreNil2.2012-04-11 lastz.oreNil2

    #	and for the swap
    mkdir /hive/data/genomes/oreNil2/bed/blastz.mm10.swap
    cd /hive/data/genomes/oreNil2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #   real    9m8.213s
    cat  fb.oreNil2.chainMm10Link.txt
    #   49704887 bases of 816084674 (6.091%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/oreNil2/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# LASTZ pig susScr3 (DONE - 2012-04-13 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10SusScr3
    mkdir /hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13
    cd /hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# pig vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: pig SusScr3
SEQ2_DIR=/hive/data/genomes/susScr3/susScr3.2bit
SEQ2_LEN=/hive/data/genomes/susScr3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    1086m29.992s
    cat fb.mm10.chainSusScr3Link.txt
    #   681359766 bases of 2652783500 (25.685%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSusScr3.2012-04-13 lastz.susScr3

    mkdir /hive/data/genomes/susScr3/bed/blastz.mm10.swap
    cd /hive/data/genomes/susScr3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    104m56.258s
    cat fb.susScr3.chainMm10Link.txt
    #   743574150 bases of 2525294057 (29.445%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/susScr3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ armadillo dasNov3 (DONE - 2012-04-13 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10DasNov3
    mkdir /hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
    cd /hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# armadillo vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: armadillo DasNov3
SEQ2_DIR=/hive/data/genomes/dasNov3/dasNov3.2bit
SEQ2_LEN=/hive/data/genomes/dasNov3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=200

BASE=/hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    1125m34.124s
    cat fb.mm10.chainDasNov3Link.txt
    #   668529920 bases of 2652783500 (25.201%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzDasNov3.2012-04-13 lastz.dasNov3

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
    time doRecipBest.pl mm10 dasNov3 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #   real    116m51.114s

    mkdir /hive/data/genomes/dasNov3/bed/blastz.mm10.swap
    cd /hive/data/genomes/dasNov3/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    150m51.653s
    cat fb.dasNov3.chainMm10Link.txt
    #   695161920 bases of 3299882059 (21.066%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/dasNov3/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ cat felCat5 (DONE - 2012-04-13 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10FelCat5
    mkdir /hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
    cd /hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# cat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cat FelCat5
SEQ2_DIR=/hive/data/genomes/felCat5/felCat5.2bit
SEQ2_LEN=/hive/data/genomes/felCat5/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    1029m54.494s
    cat fb.mm10.chainFelCat5Link.txt
    #   788544084 bases of 2652783500 (29.725%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzFelCat5.2012-04-13 lastz.felCat5

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
    time doRecipBest.pl mm10 felCat5 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    106m30.011s

    mkdir /hive/data/genomes/felCat5/bed/blastz.mm10.swap
    cd /hive/data/genomes/felCat5/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	 real 124m25.850s
    cat fb.felCat5.chainMm10Link.txt
    #   762344436 bases of 2364296207 (32.244%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/felCat5/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ naked mole rat hetGla2 (DONE - 2012-04-14 - Hiram)
    #	establish a screen to control this job
    screen -S mm10HetGla2
    mkdir /hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14
    cd /hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# naked mole rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: naked mole rat HetGla2
SEQ2_DIR=/hive/data/genomes/hetGla2/hetGla2.2bit
SEQ2_LEN=/hive/data/genomes/hetGla2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    690m7.626s
    cat fb.mm10.chainHetGla2Link.txt
    #	853221843 bases of 2652783500 (32.163%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzHetGla2.2012-04-14 lastz.hetGla2

    mkdir /hive/data/genomes/hetGla2/bed/blastz.mm10.swap
    cd /hive/data/genomes/hetGla2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    92m24.775s
    cat fb.hetGla2.chainMm10Link.txt
    #   879356778 bases of 2314771103 (37.989%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/hetGla2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ dolphin turTru2 (DONE - 2012-04-14 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10TurTru2
    mkdir /hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
    cd /hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# dolphin vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: dolphin TurTru2
SEQ2_DIR=/hive/data/genomes/turTru2/turTru2.2bit
SEQ2_LEN=/hive/data/genomes/turTru2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    624m36.508s
    cat fb.mm10.chainTurTru2Link.txt
    #   802921354 bases of 2652783500 (30.267%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzTurTru2.2012-04-14 lastz.turTru2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
    time doRecipBest.pl mm10 turTru2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #   real    44m47.753s

    mkdir /hive/data/genomes/turTru2/bed/blastz.mm10.swap
    cd /hive/data/genomes/turTru2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    124m17.088s
    cat fb.turTru2.chainMm10Link.txt
    #   781169007 bases of 2332402443 (33.492%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/turTru2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ Gibbon nomLeu2 (DONE - 2012-04-14 - Hiram)
    screen -S mm10NomLeu2
    mkdir /hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14
    cd /hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14

    cat << '_EOF_' > DEF
# gibbon vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Gibbon NomLeu2
SEQ2_DIR=/hive/data/genomes/nomLeu2/nomLeu2.2bit
SEQ2_LEN=/hive/data/genomes/nomLeu2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10NomLeu2
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    621m38.251s
    cat fb.mm10.chainNomLeu2Link.txt
    #   902774780 bases of 2652783500 (34.031%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzNomLeu2.2012-04-14 lastz.nomLeu2

    mkdir /hive/data/genomes/nomLeu2/bed/blastz.mm10.swap
    cd /hive/data/genomes/nomLeu2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real 92m24.775s
    cat fb.nomLeu2.chainMm10Link.txt
    #   889660339 bases of 2756609047 (32.274%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/nomLeu2/bed
    ln -s blastz.mm10.swap lastz.mm10

#####################################################################
# tRNAs track (DONE 2012-04-02 Chin)
#
    # Please refer to the generic tRNS track build documentation
    #   ~/kent/src/hg/makeDb/doc/tRNAsTrack.txt
    # for details about how the track was build.

##############################################################################
# orfeome 2012-03-16  (markd)

enabled ORFeome tracks in etc/genbank.conf and reload genbank

############################################################################
# construct liftOver to mm9 (DONE - 2012-04-30 - Hiram)
    screen -S 10        # manage this longish running job in a screen
    mkdir /hive/data/genomes/mm10/bed/blat.mm9.2012-04-30
    cd /hive/data/genomes/mm10/bed/blat.mm9.2012-04-30
    # check it with -debug first to see if it is going to work:
    time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
        -ooc=/scratch/data/mm10/mm10.11.ooc \
        -debug -dbHost=hgwdev -workhorse=hgwdev mm10 mm9 > do.log 2>&1
    # if that is OK, then run it:
    time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
        -ooc=/scratch/data/mm10/mm10.11.ooc \
        -dbHost=hgwdev -workhorse=hgwdev mm10 mm9 > do.log 2>&1
    #   real    95m21.635s

    # verify this file exists:
    og -L /gbdb/mm10/liftOver/mm10ToMm9.over.chain.gz
# -rw-rw-r-- 1 535855 Feb  9 12:07 /gbdb/mm9/liftOver/mm9ToMm10.over.chain.gz

    # and try out the conversion on genome-test from mm9 to mm10
############################################################################
# EXONIPHY MM10, lifted from hg19 (DONE - braney 2012-05-29)
#	needed for ucscGenes building
    # create a syntenic liftOver chain file
    cd /cluster/data/hg19/bed/lastz.mm10/axtChain
    time nice -n +19 netFilter -syn hg19.mm10.net.gz \
	| netChainSubset -verbose=0 stdin hg19.mm10.all.chain.gz stdout \
	| chainStitchId stdin stdout | gzip -c > hg19.mm10.syn.chain.gz
    #real    2m38.915s
    #user    3m29.458s
    #sys     0m16.033s

    #	slightly smaller than the ordinary liftOver chain file:
-rw-rw-r-- 1 78419424 Mar  7 18:40 hg19.mm10.over.chain.gz
-rw-rw-r-- 1 74588027 May 29 12:29 hg19.mm10.syn.chain.gz

    # exoniphyMm9.gp is prepared as follows
    mkdir /cluster/data/mm10/bed/exoniphy
    cd /cluster/data/mm10/bed/exoniphy
    hgsql hg19 -e "select * from exoniphy" -N | cut  -f 2-16 > exoniphyHg19.gp
    time nice -n +19 liftOver -genePred exoniphyHg19.gp \
	/cluster/data/hg19/bed/lastz.mm10/axtChain/hg19.mm10.syn.chain.gz \
	    exoniphyMm10.gp unmapped
    # real    16m0.334s
    # user    15m46.462s
    # sys     0m7.115s

    wc -l *
    # 186601 exoniphyHg19.gp
    # 178821 exoniphyMm10.gp
    # 15560 unmapped

    cd /cluster/data/mm10/bed/exoniphy
    nice -n +19 hgLoadGenePred -genePredExt mm10 exoniphy exoniphyMm10.gp
    nice -n +19 featureBits mm10 exoniphy
    # 26795543 bases of 2652783500 (1.010%) in intersection
    nice -n +19 featureBits mm9 exoniphy
    #	25931742 bases of 2620346127 (0.990%) in intersection

##############################################################################
# LASTZ cow bosTau6 (DONE - 2012-06-19 - Chin)
    # establish a screen to control this job with a name to indicate
    # what it is
    screen -S mm10BosTau6
    mkdir /hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19
    cd /hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #   number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# cow vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cow BosTau6
SEQ2_DIR=/scratch/data/bosTau6/bosTau6.2bit
SEQ2_LEN=/scratch/data/bosTau6/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
    `pwd`/DEF \
    -syntenicNet \
    -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
    -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    212m21.604s

    cat fb.mm10.chainBosTau6Link.txt
    #   700039696 bases of 2652783500 (26.389%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzBosTau6.2012-06-19 lastz.bosTau6

    # swap
    mkdir /hive/data/genomes/bosTau6/bed/blastz.mm10.swap
    cd /hive/data/genomes/bosTau6/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
    /hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19/DEF \
    -swap -syntenicNet \
    -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
    -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    72m13.925s
    cat fb.bosTau6.chainMm10Link.txt
    #   688651806 bases of 2649682029 (25.990%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/bosTau6/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# lastz Medium Ground Finch geoFor1 (DONE - 2012-07-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10
    mkdir /hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29
    cd /hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29

    cat << '_EOF_' > DEF
# Mouse vs. medium ground finch
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Medium Ground Finch GeoFor1
SEQ2_DIR=/hive/data/genomes/geoFor1/geoFor1.2bit
SEQ2_LEN=/hive/data/genomes/geoFor1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #   real    251m4.194s
    cat fb.mm10.chainGeoFor1Link.txt
    #   93984241 bases of 2652783500 (3.543%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzGeoFor1.2012-07-29 lastz.geoFor1

    #	and for the swap
    mkdir /hive/data/genomes/geoFor1/bed/blastz.mm10.swap
    cd /hive/data/genomes/geoFor1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #   real    10m0.875s
    cat  fb.geoFor1.chainMm10Link.txt
    #   80273915 bases of 1041286029 (7.709%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/geoFor1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# construct assembly fragments table (DONE - 2012-09-11 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/assemblyFrags
    cd /hive/data/genomes/mm10/bed/assemblyFrags
    zgrep -h -v "^#" "${F}"

    zgrep -h -v "^#" ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/*.comp.agp.gz \
    | awk '$5 != "N"' \
        | awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $1,$2-1,$3,$6,$9}' \
    | sed -e 's/CM000994.2/chr1/; s/CM000995.2/chr2/; s/CM000996.2/chr3/; s/CM000997.2/chr4/; s/CM000998.2/chr5/; s/CM000999.2/chr6/; s/CM001000.2/chr7/; s/CM001001.2/chr8/; s/CM001002.2/chr9/; s/CM001003.2/chr10/; s/CM001004.2/chr11/; s/CM001005.2/chr12/; s/CM001006.2/chr13/; s/CM001007.2/chr14/; s/CM001008.2/chr15/; s/CM001009.2/chr16/; s/CM001010.2/chr17/; s/CM001011.2/chr18/; s/CM001012.2/chr19/; s/CM001013.2/chrX/; s/CM001014.2/chrY/;' > chr.asmFrag.bed

    zgrep -h -v "^#" ../../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/*.agp.gz \
        | awk '$5 != "N"' \
        | awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $1,$2-1,$3,$6,$9}' \
    | sed -e "s#GL456233.1#chrX_GL456233_random#; s#GL456216.1#chr4_GL456216_random#; s#JH584299.1#chr5_JH584299_random#; s#JH584301.1#chrY_JH584301_random#; s#JH584300.1#chrY_JH584300_random#; s#JH584303.1#chrY_JH584303_random#; s#JH584302.1#chrY_JH584302_random#; s#JH584298.1#chr5_JH584298_random#; s#JH584297.1#chr5_JH584297_random#; s#JH584296.1#chr5_JH584296_random#; s#JH584295.1#chr4_JH584295_random#; s#JH584294.1#chr4_JH584294_random#; s#JH584293.1#chr4_JH584293_random#; s#JH584292.1#chr4_JH584292_random#; s#GL456354.1#chr5_GL456354_random#; s#GL456350.1#chr4_GL456350_random#; s#GL456221.1#chr1_GL456221_random#; s#GL456219.1#chr7_GL456219_random#; s#GL456213.1#chr1_GL456213_random#; s#GL456212.1#chr1_GL456212_random#; s#GL456211.1#chr1_GL456211_random#; s#GL456210.1#chr1_GL456210_random#;" > chrUL.asmFrag.bed

    zgrep -h -v "^#" ../../genbank/Primary_Assembly/unplaced_scaffolds/AGP/*.agp.gz \
        | awk '$5 != "N"' | sed -e 's/\.1\t/\t/' \
        | awk '{printf "chrUn_%s\t%d\t%d\t%s\t0\t%s\n", $1,$2-1,$3,$6,$9}' \
        > chrUn.asmFrag.bed

    cat chr.asmFrag.bed chrUL.asmFrag.bed chrUn.asmFrag.bed > mm10.asmFrag.bed
    # add the chrM identity
    echo -e "chrM\t0\t1629\tAY172335.1\t0\t+" >> mm10.asmFrag.bed
    hgLoadBed mm10 assemblyFrags mm10.asmFrag.bed
    featureBits mm10 assemblyFrags
    #   2652769048 bases of 2652783500 (99.999%) in intersection
    # should be silent when all chr names are correct:
    checkTableCoords mm10 assemblyFrags

#########################################################################
# construct ucscToEnsembl table (DONE - 2012-09-11 - Hiram)
    mkdir /hive/data/genomes/mm10/ensembl
    cd /hive/data/genomes/mm10/ensembl
    wget --timestamping \
'ftp://ftp.ensembl.org/pub/release-68/fasta/mus_musculus/dna/Mus_musculus.GRCm38.68.dna.toplevel.fa.gz'

    wget --timestamping \
'ftp://ftp.ensembl.org/pub/release-68/fasta/mus_musculus/dna/Mus_musculus.GRCm38.68.dna.nonchromosomal.fa.gz'

    faCount *.fa.gz > faCount.txt
    egrep -v "total|seq" faCount.txt  | awk '{print $1,$2}' \
        | sort -u | sort -k2nr | sed -e "s/ /\t/" > ensembl.chrom.sizes

    mkdir /hive/data/genomes/mm10/bed/ucscToEnsembl
    cd /hive/data/genomes/mm10/bed/ucscToEnsembl
    awk '{printf "%d\t%s\n", $2,$1}' ../../chrom.sizes | sort > sizes.chrom.ucsc
    awk '{printf "%d\t%s\n", $2,$1}' ../../ensembl/ensembl.chrom.sizes \
        | sort > sizes.chrom.ensembl
    join sizes.chrom.ucsc sizes.chrom.ensembl \
        | awk '{printf "%s\t%s\n", $2,$3}' > ucscToEnsembl.tab

    cut -f1 ucscToEnsembl.tab | awk '{print length($1)}' | sort -rn | head -1
    #   20

    cat << '_EOF_' > ucscToEnsembl.sql
# UCSC to Ensembl chr name translation
CREATE TABLE ucscToEnsembl (
    ucsc varchar(255) not null,        # UCSC chromosome name
    ensembl varchar(255) not null,     # Ensembl chromosome name
              #Indices
    PRIMARY KEY(ucsc(20))
);
'_EOF_'

    hgLoadSqlTab mm10 ucscToEnsembl ucscToEnsembl.sql ucscToEnsembl.tab

#########################################################################
# GRC Incident database (DONE - 2012-09-21 - Hiram)
    # updated the automatic scripts to include the build of this track
    #   on Mm10
    # this procedure is run as a cron job in Hiram's account:

    #	43 09 * * * /hive/data/outside/grc/incidentDb/runUpdate.sh makeItSo

    # using the two scrips there: runUpdate.sh and update.sh
    # which are checked into the source tree as files:
    #	src/hg/utils/automation/grcIncidentUpdate.sh
    #	src/hg/utils/automation/grcRunIncidentUpdate.sh

    # they fetch the XML files from NCBI, convert them to SQL text
    # files, construct a bigBed file, and pushes it to genomewiki if
    # it is an update from previous

    # the table in the dataBase is: grcIncidentDb
    # which is the URL to the bb file, a single row:
    #   http://genomewiki.ucsc.edu/images/a/a4/Mm10.grcIncidentDb.bb

    # construct the table after running the script once manually:
    hgBbiDbLink mm10 grcIncidentDb \
        "http://genomewiki.ucsc.edu/images/a/a4/Mm10.grcIncidentDb.bb"

#########################################################################
# GRCm38.p1 patch 1 (DONE - 2012-09-21 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/patch1
    cd /hive/data/genomes/mm10/bed/patch1
    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mus_musculus/GRCm38.p1/ ./genbank/
    # slight modifications to this script from hg19 patch9 work:
    ./gatherNames.pl genbank > ucscNames.patch1.txt
    # examine the names for sanity:
    awk '{print $NF}' ucscNames.patch1.txt | sort
    # and they should not be longer than 31 characters:
    awk '{print $NF}' ucscNames.patch1.txt | sort | awk '{print length($0)}' \
        | sort -n | tail
    # script from hg19 patch9, update the variable patchName
     ./mkTables.pl patches.chrom.sizes ucscNames.patch1.txt genbank/PATCHES/alt_scaffolds/AGP/alt.scaf.agp.gz
    # output to stdout is the contents of alt.scaf.agp.gz
    # constructs ctgPos.txt chromInfo.txt gap.txt gold.txt
    # script from hg19 patch9, update the variable patchName
    ./mkCtgPos2.pl ucscNames.patch1.txt patches.chrom.sizes > ctgPos2.txt
    cp -p ../patch5/mkHapLocate.pl .
    ./mkHapLocate.pl ctgPos.txt \
	PATCHES/alt_scaffolds/alt_scaffold_placement.txt \
	> haplotypeLocations.bed
    cp -p haplotypeLocations.bed altSequence.bed

    ./mkFasta.pl ucscNames.patch1.txt > mm10.patch1.fa
    # the build of mm10Patch1 can be seen in mm10Patch1.txt

    egrep -v "32,32,190" altSequence.bed  \
	| awk '{printf "%s\t%d\t%d\t%s\t%d\t%s\n", $1,$2,$3,$4,$5,$6}' \
	    > altSeqPatchesP1.tab
    # no haplotypes yet, this is nothing:
    egrep "32,32,190" altSequence.bed  \
	| awk '{printf "%s\t%d\t%d\t%s\t%d\t%s\n", $1,$2,$3,$4,$5,$6}' \
	    > altSeqHaplotypesP1.tab
    # verify none lost
    wc -l altSequence.bed altSeqPatchesP1.tab altSeqHaplotypesP1.tab
    #   9 altSequence.bed
    #   9 altSeqPatchesP1.tab
    #   0 altSeqHaplotypesP1.tab

    # not necessary, there are none yet:
    hgLoadBed mm10 altSeqHaplotypesP1 altSeqHaplotypesP1.tab
    #	Loaded 75 elements of size 6
    hgLoadBed mm10 altSeqPatchesP1 altSeqPatchesP1.tab
    #   Read 9 elements of size 6 from altSeqPatchesP1.tab

    #    these tables are part of mouse/mm10/altSeqComposite1.ra

##############################################################################
# Haplotype track (WORKING - 2012-10-01 - Hiram)

# Warning: these are all actually alternate scaffolds from OTHER mouse strains
# These haplotypes are NOT from mm10.  Probably the table should have been called NonMm10Haplotypes!

# The directory after genbank/ identifies the strain, e.g. 129S2_SvPas
#../../../mm10/genbank/129S2_SvPas/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/129P2_OlaHsd/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/NOD_ShiLtJ/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/A_J/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/CAST_Ei/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/129X1_SvJ/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/AKR_J/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/RIII/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/129S6_SvEvTac/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/129S7_SvEvBrd-Hprt-b-m2/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/BALB_c/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/129S1_SvImJ/alt_scaffolds/alt_scaffold_placement.txt
#../../../mm10/genbank/NOD_MrkTac/alt_scaffolds/alt_scaffold_placement.txt


    cat << '_EOF_' > mkBedFile.pl
#!/usr/bin/env perl

use strict;
use warnings;

my $debug = 1;

sub usage() {
    print STDERR "usage: ./mkBedFile.pl ../../mm10/genbank > mm10Haplotypes.bed\n";
    print STDERR "expecting the Mus_musculus/GRCm38.p1/ hierarchy in ./genbank from NCBI\n";
    exit 255;
}

my $argc = scalar(@ARGV);

if ($argc != 1) {
    usage;
}

my $patchDir = shift;

if ( ! -d $patchDir ) {
    print STDERR "ERROR: given directory $patchDir is not a directory or does not exist";
    usage;
}

my %glSize;
my %ctgToChr;
my %ctgToFastaName;
# my $fasta = "$patchDir/PATCHES/alt_scaffolds/FASTA/alt.scaf.fa.gz";
my @placeList = split('\n',`find $patchDir -type f | grep placement.txt | grep alt_scaffolds | grep -v UNKNOWN`);
for (my $i = 0; $i < scalar(@placeList); ++$i) {
    printf STDERR "# %s\n", $placeList[$i];
    open (FH, "grep -v '^#' $placeList[$i]|") or die "can not read $placeList[$i]";
    while (my $line = <FH>) {
#        printf STDERR "%s", $line;
        chomp $line;
        my @a = split('\s+', $line);
        next if ($a[11] eq "na");
        $a[8] = "+" if ($a[8] eq "b");
        my $descr = sprintf("<B>Region&nbsp;name:&nbsp;</B>%s", $a[7]);
        printf "chr%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\n",
                $a[5], $a[11], $a[12], $a[0], $a[8], $a[3], $descr;
    }
    close (FH);
}
'_EOF_'
    # << happy emacs
    chmod +x mkBedFile.pl

    ./mkBedFile.pl > mm10Haplotypes.bedDetail
    cat << '_EOF_' > mm10Haplotypes.sql
CREATE TABLE mm10Haplotypes (
    chrom varchar(255) not null,   # Reference sequence chromosome or scaffold
    chromStart int unsigned not null,   # Start position in chromosome
    chromEnd int unsigned not null,     # End position in chromosome
    name varchar(255) not null, # Short Name of item
    score int unsigned, # Score from 0-1000
    strand char(1),     # + or -
    id varchar(255) not null,   # ID to bed used in URL to link back
    description longblob not null, # Long description of item for the details page
    #Indices
    INDEX(chrom, chromStart)
);
'_EOF_'

    hgLoadSqlTab mm10 mm10Haplotypes mm10Haplotypes.sql mm10Haplotypes.bedDetail

    # trackDb entry:
track mm10Haplotypes
shortLabel Alt. strains
longLabel Alternate mouse strains, mapped to reference as haplotypes
group varRep
priority 111
visibility hide
type bedDetail 8
url http://www.ncbi.nlm.nih.gov/nuccore/$$
urlLabel NCBI Nucleotide:

##########################################################################
##  CYTOBAND - ideogram track (DONE - 2012-10-19 - Hiram)
    ssh hgwdev
    mkdir -p /hive/data/outside/ncbi/ideogram/2012-10
    cd /hive/data/outside/ncbi/ideogram/2012-10
    # fetch all the ideogram files:
    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/pub/gdp/ ./

    mkdir /hive/data/genomes/mm10/bed/cytoband
    cd /hive/data/genomes/mm10/bed/cytoband

    # Create bed file
    $HOME/kent/src/utils/ncbi/createNcbiCytoBand.pl \
/hive/data/outside/ncbi/ideogram/2012-10/ideogram_10090_GCF_000000055.19_NA_V2

    ## can now verify before load:
    $HOME/kent/src/utils/ncbi/cytoBandVerify.pl
    #	everything checks out OK on 21 chroms
    # Load the bed file
    hgLoadBed -noBin -sqlTable=$HOME/kent/src/hg/lib/cytoBand.sql \
	mm10 cytoBand cytoBand.bed
    #   Read 403 elements of size 5 from cytoBand.bed
    # Make cytoBandIdeo track for ideogram gif on hgTracks page.
    # For mouse cytoBandIdeo is just a replicate of the cytoBand track.
    hgsql -e "drop table cytoBandIdeo;" mm10
    hgsql mm10 -e "create table cytoBandIdeo (index(chrom(10),chromStart)) as select * from cytoBand;"

##########################################################################
# CYTOBANDIDEO update -  (DONE - 2013-02-27 - kuhn)
# adding rows for chroms with no cytology
# this is just for navigation/orientation on those chroms

    set db=mm10
    set sql=~/kent/src/hg/lib/cytoBandIdeo.sql
    # make backup of existing table
    hgsql -e "CREATE TABLE cytoBandIdeoCopy SELECT * FROM cytoBandIdeo" $db
    # dump existing table
    hgsql -N -e "SELECT * FROM cytoBandIdeo" $db > $db.cytoBandIdeo

    # find chroms already covered
    hgsql -N -e 'SELECT chrom FROM cytoBandIdeo' $db \
       | sort -u > $db.coveredNames
    # make cytoBand records for chroms not already covered
    hgsql -N -e 'SELECT chrom, size FROM chromInfo' $db \
      | grep -wvf $db.coveredNames \
      | awk '{print $1"\t0\t"$2"\t\tgneg"}' > $db.cytoBandNew
    # check
    wc -l $db.*
    # combine and sort
    cat $db.cytoBandNew $db.cytoBandIdeo > $db.cytoBandIdeoFull
    bedSort $db.cytoBandIdeoFull $db.cytoBandIdeoFull
    # replace exsting table
    hgsql -e "DROP TABLE cytoBandIdeo" $db
    hgLoadSqlTab $db cytoBandIdeo $sql $db.cytoBandIdeoFull
    # check and then drop copy

##########################################################################
# lastz Lamprey petMar2 (DONE - 2012-10-17 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S petMar2
    mkdir /hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19
    cd /hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19

    cat << '_EOF_' > DEF
# Mouse vs. Lamprey
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Lamprey PetMar2
SEQ2_DIR=/hive/data/genomes/petMar2/petMar2.2bit
SEQ2_LEN=/hive/data/genomes/petMar2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=60

BASE=/hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-qRepeats=windowmaskerSdust \
        -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
    #   real    218m29.078s

    cat fb.mm10.chainPetMar2Link.txt
    #   28262565 bases of 2652783500 (1.065%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzPetMar2.2012-10-19 lastz.petMar2

    #	and for the swap
    mkdir /hive/data/genomes/petMar2/bed/blastz.mm10.swap
    cd /hive/data/genomes/petMar2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #   real    7m2.754s
    cat  fb.petMar2.chainHg19Link.txt
    #	20923095 bases of 647368134 (3.232%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/petMar2/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz White Rhino cerSim1 (DONE - 2012-10-23 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10CerSim1
    mkdir /hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23
    cd /hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23

    cat << '_EOF_' > DEF
# Mouse vs. White Rhino
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10

# QUERY: White Rhino CerSim1
SEQ2_DIR=/hive/data/genomes/cerSim1/cerSim1.2bit
SEQ2_LEN=/hive/data/genomes/cerSim1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=20

BASE=/hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    992m45.890s

    cat fb.mm10.chainCerSim1Link.txt
    #   942281365 bases of 2652783500 (35.520%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzCerSim1.2012-10-23 lastz.cerSim1

    #	and for the swap
    mkdir /hive/data/genomes/cerSim1/bed/blastz.mm10.swap
    cd /hive/data/genomes/cerSim1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    62m44s
    cat  fb.cerSim1.chainMm10Link.txt
    #	926131511 bases of 2366858012 (39.129%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/cerSim1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# QPCR PRIMERS (DONE - 2012-12-10 - Chin)
# The track name is changed to "qPCR Primers"
# Reload table with new track_mouse.BED (2013-01-28)
    # Download
    mkdir /hive/data/outside/Weizmann/qPcrPrimers
    cd /hive/data/outside/Weizmann/qPcrPrimers
    wget http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/mouse/track_mouse.BED
    mkdir -p /hive/data/genomes/mm10/bed/qPcrPrimers
    cat track_mouse.BED | grep -v track \
     > /hive/data/genomes/mm10/bed/qPcrPrimers/qPcrPrimers_mm10.bed

    cd /hive/data/genomes/mm10/bed/qPcrPrimers
    hgLoadBed -bedDetail -tab -renameSqlTable \
      -sqlTable=$HOME/kent/src/hg/lib/bedDetail.sql \
      mm10 qPcrPrimers qPcrPrimers_mm10.bed
    # Reading qPcrPrimers_mm10.bed
    # Read 518230 elements of size 14 from qPcrPrimers_mm10.bed
    # Sorted
    # Creating table definition for qPcrPrimers
    # Saving bed.tab
    # Loading mm10

    # NULL descrition column
    hgsql mm10 -ne "UPDATE qPcrPrimers SET description = NULL;"

#########################################################################
# DBSNP B137 / SNP137 (DONE 12/20/12 angie)
# Redmine #7043

    mkdir -p /hive/data/outside/dbSNP/137/mouse
    cd /hive/data/outside/dbSNP/137/mouse
    # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/database/organism_data/
    # to find the subdir name to use as orgDir below (mouse_10090 in this case).
    # Then click into that directory and look for file names like
    #    b(1[0-9][0-9])_*_([0-9]+_[0-9])
    # -- use the first num for build and the second num_num for buildAssembly.
    # jkStuff/liftContigs.lft maps NCBI contig names to chroms; use that for liftUp.
    #
    # Some trial and error was required to get the config.ra just right --
    # the b* filenames don't include buildAssembly!
    # patch contigs needed to be filtered out:
    cat > config.ra <<EOF
db mm10
orgDir mouse_10090
build 137
buildAssembly
liftUp /hive/data/genomes/mm10/jkStuff/liftContigs.lft
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra >& do.log & tail -f do.log
    # Script ended with feedback about needing refAssemblyLabel because dbSNP
    # mapped to more than one assembly; add the label that clearly corresponds to
    # mm10, GRCm38, to config.ra and try again:
    cat > config.ra <<EOF
db mm10
orgDir mouse_10090
build 137
buildAssembly
liftUp /hive/data/outside/dbSNP/137/mouse/suggested.lft
refAssemblyLabel GRCm38
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl -continue=loadDbSnp \
      config.ra >>& do.log & tail -f do.log
    # Script ended with feedback about unrecognized NT_* contigs from dbSNP.
    # Inspect the script-generated suggested.lft for liftUp; it's usually right.
    # For contigs that are labeled as part of GRCm38 but not liftable to mm10,
    # listed in script-generated cantLiftUpSeqNames.txt, do some entrez
    # nucleotide searches for contig IDs and convince yourself that they're all
    # for alt assembly sequences that we don't include in mm10 (e.g. patches,
    # other strains).  Then tell the script to filter out those contigs:
    cut -f 2 cantLiftUpSeqNames.txt > ignoreAltAssemblyContigs.txt
    cat > config.ra <<EOF
db mm10
orgDir mouse_10090
build 137
buildAssembly
liftUp /hive/data/outside/dbSNP/137/mouse/suggested.lft
refAssemblyLabel GRCm38
ignoreDbSnpContigsFile /hive/data/outside/dbSNP/137/mouse/ignoreAltAssemblyContigs.txt
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl -continue=loadDbSnp \
      config.ra >>& do.log & tail -f do.log
# ...
#MultipleAlignments      1667342 This variant aligns in more than one location.
#ObservedMismatch        4561144 UCSC reference allele does not match any observed allele from dbSNP.
#
# *** All done!
    # That is an unusually high count of ObservedMismatch... follow up with dbSNP.


#############################################################################
# FILTER SNP137 (DONE 12/21/12 angie)
    # Redmine #7043
    # Make several tracks that are filtered subsets of snp137:
    # First, filter out the multiply-aligned and/or weight >1 SNPs -> snp137Mult
    # Second, siphon off the common variants -> snp137Common
    # Third, take the (uniquely mapped, not known to be common) variants
    # w/dbSNP's "clinically-assoc" flag -> snp137Flagged
    cd /hive/data/outside/dbSNP/137/mouse
    zcat snp137.bed.gz \
    | perl -we \
      '$minTotal2N = 10; \
       ($multCount, $comCount, $flagCount, $miscCount) = (0,0,0,0); \
       open($mult, "| gzip -c > snp137Mult.bed.gz") || die; \
       open($common,    "| gzip -c > snp137Common.bed.gz") || die; \
       open($flagged,   "| gzip -c > snp137Flagged.bed.gz") || die; \
       open($misc,      "| gzip -c > snp137Misc.bed.gz") || die; \
       while (<>) { \
         @w = split("\t"); \
         if ($w[16] > 1 || $w[17] =~ /MultipleAlignments/) { \
           print $mult $_; \
           $multCount++; \
         } else { \
           my ($alleleFreqCount, $nStr, $freqStr) = ($w[20], $w[22], $w[23]); \
           my @alNs = split(",", $nStr);      die unless scalar(@alNs) == $alleleFreqCount; \
           my @freqs = split(",", $freqStr);  die unless scalar(@freqs) == $alleleFreqCount; \
           my ($total2N, $maxAlleleFreq) = (0, 0); \
           for (my $i = 0;  $i < $alleleFreqCount;  $i++) { \
             $total2N += $alNs[$i]; \
             $maxAlleleFreq = $freqs[$i] if ($freqs[$i] > $maxAlleleFreq); \
           } \
           if ($alleleFreqCount >= 2 && $total2N >= $minTotal2N && $maxAlleleFreq <= 0.99) { \
             print $common $_; \
             $comCount++; \
           } elsif($w[24] =~ /clinically-assoc/)  { \
             print $flagged $_; \
             $flagCount++; \
           } else { \
             print $misc $_; \
             $miscCount++; \
           } \
         } \
       } \
       close($mult);  close($common); close($flagged);  close($misc); \
       print "snp137Mult:    $multCount\nsnp137Common:  $comCount\nsnp137Flagged: $flagCount\n" . \
             "leftover:      $miscCount\n";'
#snp137Mult:    1671771
#snp137Common:  2709532
#snp137Flagged: 0
#leftover:      66537658
    # It's expected for snp137Flagged to be empty because that's for human SNPs.

    # Load tables
    foreach subset (Mult Common)
      hgLoadBed -tab -onServer -tmpDir=/data/tmp -allowStartEqualEnd -renameSqlTable \
        mm10 snp137$subset -sqlTable=snp137.sql snp137$subset.bed.gz
    end


############################################################################
# DBSNP CODING ANNOTATIONS (137) (DONE 12/21/12 angie)
    # Redmine #7043
    cd /hive/data/outside/dbSNP/137/mouse
    # ncbiFuncAnnotations.txt has NCBI coords: 0-based, fully closed.
    # For anything except an insertion (0 bases between flanks),
    # we need to add 1 to the end coord.  For an insertion, we need
    # to add 1 to the start coord.  Make a hash of the insertion IDs,
    # then look up each ID in ncbiFuncAnnotations.txt to tell which
    # transform to apply.
    # Note: sort -u with the keys below is too restrictive -- we need full line uniq.
    zcat ncbiFuncAnnotations.txt.gz \
    | perl -we 'open($IDS, "zcat ncbiFuncInsertions.ctg.bed.gz |") || die "ids: $!"; \
              while (<$IDS>) { chomp; $ids{$_} = 1; } \
              close($IDS); \
              %coding = (2=>1, 3=>1, 4=>1, 8=>1, 9=>1, 41=>1, 42=>1, 43=>1, 44=>1, 45=>1); \
              while (<>) { \
                chomp;  @w = split("\t"); # id, ctg, start, end, ... \
                next unless $coding{$w[5]}; \
                $bed4 = join("\t", $w[1], $w[2], $w[3], $w[0]); \
                if (exists $ids{$bed4} && $w[3] == $w[2]+1) { \
                  $w[2]++; # 2-base insertions: increment start coord \
                } else { \
                  $w[3]++; # increment end coord to get half-open \
                } \
                print join("\t", @w) . "\n"; \
              }' \
    | sort -k1n,1n -k2,2 -k3n,3n -k5,5 -k6n,6n \
    | uniq \
      > ncbiCodingAnnotations.txt
    wc -l ncbiCodingAnnotations.txt
#1884989 ncbiCodingAnnotations.txt

    # How many & what kinds of function types?
    cut -f 6 ncbiCodingAnnotations.txt \
    | sort -n | uniq -c
# 371388 3   (coding-synon)
#1301099 8   (cds-reference -- ignored)
#   3465 41  (nonsense)
# 199148 42  (missense)
#    319 43  (stop-loss)
#   7422 44  (frameshift)
#   2148 45  (cds-indel)
    # In b137, the functional annotations include non-coding (frame = NULL),
    # which we'll exclude here because this is supposed to be just coding stuff...
    # probably need to update how we show dbSNP's func annos anyway, e.g.
    # it is a shame that we toss out codon number and transcript offset.
    # Gather up multiple annotation lines into one line per {snp, gene, frame}:
    perl -e  'while (<>) { chomp; \
                my ($rsId, $ctg, $s, $e, $txId, $fxn, $frm, $nt, $aa, $codon) = split("\t"); \
                next if ($fxn == 8 && ($frm eq "NULL" && $aa eq "NULL" && $codon eq "NULL")); \
                if (defined $lastRs && \
                    ($lastRs != $rsId || $lastCtg ne $ctg || $lastS != $s || \
                     $lastTx ne $txId || $lastFrm ne $frm)) { \
                  if (defined $refRow) { \
                    $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                    $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
                  } \
                  $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                        "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
                  $lineOut =~ s@NULL@n/a@g; \
                  print $lineOut; \
                  $refRow = undef;  @rows = ();  ($count, $fxns, $nts, $codons, $aas) = (); \
                } \
                ($lastRs, $lastCtg, $lastS, $lastE, $lastTx, $lastFrm) = \
                    ($rsId, $ctg, $s, $e, $txId, $frm); \
                $count++; \
                if ($fxn == 8) { \
                  $refRow = [$fxn, $nt, $aa, $codon]; \
                } else { \
                 $fxns .= "$fxn,";  $nts .= "$nt,";  $aas .= "$aa,";  $codons .= "$codon,"; \
                } \
              } \
              if (defined $refRow) { \
                $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
              } \
              $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                    "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
              $lineOut =~ s@NULL@n/a@g; \
              print $lineOut;' \
      ncbiCodingAnnotations.txt \
    | liftUp snp137CodingDbSnp.bed /hive/data/outside/dbSNP/137/mouse/suggested.lft warn stdin

    hgLoadBed mm10 snp137CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
      -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
      snp137CodingDbSnp.bed
#Read 552120 elements of size 11 from snp137CodingDbSnp.bed


#########################################################################
# RETROPOSED GENES ucscRetro track VERSION 2
# (2013-04-03 - 2013-04-17, baertsch,hartera DONE)
mkdir -p /hive/hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403

mkdir -p /hive/data/genomes/mm10/bed/retro/
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403
cat << '_EOF_' > DEF

RETRO_OPTIONS="-verbose=4 -minAli=0.98 -nearTop=0.005 "
RUNDATE="2013-04-03"
DB=mm10
SCORETHRESH=510
GENOMENAME='Mus musculus'
GBDB=mm
DATE=20130403
MRNABASE=/hive/data/genomes/$DB/bed/mrnaBlastz
TMPMRNA=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/mrnaBlastz/$DB
TMPEST=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/est/$DB
BINDIR=/hive/users/hartera/GencodeWG/retroFinder/trunk/bin
EST=all_est
SPLICED_EST=intronEst
SPLIT_EST=0
SPLIT_SPLICED_EST=1
SCRIPT=/hive/users/hartera/GencodeWG/retroFinder/trunk/src/pipeline
GENOME=/hive/data/genomes
TWOBIT=$GENOME/$DB/$DB.2bit
RETRODIR=$GENOME/$DB/bed/retro
BASE=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/retro
VERSION=2
OUTDIR=${BASE}/${DB}.${VERSION}
RESULT=$OUTDIR/result
LOG=$OUTDIR/log
OUT=$OUTDIR/out
OVERLAPDIR=$OUTDIR/run.o
TABLE=ucscRetroInfo$VERSION
ORTHOTABLE=ucscRetroOrtho$VERSION
ALIGN=ucscRetroAli$VERSION
LOCAL=/scratch/data/$DB
NIB=$LOCAL/nib
RMSK=rmsk
NET1=netHg19
NET2=netCanFam3
NET3=netRn5
GENE1=knownGene
GENE2=refGene
GENE3=ensGene
CLUSTER=swarm
SPECIES="hg19 mm10"
ROOTDIR="/cluster/home/$USER/public_html/retro/mm10Apr13"
WEBROOT=$ROOTDIR/retro.$RUNDATE
WEBSERVER=http://hgwdev-hartera.soe.ucsc.edu
EXPDIR=exp
GENEPFAM=knownGene
PFAM=knownToPfam
PFAMIDFIELD=name
PFAMDOMAIN=value
ARRAY=gnfAtlas2
AFFYPROBE=affyGnf1m
ARRAYMEDIAN=hgFixed.gnfMouseAtlas2Median
ARRAYRATIO=hgFixed.gnfMouseAtlas2AllRatio
ARRAYABS=hgFixed.gnfMouseAtlas2All
ARRAYEXP=hgFixed.gnfMouseAtlas2MedianExps
ARRAYEXPALL=hgFixed.gnfMouseAtlas2AllExps
# ARRAYLOOKUP=knownToGnfAtlas2
#ARRAYPSLS="/hive/data/genomes/mm9/bed/geneAtlas2/affyGnf1m.psl"
ALTSPLICE=sibTxGraph
SPLITBYAGE=splitRetrosByAgeMouse
PDB=proteins121210
BREAKS=0,8,16,24,32
XLIM=34
YLIM=0.1
YLIM1=4000
YLIM2=160
MAXDIVERGENCE=32
'_EOF_'
    # << happy emacs
chmod +x DEF
mkdir mrnaBlastz
cd mrnaBlastz
cp ../DEF .
# Create S1.len:
cp /hive/data/genomes/mm10/chrom.sizes S1.len
# Edit S1.len and remove chrM and random chroms then copy over to mm10
# genomes directory
mkdir -p /hive/data/genomes/mm10/bed/mrnaBlastz
cp S1.len /hive/data/genomes/mm10/bed/mrnaBlastz

screen
# Run steps 1 to 6 of RetroFinder pipeline from scripts in CCDS SVN source tree:
retroFinder/trunk/src/pipeline/ucscStep1.sh DEF
# check cluster job on swarm
retroFinder/trunk/src/pipeline/ucscStep2.sh DEF
retroFinder/trunk/src/pipeline/ucscStep3.sh DEF
#check cluster job
retroFinder/trunk/src/pipeline/ucscStep4.sh DEF
#check cluster job
    # Load the track
retroFinder/trunk/src/pipeline/ucscStep5.sh DEF
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403/retro/mm10.2
retroFinder/trunk/src/pipeline/filterMrna.sh DEF
retroFinder/trunk/src/pipeline/filterEst.sh DEF
retroFinder/trunk/src/pipeline/analyseExpress.sh DEF
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403/mrnaBlastz
retroFinder/trunk/src/pipeline/ucscStep6.sh DEF
#added ucscRetroAli to trackDb.ra
# copied
# /hive/groups/gencode/pseudogenes/retroFinder/mm10/20130403/retro/mm10.2/trackDb.retro
# entry to kent/src/hg/makeDb/trackDb/mouse/mm10/trackDb.ra
# and edited it to add version number and date.
# Scripts copied ucscRetroAli2.psl, ucscRetroInfo2.bed and ucscRetroCds2.tab
# to /hive/data/genomes/mm10/bed/retro/

##############################################################################
# LASTZ shrew sorAra2 (DONE - 2013-06-12 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10SorAra2
    mkdir /hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
    cd /hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# shrew vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: shrew SorAra2
SEQ2_DIR=/hive/data/genomes/sorAra2/sorAra2.2bit
SEQ2_LEN=/hive/data/genomes/sorAra2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=40

BASE=/hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    # real    785m32.163s

    cat fb.mm10.chainSorAra2Link.txt
    #   354499462 bases of 2652783500 (13.363%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzSorAra2.2013-06-12 lastz.sorAra2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
    time doRecipBest.pl mm10 sorAra2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    24m38.069s

    mkdir /hive/data/genomes/sorAra2/bed/blastz.mm10.swap
    cd /hive/data/genomes/sorAra2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    # real    39m53.463s
    cat fb.sorAra2.chainMm10Link.txt
    #  343760052 bases of 2192103426 (15.682%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/sorAra2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ tenrec echTel2 (DONE - 2013-06-12 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10EchTel2
    mkdir /hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
    cd /hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# tenrec vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: tenrec EchTel2
SEQ2_DIR=/hive/data/genomes/echTel2/echTel2.2bit
SEQ2_LEN=/hive/data/genomes/echTel2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    1006m3.874s

    cat fb.mm10.chainEchTel2Link.txt
    #	384570981 bases of 2652783500 (14.497%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzEchTel2.2013-06-12 lastz.echTel2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
    time doRecipBest.pl mm10 echTel2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #   real    27m58.816s

    # and, for the swap
    mkdir /hive/data/genomes/echTel2/bed/blastz.mm10.swap
    cd /hive/data/genomes/echTel2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    43m0.194s
    cat fb.echTel2.chainMm10Link.txt
    #	380872172 bases of 2605196361 (14.620%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/echTel2/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
# LASTZ alpaca vicPac2 (DONE - 2013-06-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10VicPac2
    mkdir /hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
    cd /hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    cat << '_EOF_' > DEF
# mouse vs alpaca
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: alpaca VicPac2
SEQ2_DIR=/hive/data/genomes/vicPac2/vicPac2.2bit
SEQ2_LEN=/hive/data/genomes/vicPac2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #   real    2156m48.687s

    cat fb.mm10.chainVicPac2Link.txt
    #	797843091 bases of 2652783500 (30.076%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/mm10/bed
    ln -s lastzVicPac2.2013-06-19 lastz.vicPac2

    # better to have reciprocal best for this one since it is low coverage:
    cd /hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
    time doRecipBest.pl mm10 vicPac2 -buildDir=`pwd` -workhorse=hgwdev \
	> best.log 2>&1 &
    #	real    33m49.271s

    mkdir /hive/data/genomes/vicPac2/bed/blastz.mm10.swap
    cd /hive/data/genomes/vicPac2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    85m53.924s

    cat fb.vicPac2.chainMm10Link.txt
    #	783682127 bases of 2078582856 (37.703%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/vicPac2/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# create ucscToINSDC name mapping (DONE - 2013-08-15 - Hiram)
    # this allows the "ensembl" blue bar button to appear
    mkdir /hive/data/genomes/mm10/bed/ucscToINSDC
    cd /hive/data/genomes/mm10/bed/ucscToINSDC

    cat << '_EOF_' > translateNames.sh
#!/bin/sh

grep -v "^#" ../../genbank/Primary_Assembly/assembled_chromosomes/chr2acc \
   | sed -e 's/^/chr/'

zcat ../../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz | grep -v "^#" | cut -f1 | sort -u \
   | sed -e 's/^\([A-Za-z0-9]*\).\([0-9]*\)/chrUn_\1\t\1.\2/;'

grep -v "^#" \
  ../../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf \
    | sed -e 's/^\([A-Za-z0-9]*\)\t\([A-Za-z0-9]*\).\([0-9]*\)/chr\1_\2_random\t\2.\3/;'


echo -e "chrM\tNC_005089.1"
'_EOF_'
    # << happy emacs

    chmod +x translateNames.sh
    ./translateNames.sh | sort > ucscToINSDC.txt
    join <(sort ../../chrom.sizes) ucscToINSDC.txt \
        | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' > ucscToINSDC.tab

    # maximum size of UCSC chrom name for SQL index
    cut -f1 ucscToINSDC.tab | awk '{print length($0)}' | sort -n | tail -1
    #   20

    sed -e 's/21/20/' $HOME/kent/src/hg/lib/ucscToINSDC.sql \
      | hgLoadSqlTab mm10 ucscToINSDC stdin ucscToINSDC.tab

    # verify the track link to INSDC functions

##############################################################################
# MGI LIFTOVER FROM mm9 ( 2013-11-14 Pauline)
    ssh kolossus
    mkdir /cluster/data/mm10/bed/jaxLiftOver
    cd /cluster/data/mm10/bed/jaxLiftOver

    liftOver -minBlocks=0.5 /cluster/data/mm9/bed/jax/2011_06/jaxQtl.bed \
      /cluster/data/mm9/bed/liftOver/mm9ToMm10.over.chain.gz \
      -bedPlus=6 -tab jaxQtlLift.{bed,unmapped}
    wc -l jaxQtlLift.{bed,unmapped}
#Old  1539 jaxQtlLift.bed
#Old    12 jaxQtlLift.unmapped
#     1883 jaxQtlLift.bed
#       14 jaxQtlLift.unmapped
# Numbers are of same order of magnitude (yay?) proceeding...

    # Load lifted track tables and original auxiliary tables:
    ssh hgwdev
    cd /cluster/data/mm10/bed/jaxLiftOver

    # jaxQTLLift
#didn't run this sed command (prob already been done to this file?)
    sed -e 's/jaxQTL/jaxQTLLift/g'\
      ~/kent/src/hg/lib/jaxQTL.sql  > jaxQTLLift.sql

#ran this (used this instead of hgLoadBed at Hiram's suggestion):
    hgLoadSqlTab  mm10 JaxQtl $HOME/kent/src/hg/lib/jaxQtl.sql \
/cluster/data/mm10/bed/jaxLiftOver/jaxQtlLift.bed

    checkTableCoords mm10 JaxQTLLift
#got no output (yay!)

#found out hgLoadSqlTab doesn't load a positionally sorted table, sorting bed
#file and reloading:

    sort -k1,1 -k2,2n jaxQtlLift.bed > jaxQtlLiftSorted.bed

    hgLoadSqlTab  mm10 jaxQtl $HOME/kent/src/hg/lib/jaxQtl.sql \
/cluster/data/mm10/bed/jaxLiftOver/jaxQtlLiftSorted.bed


##############################################################################
# DBSNP B138 / SNP138 (DONE 1/17/14 angie)
    # RedMine #12490
    screen
    mkdir -p /hive/data/outside/dbSNP/138/mouse
    cd /hive/data/outside/dbSNP/138/mouse
    # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/database/organism_data/
    # to find the subdir name to use as orgDir below (mouse_10090 in this case).
    # Then click into that directory and look for file names like
    #    b(1[0-9][0-9])_
    # -- use the first num for build setting in config.ra
    # The buildAssembly setting in config.ra is empty because dbSNP stopped including
    # that in file names.
    cat > config.ra <<EOF
db mm10
orgDir mouse_10090
build 138
buildAssembly
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra >& do.log & tail -f do.log
    # Some trial and error was required to get the config.ra just right.
    # First stop: need a refAssemblyLabel:
# *** This release contains more than one assembly label.
# *** Please examine this list in case we need to exclude any of these:
#
#GRCm38.p1
#Mm_Celera
# *** Add refAssemblyLabel to config.ra.  If keeping all labels, it will
# *** look like this:
#
#refAssemblyLabel GRCm38.p1,Mm_Celera
#
# *** Edit out any of those that are not included in mm10 (e.g. Celera).
# *** Then restart this script with -continue=loadDbSnp .
    cat >> config.ra <<EOF
refAssemblyLabel GRCm38.p1
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=loadDbSnp >>& do.log &
    tail -f do.log
    # Second stop: need to grab the NCBI Assembly Reports file for GRCm38; the
    # script will do its best to deduce the needed liftUp entries and contigs
    # to ignore (because they are for alternate mouse strains, or patch contigs etc).
#*** b138_ContigInfo has coords for 119 sequences; these have been written to
#*** /hive/data/outside/dbSNP/138/mouse/suggested.lft .
#*** 152 lines of b138_ContigInfo.bcp.gz either had no lift-coords
#*** or had unrecognized chrom names; see
#*** /hive/data/outside/dbSNP/138/mouse/cantLiftUpSeqNames.txt .
#
#*** You must account for those in config.ra, in the liftUp file
#*** and/or ignoreDbSnpContigsFile or the ignoreDbSnpContigs regex.
#*** Then run again with -continue=loadDbSnp .
#
#*** NOTE: If you add the ncbiAssemblyReportFile setting to config.ra and
#***       run again with -continue=loadDbSnp, this script may be able to
#***       construct those files for you.
    # Look at the doDbSnp.pl -help message for instructions about how to find the
    # Assembly Reports file for GRCm38 on the NCBI web site.
    wget ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/GCF_000001635.22.assembly.txt
    cat >> config.ra <<EOF
ncbiAssemblyReportFile GCF_000001635.22.assembly.txt
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=loadDbSnp >>& do.log &
    tail -f do.log
    # Third stop: review the list of dbSNP contigs that we can't map, and if they're
    # all contigs not in our assembly, tell config.ra to ignore them.
#*** b138_ContigInfo has coords for 119 sequences; these have been written to
#*** /hive/data/outside/dbSNP/138/mouse/suggested.lft .
#
#*** GCF_000001635.22.assembly.txt has mappings for 44 sequences;
#*** these have been written to
#*** /hive/data/outside/dbSNP/138/mouse/suggested.lft .
#
#*** 108 lines of b138_ContigInfo.bcp.gz contained contig names that
#*** could not be mapped to chrom.size via their GenBank contig mappings; see
#*** /hive/data/outside/dbSNP/138/mouse/cantLiftUpSeqNames.txt .
#
#*** You must account for all 271 contig_acc values in config.ra,
#*** in the liftUp file and/or ignoreDbSnpContigsFile.
#*** Then run again with -continue=loadDbSnp .
    cut -f 2 cantLiftUpSeqNames.txt > contigsNotInUCSC.txt
    cat >> config.ra <<EOF
liftUp suggested.lft
ignoreDbSnpContigsFile contigsNotInUCSC.txt
EOF
    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=loadDbSnp >>& do.log &
    tail -f do.log

    # The script died with an error implying that a perl command in a pipe got
    # empty input from sort which was getting input from an hgsql query to join
    # Batch submitter handles with rs# snp_id's.  Looks like the mysql connection
    # was lost or something.  Anyway, re-running that part of addToDbSnp.csh
    # in 2 parts and continuing manually through the end of addToDbSnp.csh:
    pushd `cat workingDir `
    hgsql mm10snp138 -NBe 'select SNPSubSNPLink.snp_id, handle from SubSNP, SNPSubSNPLink, Batch \
                       where SubSNP.subsnp_id = SNPSubSNPLink.subsnp_id and \
                             SubSNP.batch_id = Batch.batch_id' \
    | sort -k1n,1n -k2,2 -u \
      > tmp.txt
    perl -we 'while (<>) { \
              chomp; my ($id, $handle) = split("\t"); \
              if (defined $prevId && $prevId != $id) { \
                print "$prevId\t$handleCount\t$handleBlob\n"; \
                $handleCount = 0;  $handleBlob = ""; \
              } \
              $handleCount++; \
              $handleBlob .= "$handle,"; \
              $prevId = $id; \
            } \
            print "$prevId\t$handleCount\t$handleBlob\n";' \
      tmp.txt > ucscHandles.txt

    cat > ucscHandles.sql <<EOF
CREATE TABLE ucscHandles (
        snp_id int NOT NULL,
        handleCount int unsigned NOT NULL,
        handles longblob NOT NULL,
        INDEX snp_id (snp_id)
);
EOF
    hgLoadSqlTab mm10snp138 ucscHandles{,.sql,.txt}

    # I added 'if (0) then' around the parts of addToDbSnp.csh that completed successfully;
    # complete the step by running the modified script:
    # Pop back out of workingDir
    popd
    addToDbSnp.csh >>& do.log &
    tail -f do.log

    # Now continue with the next step:
    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=bigJoin >>& do.log &
    tail -f do.log
# *** All done!


##############################################################################
# FILTER SNP138 (DONE 1/17/14 angie)
   cd /hive/data/outside/dbSNP/138/mouse
   zcat snp138.bed.gz \
   | ~/kent/src/hg/utils/automation/categorizeSnps.pl
#Mult:     3066546
#Common:   8082414
#Flagged:  0
#leftover: 60824824
   foreach f ({Mult,Common}.bed.gz)
     mv $f snp138$f
   end
   # Load tables
   foreach subset (Mult Common)
     hgLoadBed -tab -onServer -tmpDir=/data/tmp -allowStartEqualEnd -renameSqlTable \
       mm10 snp138$subset -sqlTable=snp138.sql snp138$subset.bed.gz
   end


##############################################################################
# DBSNP CODING ANNOTATIONS (138) (DONE 1/17/14 angie)
   cd /hive/data/outside/dbSNP/138/mouse
   # ncbiFuncAnnotations.txt has NCBI coords: 0-based, fully closed.
   # For anything except an insertion (0 bases between flanks),
   # we need to add 1 to the end coord.  For an insertion, we need
   # to add 1 to the start coord.  Make a hash of the insertion IDs,
   # then look up each ID in ncbiFuncAnnotations.txt to tell which
   # transform to apply.
   # Note: sort -u with the keys below is too restrictive -- we need full line uniq.
   zcat ncbiFuncAnnotations.txt.gz \
   | perl -we 'open($IDS, "zcat ncbiFuncInsertions.ctg.bed.gz |") || die "ids: $!"; \
             while (<$IDS>) { chomp; $ids{$_} = 1; } \
             close($IDS); \
             %coding = (2=>1, 3=>1, 4=>1, 8=>1, 9=>1, 41=>1, 42=>1, 43=>1, 44=>1, 45=>1); \
             while (<>) { \
               chomp;  @w = split("\t"); # id, ctg, start, end, ... \
               next unless $coding{$w[5]}; \
               $bed4 = join("\t", $w[1], $w[2], $w[3], $w[0]); \
               if (exists $ids{$bed4} && $w[3] == $w[2]+1) { \
                 $w[2]++; # 2-base insertions: increment start coord \
               } else { \
                 $w[3]++; # increment end coord to get half-open \
               } \
               print join("\t", @w) . "\n"; \
             }' \
   | sort -k1n,1n -k2,2 -k3n,3n -k5,5 -k6n,6n \
   | uniq \
     > ncbiCodingAnnotations.txt
   wc -l ncbiCodingAnnotations.txt
#1584257 ncbiCodingAnnotations.txt
   # How many & what kinds of function types?
   cut -f 6 ncbiCodingAnnotations.txt \
   | sort -n | uniq -c
# 372821 3   (coding-synon)
# 552828 8   (cds-reference -- ignored)
#    376 41  (nonsense)
# 181984 42  (missense)
#     49 43  (stop-loss)
#   3382 44  (frameshift)
# 472817 45  (cds-indel)

   # In b138, the functional annotations include non-coding (frame = NULL),
   # which we'll exclude here because this is supposed to be just coding stuff...
   # probably need to update how we show dbSNP's func annos anyway, e.g.
   # it is a shame that we toss out codon number and transcript offset.
   # Gather up multiple annotation lines into one line per {snp, gene, frame}:
   perl -e  'while (<>) { chomp; \
               my ($rsId, $ctg, $s, $e, $txId, $fxn, $frm, $nt, $aa, $codon) = split("\t"); \
               next if ($fxn == 8 && ($frm eq "NULL" && $aa eq "NULL" && $codon eq "NULL")); \
               if (defined $lastRs && \
                   ($lastRs != $rsId || $lastCtg ne $ctg || $lastS != $s || \
                    $lastTx ne $txId || $lastFrm ne $frm)) { \
                 if (defined $refRow) { \
                   $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                   $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
                 } \
                 $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                       "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
                 $lineOut =~ s@NULL@n/a@g; \
                 print $lineOut; \
                 $refRow = undef;  @rows = ();  ($count, $fxns, $nts, $codons, $aas) = (); \
               } \
               ($lastRs, $lastCtg, $lastS, $lastE, $lastTx, $lastFrm) = \
                   ($rsId, $ctg, $s, $e, $txId, $frm); \
               $count++; \
               if ($fxn == 8) { \
                 $refRow = [$fxn, $nt, $aa, $codon]; \
               } else { \
                $fxns .= "$fxn,";  $nts .= "$nt,";  $aas .= "$aa,";  $codons .= "$codon,"; \
               } \
             } \
             if (defined $refRow) { \
               $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
               $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
             } \
             $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                   "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
             $lineOut =~ s@NULL@n/a@g; \
             print $lineOut;' \
     ncbiCodingAnnotations.txt \
   | liftUp snp138CodingDbSnp.bed suggested.lft warn stdin
   hgLoadBed mm10 snp138CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
     -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
     snp138CodingDbSnp.bed
#Read 1025678 elements of size 11 from snp138CodingDbSnp.bed


##############################################################################
# SEGMENTAL DUPLICATIONS (WORKING 4/14/14 Pauline)
    # File emailed from John Huddleston (jlhudd@uw.edu) in the Eichler Lab.
    mkdir /hive/data/genomes/mm10/bed/genomicSuperDups
    cd /hive/data/genomes/mm10/bed/genomicSuperDups

    wget --timestamping 'http://mouseparalogy.gs.washington.edu/GRCm38/genomicSuperDup.tab'

    mv genomicSuperDup.tab mm10_WGAC.tab

    awk '($3 - $2) >= 1000 && ($9 - $8) >= 1000 {print;}' mm10_WGAC.tab \
    | hgLoadBed mm10 genomicSuperDups stdin \
      -tab -sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql

    # mm8 version of track had issue where strand values were "+" and "_" --
    # checked and found same issue - so ran same fix:
    hgsql mm10 -e 'update genomicSuperDups set strand = "-" where strand = "_";'

    #new mm10 version has a lot more stuff than version on mm8:
    #featureBits mm8 genomicSuperDups
    #157417547 bases of 2567283971 (6.132%) in intersection
    #featureBits mm10 genomicSuperDups
    #214917441 bases of 2652783500 (8.102%) in intersection
    #select count(*) from genomicSuperDups;
    #659775 (vs. 277816 in mm8)
#
#########################################################################
# hgPal downloads (DONE braney 2009-11-03)
#   FASTA from 60way for refGene, knownGene, knownCanonical

    ssh hgwdev
    screen
    bash
    rm -rf /cluster/data/mm10/bed/multiz60way/pal
    mkdir /cluster/data/mm10/bed/multiz60way/pal
    cd /cluster/data/mm10/bed/multiz60way/pal
    for i in `cat ../species.list`; do echo $i; done > order.lst

    mz=multiz60way
    gp=refGene
    db=mm10
    mkdir exonAA exonNuc ppredAA ppredNuc
    for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
    do
	echo "date"
	echo "mafGene -chrom=$j  $db $mz $gp order.lst stdout | \
	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
	echo "mafGene -chrom=$j -noTrans $db $mz $gp order.lst stdout | \
	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
	echo "mafGene -chrom=$j -exons -noTrans $db $mz $gp order.lst stdout | \
	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
	echo "mafGene -chrom=$j -exons $db $mz $gp order.lst stdout | \
	    gzip -c > exonAA/$j.exonAA.fa.gz"
    done > $gp.jobs

    nice time sh -x $gp.jobs > $gp.jobs.log 2>&1 &
    sleep 1
    tail -f $gp.jobs.log

# 1817.21user 233.92system 4:54:04elapsed 11%CPU (0avgtext+0avgdata
# 920192maxresident)k
# 6024inputs+0outputs (7major+1648126minor)pagefaults 0swaps

    mz=multiz60way
    gp=refGene
    db=mm10
    zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
    zcat ppredAA/*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
    zcat ppredNuc/*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz

    rm -rf exonAA exonNuc ppredAA ppredNuc

    # we're only distributing exons at the moment
    mz=multiz60way
    gp=refGene
    db=mm10
    pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz

    mz=multiz60way
    gp=knownGene
    db=mm10
    mkdir exonAA exonNuc ppredAA ppredNuc
    for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
    do
	echo "date"
	echo "mafGene -chrom=$j  $db $mz $gp order.lst stdout | \
	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
	echo "mafGene -chrom=$j -noTrans $db $mz $gp order.lst stdout | \
	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
	echo "mafGene -chrom=$j -exons -noTrans $db $mz $gp order.lst stdout | \
	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
	echo "mafGene -chrom=$j -exons $db $mz $gp order.lst stdout | \
	    gzip -c > exonAA/$j.exonAA.fa.gz"
    done > $gp.$mz.jobs

    time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 &
    sleep 1
    tail -f $gp.$mz.job.log

# oops... missed the timing


    mz=multiz60way
    gp=knownGene
    db=mm10

    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
    zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
    zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz

    rm -rf exonAA exonNuc ppredAA ppredNuc

    mz=multiz60way
    gp=knownGene
    db=mm10
    pd=/usr/local/apache/htdocs/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz

    # now do the canonical set
    cd /cluster/data/mm10/bed/multiz60way/pal
    mz=multiz60way
    gp=knownCanonical
    db=mm10
    for j in `awk '{print $1}' /cluster/data/mm10/chrom.sizes`
    do
	echo "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$j'" | hgsql $db | tail -n +2 > $j.known.bed
    done

    mkdir exonAA exonNuc ppredAA ppredNuc
    for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
    do
	echo "date"
	echo "mafGene -geneBeds=$j.known.bed  $db $mz knownGene order.lst stdout | \
	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
	echo "mafGene -geneBeds=$j.known.bed -noTrans $db $mz knownGene order.lst stdout | \
	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
	echo "mafGene -geneBeds=$j.known.bed -exons -noTrans $db $mz knownGene order.lst stdout | \
	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
	echo "mafGene -geneBeds=$j.known.bed -exons $db $mz knownGene order.lst stdout | \
	    gzip -c > exonAA/$j.exonAA.fa.gz"
    done > $gp.$mz.jobs

    time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 &
    sleep 1
    tail -f $gp.$mz.job.log

# real    302m20.489s
# user    27m31.179s
# sys     5m30.071s


    rm *.known.bed
    mz=multiz60way
    gp=knownCanonical
    db=mm10
    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
    zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
    zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz

    rm -rf exonAA exonNuc ppredAA ppredNuc

    mz=multiz60way
    gp=knownCanonical
    db=mm10
    pd=/usr/local/apache/htdocs/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
##############################################################################
# LASTZ Rhesus rheMac2 (DONE - 2014-05-23 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23
    cd /hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23

    cat << '_EOF_' > DEF
# rhesus vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rhesus RheMac2
SEQ2_DIR=/scratch/data/rheMac2/rheMac2.2bit
SEQ2_LEN=/scratch/data/rheMac2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    #	establish a screen to control this job
    screen -S mm10RheMac2
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
    #	real    257m21.255s

    cat fb.mm10.chainRheMac2Link.txt
    #	895296744 bases of 2652783500 (33.749%) in intersection

    mkdir /hive/data/genomes/rheMac2/bed/blastz.mm10.swap
    cd /hive/data/genomes/rheMac2/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    74m36.098s

    cat fb.rheMac2.chainMm10Link.txt
    #	875700775 bases of 2646704109 (33.086%) in intersection

############################################################################
# FaceBase Microarray track (DONE - 2014-05-21 - Pauline)
    # establish a screen to control this job with a name to indicate what it is
    mkdir /hive/data/genomes/mm10/bed/FaceBase24SampleTypesAvg
    cd /hive/data/genomes/mm10/bed/FaceBase24SampleTypesAvg

    wget --timestamping http://genomebrowser.facebase.org/myHub/mm10/FaceBase_24Samp_Types_Averaged.bed
    hgLoadBed mm10 FaceBase24SampleTypesAvg FaceBase_24Samp_Types_Averaged.bed

    #For microarray tracks also need to add a section to
    #/cluster/home/pauline/kent/src/hg/makeDb/hgCgiData/Mouse/microarrayGroups.ra

##############################################################################
# RepeatMasker Visualization track (DONE - 2014-07-25 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/rmskJoined
    cd /hive/data/genomes/mm10/bed/rmskJoined

    ln -s ../repeatMasker/mm10.sorted.fa.out .
    ln -s ../repeatMasker/mm10.fa.align.gz .

    # working on fixing this script for the next release of RM
    # since mm10 was an older version of RM, this conversion needs the
    # bedtools, thus the extra PATH business

    export PATH=/cluster/bin/bedtools:$PATH
        /scratch/data/RepeatMasker140131/util/nextVerRmToUCSCTables.pl \
            -out mm10.sorted.fa.out -align mm10.fa.align.gz

    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
        -renameSqlTable -verbose=4 -tab \
            -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as mm10 \
                rmskJoinedBaseline mm10.sorted.fa.join.bed \
                    > loadJoined.log 2>&1

    hgLoadSqlTab mm10 rmskAlignBaseline \
        /cluster/home/hiram/kent/src/hg/lib/rmskAlign.sql \
            mm10.fa.align.tsv > loadAlign.log 2>&1

    hgLoadOutJoined -verbose=2 mm10 mm10.sorted.fa.out > loadOut.log 2>&1

    featureBits -countGaps mm10 rmskJoinedBaseline
    #    2243474717 bases of 2730871774 (82.152%) in intersection

##############################################################################
# cloneEnds (DONE - 2014-08-11 - Steve)

    mkdir /hive/data/genomes/mm10/bed/cloneEnds
    cd /hive/data/genomes/mm10/bed/cloneEnds

    # fetch the NCBI INSDC name correspondence file:
    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/GCF_000001635.23.assembly.txt ./

    # fetch the clone reports
    mkdir reports
    rsync -a -P \
rsync://ftp.ncbi.nih.gov/repository/clone/reports/Mus_musculus/*.GCF_000001635.22.103.*.gff \
       ./reports/

    # script to establish refSeq to UCSC chrom names:

    cat << '_EOF_' > refSeqNames.pl
#!/usr/bin/env perl

use strict;
use warnings;

open (FH, "<GCF_000001635.23.assembly.txt") or die "can not read GCF_000001635.23.assembly.txt";
while (my $line = <FH>) {
  chomp $line;
  next if ($line =~ m/^#/);
  my @a = split('\t', $line);
  my $chrN = $a[2];
  my $refSeq = $a[6];
  my $contig = $a[4];
  my $type = $a[1];
  next if (!defined $type);
  next if (!defined $refSeq);
  next if (!defined $contig);
  my $suffix = "";
  if ($type eq "alt-scaffold") {
     $suffix = "_alt";
  } elsif ($type eq "unlocalized-scaffold") {
     $suffix = "_random";
  } elsif ($type eq "unplaced-scaffold") {
     $chrN = "Un";
  }
  $chrN = "M" if ($chrN eq "MT");
  if ($a[0] =~ m/_/) {
    $contig =~ s/\.[0-9]//;
    printf "%s\tchr%s_%s%s\n", $refSeq, $chrN, $contig, $suffix;
  } else {
    printf "%s\tchr%s\n", $refSeq, $chrN;
  }
}
close (FH);
'_EOF_'
    # << happy emacs

    chmod +x refSeqNames.pl

    ./refSeqNames.pl > refSeq.ucscName.tab

    # establish full library list:
    ls reports/*.GCF_000001635.22.103.*.gff | sed -e 's#reports/##' \
       | cut -d"." -f1 | sort -u > library.list.txt

    # a script to scan the GFF files, with the refSeq.ucscName.tab
    # name correspondence to construct bed files

    cat << '_EOF_' > mm10.pl
#!/usr/bin/env perl

use strict;
use warnings;

my $argc = scalar(@ARGV);

if ($argc < 1) {
  printf STDERR "usage: ./mm10.pl <report.gff> [moreReports.gff]\n";
  exit 255;
}

my %refSeqToUcsc;   # key is refSeq name, value is UCSC chrom name
open (FH, "<refSeq.ucscName.tab") or die "can not read refSeq.ucscName.tab";
while (my $line = <FH>) {
  chomp $line;
  my ($refSeq, $ucsc) = split('\t', $line);
  $refSeqToUcsc{$refSeq} = $ucsc;
}
close (FH);

my %chromSizes;    # key is UCSC chrom name, key is chrom size
open (FH, "</hive/data/genomes/mm10/chrom.sizes") or die "can not read mm10/chrom.sizes";
while (my $line = <FH>) {
  chomp $line;
  my ($chr, $size) = split('\t', $line);
  $chromSizes{$chr} = $size;
}
close (FH);

while (my $file = shift) {
my %starts;   # key is parent ID, value is start end coordinates start,end
my %ends;	# key is parent ID, value is end end coordinates start,end
my %parents;	# key is parent ID, value is 1 to signify exists
my %endNames;   # key is parent ID, value is the Name of the parent clone_insert

printf STDERR "# processing $file\n";

open (FH, "<$file") or die "can not read $file";
while (my $line = <FH>) {
  chomp $line;
  next if ($line=~ m/^#/);
  my @a = split('\t', $line);
  next if (scalar(@a) < 1);
  my $contig = $a[0];
  $contig =~ s/ref.//;
  $contig =~ s/\|//;
  my $ucscChr = $refSeqToUcsc{$contig};
  if (!defined($ucscChr)) {
    printf STDERR "# ERR: contig not in refSeqToUcsc: '$contig'\n";
    next;
  }
  next if (! exists($chromSizes{$ucscChr}));
  my $chromSize = $chromSizes{$ucscChr};
  my $chromStart = $a[3] - 1;
  my $chromEnd = $a[4];
  if ($chromStart > $chromSize) {
    printf STDERR "# warning chromStart over size $ucscChr $chromStart $chromEnd\n";
    $chromStart = $chromSize-1;
  }
  if ($chromEnd > $chromSize) {
    my $overRun = $chromEnd - $chromSize;
    printf STDERR "# warning chromEnd over size by $overRun -> $ucscChr $chromStart $chromEnd\n";
    $chromEnd = $chromSize;
  }
  my $id="notFound";
  my $name="notFound";
  my $parent="notFound";
  my @b = split(';', $a[8]);
  for (my $i = 0; $i < scalar(@b); ++$i) {
     my ($tag, $value) = split('=', $b[$i]);
     if ($tag eq "ID") {
        $id = $value;
        if ($id !~ m/-/) {
          if (exists($parents{$id})) {
            printf STDERR "# WARN: duplicate parent: $id";
          } else {
            $parents{$id} = $ucscChr;
          }
        }
     } elsif ($tag eq "Parent") {
        $parent = $value;
     } elsif ($tag eq "Name") {
        $name = $value;
     }
  }
  my $type="notFound";
  my $insertType = $a[2];
  if ($insertType =~ m/clone_insert_start/) {
     $type = "start";
     if ($parent eq "notFound") {
       printf STDERR "# ERR: can not find parent for start $name Ttype $id\n";
     } else {
       if (!exists($parents{$parent})) {
         printf STDERR "# ERR: start found $name  with no parent $parent declared\n";
       } elsif (exists($starts{$parent})) {
         printf STDERR "# ERR: duplicate start for $parent\n";
       } elsif ($ucscChr eq $parents{$parent}) {
         $starts{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
       } else {
         printf STDERR "# ERR: start on different chrom $ucscChr than parent $parent $parents{$parent}\n";
       }
     }
  } elsif ($insertType =~ m/clone_insert_end/) {
     $type = "end";
     if ($parent eq "notFound") {
       printf STDERR "# ERR: can not find parent for end $name Ttype $id\n";
     } else {
       if (!exists($parents{$parent})) {
         printf STDERR "# ERR: end found $name  with no parent $parent declared\n";
       } elsif (exists($ends{$parent})) {
         printf STDERR "# ERR: duplicate end for $parent\n";
       } elsif ($ucscChr eq $parents{$parent}) {
         $ends{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
       } else {
         printf STDERR "# ERR: end on different chrom $ucscChr than parent $parent $parents{$parent}\n";
       }
     }
  } elsif ($insertType =~ m/clone_insert/) {
     $type = "insert";
     $endNames{$id} = $name;
  }
  $name =~ s/gi\|//g;
  $id =~ s/gi\|//g;
  printf STDERR "%s\t%d\t%d\t%s_%s_%s\t0\t%s\n", $ucscChr, $chromStart, $chromEnd, $name, $type, $id, $a[6];
}       # while (my $line = <FH>)

close (FH);

foreach my $parent (keys %parents) {
  if (! exists($starts{$parent}) ) {
    printf STDERR "# ERR: no start for $parent\n";
  } elsif (! exists($ends{$parent}) ) {
    printf STDERR "# ERR: no end for $parent\n";
  } else {
    my $strand = "+";
    my $chrStart = 0;
    my $chrEnd = 0;
    my $blockStart = 0;
    my ($sStart, $sEnd) = split('\t', $starts{$parent});
    my ($eStart, $eEnd) = split('\t', $ends{$parent});
    my $startSize = $sEnd - $sStart;
    my $endSize = $eEnd - $eStart;
    if ($eStart < $sStart) {
      $chrStart = $eStart;
      $chrEnd = $sEnd;
      $blockStart = $sStart - $chrStart;
      $strand = "-";
      $startSize = $eEnd - $eStart;
      $endSize = $sEnd - $sStart;
    } else {
      $chrStart = $sStart;
      $chrEnd = $eEnd;
      $blockStart = $eStart - $chrStart;
    }
    if ($startSize > $blockStart) {
      printf STDERR "# startSize > blockStart $endNames{$parent}\n";
    } else {
      printf "%s\t%d\t%d\t%s\t0\t%s\t%d\t%d\t0\t2\t%d,%d\t0,%d\n", $parents{$parent}, $chrStart, $chrEnd, $endNames{$parent}, $strand, $chrStart, $chrEnd, $startSize, $endSize, $blockStart;
    }
  }
}

}
'_EOF_'
    # << happy emacs

    chmod +x mm10.pl

    # process GFF files into bed files into separateLibs/ directory
for L in `cat library.list.txt`
do
   export db="`pwd -P | awk -F'/' '{print $5}'`"
   export destDir="separateLibs/${L}"
   echo "working: ${L}"
   mkdir -p "${destDir}"
   ./${db}.pl reports/${L}.GCF_000001635.22.103.*.gff \
       2> ${destDir}/tmp.bed6 | sort -k1,1 -k2,2n > ${destDir}/${db}.${L}.bed
   sort -k1,1 -k2,2n ${destDir}/tmp.bed6 > ${destDir}/${db}.${L}.items.bed6
done

    # use only those libraries with more than 20,000 clone ends
    wc -l separateLibs/*/*.bed | sort -n | grep -v total | awk '$1 > 20000' \
        | sed -e 's#.*separateLibs/##; s#/.*##' > libs.over20K.list

    # note those libraries with less than 20,000 clone ends
    wc -l separateLibs/*/*.bed | grep -v total | awk '$1 < 20000' | sed -e 's#.*separateLibs/##; s#/.*##' > libs.under20K.list

    # filter out bad ends, length must be <= median size times three
cat lis.over20K.list | while read L
do
   if [ ! -s separateLibs/${L}/lengths.txt ]; then
      awk '{print $3-$2}' separateLibs/${L}/mm10.${L}.bed > separateLibs/${L}/lengths.txt
   fi
   median3X=`ave separateLibs/${L}/lengths.txt | grep median | awk '{printf "%d", $2*3}'`
   awk '($3-$2) < '$median3X'' separateLibs/${L}/mm10.${L}.bed > separateLibs/${L}/mm10.median3X.bed
   awk '($3-$2) >= '$median3X'' separateLibs/${L}/mm10.${L}.bed > separateLibs/${L}/mm10.badMap.bed
   before=`cat separateLibs/${L}/mm10.${L}.bed | wc -l`
   after=`cat separateLibs/${L}/mm10.median3X.bed | wc -l`
   dropped=`echo $before $after | awk '{print $1-$2}'`
   perCent=`echo $dropped $before | awk '{printf "%.2f", 100*'$dropped/$before'}'`
   echo "$L $before - $after = $dropped -> % $perCent dropped"
done

# B6Ng01 96548 - 95837 = 711 -> % 0.74 dropped
# C3H 42705 - 42378 = 327 -> % 0.77 dropped
# CH29 51200 - 50621 = 579 -> % 1.13 dropped
# DN 101826 - 100472 = 1354 -> % 1.33 dropped
# MHPN 59859 - 58582 = 1277 -> % 2.13 dropped
# MHPP 29074 - 28550 = 524 -> % 1.80 dropped
# MSMg01 81802 - 78772 = 3030 -> % 3.70 dropped
# RP23 83424 - 83062 = 362 -> % 0.43 dropped
# RP24 51112 - 50849 = 263 -> % 0.51 dropped
# WI1 326662 - 324259 = 2403 -> % 0.74 dropped
# bMQ 73519 - 72540 = 979 -> % 1.33 dropped

   # loading the median3X files
mkdir -p filteredEnds
for L in `cat libs.over20K.list`
do
    echo $L 1>&2
    hgLoadBed -type=bed12 mm10 cloneEnd_${L} \
       separateLibs/${L}/mm10.median3X.bed \
        > filteredEnds/loadBed.${L}.log 2>&1
done

    # construct multiple mapped ends:
cat separateLibs/*/mm10.median3X.bed | cut -f4 | sort | uniq -c | sort -rn > allEnds.names.count.txt

awk '$1 > 1' allEnds.names.count.txt | awk '{print $2}' | sort > multiples.names.txt

cat separateLibs/*/mm10.median3X.bed | sort -k4 > allEnds.nameSorted.bed
join -t'        ' -o "2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12" -2 4 multiples.names.txt allEnds.nameSorted.bed | sort -k1,1 -k2,2n > allEnds.multiple.locations.bed

hgLoadBed -type=bed12 mm10 cloneEnd_multipleMaps \
   allEnds.multiple.locations.bed > load.multipleMaps.log 2>&1

    # construct bad mapped ends:
mkdir -p filteredDroppedEnds

for L in `cat libs.over20K.list`
do
    echo $L 1>&2
    cat separateLibs/${L}/mm10.badMap.bed
done | sort -k1,1 -k2,2n > filteredDroppedEnds/badEnds.bed

hgLoadBed -type=bed12 mm10 cloneEndbadEnds filteredDroppedEnds/badEnds.bed \
       > filteredDroppedEnds/loadBed.badEnds.log 2>&1

    # construct coverage bigWig files:
cat separateLibs/*/mm10.median3X.bed | awk '$6 == "+"' | sort -k1,1 -k2,2n \
    | bedItemOverlapCount mm10 stdin > allEnds.forward.bedGraph

cat separateLibs/*/mm10.median3X.bed | awk '$6 == "-"' | sort -k1,1 -k2,2n \
    | bedItemOverlapCount mm10 stdin > allEnds.reverse.bedGraph

bedGraphToBigWig allEnds.forward.bedGraph /hive/data/genomes/mm10/chrom.sizes \
   cloneEnd_coverageForward.bw

bedGraphToBigWig allEnds.reverse.bedGraph /hive/data/genomes/mm10/chrom.sizes \
   cloneEnd_coverageReverse.bw

    mkdir /gbdb/mm10/bbi/cloneEnd
    ln -s `pwd`/cloneEnd_coverageForward.bw /gbdb/mm10/bbi/cloneEnd
    ln -s `pwd`/cloneEnd_coverageReverse.bw /gbdb/mm10/bbi/cloneEnd

    hgBbiDbLink mm10 cloneEnd_coverageForward \
        /gbdb/mm10/bbi/cloneEnd/cloneEnd_coverageForward.bw
    hgBbiDbLink mm10 cloneEnd_coverageReverse \
        /gbdb/mm10/bbi/cloneEnd/cloneEnd_coverageReverse.bw

    ### Fixup the scores to indicate how many multiple mappings as mentioned
    ### in the hg19 bacEnds description page: one mapping: score = 1000
    ### multiple mappings: score = 1500/count
    ### the sort | uniq -c | awk does this score calculation with the name
    ###   in column 1
    ### The join puts the existing table together with those scores
    ### DONE - 2016-03-02 - Hiram

    mkdir /hive/data/genomes/mm10/bed/cloneEnds/addCounts
    cd /hive/data/genomes/mm10/bed/cloneEnds/addCounts
    mkdir score withScore noScore withScore
    for table in cloneEndB6Ng01 cloneEndC3H cloneEndCH29 cloneEndDN \
cloneEndMHPN cloneEndMHPP cloneEndMSMg01 cloneEndRP23 cloneEndRP24 \
cloneEndWI1 cloneEndbMQ cloneEndbadEnds cloneEndmultipleMaps
do
  hgsql -N -e "select name from $table;" mm10 | sort | uniq -c |
      awk '{ if (1 == $1) {printf "%s\t1000\n", $2} else {printf "%s\t%d\n", $2, 1500/$1} }' \
         | sort > score/mm10.$table.score.tab
  hgsql -N -e "select * from $table order by name;" mm10 \
      | sort -k5 > noScore/mm10.$table.tab
  join -t'	' -1 5 noScore/mm10.$table.tab score/mm10.$table.score.tab \
  | awk '{printf "%d\t%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\n", $2,$3,$4,$5,$1,$14,$7,$8,$9,$10,$11,$12,$13}' \
    | sort -k2,2 -k3,3n > withScore/mm10.$table.withScore.tab
  hgsql -e "delete from $table;" mm10
  hgsql -e "load data local infile \"withScore/mm10.$table.withScore.tab\" into table $table;" mm10
done

    for table in cloneEndB6Ng01 cloneEndC3H cloneEndCH29 cloneEndDN \
cloneEndMHPN cloneEndMHPP cloneEndMSMg01 cloneEndRP23 cloneEndRP24 \
cloneEndWI1 cloneEndbMQ cloneEndbadEnds cloneEndmultipleMaps
do
    hgsql -N -e "select count(*) from $table;" mm10 | cat
done
#     95837
#     42378
#     50621
#    100472
#     58582
#     28550
#     78772
#     83062
#     50849
#    324259
#     72540
#     11809
#      4269

##############################################################################
# LASTZ Cow bosTau8 (DONE - 2014-10-15 - Steve)
    mkdir /hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15
    cd /hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15

    cat << '_EOF_' > DEF
# mouse vs cow
# maximum M allowed with lastz is only 254
BLASTZ_M=254

# TARGET: Mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/nib
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Cow bosTau8
SEQ2_DIR=/hive/data/genomes/bosTau8/bosTau8.2bit
SEQ2_LEN=/hive/data/genomes/bosTau8/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -syntenicNet \
        -noLoadChainSplit \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
    # real    181m30.700s
    cat fb.mm10.chainBosTau8Link.txt
    # 698722925 bases of 2652783500 (26.339%) in intersection
    # Create link
    cd /hive/data/genomes/mm10/bed
    ln -s  lastzBosTau8.2014-10-15 lastz.bosTau8

    #   and the swap
    mkdir /hive/data/genomes/bosTau8/bed/blastz.mm10.swap
    cd /hive/data/genomes/bosTau8/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15/DEF \
        -swap -syntenicNet  \
        -noLoadChainSplit \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
    #   real    58m4.272s
    cat fb.bosTau8.chainMm10Link.txt
    # 687270584 bases of 2649307237 (25.942%) in intersection
    # Create link
    cd /hive/data/genomes/bosTau8/bed
    ln -s blastz.mm10.swap lastz.mm10

##############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
# LASTZ mouse/mm10 sheep/oviAri3 - (DONE - 2015-01-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08
    cd /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08

    cp -p \
/hive/users/hiram/multiz/100way/mm10.oviAri3/mm10.oviAri3.tuning.top400.txt \
     ./mm10.oviAri3.tuning.Q.txt

    cat << '_EOF_' > DEF
# mouse vs sheep
# parameters obtained from a tuning run of lastz_D
# /hive/users/hiram/multiz/100way/mm10.oviAri3/mm10.oviAri3.tuning.top400.txt

BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.54/bin/lastz
BLASTZ_T=2
BLASTZ_O=400
BLASTZ_E=30
BLASTZ_M=254
BLASTZ_X=890
BLASTZ_Y=3400
BLASTZ_Q=/hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08/mm10.oviAri3.tuning.Q.txt
#       A     C     G     T
# A    89  -172   -40  -184
# C  -172   100  -121   -40
# G   -40  -121   100  -172
# T  -184   -40  -172    89

# TARGET: mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=2
SEQ1_LAP=10000

# QUERY: sheep oviAri3
SEQ2_DIR=/hive/data/genomes/oviAri3/oviAri3.2bit
SEQ2_LEN=/hive/data/genomes/oviAri3/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=10
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    75m27.412s

    cat fb.mm10.chainOviAri3Link.txt
    # 432006690 bases of 2652783500 (16.285%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` mm10 oviAri3) > rbest.log 2>&1 &
    # real    17m24.577s

    # and for the swap:
    mkdir /hive/data/genomes/oviAri3/bed/blastz.mm10.swap
    cd /hive/data/genomes/oviAri3/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    31m27.481s

    cat fb.oviAri3.chainMm10Link.txt
    #422549165 bases of 2534335866 (16.673%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` oviAri3 mm10) > rbest.log 2>&1
    # real    16m45.956s

#########################################################################
# RETROFINDER RETROPOSED GENES ucscRetro track VERSION 6
# (2015-01-02 - 2015-01-07, hartera, DONE)
ssh hgwdev
mkdir -p /hive/hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102

cat << '_EOF_' > DEF

RETRO_OPTIONS="-verbose=4 -minAli=0.98 -nearTop=0.005 "
RUNDATE="2015-01-02"
DB=mm10
SCORETHRESH=510
GENOMENAME='Mus musculus'
GBDB=mm
DATE=20150102
VERSION=6
RUNDIR=/hive/groups/gencode/pseudogenes/retroFinder/$DB.$DATE
BINDIR=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/bin
KENTDIR=/cluster/home/hartera/kent
KENTBINDIR=/cluster/bin/x86_64
MRNABASE=/hive/data/genomes/$DB/bed/mrnaBlastz.$VERSION
TMPMRNA=$RUNDIR/mrnaBlastz/$DB
TMPEST=$RUNDIR/est/$DB
USEALTSEQS=0
EST=all_est
SPLICED_EST=intronEst
SPLIT_EST=0
SPLIT_SPLICED_EST=1
LASTZPROG=/cluster/bin/penn/x86_64/lastz
SCRIPT=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/src/pipeline
GENOME=/hive/data/genomes
TWOBIT=$GENOME/$DB/$DB.2bit
RETRODIR=$GENOME/$DB/bed/retro
BASE=$RUNDIR/retro
BASE=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/retro
OUTDIR=${BASE}/version${VERSION}/${DB}
RESULT=$OUTDIR/result
RESULTSPLIT=$OUTDIR/resultSplit
LOG=$OUTDIR/log
OUT=$OUTDIR/out
OVERLAPDIR=$OUTDIR/run.o
TABLE=ucscRetroInfo$VERSION
ORTHOTABLE=ucscRetroOrtho$VERSION
ALIGN=ucscRetroAli$VERSION
LOCAL=/scratch/data/$DB
NIB=$LOCAL/nib
RMSK=rmsk
NET1=netHg38
NET2=netCanFam3
NET3=netRn5
GENE1=knownGene
GENE2=refGene
GENE3=wgEncodeGencodeCompVM4
CLUSTER=ku
SPECIES="hg38 mm10"
ROOTDIR="~/public_html/retro/mm10Jul14"
WEBROOT=$ROOTDIR/retro.$RUNDATE
WEBSERVER=http://hgwdev-hartera.soe.ucsc.edu
SHUFFLEDIR=shuffle
SHUFFLEROOT=$WEBROOT/$SHUFFLEDIR
DUPDIR=dups
DUPROOT=$WEBROOT/$DUPDIR
AGEDIR=age
AGEROOT=$WEBROOT/$AGEDIR
EXPDIR=exp
GENEPFAM=knownGene
PFAM=knownToPfam
PFAMIDFIELD=name
PFAMDOMAIN=value
ARRAY=gnfAtlas2
AFFYPROBE=affyGnf1m
ARRAYMEDIAN=hgFixed.gnfMouseAtlas2Median
ARRAYRATIO=hgFixed.gnfMouseAtlas2AllRatio
ARRAYABS=hgFixed.gnfMouseAtlas2All
ARRAYEXP=hgFixed.gnfMouseAtlas2MedianExps
ARRAYEXPALL=hgFixed.gnfMouseAtlas2AllExps
# ARRAYLOOKUP=knownToGnfAtlas2
#ARRAYPSLS="/hive/data/genomes/mm9/bed/geneAtlas2/affyGnf1m.psl"
ALTSPLICE=sibTxGraph
SPLITBYAGE=$SCRIPT/splitRetrosByAgeMouse
PDB=proteins140122
BREAKS=0,8,16,24,32
XLIM=34
YLIM=0.1
YLIM1=4000
YLIM2=160
MAXDIVERGENCE=32
'_EOF_'
    # << happy emacs
chmod +x DEF

mkdir -p /hive/data/genomes/mm10/bed/retro
mkdir -p /hive/data/genomes/mm10/bed/mrnaBlastz.6
cd /hive/data/genomes/mm10/bed/mrnaBlastz.6
# Create S1.len file
foreach c (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X Y)
  echo $c
  hgsql -Ne "select chrom, size from chromInfo where chrom='chr${c}';" mm10 \
    >> S1.len
end

# NOTE: in future, use /hive/data/genomes/mm10/chrom.sizes for S1.len
# and just remove randoms and chrM.
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102
mkdir mrnaBlastz
cd mrnaBlastz
cp ../DEF .
cp /hive/data/genomes/mm10/bed/mrnaBlastz.6/S1.len .

screen
# Run steps 1 to 5 of RetroFinder pipeline from scripts in CCDS SVN source tree:
retroFinder/branches/version2/src/pipeline/ucscStep1.sh DEF
# check cluster jobs on ku
retroFinder/branches/version2/src/pipeline/ucscStep2.sh DEF
retroFinder/branches/version2/src/pipeline/ucscStep3.sh DEF
#check cluster jobs on ku
retroFinder/branches/version2/src/pipeline/ucscStep4.sh DEF
#check cluster jobs on ku
    # Load the track
retroFinder/branches/version2/src/pipeline/ucscStep5.sh DEF
cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102/retro/version6/mm10
retroFinder/branches/version2/src/pipeline/filterMrna.sh
retroFinder/branches/version2/src/pipeline/filterEst.sh
# Check cluster jobs on ku
retroFinder/branches/version2/src/pipeline/analyseExpress.sh
# Check cluster jobs on ku
#added ucscRetroAli6 to kent/src/hg/makeDb/mouse/mm10/trackDb.ra
# copied
# /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102/retro/version6/mm10/trackDb.retro
# entry to kent/src/hg/makeDb/trackDb/mouse/mm10/trackDb.ra
# and edited it to remove the full data and add:
# dataVersion Jan. 2015
# Scripts copied ucscRetroAli6.psl, ucscRetroInfo6.bed and ucscRetroCds6.tab
# to /hive/data/genomes/mm10/bed/retro/

##############################################################################
# LASTZ mouse/mm10 sheep/tarSyr2 - (DONE - 2015-03-27 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27
    cd /hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27

    cat << '_EOF_' > DEF
# tarsier vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.54/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Tarsier TarSyr2
SEQ2_DIR=/hive/data/genomes/tarSyr2/tarSyr2.2bit
SEQ2_LEN=/hive/data/genomes/tarSyr2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=800
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
      -chainMinScore=3000 -chainLinearGap=medium \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
    # real    301m17.238s

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
      -chainMinScore=3000 -chainLinearGap=medium \
        -continue=syntenicNet -syntenicNet -workhorse=hgwdev \
          -smallClusterHub=ku -bigClusterHub=ku) > synNet.log 2>&1
    # real    16m5.061s

    cat fb.mm10.chainTarSyr2Link.txt
    # 856877439 bases of 2652783500 (32.301%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` mm10 tarSyr2) > rbest.log 2>&1 &
    #  real    27m4.048s

    # and for the swap:
    mkdir /hive/data/genomes/tarSyr2/bed/blastz.mm10.swap
    cd /hive/data/genomes/tarSyr2/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    181m7.042s

    cat fb.tarSyr2.chainMm10Link.txt
    #  900229088 bases of 3405755564 (26.433%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` tarSyr2 mm10) > rbest.log 2>&1
    #  real    77m29.742s

#########################################################################
# UCSC to RefSeq name correspondence (DONE - 2015-04-15 - Hiram)

    mkdir /hive/data/genomes/mm10/bed/ucscToRefSeq
    cd /hive/data/genomes/mm10/bed/ucscToRefSeq

    rsync -avPL \
  rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/vertebrate_mammalian/Mus_musculus/all_assembly_versions/GCA_000001635.5_GRCm38.p3/GCA_000001635.5_GRCm38.p3_assembly_report.txt ./

    # this assembly_report has "UCSC-style-name" in column 10
    # but it does not name everything

    # columns 5 and 7 are the INSDC and RefSeq names
    grep -v "^#" GCA_000001635.5_GRCm38.p3_assembly_report.txt \
      | awk -F'\t' '{printf "%s\t%s\n", $5,$7}' | sort > insdc.refSeq.tab

    # chrM/MT confusion fixed by sed
    hgsql -N -e 'select name,chrom,chromStart,chromEnd from ucscToINSDC;' mm10 \
      | sed -e 's/NC_005089.1/AY172335.1/;' | sort > insdc.ucsc.tab

    join insdc.ucsc.tab insdc.refSeq.tab | tr '[ ]' '[\t]' \
       | cut -f2- > ucsc.refSeq.tab

    export chrSize=`cut -f1 ucsc.refSeq.tab | awk '{print length($0)}' | sort -n | tail -1`
    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
       | sed -e 's/INSDC/RefSeq/g;' > ucscToRefSeq.sql
    hgLoadSqlTab mm10 ucscToRefSeq ./ucscToRefSeq.sql ucsc.refSeq.tab

    checkTableCoords  mm10 -table=ucscToRefSeq
    featureBits -countGaps mm10 ucscToRefSeq
    # 2730871774 bases of 2730871774 (100.000%) in intersection

    # fixup 2016-04-11 - Hiram
    # the chrM name is not correct, it was RefSeq instead of Genbank/INSDC:
    hgsql -e 'select * from ucscToINSDC where name="NC_005089.1";' mm10
+-------+------------+----------+-------------+
| chrom | chromStart | chromEnd | name        |
+-------+------------+----------+-------------+
| chrM  |          0 |    16299 | NC_005089.1 |
+-------+------------+----------+-------------+

    hgsql -e 'update ucscToINSDC set name="AY172335.1" where name="NC_005089.1";' mm10

    hgsql -e 'select * from ucscToINSDC where name="AY172335.1";' mm10
+-------+------------+----------+------------+
| chrom | chromStart | chromEnd | name       |
+-------+------------+----------+------------+
| chrM  |          0 |    16299 | AY172335.1 |
+-------+------------+----------+-------------+



#########################################################################
# download and load ncbiGene track ( DONE - 2015-06-09 - Brian)

db=mm10
mkdir  /cluster/data/genomes/$db/bed/ncbiGene
cd  /cluster/data/genomes/$db/bed/ncbiGene

ftpFile=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz
gff3File=`basename $ftpFile`

echo "select * from ucscToRefSeq" | hgsql $db | tail -n +2 | awk '{print 0, $4, $3, $1, $3}' > refSeqToUcsc.lft
rm -f $ftpFile
wget $ftpFile

/cluster/home/braney/bin/x86_64/gff3ToGenePred -useName -warnAndContinue -attrsOut=attrs -bad=bad.gp $gff3File stdout 2> convertErr.txt | liftUp -type=.gp -extGenePred lift.gp refSeqToUcsc.lft warn  stdin 2> liftErr.txt
wc -l lift.gp
# 108567 lift.gp
wc -l bad.gp
# 189

tawk '{print $1}'  attrs | sort | uniq > meta
wc -l meta
# 110847 meta
for i in product Dbxref gene gbkey
do
    echo $i
    tawk -v attr=$i '$2==attr {print $1,$3}' attrs | sort | uniq | join -t $'\t' /dev/stdin meta > out
    mv out meta
done
wc -l meta
# 109420 meta

egrep "^N(M|R|P)" lift.gp > curated.gp
egrep "^X(M|R)" lift.gp > predicted.gp

wc -l curated.gp predicted.gp
#33545 curated.gp
#70587 predicted.gp
#104132 total

cat curated.gp predicted.gp | awk '{print $1}' | sort -u > tmp1
cat meta | awk '{print $1}' | sort -u > tmp2
join -v 1 tmp1 tmp2 | wc -l
# 0

grep dropping convertErr.txt | wc -l
#    189

awk '/isn/ {print $1}' liftErr.txt | sort -u
#    NT_166322.1
#    NT_187001.1

hgLoadGenePred -genePredExt $db ncbiRefCurated curated.gp
hgLoadGenePred -genePredExt $db ncbiRefPredicted predicted.gp
hgLoadSqlTab $db ncbiRefLink $kent/src/hg/lib/ncbiRefLink.sql meta

hgsql -e 'INSERT INTO trackVersion \
    (db, name, who, version, updateTime, comment, source, dateReference) \
    VALUES("mm10", "ncbiRefSeq", "braney", "105", now(), \
    "http://www.ncbi.nlm.nih.gov/genome/annotation_euk/Mus_musculus/105/", \
    "ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus", \
    "9 February 2015" );' hgFixed

#
#############################################################################
# hgPal downloads (DONE braney 2015-06-02)
#   CDS FASTA from 60-way for knownGene

    ssh hgwdev
    screen -S mm10HgPal
    mkdir /hive/data/genomes/mm10/bed/multiz60way/pal
    cd /hive/data/genomes/mm10/bed/multiz60way/pal
    cat ../species.list | tr '[ ]' '[\n]' > order.lst

    export mz=multiz60way
    export gp=knownGene
    export db=mm10
    export I=0
    mkdir exonAA exonNuc
    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.lst stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.lst stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
        if [ $I -gt 6 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done > $gp.jobs
    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    time nice sh -x $gp.jobs > $gp.jobs.log 2>&1 &
    #   real    80m36.763s

    mz=multiz60way
    gp=knownGene
    db=mm10
    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    #   real    1m16.821s
    zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz

    rm -rf exonAA exonNuc

    # we're only distributing exons at the moment
    mz=multiz60way
    gp=knownGene
    db=mm10
    pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    md5sum *.fa.gz > md5sum.txt
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
    ln -s `pwd`/md5sum.txt $pd/

#
#############################################################################
# hgPal downloads (DONE jcasper 2016-06-22)
#   CDS FASTA from 60-way for knownGene - rebuilt for mm10 ucsc genes v16

    ssh hgwdev
    screen -S mm10HgPal
    mkdir /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc16
    cd /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc16
    cat ../species.list | tr '[ ]' '[\n]' > order.lst

    export mz=multiz60way
    export gp=knownGene
    export db=mm10
    export I=0
    mkdir exonAA exonNuc
    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.lst stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.lst stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
        if [ $I -gt 6 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done > $gp.jobs
    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    time nice sh -x $gp.jobs > $gp.jobs.log 2>&1
    #   real    87m59.962s

    mz=multiz60way
    gp=knownGene
    db=mm10
    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    #   real    1m48.725s
    zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz

    rm -rf exonAA exonNuc

    # we're only distributing exons at the moment
    mz=multiz60way
    gp=knownGene
    db=mm10
    pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz $pd/md5sum.txt
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
    (cd $pd && md5sum *.fa.gz) > md5sum.txt
    ln -s `pwd`/md5sum.txt $pd/


###########################################################################
# GENEID GENE PREDICTIONS (DONE - 2015-06-26 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/geneid
    cd /hive/data/genomes/mm10/bed/geneid
    wget --timestamping \
http://genome.crg.es/genepredictions/M.musculus/mm10/geneid_v1.4/00README
    wget --timestamping \
http://genome.crg.es/genepredictions/M.musculus/mm10/geneid_v1.4/mm10.geneid.prot
    wget --timestamping \
http://genome.crg.es/genepredictions/M.musculus/mm10/geneid_v1.4/mm10.geneid.gtf
    ldHgGene -gtf -genePredExt mm10 geneid mm10.geneid.gtf

    #  Read 36771 transcripts in 287332 lines in 1 files
    #  36771 groups 66 seqs 1 sources 3 feature types
    #  36771 gene predictions

    featureBits -enrichment mm10 refGene:CDS geneid
# refGene:CDS 1.292%, geneid 1.584%, both 1.028%, cover 79.51%, enrich 50.19x
    featureBits -enrichment mm9 refGene:CDS geneid
# refGene:CDS 1.305%, geneid 1.590%, both 1.040%, cover 79.65%, enrich 50.11x

    featureBits -countGaps mm10 geneid
# 42028722 bases of 2730871774 (1.539%) in intersection
    featureBits -countGaps mm9 geneid
# 41651898 bases of 2725765481 (1.528%) in intersection

##########################################################################
# SGP GENES (DONE - 2015-07-30 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/sgpGene
    cd /hive/data/genomes/mm10/bed/sgpGene
    wget --timestamping \
http://genome.crg.es/genepredictions/M.musculus/mm10/SGP2/hg38/00README
    wget --timestamping \
http://genome.crg.es/genepredictions/M.musculus/mm10/SGP2/hg38/mm10.sgp2.gtf
    wget --timestamping \
http://genome.crg.es/genepredictions/M.musculus/mm10/SGP2/hg38/mm10.sgp2.gff3

    ldHgGene -gtf -genePredExt mm10 sgpGene mm10.sgp2.gtf
    # Read 35235 transcripts in 287314 lines in 1 files
    #   35235 groups 60 seqs 1 sources 3 feature types
    # 35235 gene predictions

    featureBits -enrichment mm10 refGene:CDS sgpGene
# refGene:CDS 1.292%, sgpGene 1.430%, both 1.101%, cover 85.21%, enrich 59.59x

    featureBits -enrichment mm9 refGene:CDS sgpGene
# refGene:CDS 1.305%, sgpGene 1.439%, both 1.113%, cover 85.23%, enrich 59.23x
#########################################################################
# lastz zebrafish danRer10 (DONE - 2015-09-11 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10DanRer10
    mkdir /hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11
    cd /hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11

    cat << '_EOF_' > DEF
# Mouse vs. zebrafish
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: zebrafish danRer10
SEQ2_DIR=/hive/data/genomes/danRer10/danRer10.2bit
SEQ2_LEN=/hive/data/genomes/danRer10/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
    #	real    198m3.073s

    cat fb.mm10.chainDanRer10Link.txt
    #	73464192 bases of 2652783500 (2.769%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` mm10 danRer10) > rbest.log 2>&1 &
    #    real    7m8.599s

    #	and for the swap
    mkdir /hive/data/genomes/danRer10/bed/blastz.mm10.swap
    cd /hive/data/genomes/danRer10/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
    #	real    16m8.387s

    cat  fb.danRer10.chainMm10Link.txt
    #	71611488 bases of 1369683683 (5.228%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` danRer10 mm10) > rbest.log 2>&1
    #    real    7m34.259s

#########################################################################
# DBSNP 142 / SNP142 (DONE 2015-11-20 braney)
    # RedMine #15934
    screen -S mm10dbSnp
    mkdir -p /hive/data/outside/dbSNP/142/mouse_mm10
    cd /hive/data/outside/dbSNP/142/mouse_mm10
    # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/database/organism_data/
    # to find the subdir name to use as orgDir below (mouse_10090 in this case).
    # Then click into that directory and look for file names like
    #    b(1[0-9][0-9])_
    # -- use the first num for build setting in config.ra
    # The buildAssembly setting in config.ra is empty because dbSNP stopped including
    # that in file names.
    cat > config.ra <<EOF
db mm10
orgDir mouse_10090
build 142
buildAssembly
refAssemblyLabel GRCm38.p2
ncbiAssemblyReportFile GCF_000001635.22.assembly.txt
ignoreDbSnpContigsFile dbSnpContigsNotInUcsc.txt
liftUp suggested.lft
EOF

#actually ran the script a few times to get the above config.ra with values suggested

    ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra >& do.log & tail -f do.log
    tail -f do.log
# *** All done!


##############################################################################
# FILTER SNP142 (DONE 2015-11-21 braney)
   cd /hive/data/outside/dbSNP/142/mouse_mm10
   zcat snp142.bed.gz \
   | ~/kent/src/hg/utils/automation/categorizeSnps.pl
#Mult:     3276456
#Common:   8213470
#Flagged:  0
#leftover: 70731318

   foreach f ({Mult,Common}.bed.gz)
     mv $f snp142$f
   end
   # Load tables
   foreach subset (Mult Common)
     hgLoadBed -tab -onServer -tmpDir=/data/tmp -allowStartEqualEnd -renameSqlTable \
       mm10 snp142$subset -sqlTable=snp142.sql snp142$subset.bed.gz
   end


##############################################################################
# DBSNP CODING ANNOTATIONS (142) (DONE 2015-11-21 braney)
   cd /hive/data/outside/dbSNP/142/mouse-mm10
   # ncbiFuncAnnotations.txt has NCBI coords: 0-based, fully closed.
   # For anything except an insertion (0 bases between flanks),
   # we need to add 1 to the end coord.  For an insertion, we need
   # to add 1 to the start coord.  Make a hash of the insertion IDs,
   # then look up each ID in ncbiFuncAnnotations.txt to tell which
   # transform to apply.
   # Note: sort -u with the keys below is too restrictive -- we need full line uniq.
   zcat ncbiFuncAnnotations.txt.gz \
   | perl -we 'open($IDS, "zcat ncbiFuncInsertions.ctg.bed.gz |") || die "ids: $!"; \
             while (<$IDS>) { chomp; $ids{$_} = 1; } \
             close($IDS); \
             %coding = (2=>1, 3=>1, 4=>1, 8=>1, 9=>1, 41=>1, 42=>1, 43=>1, 44=>1, 45=>1); \
             while (<>) { \
               chomp;  @w = split("\t"); # id, ctg, start, end, ... \
               next unless $coding{$w[5]}; \
               $bed4 = join("\t", $w[1], $w[2], $w[3], $w[0]); \
               if (exists $ids{$bed4} && $w[3] == $w[2]+1) { \
                 $w[2]++; # 2-base insertions: increment start coord \
               } else { \
                 $w[3]++; # increment end coord to get half-open \
               } \
               print join("\t", @w) . "\n"; \
             }' \
   | sort -k1n,1n -k2,2 -k3n,3n -k5,5 -k6n,6n \
   | uniq \
     > ncbiCodingAnnotations.txt
   wc -l ncbiCodingAnnotations.txt
#3854299 ncbiCodingAnnotations.txt
   # How many & what kinds of function types?
   cut -f 6 ncbiCodingAnnotations.txt \
   | sort -n | uniq -c
# 1258578 3 (coding-synon)
# 1882006 8 (cds-reference -- ignored)
#    4717 41  (nonsense)
#  624020 42  (missense)
#     745 43  (stop-loss)
#   14806 44  (frameshift)
#   69427 45  (cds-indel)


   # In b142, the functional annotations include non-coding (frame = NULL),
   # which we'll exclude here because this is supposed to be just coding stuff...
   # probably need to update how we show dbSNP's func annos anyway, e.g.
   # it is a shame that we toss out codon number and transcript offset.
   # Gather up multiple annotation lines into one line per {snp, gene, frame}:
   perl -e  'while (<>) { chomp; \
               my ($rsId, $ctg, $s, $e, $txId, $fxn, $frm, $nt, $aa, $codon) = split("\t"); \
               next if ($fxn == 8 && ($frm eq "NULL" && $aa eq "NULL" && $codon eq "NULL")); \
               if (defined $lastRs && \
                   ($lastRs != $rsId || $lastCtg ne $ctg || $lastS != $s || \
                    $lastTx ne $txId || $lastFrm ne $frm)) { \
                 if (defined $refRow) { \
                   $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                   $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
                 } \
                 $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                       "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
                 $lineOut =~ s@NULL@n/a@g; \
                 print $lineOut; \
                 $refRow = undef;  @rows = ();  ($count, $fxns, $nts, $codons, $aas) = (); \
               } \
               ($lastRs, $lastCtg, $lastS, $lastE, $lastTx, $lastFrm) = \
                   ($rsId, $ctg, $s, $e, $txId, $frm); \
               $count++; \
               if ($fxn == 8) { \
                 $refRow = [$fxn, $nt, $aa, $codon]; \
               } else { \
                $fxns .= "$fxn,";  $nts .= "$nt,";  $aas .= "$aa,";  $codons .= "$codon,"; \
               } \
             } \
             if (defined $refRow) { \
               $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
               $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
             } \
             $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                   "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
             $lineOut =~ s@NULL@n/a@g; \
             print $lineOut;' \
     ncbiCodingAnnotations.txt \
   | liftUp snp142CodingDbSnp.bed suggested.lft warn stdin
   hgLoadBed mm10 snp142CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
     -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
     snp142CodingDbSnp.bed
#Read 1951211 elements of size 11 from snp142CodingDbSnp.bed
##############################################################################
# SNPMASKED SEQUENCE FOR SNP142 (DONE 2015-11-21 braney)
    mkdir /hive/data/genomes/mm10/snp142Mask
    cd /hive/data/genomes/mm10/snp142Mask
    # Identify rsIds with various problems -- we will exclude those.
    zcat /hive/data/outside/dbSNP/142/mouse_mm10/snp142.bed.gz \
    | awk '$18 ~ /MultipleAlignments|ObservedTooLong|ObservedWrongFormat|ObservedMismatch|MixedObserved/ {print $4;}' \
      | sort -u \
      > snp142ExcludeRsIds.txt
    zcat /hive/data/outside/dbSNP/142/mouse_mm10/snp142.bed.gz \
    | grep -vFwf snp142ExcludeRsIds.txt \
      > snp142Cleaned.bed
    wc -l snp142Cleaned.bed
#76837455 snp142Cleaned.bed

    # Substitutions:
    mkdir substitutions
    snpMaskSingle snp142Cleaned.bed /hive/data/genomes/mm10/mm10.2bit stdout diffObserved.txt \
    | faSplit byname stdin substitutions/
#Masked 66976283 snps in 66976283 out of 2729124706 genomic bases
# /hive/data/genomes/mm10/mm10.2bit has 2730871774 total bases,
#but the total number of bases in sequences for which we masked snps is 2729124706 (difference is 1747068)

    # Check that 1747068 is the total #bases in sequences with nothing in snp142Cleaned:
    grep -Fw single snp142Cleaned.bed | cut -f 1 | uniq > /data/tmp/1
    grep -vwf /data/tmp/1 ../chrom.sizes \
    | awk 'BEGIN {TOTAL = 0;}  {TOTAL += $2;}  END {printf "%d\n", TOTAL;}'
#1726860
    calc 1747068-1726860
#20208

    # warnings about differing observed strings at same base position:
    wc -l diffObserved.txt
#2 diffObserved.txt
    # peanuts!  good.
    # Make sure that sizes are identical, first diffs are normal -> IUPAC,
    # and first diffs' case is preserved:
    mkdir tmpFa
    cd tmpFa
    twoBitToFa /hive/data/genomes/mm10/mm10.2bit stdout | faSplit byname stdin tmpFa
    cd ..
    foreach f (substitutions/chr*.fa.gz)
      faCmp $f tmpFa/`basename $f subst.fa.gz`fa |& grep -v "that differ"
    end
#chr1 in substitutions/chr1.fa differs from chr1 at ../1/chr1.fa at base 10107 (y != c)
#chr10 in substitutions/chr10.fa differs from chr10 at ../10/chr10.fa at base 60493 (R != A)
#...
#(output OK -- ambiguous bases replacing [agct] at SNP positions)
    foreach f (substitutions/chr*.fa)
      echo $f:t:r
      mv $f $f:r.subst.fa
    end
    # Fire off a bunch of gzip jobs in parallel:
    ls -1 substitutions/*.fa | split -l 5
    foreach f (x??)
      gzip `cat $f` &
    end
    # Wait for backgrounded gzip jobs to complete
    rm x??

    # Insertions & deletions not done.  To date we have only offered substs for download.
    # If there is user demand, use template from snp131 above.

    # Clean up and prepare for download:
    gzip snp142Cleaned.bed &
    foreach d (substitutions)
      pushd $d
        md5sum *.gz > md5sum.txt
        cp /hive/data/genomes/hg38/snp142Mask/$d/README.txt .
      popd
    end
    # Edit the README.txt.

    # Create download links on hgwdev.
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/snp142Mask
    ln -s /hive/data/genomes/mm10/snp142Mask/substitutions/* \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/snp142Mask/

##############################################################################
# LASTZ Rhesus rheMac8 (DONE - 2016-02-10 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10
    cd /hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10

    printf '# rhesus vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rhesus RheMac8
SEQ2_DIR=/hive/data/genomes/rheMac8/rheMac8.2bit
SEQ2_LEN=/hive/data/genomes/rheMac8/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10RheMac8
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    239m18.376s

    cat fb.mm10.chainRheMac8Link.txt
    #	918841829 bases of 2652783500 (34.637%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` mm10 rheMac8) > rbest.log 2>&1 &
    # real    421m31.807s

    mkdir /hive/data/genomes/rheMac8/bed/blastz.mm10.swap
    cd /hive/data/genomes/rheMac8/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    110m33.219s

    cat fb.rheMac8.chainMm10Link.txt
    #	917131079 bases of 3142093174 (29.189%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` rheMac8 mm10) > rbest.log 2>&1
    # real    409m8.252s

##############################################################################
# Patents (26 Feb 2016, Max)
# convert SAM to BED
cd /hive/data/genomes/hg19/bed/patents/data/
samtools view -S -t ensGenomeMm10/Mus_musculus.GRCm38.75.dna.toplevel.fa.fai Mus_musculus.GRCm38.75.s90c50.sam -h > mm10.sam
# convert to bed
function sam2psl_pierre() { java -Dfile.encoding=UTF8 -Xmx500m    -cp "/cluster/bin/jvarkit/htsjdk-1.133/dist/commons-jexl-2.1.1.jar:/cluster/bin/jvarkit/htsjdk-1.133/dist/commons-logging-1.1.1.jar:/cluster/bin/jvarkit/htsjdk-1.133/dist/htsjdk-1.133.jar:/cluster/bin/jvarkit/htsjdk-1.133/dist/snappy-java-1.0.3-rc3.jar:/cluster/bin/jvarkit/dist-1.133/sam2psl.jar" com.github.lindenb.jvarkit.tools.misc.SamToPsl $*; }
sam2psl_pierre mm10.sam 2> /dev/null > mm10.psl
pslToBed mm10.psl mm10.bed
# strip the BAM flag field from the BED name
# careful: this line includes tab characters
sed -ri 's/_(16|0)	/	/g' mm10.bed

# now join meta with bed file
cd ../mm10
sort by name
# The -S10G parameter is only supported in newer sort versions
# # if it complains, just remove it. It will just take longer.
time sort -k4,4 -S10G --parallel=20 mm10.bed > mm10.s4.bed
join -t $'\t' -1 4 -2 1 ../data/mm10.s4.bed ../data/seqAndPatentSummary.tab -o '1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 1.12 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 2.10 2.11 2.12' | patSeqFilterBulkAndAnnotate ../data/htPatents.txt patBulk.bed patNonBulk.bed -c ../data/seqCounts.tab
bedSort patNonBulk.bed patNonBulk.bed
bedSort patBulk.bed patBulk.bed
bedToBigBed patNonBulk.bed /cluster/data/genomes/mm10/chrom.sizes patNonBulk.bb -tab -as=../patSummary.as -type=bed12+
bedToBigBed patBulk.bed /cluster/data/genomes/mm10/chrom.sizes patBulk.bb -tab -as=../patSummary.as -type=bed12+
hgBbiDbLink hg19 patBulk /gbdb/hg19/bbi/patBulk.bb
hgBbiDbLink hg19 patNonBulk /gbdb/hg19/bbi/patNonBulk.bb

##############################################################################
# LASTZ Rat rn6 (DONE - 2016-04-09 - Jonathan)
    mkdir /hive/data/genomes/mm10/bed/lastzRn6.2016-04-07
    cd /hive/data/genomes/mm10/bed/lastzRn6.2016-04-07

    printf '# rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rat Rn6
SEQ2_DIR=/hive/data/genomes/rn6/rn6.2bit
SEQ2_LEN=/hive/data/genomes/rn6/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzRn6.2016-04-07
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10Rn6
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
    #   real    501m43.495s

    cat fb.mm10.chainRn6Link.txt
    #	1880453869 bases of 2652783500 (70.886%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` mm10 rn6) > rbest.log 2>&1 &
    # real    766m50.090s

    mkdir /hive/data/genomes/rn6/bed/blastz.mm10.swap
    cd /hive/data/genomes/rn6/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRn6.2016-04-07/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    234m59.393s

    cat fb.rn6.chainMm10Link.txt
    #   1938597957 bases of 2729860805 (71.015%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` rn6 mm10) > rbest.log 2>&1
    # real    882m38.624s

#########################################################################
## 4-Way Multiz for UCSC Genes construction (TBD - 2016-04-06 - Jonathan)
# mm10, hg38, canFam3, rn6

    mkdir /hive/data/genomes/mm10/bed/multiz4way
    cd /hive/data/genomes/mm10/bed/multiz4way

    # extract a tree for the 4 we need
    /cluster/bin/phast/tree_doctor \
	--prune-all-but hg38,mm10,canFam3,rn6 $HOME/kent/src/hg/utils/phyloTrees/191way.nh > 4way.nh

    # this looks like:
    ((hg38:0.145908,(mm10:0.084509,rn6:0.091589):0.271974):0.020593,canFam3:0.165928);


    # Use this specification in the phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    #	to obtain a gif image for htdocs/images/phylo/mm10_4way.gif

    /cluster/bin/phast/all_dists 4way.nh > 4way.distances.txt
    #	Use this output to create the table below
    grep -i mm10 4way.distances.txt | sort -k3,3n
#
#	If you can fill in all the numbers in this table, you are ready for
#	the multiple alignment procedure
#
#                         featureBits chainLink measures
#                                        chainMm10Link   chain    linearGap
#    distance                      on mm10    on other   minScore
#  1  0.176098 - rat rn6        (% 70.886) (% 71.015)       5000     medium
#  2  0.502391 - human hg38     (% 35.372) (% 31.653)       3000     medium
#  3  0.543004 - dog canFam3    (% 29.144) (% 31.624)       3000     medium

    #   using the syntenic nets
    cd /cluster/data/mm10/bed/multiz4way
    mkdir mafLinks
    cd mafLinks
    mkdir rn6 canFam3 hg38

    for D in hg38 canFam3 rn6
do
    cd $D
    ln -s ../../../lastz.${D}/mafSynNet/*.maf.gz ./
    cd ..
done

    #   determine what is the newest version of multiz and use that
    cd /hive/data/genomes/mm10/bed/multiz4way
    mkdir penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn

    # the autoMultiz cluster run
    ssh ku
    cd /hive/data/genomes/mm10/bed/multiz4way

    # create species list and stripped down tree for autoMZ
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
    4way.nh > tmp.nh
    echo `cat tmp.nh` | sed 's/ //g; s/,/ /g' > tree.nh
    sed 's/[()]//g; s/,/ /g' tree.nh > species.lst

    mkdir run maf
    cd run

    #   NOTE: you need to set the db and multiz dirname properly in this
    #   script
    cat > autoMultiz << '_EOF_'
#!/bin/csh -ef
set db = mm10
set c = $1
set maf = $2
set binDir = /hive/data/genomes/mm10/bed/multiz4way/penn
set tmp = /dev/shm/$db/multiz.$c
set pairs = /hive/data/genomes/mm10/bed/multiz4way/mafLinks
rm -fr $tmp
mkdir -p $tmp
cp ../{tree.nh,species.lst} $tmp
pushd $tmp
foreach s (`cat species.lst`)
    set in = $pairs/$s/$c.maf
    set out = $db.$s.sing.maf
    if ($s == $db) then
    continue
    endif
    if (-e $in.gz) then
    zcat $in.gz > $out
    else if (-e $in) then
    cp $in $out
    else
    echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($binDir $path); rehash
$binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
popd
cp $tmp/$c.maf $maf
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod +x autoMultiz

cat  << '_EOF_' > template
#LOOP
./autoMultiz $(root1) {check out line+ /hive/data/genomes/mm10/bed/multiz4way/maf/$(root1).maf}
#ENDLOOP
'_EOF_'
    # << happy emacs

    cut -f1 /cluster/data/mm10/chrom.sizes > chrom.lst
    gensub2 chrom.lst single template jobList
    para create jobList
    # 66 jobs
    para try ... check ... push ... etc ...

# Completed: 66 of 66 jobs
# CPU time in finished jobs:      34495s     574.91m     9.58h    0.40d  0.001 y
# IO & Wait Time:                   826s      13.77m     0.23h    0.01d  0.000 y
# Average job time:                 535s       8.92m     0.15h    0.01d
# Longest finished job:            2765s      46.08m     0.77h    0.03d
# Submission to last job:          2776s      46.27m     0.77h    0.03d

    #   combine results into a single file for loading and gbdb reference
    cd /hive/data/genomes/mm10/bed/multiz4way
    grep "^#" maf/chr1_GL456210_random.maf | grep -v "eof maf" > multiz4way.maf
    grep -h -v "^#" maf/*.maf >> multiz4way.maf
    grep "^#" maf/chr1_GL456210_random.maf | grep "eof maf" >> multiz4way.maf

    #	makes a 6.5 Gb file:
    #   -rw-rw-r-- 1 6928752890 Apr 12 10:18 multiz4way.maf

    # Load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/multiz4way
    mkdir /gbdb/mm10/multiz4way
    ln -s /hive/data/genomes/mm10/bed/multiz4way/multiz4way.maf \
	/gbdb/mm10/multiz4way
    #	the hgLoadMaf generates huge tmp files, locate them in /dev/shm
    cd /dev/shm
    time nice -n +19 hgLoadMaf mm10 multiz4way
    #   Loaded 5300158 mafs in 1 files from /gbdb/mm10/multiz4way
    #   real    1m41.656s

    cd /hive/data/genomes/mm10/bed/multiz4way
    time (cat /gbdb/mm10/multiz4way/*.maf \
        | hgLoadMafSummary -verbose=2 -minSize=10000 \
	-mergeGap=500 -maxSize=50000 mm10 multiz4waySummary stdin)
    # Created 1310955 summary blocks from 9774995 components and 5300158 mafs
    # real    2m27.913s
    mv /dev/shm/multiz4way.tab .
# -rw-rw-r-- 1 277435502 Apr 12 12:11 multiz4way.tab
# -rw-rw-r-- 1  59271980 Apr 12 12:16 multiz4waySummary.tab
    wc -l multiz4way*.tab
    # 5300158 multiz4way.tab
    # 1310955 multiz4waySummary.tab
    # 6611113 total

#########################################################################
# LASTZ mouse/mm10 vs. chicken/galGal5 - (DONE - 2016-04-20 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20
    cd /hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20

    printf "# Mouse vs. chicken
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q
#      A    C    G    T
#     91  -90  -25 -100
#    -90  100 -100  -25
#    -25 -100  100  -90
#   -100  -25  -90  91

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: chicken galGal5
SEQ2_DIR=/hive/data/genomes/galGal5/galGal5.2bit
SEQ2_LEN=/hive/data/genomes/galGal5/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20
TMPDIR=/dev/shm
" > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    112m25.946s

    cat fb.mm10.chainGalGal5Link.txt
    # 102343350 bases of 2652783500 (3.858%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` mm10 galGal5) > rbest.log 2>&1 &
    # real    170m24.948s

    # and for the swap:
    mkdir /hive/data/genomes/galGal5/bed/blastz.mm10.swap
    cd /hive/data/genomes/galGal5/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    12m17.175s

    cat fb.galGal5.chainMm10Link.txt
    # 95753452 bases of 1218501075 (7.858%) in intersection

    time (doRecipBest.pl -buildDir=`pwd` galGal5 mm10) > rbest.log 2>&1
    # real    138m37.610s

#########################################################################
# LASTZ mouse/mm10 vs. Malayan flying lemur/galVar1 - (DONE - 2016-04-26 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26
    cd /hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26

    printf "# mouse vs Malayan flying lemur
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_O=400
BLASTZ_E=30
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Malayan flying lemur galVar1
SEQ2_DIR=/hive/data/genomes/galVar1/galVar1.2bit
SEQ2_LEN=/hive/data/genomes/galVar1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=400
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26
TMPDIR=/dev/shm
" > DEF
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    340m23.106s

    cat fb.mm10.chainGalVar1Link.txt
    # 944876157 bases of 2652783500 (35.618%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 galVar1) \
      > rbest.log 2>&1 &
    # real    694m27.183s

    # and for the swap:
    mkdir /hive/data/genomes/galVar1/bed/blastz.mm10.swap
    cd /hive/data/genomes/galVar1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    173m45.678s

    cat fb.galVar1.chainMm10Link.txt
    # 1008272821 bases of 2802917674 (35.972%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` galVar1 mm10) \
       > rbest.log 2>&1
    # real    856m16.458s

#########################################################################
# lastz Chinese softshell turtle pelSin1 (DONE - 2016-05-10 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10PelSin1
    mkdir /hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10
    cd /hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10

    printf '# Mouse vs. Chinese softshell turtle
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Chinese softshell turtle pelSin1
SEQ2_DIR=/hive/data/genomes/pelSin1/pelSin1.2bit
SEQ2_LEN=/hive/data/genomes/pelSin1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10
TMPDIR=/dev/shm
' > DEF

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
     time (doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
    # real    156m43.981s

    cat fb.mm10.chainPelSin1Link.txt
    #	113023930 bases of 2652783500 (4.261%) in intersection

    # forgot to include syntenicNet:
     time (doBlastzChainNet.pl -verbose=2 \
        -continue=syntenicNet -syntenicNet `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=5000 -chainLinearGap=loose) > synNet.log 2>&1 &
    # real    2m9.196s


    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 pelSin1) \
       > rbest.log 2>&1 &
    # real    221m37.947s

    #	and for the swap
    mkdir /hive/data/genomes/pelSin1/bed/blastz.mm10.swap
    cd /hive/data/genomes/pelSin1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -swap -chainMinScore=5000 -chainLinearGap=loose) \
            > swap.log 2>&1
    #	real    16m3.703s

    cat  fb.pelSin1.chainMm10Link.txt
    #	102485355 bases of 2106639384 (4.865%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` pelSin1 mm10) \
       > rbest.log 2>&1
    # real    198m33.448s

#########################################################################
# LASTZ mouse/mm10 Gorilla/panPan2 - (DONE - 2016-05-24 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24
    cd /hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24

    printf '# mouse vs bonobo
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: bonobo panPan2
SEQ2_DIR=/hive/data/genomes/panPan2/panPan2.2bit
SEQ2_LEN=/hive/data/genomes/panPan2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    360m9.534s

    cat fb.mm10.chainPanPan2Link.txt
    # 928638440 bases of 2652783500 (35.006%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 panPan2) \
      > rbest.log 2>&1 &
    # real    765m26.648s

    # and for the swap:
    mkdir /hive/data/genomes/panPan2/bed/blastz.mm10.swap
    cd /hive/data/genomes/panPan2/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    106m54.032s

    cat fb.panPan2.chainMm10Link.txt
    # 911279510 bases of 2725937399 (33.430%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` panPan2 mm10) \
       > rbest.log 2>&1
    # real    620m0.039s

#########################################################################
# LASTZ mouse/mm10 Chimp/panTro5 - (DONE - 2016-08-03 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03
    cd /hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03

    printf '# mouse vs chimp
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: chimp panTro5
SEQ2_DIR=/hive/data/genomes/panTro5/panTro5.2bit
SEQ2_LEN=/hive/data/genomes/panTro5/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=100
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    #  real    273m27.335s

    cat fb.mm10.chainPanTro5Link.txt
    # 935711523 bases of 2652783500 (35.273%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 panTro5) \
      > rbest.log 2>&1 &
    # real    624m28.225s

    # and for the swap:
    mkdir /hive/data/genomes/panTro5/bed/blastz.mm10.swap
    cd /hive/data/genomes/panTro5/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    98m32.623s

    cat fb.panTro5.chainMm10Link.txt
    # 965636631 bases of 3132620660 (30.825%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` panTro5 mm10) \
       > rbest.log 2>&1
    # real    560m21.432s

#########################################################################
# Crispr track. See ../crisprTrack/README.txt (2016-09-15 max)
# Command: doCrispr.sh mm10 ensGene
##############################################################################

############################################################################################
# Mouse strains VCF (DONE - 2016-11-08 - Hiram)

    mkdir /hive/data/genomes/mm10/bed/strainsVCF
    cd /hive/data/genomes/mm10/bed/strainsVCF

    # download files:
wget --timestamping \
ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi

wget --timestamping \
ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi

wget --timestamping \
ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz

wget --timestamping \
ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.snps_all.dbSNP142.vcf.gz

    # change to UCSC chrom names:

    zcat mgp.v5.merged.snps_all.dbSNP142.vcf.gz \
       | sed -e "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/" \
          > ucscNames.mgp.v5.merged.snps_all.dbSNP142.vcf

    # need to fixup the chrom names in the header, extract the header:
    grep "^#" ucscNames.mgp.v5.merged.snps_all.dbSNP142.vcf > original.header.txt
    # copy that and edit it to fixup the names:
    cp original.header.txt ucscNames.header.txt

    # extract the lines not in the header
    grep -v "^#" ucscNames.mgp.v5.merged.snps_all.dbSNP142.vcf > ucscNames.notHeader.txt

    # put it back together:
    cat ucscName.header.txt ucscNames.notHeader.txt > ucsc.mgpV5MergedSNPsAlldbSNP142.vcf

    # tabix gzip (about 2 hours)
    export name="ucsc.mgpV5MergedSNPsAlldbSNP142.vcf"
    /cluster/bin/tabix-0.2.6/bgzip $name
    /cluster/bin/tabix-0.2.6/tabix -p vcf $name.gz.tbi

    # symlink to gbdb
    mkdir /gbdb/mm10/mouseStrains
    ln -s `pwd`/ucsc.mgpV5MergedSNPsAlldbSNP142.vcf.gz \
          /gbdb/mm10/mouseStrains/mgpV5MergedSNPsAlldbSNP142.vcf.gz
    ln -s `pwd`/ucsc.mgpV5MergedSNPsAlldbSNP142.vcf.gz.tbi \
          /gbdb/mm10/mouseStrains/mgpV5MergedSNPsAlldbSNP142.vcf.gz.tbi

    hgBbiDbLink mm10 strainSNPs /gbdb/mm10/mouseStrains/mgpV5MergedSNPsAlldbSNP142.vcf.gz

    # trackDb entry in trackDb/mouse/mm10/trackDb.ra:

track strainSNPs
shortLabel Mouse SNPs
longLabel Annotated SNPs from mouse strain comparison analysis
group varRep
type vcfTabix
visibility hide
hapClusterHeight 78

#############################################################################
# lastz turkey melGal5 (DONE - 2017-01-19 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10MelGal5
    mkdir /hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19
    cd /hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19

    printf '# Mouse vs. turkey
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: turkey melGal5
SEQ2_DIR=/hive/data/genomes/melGal5/melGal5.2bit
SEQ2_LEN=/hive/data/genomes/melGal5/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19
TMPDIR=/dev/shm
' > DEF

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
    #	real    160m46.030s

    cat fb.mm10.chainMelGal5Link.txt
    #	94675126 bases of 2652783500 (3.569%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 melGal5) \
           > rbest.log 2>&1 &
    # real    379m35.317s

    #	and for the swap
    mkdir /hive/data/genomes/melGal5/bed/blastz.mm10.swap
    cd /hive/data/genomes/melGal5/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 -syntenicNet \
	/hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1
    #	real    31m37.466s

    cat  fb.melGal5.chainMm10Link.txt
    #	81470789 bases of 1093044709 (7.454%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` melGal5 mm10) \
           > rbest.log 2>&1
    # real    356m16.099s

#############################################################################
# LASTZ mouse/mm10 Pig-tailed macaque/macNem1 - (DONE - 2017-02-28 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28
    cd /hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28

    printf '# mouse vs Pig-tailed macaque
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=5
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: Pig-tailed macaque macNem1
SEQ2_DIR=/hive/data/genomes/macNem1/macNem1.2bit
SEQ2_LEN=/hive/data/genomes/macNem1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    #  real    370m19.213s

    cat fb.mm10.chainMacNem1Link.txt
    # 918083212 bases of 2652783500 (34.608%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 macNem1) \
      > rbest.log 2>&1 &
    # real    344m11.369s

    # and for the swap:
    mkdir /hive/data/genomes/macNem1/bed/blastz.mm10.swap
    cd /hive/data/genomes/macNem1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    65m14.074s

    cat fb.macNem1.chainMm10Link.txt
    # 905682728 bases of 2838503083 (31.907%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` macNem1 mm10) \
       > rbest.log 2>&1
    # real    321m2.285s

#############################################################################
# LASTZ mouse/mm10 Angolan colobus/colAng1 - (DONE - 2017-02-28 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28
    cd /hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28

    printf '# mouse vs Angolan colobus
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=5
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: Angolan colobus colAng1
SEQ2_DIR=/hive/data/genomes/colAng1/colAng1.2bit
SEQ2_LEN=/hive/data/genomes/colAng1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    #  real    376m8.949s

    cat fb.mm10.chainColAng1Link.txt
    # 902325064 bases of 2652783500 (34.014%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 colAng1) \
      > rbest.log 2>&1 &
    # real    343m38.692s

    # and for the swap:
    mkdir /hive/data/genomes/colAng1/bed/blastz.mm10.swap
    cd /hive/data/genomes/colAng1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    62m44.125s

    cat fb.colAng1.chainMm10Link.txt
    # 885418780 bases of 2679973137 (33.038%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` colAng1 mm10) \
       > rbest.log 2>&1
    # real    296m19.689s

#############################################################################
# LASTZ mouse/mm10 Gray mouse lemur/micMur3 - (DONE - 2017-03-03 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03
    cd /hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03

    printf '# mouse vs Gray mouse lemur
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=4
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: Gray mouse lemur micMur3
SEQ2_DIR=/hive/data/genomes/micMur3/micMur3.2bit
SEQ2_LEN=/hive/data/genomes/micMur3/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    #  real    2192m13.661s

    cat fb.mm10.chainMicMur3Link.txt
    # 907817373 bases of 2652783500 (34.221%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 micMur3) \
      > rbest.log 2>&1 &
    # real    522m5.587s

    # and for the swap:
    mkdir /hive/data/genomes/micMur3/bed/blastz.mm10.swap
    cd /hive/data/genomes/micMur3/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    71m4.702s

    cat fb.micMur3.chainMm10Link.txt
    # 905011854 bases of 2386321975 (37.925%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` micMur3 mm10) \
       > rbest.log 2>&1
    # real    508m58.716s

#############################################################################
# LASTZ mouse/mm10 Gray mouse lemur/tupChi1 - (DONE - 2017-03-09 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09
    cd /hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09

    printf '# mouse vs Chinese tree shrew
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=4
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: Chinese tree shrew tupChi1
SEQ2_DIR=/hive/data/genomes/tupChi1/tupChi1.2bit
SEQ2_LEN=/hive/data/genomes/tupChi1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=200
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    #  real    224m24.608s

    cat fb.mm10.chainTupChi1Link.txt
    #  683463709 bases of 2652783500 (25.764%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 tupChi1) \
      > rbest.log 2>&1 &
    # real    385m2.239s

    # and for the swap:
    mkdir /hive/data/genomes/tupChi1/bed/blastz.mm10.swap
    cd /hive/data/genomes/tupChi1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    71m4.702s

    cat fb.tupChi1.chainMm10Link.txt
    # 708757944 bases of 2706389135 (26.188%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` tupChi1 mm10) \
       > rbest.log 2>&1
    # real    508m10.564s

#############################################################################
# LASTZ mouse/mm10 Chinese pangolin/manPen1 - (DONE - 2017-03-15 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15
    cd /hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15

    printf '# Mouse vs. Chinese softshell turtle
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Chinese pangolin manPen1
SEQ2_DIR=/hive/data/genomes/manPen1/manPen1.2bit
SEQ2_LEN=/hive/data/genomes/manPen1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=180

BASE=/hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000 -chainLinearGap=medium \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
    #  real    404m9.925s

    cat fb.mm10.chainManPen1Link.txt
    #  724400544 bases of 2652783500 (27.307%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 manPen1) \
      > rbest.log 2>&1 &
    # real    499m21.668s

    # and for the swap:
    mkdir /hive/data/genomes/manPen1/bed/blastz.mm10.swap
    cd /hive/data/genomes/manPen1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 -swap \
        /hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15/DEF \
        -syntenicNet -fileServer=hgwdev \
        -chainMinScore=3000 -chainLinearGap=medium \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > swap.log 2>&1
    #  real    71m4.702s

    cat fb.manPen1.chainMm10Link.txt
    # 710179682 bases of 1999066070 (35.526%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` manPen1 mm10) \
       > rbest.log 2>&1
    # real    495m7.361s

#############################################################################
# LASTZ mouse/mm10 vs. Golden eagle/aquChr2 - (DONE - 2017-03-16 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16
    cd /hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16

    printf "# Mouse vs. Golden eagle
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q
#      A    C    G    T
#     91  -90  -25 -100
#    -90  100 -100  -25
#    -25 -100  100  -90
#   -100  -25  -90  91

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Golden eagle aquChr2
SEQ2_DIR=/hive/data/genomes/aquChr2/aquChr2.2bit
SEQ2_LEN=/hive/data/genomes/aquChr2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16
TMPDIR=/dev/shm
" > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    217m29.467s

    cat fb.mm10.chainAquChr2Link.txt
    # 105013175 bases of 2652783500 (3.959%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 aquChr2) \
         > rbest.log 2>&1 &
    # real    196m24.435s

    # and for the swap:
    mkdir /hive/data/genomes/aquChr2/bed/blastz.mm10.swap
    cd /hive/data/genomes/aquChr2/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    9m16.569s

    cat fb.aquChr2.chainMm10Link.txt
    # 89023131 bases of 1180019022 (7.544%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` aquChr2 mm10) \
         > rbest.log 2>&1
    # real    132m43.886s

#########################################################################
# LASTZ bison bisBis1 (DONE - 2017-03-17 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17
    cd /hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17

    printf '# Mouse vs. Bison
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# maximum M allowed with lastz is only 254
BLASTZ_M=254

# TARGET: Mouse mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=50

# QUERY: bison bisBis1
SEQ2_DIR=/hive/data/genomes/bisBis1/bisBis1.2bit
SEQ2_LEN=/hive/data/genomes/bisBis1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=900

BASE=/hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    # real    576m23.128s

    cat fb.mm10.chainBisBis1Link.txt
    # 688337604 bases of 2652783500 (25.948%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 bisBis1) \
         > rbest.log 2>&1 &
    # real    430m48.078s

    #   and the swap
    mkdir /hive/data/genomes/bisBis1/bed/blastz.mm10.swap
    cd /hive/data/genomes/bisBis1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17/DEF \
        -swap -syntenicNet  \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #   real    169m28.369s

    cat fb.bisBis1.chainMm10Link.txt
    # 682104798 bases of 2757854331 (24.733%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` bisBis1 mm10) \
         > rbest.log 2>&1
    # real    445m5.636s

############################################################################
# lastz frog xenTro9 (DONE - 2017-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10XenTro9
    mkdir /hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29
    cd /hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29

    printf '# Mouse vs. frog
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10

# QUERY: frog xenTro9
SEQ2_DIR=/hive/data/genomes/xenTro9/xenTro9.2bit
SEQ2_LEN=/hive/data/genomes/xenTro9/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=20

BASE=/hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
          -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) \
              > do.log 2>&1 &
    #	real    806m23.459s

    cat fb.mm10.chainXenTro9Link.txt
    #	87053836 bases of 2652783500 (3.282%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 xenTro9) \
         > rbest.log 2>&1 &
    # real    617m41.376s

    #	and for the swap
    mkdir /hive/data/genomes/xenTro9/bed/blastz.mm10.swap
    cd /hive/data/genomes/xenTro9/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -swap -chainMinScore=5000 -chainLinearGap=loose) \
	> swap.log 2>&1 &
    #	real    25m54.516s

    cat fb.xenTro9.chainMm10Link.txt
    #	90150612 bases of 1369865365 (6.581%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` xenTro9 mm10) \
         > rbest.log 2>&1 &
    # real    597m52.740s

#########################################################################
# lastz frog xenLae2 (DONE - 2017-03-29 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10XenLae2
    mkdir /hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29
    cd /hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29

    printf '# Mouse vs. frog
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10

# QUERY: frog xenLae2
SEQ2_DIR=/hive/data/genomes/xenLae2/xenLae2.2bit
SEQ2_LEN=/hive/data/genomes/xenLae2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
          -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) \
              > do.log 2>&1 &
    #	real    1044m10.115s

    cat fb.mm10.chainXenLae2Link.txt
    #	82272699 bases of 2652783500 (3.101%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 xenLae2) \
         > rbest.log 2>&1 &
    # real    656m46.337s

    #	and for the swap
    mkdir /hive/data/genomes/xenLae2/bed/blastz.mm10.swap
    cd /hive/data/genomes/xenLae2/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1
    #	real    26m14.884s
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29/DEF \
        -continue=syntenicNet -workhorse=hgwdev -smallClusterHub=ku \
	-bigClusterHub=ku -syntenicNet -swap -chainMinScore=5000 \
	-chainLinearGap=loose) > syntenicNet.log 2>&1 &
    # real    1m52.642s

    cat  fb.xenLae2.chainMm10Link.txt
    #	116001603 bases of 2408724787 (4.816%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` xenLae2 mm10) \
         > rbest.log 2>&1 &
    # real    746m4.542s

#########################################################################
# lastz turtle chrPic2 (DONE - 2017-04-05 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10ChrPic2
    mkdir /hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05
    cd /hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05

    printf '# Mouse vs. turtle
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: turtle chrPic2
SEQ2_DIR=/hive/data/genomes/chrPic2/chrPic2.2bit
SEQ2_LEN=/hive/data/genomes/chrPic2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=300

BASE=/hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05
TMPDIR=/dev/shm
' > DEF

     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
    #	real    865m16.816s

    # ku difficulties due to /dev/shm/ being full, continuing:
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
     -continue=cat -chainMinScore=5000 -chainLinearGap=loose) > cat.log 2>&1 &
    # real    13m13.959s

    # one big chain causing trouble, continuing:
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
     -continue=chainMerge -chainMinScore=5000 -chainLinearGap=loose) > chainMerge.log 2>&1 &
    # real    11m47.232s

    cat fb.mm10.chainChrPic2Link.txt
    #	112560591 bases of 2652783500 (4.243%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 chrPic2) \
         > rbest.log 2>&1 &
    # real    114m27.445s

    #	and for the swap
    mkdir /hive/data/genomes/chrPic2/bed/blastz.mm10.swap
    cd /hive/data/genomes/chrPic2/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05/DEF \
        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
    #	real    12m2.676s

    cat  fb.chrPic2.chainMm10Link.txt
    #	106063993 bases of 2173204089 (4.881%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` chrPic2 mm10) \
         > rbest.log 2>&1 &
    # real    110m9.546s

##############################################################################
# LASTZ Chinese hamster ovary cell line CHO-K1  criGriChoV1
#	(DONE - 2017-04-13 - Hiram)
    #	establish a screen to control this job
    screen -S mm10criGriChoV1
    mkdir /hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13
    cd /hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13

    printf '# Chinese hamster ovary cell line vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Chinese hamster ovary cell line CHO-K1  criGriChoV1
SEQ2_DIR=/hive/data/genomes/criGriChoV1/criGriChoV1.2bit
SEQ2_LEN=/hive/data/genomes/criGriChoV1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=250
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    575m28.254s

    cat fb.mm10.chainCriGriChoV1Link.txt
    #	1553371182 bases of 2652783500 (58.556%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 criGriChoV1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    732m16.081s

    mkdir /hive/data/genomes/criGriChoV1/bed/blastz.mm10.swap
    cd /hive/data/genomes/criGriChoV1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13/DEF \
	-noDbNameCheck -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    157m21.977s

    cat fb.criGriChoV1.chainMm10Link.txt
    #	1513594461 bases of 2318132242 (65.294%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev criGriChoV1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    769m8.998s

##############################################################################
## 4-Way Multiz (DONE - 2017-04-20 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way

    # from the 213-way in the source tree, select out the 5 used here:
    /cluster/bin/phast/tree_doctor \
        --prune-all-but hg38,galVar1,mm10,tupChi1 \
        /cluster/home/hiram/kent/src/hg/utils/phyloTrees/213way.nh \
          > mm10.4way.nh
    cat mm10.4way.nh
    # ((hg38:0.143908,(tupChi1:0.120000,galVar1:0.080000):0.054937):0.002000,
         mm10:0.356483);

    # using TreeGraph2 on Mac desktop to rearrange tree to get mm10 at top:
# (mm10:0.356483,(hg38:0.143908,(tupChi1:0.12,galVar1:0.08):0.054937):0.002);

    #	what that looks like:
 ~/kent/src/hg/utils/phyloTrees/asciiTree.pl mm10.4way.nh | sed -e 's/^/# /;'

# (mm10:0.356483,
# (hg38:0.143908,
# (tupChi1:0.12,
# galVar1:0.08):0.054937):0.002);

    # extract species list from that .nh file
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
        mm10.4way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
        | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt

    # construct db to name translation list:
    cat species.list.txt | while read DB
do
hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
        | sed -e 's/-nosed/_nosed/; s/-eating/_eating/;' > db.to.name.txt

    # construct a common name .nh file:
    /cluster/bin/phast/tree_doctor --rename \
    "`cat db.to.name.txt`" mm10.4way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
         > mm10.4way.commonNames.nh
    cat mm10.4way.commonNames.nh | sed -e 's/^/# /;'
# (Mouse:0.356483,
# (Human:0.143908,
# (Chinese_tree_shrew:0.12,
# Malayan_flying_lemur:0.08):0.054937):0.002);

#	Use this specification in the phyloGif tool:
#	http://genome.ucsc.edu/cgi-bin/phyloGif
#	to obtain a png image for src/hg/htdocs/images/phylo/hg38_4way.png

    ~/kent/src/hg/utils/phyloTrees/asciiTree.pl mm10.4way.nh > t.nh
    ~/kent/src/hg/utils/phyloTrees/scientificNames.sh t.nh \
       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > mm10.4way.scientificNames.nh
    rm -f t.nh
    cat mm10.4way.scientificNames.nh | sed -e 's/^/# /;'
# (Mus_musculus:0.356483,
# (Homo_sapiens:0.143908,
# (Tupaia_chinensis:0.12,
# Galeopterus_variegatus:0.08):0.054937):0.002);

    /cluster/bin/phast/all_dists mm10.4way.nh | grep mm10 \
        | sed -e "s/mm10.//" | sort -k2n > 4way.distances.txt
    #	Use this output to create the table below
    cat 4way.distances.txt | sed -e 's/^/# /;'
# galVar1       0.493420
# hg38  0.502391
# tupChi1       0.533420

    printf '#!/usr/bin/env perl

use strict;
use warnings;

open (FH, "<4way.distances.txt") or
        die "can not read 4way.distances.txt";

my $count = 0;
while (my $line = <FH>) {
    chomp $line;
    my ($D, $dist) = split('"'"'\\s+'"'"', $line);
    my $chain = "chain" . ucfirst($D);
    my $B="/hive/data/genomes/mm10/bed/lastz.$D/fb.mm10." .
        $chain . "Link.txt";
    my $chainLinkMeasure =
        `awk '"'"'{print \\$5}'"'"' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
    chomp $chainLinkMeasure;
    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
    $chainLinkMeasure =~ s/\\%%//;
    my $swapFile="/hive/data/genomes/${D}/bed/lastz.mm10/fb.${D}.chainMm10Link.txt";
    my $swapMeasure = "N/A";
    if ( -s $swapFile ) {
	$swapMeasure =
	    `awk '"'"'{print \\$5}'"'"' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
	chomp $swapMeasure;
	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
	$swapMeasure =~ s/\\%%//;
    }
    my $orgName=
    `hgsql -N -e "select organism from dbDb where name='"'"'$D'"'"';" hgcentraltest`;
    chomp $orgName;
    if (length($orgName) < 1) {
        $orgName="N/A";
    }
    ++$count;
    printf "# %%02d  %%.4f (%%%% %%06.3f) (%%%% %%06.3f) - %%s %%s\\n", $count, $dist,
        $chainLinkMeasure, $swapMeasure, $orgName, $D;
}
close (FH);
' > sizeStats.pl
    chmod +x ./sizeStats.pl
    ./sizeStats.pl

#	If you can fill in all the numbers in this table, you are ready for
#	the multiple alignment procedure

#       featureBits chainLink measures
#               chainLink
#  N distance  on hg38  on other     other species
# 01  0.4934 (% 35.618) (% 35.972) - Malayan flying lemur galVar1
# 02  0.5024 (% 35.372) (% 31.653) - Human hg38
# 03  0.5334 (% 25.764) (% 26.188) - Chinese tree shrew tupChi1

# None of this concern for distances matters in building the first step, the
# maf files.  The distances will be better calibrated later.

    # create species list and stripped down tree for autoMZ
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
	mm10.4way.nh | xargs echo | sed 's/ //g; s/,/ /g' > tree.nh

    sed 's/[()]//g; s/,/ /g' tree.nh > species.list
    # mm10 hg38 tupChi1 galVar1


    # survey N50 for each
    for db in `cat species.list`
do
n50.pl /hive/data/genomes/$db/chrom.sizes
done
#       reading: /hive/data/genomes/mm10/chrom.sizes
#       contig count: 455, total size: 3209286105, one half size: 1604643052
#       reading: /hive/data/genomes/mm10/chrom.sizes
#       contig count: 66, total size: 2730871774, one half size: 1365435887
# cumulative    N50 count       contig  contig size
1312176979      8       chr7    145441459
1365435887 one half size
1442871972      9       chr10   130694993
#       reading: /hive/data/genomes/hg38/chrom.sizes
#       contig count: 455, total size: 3209286105, one half size: 1604643052
# cumulative    N50 count       contig  contig size
1547391171      8       chrX    156040895
1604643052 one half size
1692529807      9       chr8    145138636
#       reading: /hive/data/genomes/tupChi1/chrom.sizes
#       contig count: 50750, total size: 2846580235, one half size: 1423290117
# cumulative    N50 count       contig  contig size
1419920836      231     KB321095        3691413
1423290117 one half size
1423590960      232     KB321106        3670124
#       reading: /hive/data/genomes/galVar1/chrom.sizes
#       contig count: 179514, total size: 3187660572, one half size: 1593830286
# cumulative    N50 count       contig  contig size
1593691350      3422    NW_007730159v1  245222
1593830286 one half size
1593936539      3423    NW_007729331v1  245189

    #	bash shell syntax here ...
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
    export H=/hive/data/genomes/mm10/bed
    mkdir mafLinks
    # good assemblies can use syntenic net:
    #  hg38
    for G in hg38
    do
      mkdir mafLinks/$G
      echo 'ln -s ${H}/lastz.$G/mafSynNet/*.maf.gz ./mafLinks/$G'
      ln -s ${H}/lastz.$G/mafSynNet/*.maf.gz ./mafLinks/$G
    done

    # other assemblies using recip best net:
    #  galVar1 tupBel1
    for G in galVar1 tupChi1
    do
      mkdir mafLinks/$G
      echo ln -s ${H}/lastz.$G/mafRBestNet/*.maf.gz ./mafLinks/$G
      ln -s ${H}/lastz.$G/mafRBestNet/*.maf.gz ./mafLinks/$G
    done

    # verify the symLinks are good:
    ls -ogrtL mafLinks/*/* | sed -e 's/^/# /; s/-rw-rw-r-- 1//;'
    ls -ogrtL mafLinks/*/* | sed -e 's/^/# /; s/-rw-rw-r-- 1//;' | head
#  52322575 Apr 10  2015 mafLinks/hg38/chr1.maf.gz
#  35696060 Apr 10  2015 mafLinks/hg38/chr10.maf.gz
#  36383118 Apr 10  2015 mafLinks/hg38/chr11.maf.gz

    ls -ogrtL mafLinks/*/* | sed -e 's/^/# /; s/-rw-rw-r-- 1//;' | tail
#      3104 Mar 10 00:08 mafLinks/tupChi1/chrUn_GL456379.maf.gz
#       143 Mar 10 00:08 mafLinks/tupChi1/chrUn_GL456381.maf.gz
#      1221 Mar 10 00:08 mafLinks/tupChi1/chrUn_GL456382.maf.gz

XXX - do not need to split - Thu Apr 20 15:02:02 PDT 2017

    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun
    # construct a list of all possible maf file names.

    find ../mafLinks | grep maf.gz | sed -e 's#../mafLinks/##;' \
        | xargs -L 1 basename | sed -e 's/.gz//;' | sort -u > maf.list

    wc -l maf.list
    # 52 maf.list

    mkdir maf run
    cd run
    mkdir penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn

    #	set the db and pairs directories here
    cat > autoMultiz.csh << '_EOF_'
    printf '#!/bin/csh -ef
set db = mm10
set c = $1
set result = $2
set run = `/bin/pwd`
set tmp = /dev/shm/$db/multiz.$c
set pairs = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/mafLinks
/bin/rm -fr $tmp
/bin/mkdir -p $tmp
/bin/cp -p ../../tree.nh ../../species.list $tmp
pushd $tmp > /dev/null
foreach s (`/bin/sed -e "s/$db //" species.list`)
    set in = $pairs/$s/$c
    set out = $db.$s.sing.maf
    if (-e $in.gz) then
        /bin/zcat $in.gz > $out
        if (! -s $out) then
            echo "##maf version=1 scoring=autoMZ" > $out
        endif
    else if (-e $in) then
        /bin/ln -s $in $out
    else
        echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($run/penn $path); rehash
$run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
        > /dev/null
popd > /dev/null
/bin/rm -f $result
/bin/cp -p $tmp/$c $result
/bin/rm -fr $tmp
' > autoMultiz.csh

    chmod +x autoMultiz.csh

    printf '#LOOP
./autoMultiz.csh $(file1) {check out line+ /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun/maf/$(root1).maf}
#ENDLOOP
' > template

    ln -s ../maf.list maf.list
    ssh ku
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun/run
    gensub2 maf.list single template jobList
    para create jobList
    para try ... check ... push ... etc...
# Completed: 52 of 52 jobs
# CPU time in finished jobs:      44671s     744.52m    12.41h    0.52d  0.001 y
# IO & Wait Time:                  1129s      18.81m     0.31h    0.01d  0.000 y
# Average job time:                 881s      14.68m     0.24h    0.01d
# Longest finished job:            3537s      58.95m     0.98h    0.04d
# Submission to last job:          5634s      93.90m     1.56h    0.07d

    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
    head -1 splitRun/maf/chr2.maf > tupChi1Multiz4way.maf
    time for F in splitRun/maf/*.maf
do
    echo "${F}" 1>&2
    egrep -v "^#" ${F}
done >> tupChi1Multiz4way.maf
    # real    0m16.400s

    tail -1 splitRun/maf/chr2.maf >> tupChi1Multiz4way.maf
# -rw-rw-r-- 1 5228617390 Apr 20 17:41 tupChi1Multiz4way.maf

    # Load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
    mkdir /gbdb/mm10/tupChi1Multiz4way
    ln -s `pwd`/tupChi1Multiz4way.maf /gbdb/mm10/tupChi1Multiz4way
    cd /dev/shm
    time hgLoadMaf mm10 tupChi1Multiz4way
# Loaded 5635229 mafs in 1 files from /gbdb/mm10/tupChi1Multiz4way
# real    1m26.208s


    time hgLoadMafSummary -verbose=2 -minSize=30000 \
	-mergeGap=1500 -maxSize=200000 mm10 tupChi1Multiz4waySummary \
	/gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
# Created 743966 summary blocks from 10080651 components and 5635229 mafs from /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
# real    1m45.053s

# -rw-rw-r-- 1 294659136 Apr 20 21:40 tupChi1Multiz4way.tab
# -rw-rw-r-- 1  34525860 Apr 20 22:09 tupChi1Multiz4waySummary.tab

    wc -l tupChi1Multiz4way*.tab
#  5635229 tupChi1Multiz4way.tab
#   743966 tupChi1Multiz4waySummary.tab

    rm tupChi1Multiz4way*.tab

##############################################################################
# GAP ANNOTATE MULTIZ7WAY MAF AND LOAD TABLES (DONE - 2017-04-20 - Hiram)
    # mafAddIRows has to be run on single chromosome maf files, it does not
    #	function correctly when more than one reference sequence
    #	are in a single file.  Need to split of the maf file into individual
    #   maf files
    mkdir -p /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno/mafSplit
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno/mafSplit

    time mafSplit -outDirDepth=2 -byTarget -useFullSequenceName \
        /dev/null . ../../tupChi1Multiz4way.maf
    #   real    1m25.202s
    find . -type f | wc -l
    #   52

    # check for N.bed files everywhere:
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno
    for DB in `cat ../species.list`
do
    if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
        echo "MISS: ${DB}"
#         cd /hive/data/genomes/${DB}
#         twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
    else
        echo "  OK: ${DB}"
    fi
done

    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno
    for DB in `cat ../species.list`
do
    echo "${DB} "
    ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
    echo ${DB}.bed  >> nBeds
    ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
    echo ${DB}.len  >> sizes
done
    # make sure they all are successful symLinks:
    ls -ogrtL

    screen -S gapAnno      # use a screen to control this longish job
    ssh ku
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno
    mkdir result
    find ./mafSplit -type d | sed -e 's#./mafSplit/##' | while read D
do
    echo mkdir -p result/${D}
    mkdir -p result/${D}
done
    printf '#LOOP
mafAddIRows -nBeds=nBeds mafSplit/$(path1) /hive/data/genomes/mm10/mm10.2bit {check out exists+ result/$(path1)}
#ENDLOOP
' > template
    # << happy emacs

    find ./mafSplit -type f | sed -e 's#^./mafSplit/##' > maf.list
    gensub2 maf.list single template jobList
    # there isn't the usual job limit problem here, only 52 jobs
    para create jobList
    para try ... check ... push ...
# Completed: 52 of 52 jobs
# CPU time in finished jobs:        749s      12.48m     0.21h    0.01d  0.000 y
# IO & Wait Time:                   119s       1.99m     0.03h    0.00d  0.000 y
# Average job time:                  17s       0.28m     0.00h    0.00d
# Longest finished job:              65s       1.08m     0.02h    0.00d
# Submission to last job:           110s       1.83m     0.03h    0.00d

    # verify all result files have some content, look for 0 size files:
    find ./result -type f -size 0
    # should see none
    # or in this manner:
    find ./result -type f | xargs ls -og | sort -k3nr | tail

    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
    head -q -n 1 result/4/1/chrUn_GL456381.maf > mm10.4way.maf
    time find ./result -type f | while read F
do
    echo "${F}" 1>&2
    grep -h -v "^#" ${F}
done >> mm10.4way.maf
    # real    0m33.237s

    #	these maf files do not have the end marker, this does nothing:
    #	tail -q -n 1 result/4/0/NW_007804317v1.maf >> mm10.4way.maf
    # How about an official end marker:
    echo "##eof maf" >> mm10.4way.maf
    ls -og
# -rw-rw-r-- 1 7580362629 Apr 20 22:27 mm10.4way.maf

    du -hsc mm10.4way.maf
    # 7.1G     mm10.4way.maf

    # construct symlinks to get the individual maf files into gbdb:
    rm /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf   # remove previous results
    ln -s `pwd`/mm10.4way.maf /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf

    # Load into database
    cd /dev/shm
    time hgLoadMaf -pathPrefix=/gbdb/mm10/tupChi1Multiz4way mm10 tupChi1Multiz4way
    # Loaded 6931895 mafs in 1 files from /gbdb/mm10/tupChi1Multiz4way
    # real    1m59.548s

    time hgLoadMafSummary -verbose=2 -minSize=30000 \
	-mergeGap=1500 -maxSize=200000 mm10 tupChi1Multiz4waySummary \
        /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
    # Created 743966 summary blocks from 10080651 components and 6931895 mafs from /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
    # real    2m14.237s

    # -rw-rw-r-- 1  362918923 Apr 20 22:30 tupChi1Multiz4way.tab
    # -rw-rw-r-- 1   36013792 Apr 20 22:33 tupChi1Multiz4waySummary.tab

    rm tupChi1Multiz4way*.tab

######################################################################
# MULTIZ7WAY MAF FRAMES (DONE - 2017-04-20 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/frames
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/frames
#   survey all the genomes to find out what kinds of gene tracks they have
    printf '#!/bin/csh -fe
foreach db (`cat ../species.list`)
    printf "# ${db}: "
    set tables = `hgsql $db -N -e "show tables" | egrep "Gene|ncbiRefSeq"`
    foreach table ($tables)
        if ($table == "ensGene" || $table == "refGene" || \
           $table == "ncbiRefSeq" || $table == "mgcGenes" || \
           $table == "knownGene" || $table == "xenoRefGene" ) then
           set count = `hgsql $db -N -e "select count(*) from $table"`
            echo -n "${table}: ${count}, "
        endif
    end
    set orgName = `hgsql hgcentraltest -N -e \
            "select scientificName from dbDb where name='"'"'$db'"'"'"`
    set orgId = `hgsql $db -N -e \
            "select id from organism where name='"'"'$orgName'"'"'"`
    if ($orgId == "") then
        echo "Mrnas: 0"
    else
        set count = `hgsql $db -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
        echo "Mrnas: ${count}"
    endif
end
' > showGenes.csh

    chmod +x ./showGenes.csh
    time ./showGenes.csh
# mm10: ensGene: 103734, knownGene: 63759, mgcGenes: 26777, ncbiRefSeq: 107894, refGene: 36869, xenoRefGene: 179145, Mrnas: 5367574
# hg38: ensGene: 208239, knownGene: 197782, mgcGenes: 35305, ncbiRefSeq: 159322, refGene: 69527, xenoRefGene: 184852, Mrnas: 11481766
# tupChi1: refGene: 206, xenoRefGene: 343637, Mrnas: 50709
# galVar1: ncbiRefSeq: 41547, xenoRefGene: 499145, Mrnas: 0

# real    0m41.291s

    # from that summary, use these gene sets:
    # knownGene - hg38 mm10
    # ncbiRefSeq - galVar1
    # xenoRefGene - tupChi1

    mkdir genes
    #   1. knownGene: hg38 mm10
    for DB in hg38 mm10
do
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > genes/${DB}.gp.gz
    printf "# ${DB}: "
    genePredCheck -db=${DB} genes/${DB}.gp.gz
done
# hg38: checked: 21375 failed: 0
# mm10: checked: 21100 failed: 0

    #   2. xenoRefGene: tupChi1
    for DB in tupChi1
do
hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds
from xenoRefGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /dev/shm/${DB}.tmp.gz
    mv /dev/shm/${DB}.tmp.gz genes/$DB.gp.gz
    printf "# ${DB}: "
    genePredCheck -db=${DB} genes/${DB}.gp.gz
done
# tupChi1: checked: 30481 failed: 0

    #   3. ncbiRefSeq for galVar1
    for DB in galVar1
do
hgsql -N -e "select * from ncbiRefSeq" ${DB} | cut -f2- \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /dev/shm/${DB}.tmp.gz
    mv /dev/shm/${DB}.tmp.gz genes/$DB.gp.gz
    echo -n "# ${DB}: "
    genePredCheck -db=${DB} genes/${DB}.gp.gz
done
# galVar1: checked: 23389 failed: 0

    # verify counts for genes are reasonable:
    for T in genes/*.gz
do
    echo -n "# $T: "
    zcat $T | cut -f1 | sort | uniq -c | wc -l
done
# genes/galVar1.gp.gz: 23054
# genes/hg38.gp.gz: 21375
# genes/mm10.gp.gz: 21100
# genes/tupChi1.gp.gz: 25028

    time (cat ../anno/mm10.4way.maf \
	| genePredToMafFrames mm10 stdin stdout \
          `cat ../species.list.txt | xargs echo \
            | sed -e "s#\([a-zA-Z0-9]*\)#\1 genes/\1.gp.gz#g;"` \
		| gzip > tupChi1Multiz4wayFrames.bed.gz)
    # real    1m35.311s

    # verify there are frames on everything, should be 5 species:
    zcat tupChi1Multiz4wayFrames.bed.gz | awk '{print $4}' | sort | uniq -c \
       | sed -e 's/^/# /;'
#  233262 galVar1
#  231021 hg38
#  190782 mm10
#  245209 tupChi1

    #   load the resulting file
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/frames
    time hgLoadMafFrames mm10 tupChi1Multiz4wayFrames tupChi1Multiz4wayFrames.bed.gz
    #   real    0m9.566s

    time featureBits -countGaps mm10 tupChi1Multiz4wayFrames
    # 38594412 bases of 2730871774 (1.413%) in intersection
    # real    0m5.681s

    #   enable the trackDb entries:
# frames tupChi1Multiz4wayFrames
# irows on
    #   appears to work OK

#########################################################################
# Phylogenetic tree from 5-way (DONE - 2017-04-20 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d

    # using knownGene for mm10, only transcribed genes and nothing
    #	from the randoms and other misc.
    hgsql -Ne "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene where cdsEnd > cdsStart;" mm10 \
      | egrep -E -v "chrM|chrUn|random|_alt" > knownGene.gp
    wc -l *.gp
    #     93916 knownGene.gp

    # verify it is only on the chroms:
    cut -f2 knownGene.gp | sort | uniq -c | sort -rn | sed -e 's/^/    # /;'
    #    3949 chr2
    #    3861 chr7
    #    3496 chr11
    #    2789 chr5
    #    2782 chr4
    #    2698 chr1
    #    2585 chr9
    #    2395 chr6
    #    2304 chr3
    #    2238 chr17
    #    2206 chr8
    #    2166 chr10
    #    1930 chrX
    #    1773 chr14
    #    1717 chr15
    #    1654 chr13
    #    1509 chr12
    #    1496 chr19
    #    1489 chr16
    #    1125 chr18
    #     193 chrY

    genePredSingleCover knownGene.gp stdout | sort > knownGeneNR.gp
    wc -l knownGeneNR.gp
    #	21054 knownGeneNR.gp

    genePredCheck -db=mm10 knownGeneNR.gp
    #  checked: 21054 failed: 0

    # the annotated maf is:
    og ../anno/mm10.4way.maf
# -rw-rw-r-- 1 7580362629 Apr 20 22:27 ../anno/mm10.4way.maf

    mkdir annoSplit
    cd annoSplit
    time mafSplit -verbose=2 -outDirDepth=2 -byTarget -useFullSequenceName \
	/dev/null . ../../anno/mm10.4way.maf
    # real    2m13.529s

    find . -type f | wc -l
    #   52
    ssh ku
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d/run
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d/run
    mkdir ../mfa

    # newer versions of msa_view have a slightly different operation
    # the sed of the gp file inserts the reference species in the chr name
    printf '#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set GP = knownGeneNR.gp
set r = "/hive/data/genomes/mm10/bed/tupChi1Multiz4way"
set c = $1:r
set infile = $r/4d/annoSplit/$2
set outDir = $r/4d/mfa/$3:h
set outfile = $r/4d/mfa/$3
/bin/mkdir -p $outDir
cd /dev/shm
/bin/awk -v C=$c '"'"'$2 == C {print}'"'"' $r/4d/$GP | sed -e "s/\\t$c\\t/\\tmm10.$c\\t/" > $c.gp
set NL=`wc -l $c.gp| gawk '"'"'{print $1}'"'"'`
echo $NL
if ("$NL" != "0") then
    $PHASTBIN/msa_view --4d --features $c.gp -i MAF $infile -o SS > $c.ss
    $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $outfile
else
    echo "" > $outfile
endif
/bin/rm -f /dev/shm/$c.gp /dev/shm/$c.ss
' > 4d.csh

    chmod +x 4d.csh

    find ../annoSplit -type f | sed -e "s#../annoSplit/##" > maf.list
    wc -l maf.list
# 52 maf.list

    printf '#LOOP
4d.csh $(file1) $(path1) {check out line+ ../mfa/$(dir1)/$(dir2)$(root1).mfa}
#ENDLOOP
' > template

    gensub2 maf.list single template jobList
    # do not have the usual problem with fast jobs here, only 52 of them total
    para create jobList
    para try ... check
    para time
# Completed: 52 of 52 jobs
# CPU time in finished jobs:        615s      10.26m     0.17h    0.01d  0.000 y
# IO & Wait Time:                   122s       2.03m     0.03h    0.00d  0.000 y
# Average job time:                  14s       0.24m     0.00h    0.00d
# Longest finished job:              50s       0.83m     0.01h    0.00d
# Submission to last job:            86s       1.43m     0.02h    0.00d

    # Not all results have contents, or finish successfully, that is OK
    # it is because not all contigs have genes, only gene sequences are measured

    # combine mfa files
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d
    # remove the broken empty files, size 0 and size 1:
    find ./mfa -type f -size 0 | xargs rm -f
    # sometimes this doesn't work, don't know why
    find ./mfa -type f -size 1 | xargs rm -f
    # when it doesn't, use this empty list procedure
    find ./mfa -type f | xargs ls -og | awk '$3 < 2' | awk '{print $NF}' \
        > empty.list
    cat empty.list | xargs rm -f
    # see what is left:
    ls -ogrt mfa/*/*/*.mfa | sort -k3nr | wc
    #       21     147    1081

    # want comma-less species.list
    time /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
	--aggregate "`cat ../species.list`" mfa/*/*/*.mfa | sed s/"> "/">"/ \
	    > 4d.all.mfa
    # real    0m1.256s

    # check they are all in there:
    grep "^>" 4d.all.mfa | wc -l
    # 4
    grep "^>" 4d.all.mfa | sed -e 's/^/# /;'
# >mm10
# >hg38
# >tupChi1
# >galVar1

    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
	../mm10.4way.nh | xargs echo | sed -e 's/ //g' > tree_commas.nh
    # tree_commas.nh looks like:
    # (mm10,(hg38,(tupChi1,galVar1)))

    # use phyloFit to create tree model (output is phyloFit.mod)
    time /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
	    --EM --precision MED --msa-format FASTA --subst-mod REV \
		--tree tree_commas.nh 4d.all.mfa
    #  real    0m0.727s

    mv phyloFit.mod all.mod

    grep TREE all.mod
# TREE:
# (mm10:0.170506,(hg38:0.114771,
#	(tupChi1:0.187178,galVar1:0.105148):0.011794):0.170506);

    # compare these calculated lengths to the tree extracted from 191way:
    grep TREE all.mod | sed -e 's/TREE: //' \
       | /cluster/bin/phast/all_dists /dev/stdin | grep mm10 \
          | sed -e "s/mm10.//;"  | sort > new.dists
    /cluster/bin/phast/all_dists ../mm10.4way.nh | grep mm10 \
        | sed -e "s/mm10.//;" | sort > old.dists
     # printing out the 'new', the 'old' the 'difference' and percent difference
    join new.dists old.dists | awk '{
  printf "#\t%s\t%8.6f\t%8.6f\t%8.6f\t%8.6f\n", $1, $2, $3, $2-$3, 100*($2-$3)/$3 }' \
      | sort -k3n
#       hg38    0.455783        0.502391        -0.046608       -9.277236
#       galVar1 0.457954        0.493420        -0.035466       -7.187791
#       tupChi1 0.539984        0.533420        0.006564        1.230550

#########################################################################
# phastCons 5-way (DONE - 2017-04-20 - Hiram)
    # split 4way mafs into 10M chunks and generate sufficient statistics
    # files for # phastCons
    ssh ku
    mkdir -p /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS
    mkdir result done

    printf '#!/bin/csh -ef
set d = $1
set c = $2
set doneDir = done/$d
set MAF = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno/result/$d/$c.maf
set WINDOWS = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS/result/$d/$c
set WC = `cat $MAF | wc -l`
set NL = `grep "^#" $MAF | wc -l`
if ( -s $3 ) then
    exit 0
endif
if ( -s $3.running ) then
    exit 0
endif

/bin/mkdir -p $doneDir
/bin/date >> $3.running

/bin/rm -fr $WINDOWS
/bin/mkdir -p $WINDOWS
pushd $WINDOWS > /dev/null
if ( $WC != $NL ) then
/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \\
    $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
endif
popd > /dev/null
/bin/date >> $3
/bin/rm -f $3.running
' > mkSS.csh

    chmod +x mkSS.csh

    printf '#LOOP
mkSS.csh $(dir1) $(root1) {check out line+ done/$(dir1)/$(root1)}
#ENDLOOP
' > template

    find ../../anno/result -type f | sed -e "s#../../anno/result/##" > maf.list
    wc -l maf.list
# 52 maf.list

    ssh ku
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS

    gensub2 maf.list single template jobList
    # no need to worry about fast jobs, only 52 jobs here
    para create jobList
    para try ... check ... etc
    para push
# Completed: 52 of 52 jobs
# CPU time in finished jobs:       1064s      17.74m     0.30h    0.01d  0.000 y
# IO & Wait Time:                   180s       3.00m     0.05h    0.00d  0.000 y
# Average job time:                  24s       0.40m     0.01h    0.00d
# Longest finished job:              89s       1.48m     0.02h    0.00d
# Submission to last job:           127s       2.12m     0.04h    0.00d


    find ./result -type f | wc -l
    # 290

    # Run phastCons
    #	This job is I/O intensive in its output files, beware where this
    #	takes place or do not run too many at once.
    ssh ku
    mkdir -p /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/run.cons
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/run.cons

    #	This is setup for multiple runs based on subsets, but only running
    #   the 'all' subset here.
    #   It triggers off of the current working directory
    #	$cwd:t which is the "grp" in this script.  Running:
    #	all and vertebrates

    printf '#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set c = $1
set d = $2
set f = $3
set len = $4
set cov = $5
set rho = $6
set grp = $cwd:t
set cons = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons
set tmp = $cons/tmp/${d}_${c}
mkdir -p $tmp
set ssSrc = $cons/SS/result
set useGrp = "$grp.mod"
if (-s $cons/$grp/$grp.non-inf) then
  ln -s $cons/$grp/$grp.mod $tmp
  ln -s $cons/$grp/$grp.non-inf $tmp
  ln -s $ssSrc/$d/$f $tmp
else
  ln -s $ssSrc/$d/$f $tmp
  ln -s $cons/$grp/$grp.mod $tmp
endif
pushd $tmp > /dev/null
if (-s $grp.non-inf) then
  $PHASTBIN/phastCons $f $useGrp \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \\
    --not-informative `cat $grp.non-inf` \\
    --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
else
  $PHASTBIN/phastCons $f $useGrp \\
    --rho $rho --expected-length $len --target-coverage $cov --quiet \\
    --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
endif
popd > /dev/null
mkdir -p pp/$d bed/$d
sleep 4
touch pp/$d bed/$d
rm -f pp/$d/$c.pp
rm -f bed/$d/$c.bed
mv $tmp/$c.pp pp/$d
mv $tmp/$c.bed bed/$d
rm -fr $tmp
rmdir --ignore-fail-on-non-empty $cons/tmp/$d:h
' > doPhast.csh

    chmod +x doPhast.csh

    #	this template will serve for all runs
    #	root1 == chrom name, file1 == ss file name without .ss suffix
    printf '#LOOP
../run.cons/doPhast.csh $(root1) $(dir1) $(file1) 45 0.3 0.3 {check out line+ pp/$(dir1)/$(root1).pp}
#ENDLOOP
' > template

    find ../SS/result -type f | sed -e "s#../SS/result/##" > ss.list
    wc -l ss.list
    #	290 ss.list

    # Create parasol batch and run it
    # run for all species
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons
    mkdir -p all
    cd all
    #	Using the .mod tree
    cp -p ../../4d/all.mod ./all.mod

    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
    para -maxJob=100 create jobList
    para try ... check ...
    para push
# Completed: 290 of 290 jobs
# CPU time in finished jobs:       5576s      92.93m     1.55h    0.06d  0.000 y
# IO & Wait Time:                  1995s      33.25m     0.55h    0.02d  0.000 y
# Average job time:                  26s       0.44m     0.01h    0.00d
# Longest finished job:              33s       0.55m     0.01h    0.00d
# Submission to last job:            66s       1.10m     0.02h    0.00d

    # create Most Conserved track
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/all
    time cut -f1 ../../../../chrom.sizes | while read C
do
    ls -d bed/?/?/${C} 2> /dev/null | while read D
    do
        echo ${D}/${C}*.bed 1>&2
        cat ${D}/${C}*.bed
    done | sort -k1,1 -k2,2n \
    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
done > tmpMostConserved.bed
    # real    0m12.570s

    time /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed \
         > mostConserved.bed
    # real    0m7.235s

# -rw-rw-r-- 1 28670932 Apr 21 00:01 tmpMostConserved.bed
# -rw-rw-r-- 1 29438194 Apr 21 00:02 mostConserved.bed

    # load into database
    ssh hgwdev
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/all
    time hgLoadBed mm10 tupChi1PhastConsElements4way mostConserved.bed
    # Read 841312 elements of size 5 from mostConserved.bed
    # real    0m7.635s

    # on human we often try for 5% overall cov, and 70% CDS cov
    # most bets are off here for that goal, these alignments are too few
    #	and too far between
    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
    time featureBits mm10 -enrichment knownGene:cds tupChi1PhastConsElements4way
# knownGene:cds 1.333%, tupChi1PhastConsElements4way 4.368%, both 0.924%,
#	cover 69.30%, enrich 15.86x
#  real    0m8.883s

    # Create merged posterier probability file and wiggle track data files
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/all
    mkdir downloads

    # the third sed fixes the chrom names, removing the partition extensions
    time (find ./pp -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
	| sed -e 's/\.[0-9][0-9]*-[0-9][0-9]* start/ start/' \
        | gzip -c > downloads/phastCons4way.wigFix.gz)
    #   real    13m32.808s

# -rw-rw-r-- 1 1452731444 Apr 21 00:18 phastCons4way.wigFix.gz

    # check integrity of data with wigToBigWig
    time (zcat downloads/phastCons4way.wigFix.gz \
	| wigToBigWig -verbose=2 stdin /hive/data/genomes/mm10/chrom.sizes \
	    phastCons4way.bw) > bigWig.log 2>&1
    egrep "real|VmPeak" bigWig.log
    # pid=19728: VmPeak:    12564976 kB
    # real    17m36.198s

    bigWigInfo phastCons4way.bw | sed -e 's/^/# /;'
# version: 4
# isCompressed: yes
# isSwapped: 0
# primaryDataSize: 2,285,833,964
# primaryIndexSize: 63,248,068
# zoomLevels: 10
# chromCount: 37
# basesCovered: 1,155,614,560
# mean: 0.166872
# min: 0.000000
# max: 1.000000
# std: 0.286694

    #	encode those files into wiggle data
    time (zcat downloads/phastCons4way.wigFix.gz \
	| wigEncode stdin phastCons4way.wig phastCons4way.wib)
    # Converted stdin, upper limit 1.00, lower limit 0.00
    #  real    6m26.433s

    du -hsc *.wi?
    #  1.1G    phastCons4way.wib
    #  184M    phastCons4way.wig

    # Load gbdb and database with wiggle.
    ln -s `pwd`/phastCons4way.wib /gbdb/mm10/tupChi1Multiz4way/phastCons4way.wib
    time hgLoadWiggle -pathPrefix=/gbdb/mm10/tupChi1Multiz4way \
	mm10 tupChi1PhastCons4way phastCons4way.wig
    #   real    0m22.540s

    # use to set trackDb.ra entries for wiggle min and max
    # and verify table is loaded correctly

    time wigTableStats.sh mm10 tupChi1PhastCons4way
# db.table                 min max mean       count sumData
# mm10.tupChi1PhastCons4way  0 1 0.166872 1155614560 1.9284e+08
#     stdDev  viewLimits
#	0.286694 viewLimits=0:1

# real    0m9.615s

    #  Create histogram to get an overview of all the data
    time hgWiggle -doHistogram -db=mm10 \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    tupChi1PhastCons4way > histogram.data 2>&1
    #	real    1m9.916s

    #	create plot of histogram:

    printf 'set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff font \
"/usr/share/fonts/default/Type1/n022004l.pfb"
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse mm10 Histogram tupChi1PhastCons4way track"
set xlabel " phastCons4way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \\
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
' | gnuplot > histo.png

    display histo.png &

#########################################################################
# phyloP for 5-way (DONE - 2017-04-20 - Hiram)
    # run phyloP with score=LRT
    ssh ku
    mkdir /cluster/data/mm10/bed/tupChi1Multiz4way/consPhyloP
    cd /cluster/data/mm10/bed/tupChi1Multiz4way/consPhyloP

    mkdir run.phyloP
    cd run.phyloP
    # Adjust model file base composition background and rate matrix to be
    # representative of the chromosomes in play
    grep BACKGROUND ../../4d/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
    #	0.571
    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
	../../4d/all.mod 0.571 > all.mod
    # verify, the BACKGROUND should now be paired up:
    grep BACK all.mod
    #   BACKGROUND: 0.219000 0.281000 0.281000 0.219000

    printf '#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
set f = $1
set d = $f:h
set file1 = $f:t
set out = $2
set cName = $f:t:r
set grp = $cwd:t
set cons = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/consPhyloP
set tmp = $cons/tmp/$grp/$f
/bin/rm -fr $tmp
/bin/mkdir -p $tmp
set ssSrc = "/hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS/result/$f"
set useGrp = "$grp.mod"
/bin/ln -s $cons/run.phyloP/$grp.mod $tmp
pushd $tmp > /dev/null
$PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \\
    -i SS $useGrp $ssSrc.ss > $file1.wigFix
popd > /dev/null
/bin/mkdir -p $out:h
sleep 4
/bin/touch $out:h
/bin/mv $tmp/$file1.wigFix $out
/bin/rm -fr $tmp
/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d
/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d:h
/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp
/bin/rmdir --ignore-fail-on-non-empty $cons/tmp
' > doPhyloP.csh

    chmod +x doPhyloP.csh

    # Create list of chunks
    find ../../cons/SS/result -type f | grep ".ss$" \
	| sed -e "s/.ss$//; s#^../../cons/SS/result/##" > ss.list
    # make sure the list looks good
    wc -l ss.list
    #	290 ss.list

    # Create template file
    #	file1 == $chr/$chunk/file name without .ss suffix
    printf '#LOOP
../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
#ENDLOOP
' > template

    ######################   Running all species  #######################
    # setup run for all species
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/consPhyloP/all
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/consPhyloP/all
    rm -fr wigFix
    mkdir wigFix

    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
    #	beware overwhelming the cluster with these fast running high I/O jobs
    para create jobList
    para try ... check ... push ... etc ...
    para -maxJob=53 push
    para time > run.time
# Completed: 290 of 290 jobs
# CPU time in finished jobs:       1042s      17.37m     0.29h    0.01d  0.000 y
# IO & Wait Time:                  2008s      33.47m     0.56h    0.02d  0.000 y
# Average job time:                  11s       0.18m     0.00h    0.00d
# Longest finished job:              22s       0.37m     0.01h    0.00d
# Submission to last job:            84s       1.40m     0.02h    0.00d

    mkdir downloads

    time (find ./wigFix -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
	| gzip -c > downloads/phyloP4way.wigFix.gz)
    #   real    12m14.234s
# -rw-rw-r-- 1 1357982519 Apr 21 12:39 phyloP4way.wigFix.gz


    # check integrity of data with wigToBigWig
    time (zcat downloads/phyloP4way.wigFix.gz \
	| wigToBigWig -verbose=2 stdin /hive/data/genomes/mm10/chrom.sizes \
	phyloP4way.bw) > bigWig.log 2>&1
    egrep "real|VmPeak" bigWig.log
    # pid=77432: VmPeak:    12564972 kB
    # real    17m47.787s

    bigWigInfo phyloP4way.bw | sed -e 's/^/# /;'
# version: 4
# isCompressed: yes
# isSwapped: 0
# primaryDataSize: 1,672,367,975
# primaryIndexSize: 63,248,068
# zoomLevels: 10
# chromCount: 37
# basesCovered: 1,155,614,560
# mean: 0.108291
# min: -2.306000
# max: 0.719000
# std: 0.585706

    #	encode those files into wiggle data
    time (zcat downloads/phyloP4way.wigFix.gz \
	| wigEncode stdin phyloP4way.wig phyloP4way.wib)
    # Converted stdin, upper limit 0.72, lower limit -2.31
    #    real    6m41.352s

    du -hsc *.wi?
    # 1.1G    phyloP4way.wib
    # 188M    phyloP4way.wig

    # Load gbdb and database with wiggle.
    ln -s `pwd`/phyloP4way.wib /gbdb/mm10/tupChi1Multiz4way/phyloP4way.wib
    time hgLoadWiggle -pathPrefix=/gbdb/mm10/tupChi1Multiz4way mm10 \
	tupChi1PhyloP4way phyloP4way.wig
    # real    0m22.598s

    # use to set trackDb.ra entries for wiggle min and max
    # and verify table is loaded correctly

    wigTableStats.sh mm10 tupChi1PhyloP4way
# db.table                 min     max    mean      count   sumData
# mm10.tupChi1PhyloP4way  -2.306 0.719 0.108291 1155614560 1.25143e+08
#       stdDev viewLimits
#	0.585706 viewLimits=-2.306:0.719

    #	that range is: 0.719+2.306 = 3.025 for hBinSize=0.003025

    #  Create histogram to get an overview of all the data
    time hgWiggle -doHistogram \
	-hBinSize=0.003025 -hBinCount=1000 -hMinVal=-2.306 -verbose=2 \
	    -db=mm10 tupChi1PhyloP4way > histogram.data 2>&1
    # real    1m4.763s

    # find the Y range for the 2:5 graph
    grep -v chrom histogram.data | grep "^[0-9]" | ave -col=5 stdin \
      | sed -e 's/^/# /;'
# Q1 0.000068
# median 0.000261
# Q3 0.001051
# average 0.001280
# min 0.000000
# max 0.075274
# count 781
# total 1.000007
# standard deviation 0.003947

    # find the X range for the 2:5 graph
    grep "^[0-9]" histogram.data | ave -col=2 stdin \
      | sed -e 's/^/# /;'
# Q1 -1.558820
# median -0.965925
# Q3 -0.366975
# average -0.917927
# min -2.306000
# max 0.719000
# count 781
# total -716.901065
# standard deviation 0.798757

    #	create plot of histogram:
    printf 'set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff font \
"/usr/share/fonts/default/Type1/n022004l.pfb"
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Mouse mm10 Histogram tupChi1PhyloP4way track"
set xlabel " phyloP4way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set xtics
set xrange [-2.6:0.85]
set yrange [0:0.033]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
' | gnuplot > histo.png

    display histo.png &
    # appears to have an odd hole in the data just past X=0 ?

#############################################################################
# hgPal downloads (DONE - 2017-04-21 - Hiram)
#   FASTA from 5-way for knownGene, refGene and knownCanonical

    ssh hgwdev
    screen -S mm10HgPal
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/pal
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/pal
    cat ../species.list | tr '[ ]' '[\n]' > order.list

    # this for loop takes about 2.5 hours on this large count contig assembly
    export mz=tupChi1Multiz4way
    export gp=knownGene
    export db=mm10
    export I=0
    export D=0
    mkdir exonAA exonNuc
    printf '#!/bin/sh\n' > $gp.jobs

    time for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
        D=`echo $D | awk '{print $1+1}'`
        dNum=`echo $D | awk '{printf "%03d", int($1/1000)}'`
        mkdir -p exonNuc/${dNum} > /dev/null
        mkdir -p exonAA/${dNum} > /dev/null
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/${dNum}/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/${dNum}/$C.exonAA.fa.gz &"
        if [ $I -gt 16 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done >> $gp.jobs
    # real    0m0.772s


    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    chmod +x  knownGene.jobs

    time (./$gp.jobs) > $gp.jobs.log 2>&1 &
    # real    11m18.851s

    export mz=multiz4way
    export gp=knownGene
    time find ./exonAA -type f | grep exonAA.fa.gz | xargs zcat \
     | gzip -c > $gp.$mz.exonAA.fa.gz
    #  real    0m8.492s

    time find ./exonNuc -type f | grep exonNuc.fa.gz | xargs zcat \
     | gzip -c > $gp.$mz.exonNuc.fa.gz
    #   real    0m39.199s

# -rw-rw-r-- 1 33908467 Apr 21 18:49 knownGene.multiz4way.exonAA.fa.gz
# -rw-rw-r-- 1 55392688 Apr 21 18:49 knownGene.multiz4way.exonNuc.fa.gz

    export mz=multiz4way
    export gp=knownGene
    export db=mm10
    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    md5sum *.fa.gz > md5sum.txt
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
    ln -s `pwd`/md5sum.txt $pd/

    rm -rf exonAA exonNuc

#############################################################################
# construct download files for 5-way (DONE - 2017-04-21 - Hiram)
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz4way
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons4way
    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP4way
    mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads
    mkdir multiz4way phastCons4way phyloP4way
    cd multiz4way
    time cp -p ../../anno/mm10.4way.maf .
    #   real    0m15.285s

    # -rw-rw-r-- 1 7580362629 Apr 20 22:27 mm10.4way.maf

    du -hsc *
    #  7.1G     mm10.4way.maf

    time gzip *.maf
    #   real    27m2.122s

    # -rw-rw-r-- 1 2040574809 Apr 20 22:27 mm10.4way.maf.gz

    du -hsc *.maf.gz
    #  2.0G    mm10.4way.maf.gz

    ###########################################################################
    ## create upstream refGene maf files
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads/tupChi1Multiz4way
    # bash script
#!/bin/sh
export geneTbl="knownGene"
for S in 1000 2000 5000
do
    echo "making upstream${S}.maf"
    featureBits mm10 ${geneTbl}:upstream:${S} -fa=/dev/null -bed=stdout \
        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
        | /cluster/bin/$MACHTYPE/mafFrags mm10 tupChi1Multiz4way \
            stdin stdout \
              -orgs=/hive/data/genomes/mm10/bed/tupChi1Multiz4way/species.list \
        | gzip -c > upstream${S}.${geneTbl}.maf.gz
    echo "done upstream${S}.${geneTbl}.maf.gz"
done
    #   real    12m55.050s

    md5sum *.maf.gz *.nh upstream*.gz README.txt >> md5sum.txt

    # some other symlinks were already made above
    # obtain the README.txt from tupChi1/multiz4way and update for this
    #   situation
    ln -s `pwd`/upstream*.gz `pwd`/README.txt \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/tupChi1Multiz4way

    grep TREE ../../4d/all.mod | awk '{print $NF}' \
      | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
         > mm10.4way.nh
    ~/kent/src/hg/utils/phyloTrees/commonNames.sh mm10.4way.nh \
      | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
         > mm10.4way.commonNames.nh
    ~/kent/src/hg/utils/phyloTrees/scientificNames.sh mm10.4way.nh \
	| $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
	    > mm10.4way.scientificNames.nh
    time md5sum *.nh *.maf.gz > md5sum.txt
    #   real    0m35.144s

    ln -s `pwd`/* \
        /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz4way

    du -hsc *.maf.gz ../../anno/mm10.4way.maf
    #  3.0G     mm10.4way.maf.gz
    #  13G     ../../anno/mm10.4way.maf

    # obtain the README.txt from tupChi1/multiz4way and update for this
    #   situation

    #####################################################################
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads/phastCons4way

    ln -s ../../cons/all/downloads/phastCons4way.wigFix.gz \
        ./mm10.phastCons4way.wigFix.gz
    ln -s ../../cons/all/phastCons4way.bw ./mm10.phastCons4way.bw
    ln -s ../../cons/all/all.mod ./mm10.phastCons4way.mod
    time md5sum *.gz *.mod *.bw > md5sum.txt
    #   real    0m20.354s

    # obtain the README.txt from tupChi1/phastCons4way and update for this
    #   situation
    ln -s `pwd`/* \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons4way

    #####################################################################
    cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads/phyloP4way

    ln -s ../../consPhyloP/all/downloads/phyloP4way.wigFix.gz \
        ./mm10.phyloP4way.wigFix.gz
    ln -s ../../consPhyloP/run.phyloP/all.mod mm10.phyloP4way.mod
    ln -s ../../consPhyloP/all/phyloP4way.bw mm10.phyloP4way.bw

    time md5sum *.mod *.bw *.gz > md5sum.txt
    #   real    0m12.264s

    # obtain the README.txt from tupChi1/phyloP4way and update for this
    #   situation
    ln -s `pwd`/* \
      /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP4way

#############################################################################
# wiki page for 5-way (DONE - 2017-04-21 - Hiram)
    mkdir /hive/users/hiram/bigWays/mm10.4way
    cd /hive/users/hiram/bigWays
    echo "mm10" > mm10.4way/ordered.list
    awk '{print $1}' /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4way.distances.txt \
       >> mm10.4way/ordered.list

    # sizeStats.sh catches up the cached measurements required for data
    # in the tables.  They are usually already mostly done, only new
    # assemblies will have updates.
    ./sizeStats.sh mm10.4way/ordered.list
    # dbDb.sh constructs mm10.4way/GalVar1_5-way_conservation_alignment.html
    # may need to add new assembly references to srcReference.list and
    # urlReference.list
    ./dbDb.sh mm10 4way
    # sizeStats.pl constructs mm10.4way/GalVar1_5-way_Genome_size_statistics.html
    # this requires entries in coverage.list for new sequences
    ./sizeStats.pl mm10 4way

    # defCheck.pl constructs GalVar1_5-way_conservation_lastz_parameters.html
    ./defCheck.pl mm10 4way

    # this constructs the html pages in mm10.4way/:
# -rw-rw-r-- 1 2800 Apr 21 21:22 Mm10_4-way_conservation_alignment.html
# -rw-rw-r-- 1 4199 Apr 21 21:22 Mm10_4-way_Genome_size_statistics.html
# -rw-rw-r-- 1 2995 Apr 21 21:22 Mm10_4-way_conservation_lastz_parameters.html

    # add those pages to the genomewiki.  Their page names are the
    # names of the .html files without the .html:
#  Mm10_4-way_conservation_alignment
#  Mm10_4-way_Genome_size_statistics
#  Mm10_4-way_conservation_lastz_parameters

    # when you view the first one you enter, it will have links to the
    # missing two.

##############################################################################
# LASTZ Chinese hamster criGri1 (DONE - 2017-05-12 - Hiram)
    #	establish a screen to control this job
    screen -S mm10criGri1
    mkdir /hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12
    cd /hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12

    printf '# mouse vs. Chinese hamster
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Chinese hamster criGri1
SEQ2_DIR=/hive/data/genomes/criGri1/criGri1.2bit
SEQ2_LEN=/hive/data/genomes/criGri1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=100
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #	real    289m42.628s

    cat fb.mm10.chainCriGri1Link.txt
    #	1577848220 bases of 2652783500 (59.479%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 criGri1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    797m59.816s

    mkdir /hive/data/genomes/criGri1/bed/blastz.mm10.swap
    cd /hive/data/genomes/criGri1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12/DEF \
	-noDbNameCheck -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    172m50.552s

    cat fb.criGri1.chainMm10Link.txt
    #	1589449878 bases of 2301325917 (69.067%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev criGri1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    846m34.982s

##############################################################################
# ncbiRefSeq composite gene track (DONE - 2017-05-26 - Hiram)
    mkdir  /hive/data/genomes/mm10/bed/ncbiRefSeq.p5
    cd  /hive/data/genomes/mm10/bed/ncbiRefSeq.p5

    ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      -bigClusterHub=ku -dbHost=hgwdev \
      -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
      refseq vertebrate_mammalian Mus_musculus \
      GCF_000001635.25_GRCm38.p5 mm10

    ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      -debug -bigClusterHub=ku -dbHost=hgwdev \
      -continue=process -stop=process -fileServer=hgwdev -smallClusterHub=ku \
      -workhorse=hgwdev refseq vertebrate_mammalian Mus_musculus \
      GCF_000001635.25_GRCm38.p5 mm10

    ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      -debug -bigClusterHub=ku -dbHost=hgwdev \
      -continue=load -stop=load -fileServer=hgwdev -smallClusterHub=ku \
      -workhorse=hgwdev refseq vertebrate_mammalian Mus_musculus \
      GCF_000001635.25_GRCm38.p5 mm10

    # There are some ncRNAs missing
    faSize -detailed mm10.rna.fa \
      | pslCheck -querySizes=stdin -targetSizes=../../chrom.sizes \
       -db=mm10 ncbiRefSeqPsl
    #  checked: 85224 failed: 18 errors: 18

    # and joinerCheck is not completely clean:
 joinerCheck -identifier=ncbiRefSeq -keys -database=mm10 all.joiner
Checking keys on database mm10
 mm10.ncbiRefSeqLink.id - hits 107479 of 107479 (100.000%) ok
 mm10.ncbiRefSeqCurated.name - hits 32217 of 32217 (100.000%) ok
 mm10.ncbiRefSeqPredicted.name - hits 52989 of 52989 (100.000%) ok
 mm10.ncbiRefSeqPsl.qName - hits 85206 of 85224 (99.979%)
Error: 18 of 85224 elements (0.021%) of mm10.ncbiRefSeqPsl.qName are not in key ncbiRefSeq.name line 6045 of all.joiner
Example miss: NR_033199.1
 mm10.ncbiRefSeqCds.id - hits 76076 of 76076 (100.000%) ok
 mm10.seqNcbiRefSeq.acc - hits 85205 of 85205 (100.000%) ok

    # The reason for these difficulties is because some of the original
    # GFF items were dropped due to unprocessedRoots.  The fix is to eliminate
    # the rest of these unprocessedRoots from PSL loaded file.

    # discovered that it didn't help to add them in, (procedure included below)
    #   then featureBits went bad:
 joinerCheck -identifier=ncbiRefSeq -keys -database=mm10 all.joiner
Checking keys on database mm10
 mm10.ncbiRefSeqLink.id - hits 107479 of 107479 (100.000%) ok
 mm10.ncbiRefSeqCurated.name - hits 32217 of 32217 (100.000%) ok
 mm10.ncbiRefSeqPredicted.name - hits 52989 of 52989 (100.000%) ok
 mm10.ncbiRefSeqPsl.qName - hits 85206 of 85224 (99.979%)
Error: 18 of 85224 elements (0.021%) of mm10.ncbiRefSeqPsl.qName are not in key ncbiRefSeq.name line 6045 of all.joiner
Example miss: NR_033199.1
 mm10.ncbiRefSeqCds.id - hits 76076 of 76076 (100.000%) ok
 mm10.seqNcbiRefSeq.acc - hits 85205 of 85222 (99.980%)
Error: 17 of 85222 elements (0.020%) of mm10.seqNcbiRefSeq.acc are not in key ncbiRefSeq.name line 6047 of all.joiner
Example miss: NR_015480.1

    # eliminate items from PSL file,
    # compare name lists:
    hgsql -N -e 'select qName from ncbiRefSeqPsl;' mm10 \
      | sort -u > ncbiRefSeqPsl.qName
    hgsql -N -e 'select name from ncbiRefSeq;' mm10 \
      | sort -u > ncbiRefSeq.name
    wc -l ncbiRefSeqPsl.qName ncbiRefSeq.name
#   85220 ncbiRefSeqPsl.qName
#  107479 ncbiRefSeq.name
    comm -12 ncbiRefSeqPsl.qName ncbiRefSeq.name | wc -l
#   85203
    # need to eliminate 17 items from the PSL track:
    comm -23 ncbiRefSeqPsl.qName ncbiRefSeq.name | wc -l
#       17
    comm -23 ncbiRefSeqPsl.qName ncbiRefSeq.name | while read N
do
   hgsql -e "select * from ncbiRefSeqPsl where qName=\"$N\";" mm10
done | wc -l
#         35
    comm -23 ncbiRefSeqPsl.qName ncbiRefSeq.name | while read N
do
   hgsql -e "delete from ncbiRefSeqPsl where qName=\"$N\";" mm10
done

    hgsql -N -e 'select qName from ncbiRefSeqPsl;' mm10 \
      | sort -u > ncbiRefSeqPsl.clean.qName
    wc -l ncbiRefSeqPsl.clean.qName ncbiRefSeq.name
    comm -12 ncbiRefSeqPsl.clean.qName ncbiRefSeq.name | wc -l
#   85203 ncbiRefSeqPsl.clean.qName
#  107479 ncbiRefSeq.name

    comm -12 ncbiRefSeqPsl.clean.qName ncbiRefSeq.name | wc -l
#   85203

    # joinerCheck is now clean
    joinerCheck -identifier=ncbiRefSeq -keys -database=mm10 all.joiner
Checking keys on database mm10
 mm10.ncbiRefSeqLink.id - hits 107479 of 107479 (100.000%) ok
 mm10.ncbiRefSeqCurated.name - hits 32217 of 32217 (100.000%) ok
 mm10.ncbiRefSeqPredicted.name - hits 52989 of 52989 (100.000%) ok
 mm10.ncbiRefSeqPsl.qName - hits 85206 of 85206 (100.000%) ok
 mm10.ncbiRefSeqCds.id - hits 76076 of 76076 (100.000%) ok
 mm10.seqNcbiRefSeq.acc - hits 85205 of 85205 (100.000%) ok

    # and pslCheck is now clean:
    faSize -detailed /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa \
       | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
           -db=mm10 ncbiRefSeqPsl
    # checked: 85206 failed: 0 errors: 0

    hgsql -N -e 'select acc,size from seqNcbiRefSeq;' mm10 \
      | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
         -db=mm10 ncbiRefSeqPsl
    # checked: 85206 failed: 0 errors: 0

    ### update hgFixed.trackVersion
    hgsql -e 'update trackVersion set version="2016-12-16" where ix=1706;' hgFixed

    ### XXX obsolete procedure that does not fix the problem
    mkdir /hive/data/genomes/mm10/bed/ncbiRefSeq.p5/missingRna
    cd  /hive/data/genomes/mm10/bed/ncbiRefSeq.p5/missingRna

    # determine missing sequences ids

    faSize -detailed ../mm10.rna.fa \
      | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
       -db=mm10 ncbiRefSeqPsl > pslCheck.ncbiRefSeq.rna.fa.txt 2>&1

    egrep -v "does not exist|errors:" pslCheck.ncbiRefSeq.rna.fa.txt \
      | awk '{printf "%s\t%s\n", $5,$4}' | sort -u > idWithRange.seqListFile.tab

    # fetch RNA sequences from entrez:
    mkdir ncbiRna

cut -f2 idWithRange.seqListFile.tab | sed -e 's#:[0-9]\+-[0-9]\+##;' \
  | while read id
do
   wget -O /dev/stdout \
  "http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&dopt=fasta&sendto=on&id=$id" \
     | sed -e 's/ Mus musculus .*//;' | sed -e '/^$/d' > ncbiRna/$id.fa
done

fi

    cat ../mm10.rna.fa ncbiRna > mm10.seqNcbiRefSeq.rna.fa

    rm -f /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa
    ln -s `pwd`/mm10.seqNcbiRefSeq.rna.fa \
        /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa
    hgLoadSeq -drop -seqTbl=seqNcbiRefSeq -extFileTbl=extNcbiRefSeq mm10 \
        /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa

    # now have clean pslCheck, verify both with the file and the seq table:
    faSize -detailed /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa \
       | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
           -db=mm10 ncbiRefSeqPsl
    #  checked: 85224 failed: 0 errors: 0

    hgsql -N -e 'select acc,size from seqNcbiRefSeq;' mm10 \
      | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
         -db=mm10 ncbiRefSeqPsl

    #  checked: 85224 failed: 0 errors: 0

##############################################################################
# LASTZ zebrafish danRer11 (DONE - 2017-06-12 - Chris)
    #	establish a screen to control this job
    screen -S mm10danRer11
    mkdir /hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12
    cd /hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12

    printf '# mouse vs. zebrafish
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: zebrafish danRer11
SEQ2_DIR=/hive/data/genomes/danRer11/danRer11.2bit
SEQ2_LEN=/hive/data/genomes/danRer11/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=100
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #	real    289m42.628s
    cat fb.mm10.chainDanRer11Link.txt
    # 36448414 bases of 2652783500 (1.374%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 danRer11 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &

    mkdir /hive/data/genomes/danRer11/bed/blastz.mm10.swap
    cd /hive/data/genomes/danRer11/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12/DEF \
	-noDbNameCheck -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    172m50.552s

    cat fb.danRer11.chainMm10Link.txt
    #	45558857 bases of 1674677181 (2.720%) in intersection
	1589449878 bases of 2301325917 (69.067%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev danRer11 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    846m34.982s

##############################################################################
# LASTZ Killer whale orcOrc1 (DONE - 2017-06-15 - Hiram)
    #	establish a screen to control this job
    screen -S mm10orcOrc1
    mkdir /hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15
    cd /hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15

    printf '# killer whale vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Killer whale orcOrc1
SEQ2_DIR=/hive/data/genomes/orcOrc1/orcOrc1.2bit
SEQ2_LEN=/hive/data/genomes/orcOrc1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #	real    192m26.791s

    cat fb.mm10.chainOrcOrc1Link.txt
    # 832909116 bases of 2652783500 (31.398%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 orcOrc1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    276m44.875s

    mkdir /hive/data/genomes/orcOrc1/bed/blastz.mm10.swap
    cd /hive/data/genomes/orcOrc1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    72m53.064s

    cat fb.orcOrc1.chainMm10Link.txt
    #	809350350 bases of 2249582125 (35.978%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev orcOrc1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1
    # real    214m50.810s

##############################################################################
# LASTZ Baboon papAnu3 (DONE - 2017-06-21 - Hiram)
    #	establish a screen to control this job
    screen -S mm10papAnu3
    mkdir /hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21
    cd /hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21

    printf '# mouse vs. baboon
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: baboon papAnu3
SEQ2_DIR=/hive/data/genomes/papAnu3/papAnu3.2bit
SEQ2_LEN=/hive/data/genomes/papAnu3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=180
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    474m39.013s

    cat fb.mm10.chainPapAnu3Link.txt
    #	910628118 bases of 2652783500 (34.327%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 papAnu3 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    644m20.659s

    mkdir /hive/data/genomes/papAnu3/bed/blastz.mm10.swap
    cd /hive/data/genomes/papAnu3/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    66m35.501s

    cat fb.papAnu3.chainMm10Link.txt
    #	897929517 bases of 2893270787 (31.035%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev papAnu3 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    578m46.893s

##############################################################################
# LASTZ pig susScr11 (DONE - 2017-07-31 - Hiram)
    #	establish a screen to control this job
    screen -S mm10susScr11
    mkdir /hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31
    cd /hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31

    printf '# mouse vs. pig
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=1

# QUERY: baboon susScr11
SEQ2_DIR=/hive/data/genomes/susScr11/susScr11.2bit
SEQ2_LEN=/hive/data/genomes/susScr11/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=1
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    567m0.166s

    cat fb.mm10.chainSusScr11Link.txt
    #	731012356 bases of 2652783500 (27.556%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 susScr11 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    455m39.565s

    mkdir /hive/data/genomes/susScr11/bed/blastz.mm10.swap
    cd /hive/data/genomes/susScr11/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    61m6.153s

    cat fb.susScr11.chainMm10Link.txt
    #	715277290 bases of 2472073034 (28.934%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev susScr11 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    358m15.340s

##############################################################################
# lastz nile tilapia oreNil3 (DONE - 2017-07-31 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10OreNil3
    mkdir /hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31
    cd /hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31

    printf '# Mouse vs. nile tilapia
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=5

# QUERY: nile tilapia oreNil3
SEQ2_DIR=/hive/data/genomes/oreNil3/oreNil3.2bit
SEQ2_LEN=/hive/data/genomes/oreNil3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=10

BASE=/hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31
TMPDIR=/scratch/tmp
' > DEF

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time (doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
    #   real    307m32.926s

    cat fb.mm10.chainOreNil3Link.txt
    #   54152663 bases of 2652783500 (2.041%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 oreNil3 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    243m27.139s

    #	and for the swap
    mkdir /hive/data/genomes/oreNil3/bed/blastz.mm10.swap
    cd /hive/data/genomes/oreNil3/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 -syntenicNet \
	/hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
    #   real    8m5.590s

    cat  fb.oreNil3.chainMm10Link.txt
    #   55291586 bases of 1009856516 (5.475%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev oreNil3 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    230m56.580s

#########################################################################
# crispr 10K track (DONE - Hiram - 2017-07-28)
    # this script was developed during this procedure, thus, the step-wise
    # procedures:
    mkdir /hive/data/genomes/mm10/bed/crispr.10K
    cd /hive/data/genomes/mm10/bed/crispr.10K

    time (~/kent/src/hg/utils/automation/doCrispr.pl \
      -stop=guides -buildDir=`pwd` mm10 ensGene) > guides.log 2>&1
    # real    78m39.898s
# Completed: 99 of 99 jobs
# CPU time in finished jobs:      12182s     203.04m     3.38h    0.14d  0.000 y
# IO & Wait Time:                  1076s      17.93m     0.30h    0.01d  0.000 y
# Average job time:                 134s       2.23m     0.04h    0.00d
# Longest finished job:             181s       3.02m     0.05h    0.00d
# Submission to last job:          4567s      76.12m     1.27h    0.05d

    ~/kent/src/hg/utils/automation/doCrispr.pl -continue=specScores \
      -stop=specScores -buildDir=`pwd` mm10 ensGene
# Completed: 945820 of 1558824 jobs
# CPU time in finished jobs:  352722192s 5878703.20m 97978.39h 4082.43d 11.185 y
# IO & Wait Time:             1367298315s 22788305.25m 379805.09h 15825.21d 43.357 y
# Average job time:                1819s      30.31m     0.51h    0.02d
# Longest finished job:            8656s     144.27m     2.40h    0.10d
# Submission to last job:       2172942s   36215.70m   603.60h   25.15d

    # after ku reboot, finishing:
# Completed: 613973 of 613973 jobs
# CPU time in finished jobs:  155165030s 2586083.83m 43101.40h 1795.89d  4.920 y
# IO & Wait Time:             584008656s 9733477.60m 162224.63h 6759.36d 18.519 y
# Average job time:                1204s      20.07m     0.33h    0.01d
# Longest finished job:            8978s     149.63m     2.49h    0.10d
# Submission to last job:       1137188s   18953.13m   315.89h   13.16d


    ~/kent/src/hg/utils/automation/doCrispr.pl -continue=effScores \
      -stop=effScores -buildDir=`pwd` mm10 ensGene
# Completed: 13518 of 13518 jobs
# CPU time in finished jobs:    6244711s  104078.52m  1734.64h   72.28d  0.198 y
# IO & Wait Time:                 32457s     540.95m     9.02h    0.38d  0.001 y
# Average job time:                 464s       7.74m     0.13h    0.01d
# Longest finished job:            2373s      39.55m     0.66h    0.03d
# Submission to last job:         15145s     252.42m     4.21h    0.18d

    ~/kent/src/hg/utils/automation/doCrispr.pl -continue=offTargets \
      -stop=offTargets -buildDir=`pwd` mm10 ensGene
# Completed: 77942 of 77942 jobs
# CPU time in finished jobs:    1397706s   23295.10m   388.25h   16.18d  0.044 y
# IO & Wait Time:                313616s    5226.94m    87.12h    3.63d  0.010 y
# Average job time:                  22s       0.37m     0.01h    0.00d
# Longest finished job:              35s       0.58m     0.01h    0.00d
# Submission to last job:          9239s     153.98m     2.57h    0.11d


    ~/kent/src/hg/utils/automation/doCrispr.pl -continue=load \
      -stop=load -buildDir=`pwd` mm10 ensGene
    # real    235m41.378s

    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
       -continue=cleanup mm10 -fileServer=hgwdev -buildDir=`pwd` \
         -smallClusterHub=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev) > cleanup.log 2>&1
    # real    100m50.151s
    ##########################################################################
    # FIXUP broken files (working - Max and Hiram - 2018-04,05)

    # Max generated a new specScores.tab, add in the chrM specScores
    # and make a unique set in a new specScores.tab file

    cd /hive/data/genomes/mm10/bed/crispr.10K/uniqSpecScores

    printf "targetSeq\tmitSpecScore\tofftargetCount\ttargetGenomeGeneLocus\n" \
	> max.withChrM.specScores.tab

    grep -h -v targetSeq ../specScores.max.tab ../addChrM/specScores.tab \
	| $HOME/bin/x86_64/gnusort -S100G --parallel=32 -u \
	>> max.withChrM.specScores.tab
    # real    1m39.468s

    # this new file is much larger than before:
# -rw-rw-r-- 1 3616703851 Jul 31  2017 withChrM.specScores.tab
# -rw-rw-r-- 1 5580638498 May 15 14:55 max.withChrM.specScores.tab

    # Now generate a new crispr.bed and crispr.bb file

    mkdir  /hive/data/genomes/mm10/bed/crispr.10K/maxBed
    cd  /hive/data/genomes/mm10/bed/crispr.10K/maxBed
    # setup new inputs:
    ln -s ../addChrM/withChrM.allGuides.bed withChrM.allGuides.bed
    ln -s ../uniqSpecScores/max.withChrM.specScores.tab max.withChrM.specScores.tab
    ln -s ../addChrM/withChrM.effScores.tab withChrM.effScores.tab
    ln -s ../addChrM/withChrM.offtargets.offsets.tab withChrM.offtargets.offsets.tab
    ln -s ../addChrM/offTargets ./offTargets

    time (/cluster/software/bin/python \
      /hive/data/outside/crisprTrack/scripts/createBigBed.py mm10 \
	withChrM.allGuides.bed max.withChrM.specScores.tab \
     withChrM.effScores.tab withChrM.offtargets.offsets.tab) > newBed.log 2>&1
    # real    232m5.379s

# -rw-rw-r-- 1 27947769791 May 15 17:55 crispr.bed
# -rw-rw-r-- 1  6911180170 May 15 18:42 crispr.bb

##############################################################################
# LASTZ Gorilla gorGor5 (DONE - 2017-08-04 - Hiram)
    #	establish a screen to control this job
    screen -S mm10gorGor5
    mkdir /hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04
    cd /hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04

    printf '# mouse vs. gorilla
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=1

# QUERY: gorilla gorGor5
SEQ2_DIR=/hive/data/genomes/gorGor5/gorGor5.2bit
SEQ2_LEN=/hive/data/genomes/gorGor5/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=130
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    170m18.102s

    cat fb.mm10.chainGorGor5Link.txt
    #	934147601 bases of 2652783500 (35.214%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev mm10 gorGor5 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    327m34.879s

    mkdir /hive/data/genomes/gorGor5/bed/blastz.mm10.swap
    cd /hive/data/genomes/gorGor5/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    72m34.088s

    cat fb.gorGor5.chainMm10Link.txt
    #	990002546 bases of 3080431298 (32.138%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev gorGor5 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    297m3.002s

##############################################################################
# refSeqFuncElems NCBI refSeq functional elements, REDONE 2017-11-29 Angie
# previously done 2017-08-08 by Chris E

mkdir /hive/data/genomes/mm10/bed/refSeqFuncElems.2017-11-29
cd /hive/data/genomes/mm10/bed/refSeqFuncElems.2017-11-29

# NOTE FOR NEXT TIME: instead of using interim GFF, in the future these annotations might be
# folded into the same main release GFF3 from which the ncbiRefSeq* tables are extracted by
# doNcbiRefSeq.pl.
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/GFF_interim/interim_GRCm38.p6_top_level_2017-09-26.gff3.gz

# Get mapping of RefSeq NC_* chromosome accs (and NT_*, NW_*) to mm10 chrom names
hgsql mm10 -NBe 'select alias, chrom from chromAlias where source = "refseq" order by alias' \
> refSeqToChrom.tab
cut -f 2 refSeqToChrom.tab | sed -e 's/^/^/' > chrom.tab

# Use Terence Murphy's list of feature types (and the multi-type attribute regulatory_class)
# to identify Functional Elements and swap in mm10 chrom names.
# Use subColumn -miss so it doesn't quit when it sees a patch contig that doesn't map to an
# mm10 chrom.  Use grep -f chrom.tab to filter out patch contig annotations.
zcat interim_GRCm38.p6_top_level_2017-09-26.gff3.gz \
| grep -P "(\t(CAAT_signal|GC_rich_promoter_region|TATA_box|enhancer|insulator|locus_control_region|mobile_genetic_element|origin_of_replication|promoter|protein_binding_site|recombination_feature|regulatory_region|repeat_region|sequence_feature|sequence_secondary_structure|silencer|stem_loop)\t|regulatory_class=)" \
| subColumn -miss=/dev/null 1 stdin refSeqToChrom.tab stdout \
| grep -f chrom.tab > funcElems.gff
wc -l funcElems.gff
#1968 funcElems.gff

# Transform GFF to BED+
~/kent/src/hg/utils/automation/parseRefSeqFuncElems funcElems.gff /dev/stdout \
| sort -k1,1 -k2n,2n > refSeqFuncElems.bed
wc -l refSeqFuncElems.bed
#1968 refSeqFuncElems.bed

# Make bigBed and link from /gbdb
bedToBigBed -tab -type=bed9+7 -as=$HOME/kent/src/hg/lib/refSeqFuncElems.as \
  refSeqFuncElems.bed /hive/data/genomes/mm10/chrom.sizes refSeqFuncElems.bb
rm -f /gbdb/mm10/ncbiRefSeq/refSeqFuncElems.bb
ln -s `pwd`/refSeqFuncElems.bb /gbdb/mm10/ncbiRefSeq/

##############################################################################
# ncbiRefSeq composite (DONE - 2017-11-16 - Angie)
# Previously done 2017-09-28; redone 11-16 to include mito "rna" from chrM genomic seq

    mkdir /hive/data/genomes/mm10/bed/ncbiRefSeq.p5.2017-11-16
    cd /hive/data/genomes/mm10/bed/ncbiRefSeq.p5.2017-11-16

    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      refseq vertebrate_mammalian Mus_musculus \
      GCF_000001635.25_GRCm38.p5 mm10) > do.log 2>&1 & tail -f do.log
    #  *** All done !  Elapsed time: 17m36s
    # real    real    17m35.651s

    cat fb.ncbiRefSeq.mm10.txt
    # 105516336 bases of 2652783500 (3.978%) in intersection

##############################################################################
# LASTZ Drill manLeu1 (DONE - 2017-09-25 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25
    cd /hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25

    printf '# drill vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Drill ManLeu1
SEQ2_DIR=/hive/data/genomes/manLeu1/manLeu1.2bit
SEQ2_LEN=/hive/data/genomes/manLeu1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10ManLeu1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    233m12.288s

    cat fb.mm10.chainManLeu1Link.txt
    #	905203366 bases of 2652783500 (34.123%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 manLeu1) \
	> rbest.log 2>&1 &
    # real    362m58.840s

    mkdir /hive/data/genomes/manLeu1/bed/blastz.mm10.swap
    cd /hive/data/genomes/manLeu1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    64m55.226s

    cat fb.manLeu1.chainMm10Link.txt
    #	895668222 bases of 2721424086 (32.912%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` manLeu1 mm10) \
	> rbest.log 2>&1
    # real    338m57.422s

##############################################################################
# LASTZ Ma's night monkey aotNan1 (DONE - 2017-09-25 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25
    cd /hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25

    printf '# Ma_s night monkey vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Ma_s night monkey AotNan1
SEQ2_DIR=/hive/data/genomes/aotNan1/aotNan1.2bit
SEQ2_LEN=/hive/data/genomes/aotNan1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10AotNan1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    400m13.309s

    cat fb.mm10.chainAotNan1Link.txt
    #	889500682 bases of 2652783500 (33.531%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 aotNan1) \
	> rbest.log 2>&1 &
    # real    352m12.077s

    mkdir /hive/data/genomes/aotNan1/bed/blastz.mm10.swap
    cd /hive/data/genomes/aotNan1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    68m48.755s

    cat fb.aotNan1.chainMm10Link.txt
    #	893851318 bases of 2714439490 (32.929%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` aotNan1 mm10) \
	> rbest.log 2>&1
    # real    383m10.761s

##############################################################################
# LASTZ Hawaiian monk seal neoSch1 (DONE - 2017-09-25 - Hiram)
    #	establish a screen to control this job
    screen -S mm10neoSch1
    mkdir /hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25
    cd /hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25

    printf '# mouse vs. Hawaiian monk seal
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=1

# QUERY: Hawaiian monk seal neoSch1
SEQ2_DIR=/hive/data/genomes/neoSch1/neoSch1.2bit
SEQ2_LEN=/hive/data/genomes/neoSch1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    324m0.457s

    cat fb.mm10.chainNeoSch1Link.txt
    #	827926012 bases of 2652783500 (31.210%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 neoSch1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    307m18.396s

    cat fb.mm10.chainRBestNeoSch1Link.txt
    #   788489846 bases of 2652783500 (29.723%) in intersection

    mkdir /hive/data/genomes/neoSch1/bed/blastz.mm10.swap
    cd /hive/data/genomes/neoSch1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    59m27.809s

    cat fb.neoSch1.chainMm10Link.txt
    #	804021579 bases of 2400839308 (33.489%) in intersection
    cat fb.neoSch1.chainSynMm10Link.txt
    #   776155245 bases of 2400839308 (32.328%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev neoSch1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    242m31.157s

    cat fb.neoSch1.chainRBestMm10Link.txt
    # 787537751 bases of 2400839308 (32.803%) in intersection

##############################################################################
# LASTZ Sooty mangabey cerAty1 (DONE - 2017-09-27 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27
    cd /hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27

    printf '# Sooty mangabey vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Sooty mangabey CerAty1
SEQ2_DIR=/hive/data/genomes/cerAty1/cerAty1.2bit
SEQ2_LEN=/hive/data/genomes/cerAty1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10CerAty1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    371m15.075s

    cat fb.mm10.chainCerAty1Link.txt
    #	917680202 bases of 2652783500 (34.593%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 cerAty1) \
	> rbest.log 2>&1 &
    # real    345m49.786s

    mkdir /hive/data/genomes/cerAty1/bed/blastz.mm10.swap
    cd /hive/data/genomes/cerAty1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    68m6.225s

    cat fb.cerAty1.chainMm10Link.txt
    #	903892923 bases of 2787289397 (32.429%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` cerAty1 mm10) \
	> rbest.log 2>&1
    # real    305m14.804s

##############################################################################
# LASTZ Coquerel's sifaka to mouse/Mm10 (DONE - 2017-09-28 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28
    cd /hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28

    printf '# Coquerel_s sifaka vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: proCoq1 - Coquerel_s sifaka - Propithecus coquereli
SEQ2_DIR=/hive/data/genomes/proCoq1/proCoq1.2bit
SEQ2_LEN=/hive/data/genomes/proCoq1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10ProCoq1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    294m43.931s

    cat fb.mm10.chainProCoq1Link.txt
    #	882327683 bases of 2652783500 (33.260%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 proCoq1) \
	> rbest.log 2>&1 &
    # real    411m5.774s

    mkdir /hive/data/genomes/proCoq1/bed/blastz.mm10.swap
    cd /hive/data/genomes/proCoq1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    62m48.333s

    cat fb.proCoq1.chainMm10Link.txt
    #	863635783 bases of 2083764538 (41.446%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` proCoq1 mm10) \
	> rbest.log 2>&1
    # real    357m54.198s

##############################################################################
# LASTZ White-faced sapajou to mouse/Mm10 (DONE - 2017-09-28 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28
    cd /hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28

    printf '# White-faced sapajou vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cebCap1 - White-faced sapajou -  Cebus capucinus imitator
SEQ2_DIR=/hive/data/genomes/cebCap1/cebCap1.2bit
SEQ2_LEN=/hive/data/genomes/cebCap1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=20

BASE=/hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10CebCap1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    293m40.906s

    cat fb.mm10.chainCebCap1Link.txt
    #	882776669 bases of 2652783500 (33.277%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 cebCap1) \
	> rbest.log 2>&1 &
    # real    334m0.458s

    mkdir /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
    cd /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    63m12.596s

    cat fb.cebCap1.chainMm10Link.txt
    #	871126707 bases of 2610518382 (33.370%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` cebCap1 mm10) \
	> rbest.log 2>&1
    # real    299m3.923s

##############################################################################
# LASTZ White-faced spapjou/cebCap1 vs. mouse/Mm10 (DONE - 2017-10-03 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03
    cd /hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03

    printf '# White-faced sapajou vs. mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cebCap1 - White-faced sapajou - Cebus capucinus imitator
SEQ2_DIR=/hive/data/genomes/cebCap1/cebCap1.2bit
SEQ2_LEN=/hive/data/genomes/cebCap1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=18

BASE=/hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10CebCap1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    206m12.413s

    cat fb.mm10.chainCebCap1Link.txt
    #	882776669 bases of 2652783500 (33.277%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 cebCap1) \
	> rbest.log 2>&1 &
    # real    331m49.541s

    mkdir /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
    cd /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    63m12.596s

    cat fb.cebCap1.chainMm10Link.txt
    #	871126707 bases of 2610518382 (33.370%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` cebCap1 mm10) \
	> rbest.log 2>&1
    # real    299m3.923s

##############################################################################
# LASTZ Sclater's lemur mouse/Mm10 (DONE - 2017-10-04 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04
    cd /hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04

    printf '# Sclater_s lemur vs. mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: eulFla1 - Sclater_s lemur - Eulemur flavifrons
SEQ2_DIR=/hive/data/genomes/eulFla1/eulFla1.2bit
SEQ2_LEN=/hive/data/genomes/eulFla1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=18

BASE=/hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10EulFla1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    144m17.701s

    cat fb.mm10.chainEulFla1Link.txt
    #	916687191 bases of 2652783500 (34.556%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 eulFla1) \
	> rbest.log 2>&1 &
    # real    330m53.327s

    mkdir /hive/data/genomes/eulFla1/bed/blastz.mm10.swap
    cd /hive/data/genomes/eulFla1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    65m26.113s

    cat fb.eulFla1.chainMm10Link.txt
    #	887070088 bases of 2094103399 (42.360%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` eulFla1 mm10) \
	> rbest.log 2>&1
    # real    270m35.579s

##############################################################################
# LASTZ Black lemur mouse/Mm10 (DONE - 2017-10-05 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05
    cd /hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05

    printf '# Black lemur vs. mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: eulMac1 - Black lemur - Eulemur macaco
SEQ2_DIR=/hive/data/genomes/eulMac1/eulMac1.2bit
SEQ2_LEN=/hive/data/genomes/eulMac1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10EulMac1
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    167m31.736s

    cat fb.mm10.chainEulMac1Link.txt
    #	925968814 bases of 2652783500 (34.906%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 eulMac1) \
	> rbest.log 2>&1 &
    # real    334m49.287s

    mkdir /hive/data/genomes/eulMac1/bed/blastz.mm10.swap
    cd /hive/data/genomes/eulMac1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    64m52.738s

    cat fb.eulMac1.chainMm10Link.txt
    #	895308387 bases of 2101039320 (42.613%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` eulMac1 mm10) \
	> rbest.log 2>&1
    # real    267m17.552s

##############################################################################
# LASTZ Damara mole rat vs. mouse/Mm10 (DONE - 2018-01-01 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01
    cd /hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01

    printf '# Damara mole rat vs. mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Damara mole rat
SEQ2_DIR=/hive/data/genomes/fukDam1/fukDam1.2bit
SEQ2_LEN=/hive/data/genomes/fukDam1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=200
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #   real    403m29.477s

    cat fb.mm10.chainFukDam1Link.txt
    # 803448015 bases of 2652783500 (30.287%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 fukDam1) \
	> rbest.log 2>&1 &
    # real    391m52.435s

    cat fb.mm10.chainRBestFukDam1Link.txt
    # 760138280 bases of 2652783500 (28.654%) in intersection

    mkdir /hive/data/genomes/fukDam1/bed/blastz.mm10.swap
    cd /hive/data/genomes/fukDam1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    79m46.564s

    cat fb.fukDam1.chainMm10Link.txt
    # 803988546 bases of 2285984782 (35.170%) in intersection
    cat fb.fukDam1.chainSynMm10Link.txt
    # 741604346 bases of 2285984782 (32.441%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` fukDam1 mm10) \
	> rbest.log 2>&1
    # real    417m52.847s

    cat fb.fukDam1.chainRBestMm10Link.txt
    # 760190877 bases of 2285984782 (33.254%) in intersection

##############################################################################
# LASTZ Kangaroo rat vs. mouse/Mm10 (DONE - 2018-01-01 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01
    cd /hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01

    printf '# Kangaroo rat vs. mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Kangaroo rat
SEQ2_DIR=/hive/data/genomes/dipOrd2/dipOrd2.2bit
SEQ2_LEN=/hive/data/genomes/dipOrd2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=200
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #   real    351m30.983s

    cat fb.mm10.chainDipOrd2Link.txt
    #	645178768 bases of 2652783500 (24.321%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 dipOrd2) \
	> rbest.log 2>&1 &
    # real    439m56.601s

    cat fb.mm10.chainRBestDipOrd2Link.txt
    # 605074450 bases of 2652783500 (22.809%) in intersection

    mkdir /hive/data/genomes/dipOrd2/bed/blastz.mm10.swap
    cd /hive/data/genomes/dipOrd2/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    79m46.564s

    cat fb.dipOrd2.chainMm10Link.txt
    # 631879699 bases of 2065314047 (30.595%) in intersection
    cat fb.dipOrd2.chainSynMm10Link.txt
    # 581661824 bases of 2065314047 (28.163%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` dipOrd2 mm10) \
	> rbest.log 2>&1
    # real    412m53.879s

    cat fb.dipOrd2.chainRBestMm10Link.txt
    # 605056621 bases of 2065314047 (29.296%) in intersection

##############################################################################
# LASTZ Chinese hamster ovary cell line CHO-K1  criGriChoV2
#	(DONE - 2018-01-05 - Hiram)
    #	establish a screen to control this job
    screen -S mm10criGriChoV2
    mkdir /hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05
    cd /hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05

    printf '# Chinese hamster ovary cell line vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Chinese hamster ovary cell line CHO-K1  criGriChoV2
SEQ2_DIR=/hive/data/genomes/criGriChoV2/criGriChoV2.2bit
SEQ2_LEN=/hive/data/genomes/criGriChoV2/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    575m28.254s

    cat fb.mm10.chainCriGriChoV2Link.txt
    #	1583859515 bases of 2652783500 (59.706%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 criGriChoV2 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    1098m32.629s

    cat fb.mm10.chainRBestCriGriChoV2Link.txt
    # 1451345011 bases of 2652783500 (54.710%) in intersection

    mkdir /hive/data/genomes/criGriChoV2/bed/blastz.mm10.swap
    cd /hive/data/genomes/criGriChoV2/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05/DEF \
	-noDbNameCheck -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    196m59.409s

    cat fb.criGriChoV2.chainMm10Link.txt
    #	1605002950 bases of 2323924942 (69.064%) in intersection
    cat fb.criGriChoV2.chainSynMm10Link.txt
    #   1443603212 bases of 2323924942 (62.119%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev criGriChoV2 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    1187m10.728s

    cat fb.criGriChoV2.chainRBestMm10Link.txt
    # 1452526554 bases of 2323924942 (62.503%) in intersection

##############################################################################
# LASTZ Baboon papAnu4 (DONE - 2018-01-08 - Hiram)
    #	establish a screen to control this job
    screen -S mm10papAnu4
    mkdir /hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08
    cd /hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08

    printf '# mouse vs. baboon
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: baboon papAnu4
SEQ2_DIR=/hive/data/genomes/papAnu4/papAnu4.2bit
SEQ2_LEN=/hive/data/genomes/papAnu4/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=180
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    783m49.438s

    cat fb.mm10.chainPapAnu4Link.txt
    #	919405716 bases of 2652783500 (34.658%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 papAnu4 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    582m15.183s

    cat fb.mm10.chainRBestPapAnu4Link.txt
    # 875366631 bases of 2652783500 (32.998%) in intersection

    mkdir /hive/data/genomes/papAnu4/bed/blastz.mm10.swap
    cd /hive/data/genomes/papAnu4/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    80m51.648s

    cat fb.papAnu4.chainMm10Link.txt
    #	907806517 bases of 2937004939 (30.909%) in intersection
    cat fb.papAnu4.chainSynMm10Link.txt
    #	866781916 bases of 2937004939 (29.512%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev papAnu4 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    521m7.590s

    cat fb.papAnu4.chainRBestMm10Link.txt
    # 874097827 bases of 2937004939 (29.762%) in intersection

##############################################################################
# LASTZ guinea pig cavApe1 (DONE - 2018-01-08 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10CavApe1
    mkdir /hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08
    cd /hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08

    printf '# guinea pig vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: guinea pig CavApe1
SEQ2_DIR=/hive/data/genomes/cavApe1/cavApe1.2bit
SEQ2_LEN=/hive/data/genomes/cavApe1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=10

BASE=/hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    514m28.099s

    cat fb.mm10.chainCavApe1Link.txt
    #	424603451 bases of 2652783500 (16.006%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 cavApe1 \
      -buildDir=`pwd`) > rbest.log 2>&1 &
    # real    481m13.804s

    cat fb.mm10.chainRBestCavApe1Link.txt
    # 394844156 bases of 2652783500 (14.884%) in intersection

    # and for the swap
    mkdir /hive/data/genomes/cavApe1/bed/blastz.mm10.swap
    cd /hive/data/genomes/cavApe1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    38m53.866s

    cat fb.cavApe1.chainMm10Link.txt
    #	420563721 bases of 1749140834 (24.044%) in intersection
    cat fb.cavApe1.chainSynMm10Link.txt
    # 364825271 bases of 1749140834 (20.857%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev cavApe1 mm10 \
      -buildDir=`pwd`) > rbest.log 2>&1 &
    # real    438m45.544s

    cat fb.cavApe1.chainRBestMm10Link.txt
    # 395976886 bases of 1749140834 (22.638%) in intersection

##############################################################################
# lastz Medium Ground Finch ficAlb1 (DONE - 2018-01-09 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10
    mkdir /hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09
    cd /hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09

    printf '# Mouse vs.  Collared flycatcher
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Collard flycatcher/FicAlb1
SEQ2_DIR=/hive/data/genomes/ficAlb1/ficAlb1.2bit
SEQ2_LEN=/hive/data/genomes/ficAlb1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
    #   real    167m34.472s

    cat fb.mm10.chainFicAlb1Link.txt
    #   98177848 bases of 2652783500 (3.701%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 ficAlb1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    246m1.019s

    cat fb.mm10.chainRBestFicAlb1Link.txt
    # 76370866 bases of 2652783500 (2.879%) in intersection

    #	and for the swap
    mkdir /hive/data/genomes/ficAlb1/bed/blastz.mm10.swap
    cd /hive/data/genomes/ficAlb1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
    # real    8m5.637s

    cat  fb.ficAlb1.chainMm10Link.txt
    #   85384367 bases of 1102325870 (7.746%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev ficAlb1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    209m22.159s

    cat fb.ficAlb1.chainRBestMm10Link.txt
    # 76183235 bases of 1102325870 (6.911%) in intersection

##########################################################################
# lastz Lamprey petMar3 (DONE - 2018-01-25 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S petMar3
    mkdir /hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25
    cd /hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25

    printf '# Mouse vs. Lamprey
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Lamprey PetMar3
SEQ2_DIR=/hive/data/genomes/petMar3/petMar3.2bit
SEQ2_LEN=/hive/data/genomes/petMar3/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=60

BASE=/hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25
TMPDIR=/dev/shm
' > DEF

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time (doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
    #   real    119m5.528s

    cat fb.mm10.chainPetMar3Link.txt
    #   36835173 bases of 2652783500 (1.389%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 petMar3) \
	> rbest.log 2>&1 &
    # real    201m40.789s

    cat fb.mm10.chainRBestPetMar3Link.txt
    # 21623456 bases of 2652783500 (0.815%) in intersection

    #	and for the swap
    mkdir /hive/data/genomes/petMar3/bed/blastz.mm10.swap
    cd /hive/data/genomes/petMar3/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25/DEF \
        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
    # real    7m57.582s

    #   real    7m2.754s
    cat  fb.petMar3.chainMm10Link.txt
    #	39217857 bases of 1043181598 (3.759%) in intersection

    cat fb.petMar3.chainSynMm10Link.txt
    # 1381239 bases of 1043181598 (0.132%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` petMar3 mm10) \
	> rbest.log 2>&1 &
    # real    206m59.727s

    cat fb.petMar3.chainRBestMm10Link.txt
    # 21335101 bases of 1043181598 (2.045%) in intersection

#########################################################################
# lastz garter snake/thaSir1 (DONE - 2018-03-13 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10ThaSir1
    mkdir /hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13
    cd /hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13

    # note: first time with this new 1.04.00 version of lastz

    printf '# Mouse vs. garter snake
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/scratch/data/blastz/HoxD55.q
#      A    C    G    T
#     91  -90  -25 -100
#    -90  100 -100  -25
#    -25 -100  100  -90
#   -100  -25  -90  91

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: garter snake thaSir1
SEQ2_DIR=/hive/data/genomes/thaSir1/thaSir1.2bit
SEQ2_LEN=/hive/data/genomes/thaSir1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=15

BASE=/hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13
TMPDIR=/dev/shm
' > DEF

    # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
    #	number of jobs, 50,000 to something under 100,000
    # when not present, SEQ2_LIMIT is a default 100
    time (doBlastzChainNet.pl -verbose=2 \
        `pwd`/DEF -syntenicNet \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
    #	real    112m40.572s

    cat fb.mm10.chainThaSir1Link.txt
    #	78464036 bases of 2652783500 (2.958%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 thaSir1) \
	> rbest.log 2>&1 &
    # real    266m17.520s

    cat fb.mm10.chainRBestThaSir1Link.txt
    # 54099233 bases of 2652783500 (2.039%) in intersection

    #	and for the swap
    mkdir /hive/data/genomes/thaSir1/bed/blastz.mm10.swap
    cd /hive/data/genomes/thaSir1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 -syntenicNet \
	/hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13/DEF \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
           -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
    #	real    11m28.892s

    cat  fb.thaSir1.chainMm10Link.txt
    #	63814138 bases of 1122701795 (5.684%) in intersection
    cat fb.thaSir1.chainSynMm10Link.txt
    # 20728394 bases of 1122701795 (1.846%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` thaSir1 mm10) \
	> rbest.log 2>&1 &
    # real    234m31.934s

    cat fb.thaSir1.chainRBestMm10Link.txt
    # 54778217 bases of 1122701795 (4.879%) in intersection

##############################################################################
# LASTZ cat felCat9 (DONE - 2018-03-14 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10FelCat9
    mkdir /hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14
    cd /hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14

    printf '# cat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: cat FelCat9
SEQ2_DIR=/hive/data/genomes/felCat9/felCat9.2bit
SEQ2_LEN=/hive/data/genomes/felCat9/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #   real    395m23.091s

    cat fb.mm10.chainFelCat9Link.txt
    #   801023018 bases of 2652783500 (30.196%) in intersection

    time (doRecipBest.pl -load  mm10 felCat9 -buildDir=`pwd` \
	-workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    486m55.606s

    cat fb.mm10.chainRBestFelCat9Link.txt
    # 761411281 bases of 2652783500 (28.702%) in intersection

    mkdir /hive/data/genomes/felCat9/bed/blastz.mm10.swap
    cd /hive/data/genomes/felCat9/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	 real    70m51.860s

    cat fb.felCat9.chainMm10Link.txt
    #   779862191 bases of 2476453204 (31.491%) in intersection
    cat fb.felCat9.chainSynMm10Link.txt
    # 754481540 bases of 2476453204 (30.466%) in intersection

    time (doRecipBest.pl -load  felCat9 mm10 -buildDir=`pwd` \
	-workhorse=hgwdev) > rbest.log 2>&1 &
    # real    375m4.937s

    cat fb.felCat9.chainRBestMm10Link.txt
    # 760753851 bases of 2476453204 (30.719%) in intersection

##############################################################################
# LASTZ Beaver casCan1 vs. mouse/Mm10 (DONE - 2018-03-19 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19
    cd /hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19

    # note: first time with this new 1.04.00 version of lastz

    printf '# Beaver vs. mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LIMIT=50
SEQ1_LAP=10000

# QUERY: Beaver
SEQ2_DIR=/hive/data/genomes/casCan1/casCan1.2bit
SEQ2_LEN=/hive/data/genomes/casCan1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=50
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    time (doBlastzChainNet.pl -verbose=2 \
	`pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #   real    455m47.982s

    cat fb.mm10.chainCasCan1Link.txt
    #	969752969 bases of 2652783500 (36.556%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 casCan1) \
	> rbest.log 2>&1 &
    # real    981m12.451s

    cat fb.mm10.chainRBestCasCan1Link.txt
    # 912108399 bases of 2652783500 (34.383%) in intersection

    mkdir /hive/data/genomes/casCan1/bed/blastz.mm10.swap
    cd /hive/data/genomes/casCan1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    100m12.450s

    cat fb.casCan1.chainMm10Link.txt
    # 1027587643 bases of 2517974654 (40.810%) in intersection
    cat fb.casCan1.chainSynMm10Link.txt
    # 876969229 bases of 2517974654 (34.828%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` casCan1 mm10) \
	> rbest.log 2>&1
    # real    1280m7.127s

    cat fb.casCan1.chainRBestMm10Link.txt
    # 911437520 bases of 2517974654 (36.197%) in intersection

##############################################################################
# LASTZ mouse/mm10 Chimp/panTro6 - (DONE - 2018-03-24 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24
    cd /hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24

    printf '# mouse vs chimp
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: chimp panTro6
SEQ2_DIR=/hive/data/genomes/panTro6/panTro6.2bit
SEQ2_LEN=/hive/data/genomes/panTro6/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=40
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    #  real    347m21.874s

    cat fb.mm10.chainPanTro6Link.txt
    # 935720585 bases of 2652783500 (35.273%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
	mm10 panTro6) > rbest.log 2>&1 &
    # real    565m15.871s

    cat fb.mm10.chainRBestPanTro6Link.txt
    # 891553355 bases of 2652783500 (33.608%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/panTro6/bed/blastz.mm10.swap
    cd /hive/data/genomes/panTro6/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    78m57.631s

    cat fb.panTro6.chainMm10Link.txt
    # 934668641 bases of 3018592990 (30.964%) in intersection
    cat fb.panTro6.chainSynMm10Link.txt
    # 889944141 bases of 3018592990 (29.482%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
	panTro6 mm10) > rbest.log 2>&1 &
    # real    504m47.811s

    cat fb.panTro6.chainRBestMm10Link.txt
    # 890065520 bases of 3018592990 (29.486%) in intersection

##############################################################################
# LASTZ mouse/mm10 Orangutan/ponAbe3 - (DONE - 2018-03-26 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26
    cd /hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26

    printf '# mouse vs orangutan
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=5
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: orangutan ponAbe3
SEQ2_DIR=/hive/data/genomes/ponAbe3/ponAbe3.2bit
SEQ2_LEN=/hive/data/genomes/ponAbe3/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    #  real    461m46.426s

    cat fb.mm10.chainPonAbe3Link.txt
    # 936755064 bases of 2652783500 (35.312%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
	mm10 ponAbe3) > rbest.log 2>&1 &
    # real    554m41.676s

    cat fb.mm10.chainRBestPonAbe3Link.txt
    # 892145302 bases of 2652783500 (33.631%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/ponAbe3/bed/blastz.mm10.swap
    cd /hive/data/genomes/ponAbe3/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    78m29.160s

    cat fb.ponAbe3.chainMm10Link.txt
    # 929970181 bases of 3043444524 (30.557%) in intersection
    cat fb.ponAbe3.chainSynMm10Link.txt
    # 890801507 bases of 3043444524 (29.270%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
	ponAbe3 mm10) > rbest.log 2>&1 &
    # real    496m49.168s

    cat fb.ponAbe3.chainRBestMm10Link.txt
    # 890774155 bases of 3043444524 (29.269%) in intersection

#########################################################################
# LASTZ mouse/mm10 sheep/oviAri4 - (DONE - 2018-04-25 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25
    cd /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25

    printf '# mouse vs sheep
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=2
SEQ1_LAP=10000

# QUERY: sheep oviAri4
SEQ2_DIR=/hive/data/genomes/oviAri4/oviAri4.2bit
SEQ2_LEN=/hive/data/genomes/oviAri4/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=10
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25
TMPDIR=/dev/shm
' > DEF
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    # Command failed:
    # ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' hgwdev \
    #   nice /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25/axtChain/netSynteny.csh
    #
    # real    237m24.916s

    # used the wrong version of doBlastzChainNet.pl which failed at the
    # syntenic net step. Clean up and re-try with the fixed up script:
    rm mm10.oviAri4.syn.chain.gz
    rm mm10.oviAri4.syn.net.gz

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -continue=syntenicNet \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) >> do.log 2>&1 &
    # real  18m40.051s

    cat fb.mm10.chainOviAri4Link.txt
    # 693504453 bases of 2652783500 (26.143%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 oviAri4) > rbest.log 2>&1 &
    # real    485m29.546s

    # and for the swap:
    mkdir /hive/data/genomes/oviAri4/bed/blastz.mm10.swap
    cd /hive/data/genomes/oviAri4/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1 &
    #  real    63m12.935s

    cat fb.oviAri4.chainMm10Link.txt
    # 680117358 bases of 2587515673 (26.285%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` oviAri4 mm10) > rbest.log 2>&1 &
    # real    437m1.637s

#########################################################################
# RepeatMasker Visualization track update (TBD - 2018-05-15 - ChrisL)

    screen -S rmskJoined.2018-05-15

    # if this is an update to an already existing rmsk build, re-run
    # masking with new libraries. Otherwise skip to rmskJoined below
    mkdir /hive/data/genomes/mm10/bed/repeatMasker.2018-05-15
    cd /hive/data/genomes/mm10/bed/repeatMasker.2018-05-15

    time (doRepeatMasker.pl -stop=mask -bigClusterHub=ku \
       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` mm10) > mask.log 2>&1 &
    # real    705m12.538s

    # fill in grep to get rid of the missing id items (not necessary this run):
    # grep -v "" \
    #     mm10.fa.out > clean.mm10.fa.out
    # mv clean.mm10.fa.out mm10.fa.out

    # finish the last step of doCat.csh, if necessary:
    # /cluster/bin/scripts/extractNestedRepeats.pl mm10.fa.out | sort -k1,1 -k2,2n > mm10.nestedRepeats.bed

    # rmskJoinedCurrent steps
    mkdir /hive/data/genomes/mm10/bed/rmskJoined.2018-05-15
    cd /hive/data/genomes/mm10/bed/rmskJoined.2018-05-15

    ln -s ../repeatMasker.2018-05-15/mm10.sorted.fa.out .
    ln -s ../repeatMasker.2018-05-15/mm10.fa.align .

    time (/scratch/data/RepeatMasker/util/rmToUCSCTables.pl \
        -out mm10.sorted.fa.out -align mm10.fa.align.gz) > rerun.log 2>&1 &
    # real    102m53.576s

    # confirm the counts are different from the previous version:
    # wc -l ../rmskJoined/mm10.fa.align.tsv ../rmskJoined/mm10.sorted.fa.join.bed ../rmskJoined/mm10.sorted.fa.out.tsv
   5918456 ../rmskJoined/mm10.fa.align.tsv
   4657599 ../rmskJoined/mm10.sorted.fa.join.bed
   5249545 ../rmskJoined/mm10.sorted.fa.out.tsv
  15825600 total
    # wc -l *.tsv
   5888031 mm10.fa.align.tsv
   4646880 mm10.sorted.fa.join.tsv
   5235053 mm10.sorted.fa.out.tsv
  15769964 total

    # sub rmskJoinedBaseline for rmskJoinedCurrent if this is the first version for this assembly
    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
        -renameSqlTable -verbose=4 -tab \
            -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as mm10 \
                rmskJoinedCurrent mm10.sorted.fa.join.tsv \
                    > loadJoined.log 2>&1
    # Error line 1028733 of mm10.sorted.fa.join.tsv:
    # chromStart after chromEnd (21000277 > 21000266)
    # is it  the only one ?
    awk -F'\t' '{if ($2 > $3) sum+=1} END {print sum}' mm10.sorted.fa.join.tsv
    # 1

    # remove it and run above hgLoadBed again:
    awk -F'\t' '{if ($2 < $3) print;}' mm10.sorted.fa.join.tsv  > mm10.sorted.fa.join.cleaned
    mv mm10.sorted.fa.join.cleaned mm10.sorted.fa.join.tsv

    # sub rmskAlignBaseline for rmskAlignCurrent if this is the first version for this assembly
    hgLoadSqlTab mm10 rmskAlignCurrent \
        /cluster/home/chmalee/kent/src/hg/lib/rmskAlign.sql \
            mm10.fa.align.tsv > loadAlign.log 2>&1

    # sub rmskOutBaseline for rmskOutCurrent if this is the first version for this assembly
    hgLoadOutJoined -verbose=2 -table=rmskOutCurrent mm10 mm10.sorted.fa.out > loadOut.log 2>&1

    featureBits -countGaps mm10 rmskJoinedBaseline
    # 2243948952 bases of 2730871774 (82.170%) in intersection
    featureBits -countGaps mm10 rmskJoinedCurrent
    # 2249729653 bases of 2730871774 (82.381%) in intersection

#########################################################################
# LASTZ mouse/mm10 horse/equCab3 - (DONE - 2018-05-25 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25
    cd /hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25

    printf '# mouse vs horse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=2
SEQ1_LAP=10000

# QUERY: horse equCab3
SEQ2_DIR=/hive/data/genomes/equCab3/equCab3.2bit
SEQ2_LEN=/hive/data/genomes/equCab3/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=10
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25
TMPDIR=/dev/shm
' > DEF
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    # real    605m50.368s

    cat fb.mm10.chainEquCab3Link.txt
    # 921489718 bases of 2652783500 (34.737%) in intersection

    cat fb.mm10.chainSynEquCab3Link.txt
    # 876836391 bases of 2652783500 (33.053%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 equCab3) > rbest.log 2>&1 &
    # real    398m20.685s

    cat fb.mm10.chainRBest.EquCab3.txt
    # 876785778 bases of 2652783500 (33.052%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/equCab3/bed/blastz.mm10.swap
    cd /hive/data/genomes/equCab3/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1 &
    #  real    83m14.250s

    cat fb.equCab3.chainMm10Link.txt
    # 930516778 bases of 2497530654 (37.257%) in intersection
    cat fb.equCab3.chainSynMm10Link.txt
    # 897238830 bases of 2497530654 (35.925%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` equCab3 mm10) > rbest.log 2>&1 &
    # real    318m40.520s

    cat fb.equCab3.chainRBest.Mm10.txt
    # 875954606 bases of 2497530654 (35.073%) in intersection

#########################################################################
# LASTZ mouse/mm10 Minke whale/balAcu1 - (DONE - 2018-06-13 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13
    cd /hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13

    printf '# mouse vs Minke whale
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=2
SEQ1_LAP=10000

# QUERY: Minke whale balAcu1
SEQ2_DIR=/hive/data/genomes/balAcu1/balAcu1.2bit
SEQ2_LEN=/hive/data/genomes/balAcu1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=40
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13
TMPDIR=/dev/shm
' > DEF
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    # real    190m45.265s

    cat fb.mm10.chainBalAcu1Link.txt
    # 851790136 bases of 2652783500 (32.109%) in intersection

    cat fb.mm10.chainSynBalAcu1Link.txt
    # 806407823 bases of 2652783500 (30.399%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 balAcu1) > rbest.log 2>&1 &
    # real    287m58.329s

    cat fb.mm10.chainRBest.BalAcu1.txt
    # 811435554 bases of 2652783500 (30.588%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/balAcu1/bed/blastz.mm10.swap
    cd /hive/data/genomes/balAcu1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1 &
    #  real    67m0.560s

    cat fb.balAcu1.chainMm10Link.txt
    # 832845143 bases of 2286657046 (36.422%) in intersection

    cat fb.balAcu1.chainSynMm10Link.txt
    # 802734600 bases of 2286657046 (35.105%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` balAcu1 mm10) > rbest.log 2>&1 &
    # real    241m51.110s

    cat fb.balAcu1.chainRBest.Mm10.txt
    # 810427625 bases of 2286657046 (35.442%) in intersection

##############################################################################
# LASTZ mouse/mm10 Axolotl/ambMex1 - (DONE - 2018-07-09 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09
    cd /hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09

    printf '# mouse vs Axolotl
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=2
SEQ1_LAP=10000

# QUERY: Axolotl ambMex1
SEQ2_DIR=/hive/data/genomes/ambMex1/ambMex1.2bit
SEQ2_LEN=/hive/data/genomes/ambMex1/chrom.sizes
SEQ2_CHUNK=80000000
SEQ2_LIMIT=800
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09
TMPDIR=/dev/shm
' > DEF
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    # real    881m7.910s

    cat fb.mm10.chainAmbMex1Link.txt
    # 52143617 bases of 2652783500 (1.966%) in intersection

    cat fb.mm10.chainSynAmbMex1Link.txt
    # 2686570 bases of 2652783500 (0.101%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 ambMex1) > rbest.log 2>&1 &
    # real    478m39.331s

    # something odd went haywire at the download step
    time (doRecipBest.pl -load -continue=download -workhorse=hgwdev -buildDir=`pwd` mm10 ambMex1) > download.log 2>&1 &
    # real    1m42.883s

    cat fb.mm10.chainRBest.AmbMex1.txt
    # 36938030 bases of 2652783500 (1.392%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/ambMex1/bed/blastz.mm10.swap
    cd /hive/data/genomes/ambMex1/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1 &
    #  real    39m28.757s

    cat fb.ambMex1.chainMm10Link.txt
    # 87124587 bases of 28366694468 (0.307%) in intersection

    cat fb.ambMex1.chainSynMm10Link.txt
    # 2893381 bases of 28366694468 (0.010%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` ambMex1 mm10) > rbest.log 2>&1 &
    # real    568m10.621s

    # something odd went haywire at the download step
    time (doRecipBest.pl -load -continue=download -workhorse=hgwdev -buildDir=`pwd` ambMex1 mm10) > download.log 2>&1 &
    # real    3m16.404s

    cat fb.ambMex1.chainRBest.Mm10.txt
    # 38584422 bases of 28366694468 (0.136%) in intersection

##############################################################################
# LASTZ mouse/mm10 vs. chicken/galGal6 - (DONE - 2018-10-12 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12
    cd /hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12

    printf "# Mouse vs. chicken
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q
#      A    C    G    T
#     91  -90  -25 -100
#    -90  100 -100  -25
#    -25 -100  100  -90
#   -100  -25  -90  91

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: chicken galGal6
SEQ2_DIR=/hive/data/genomes/galGal6/galGal6.2bit
SEQ2_LEN=/hive/data/genomes/galGal6/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12
TMPDIR=/dev/shm
" > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    84m14.188s

    cat fb.mm10.chainGalGal6Link.txt
    # 101151132 bases of 2652783500 (3.813%) in intersection
    cat fb.mm10.chainSynGalGal6Link.txt
    # 70707720 bases of 2652783500 (2.665%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 galGal6) > rbest.log 2>&1 &
    # real    116m19.316s

    cat fb.mm10.chainRBest.GalGal6.txt
    # 79649474 bases of 2652783500 (3.002%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/galGal6/bed/blastz.mm10.swap
    cd /hive/data/genomes/galGal6/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    6m41.043s

    cat fb.galGal6.chainMm10Link.txt
    # 88539346 bases of 1055588482 (8.388%) in intersection


    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` galGal6 mm10) > rbest.log 2>&1 &
    # real    94m11.007s

    cat fb.galGal6.chainRBest.Mm10.txt
    # 79474812 bases of 1055588482 (7.529%) in intersection

#########################################################################
# LASTZ mouse/mm10 Minke whale/bosTau9 - (DONE - 2018-11-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08
    cd /hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08

    printf '# mouse vs cow
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: mouse mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=2
SEQ1_LAP=10000

# QUERY: cow bosTau9
SEQ2_DIR=/hive/data/genomes/bosTau9/bosTau9.2bit
SEQ2_LEN=/hive/data/genomes/bosTau9/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=10
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08
TMPDIR=/dev/shm
' > DEF
    # << happy emacs

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1 &
    # real    211m46.258s

    cat fb.mm10.chainBosTau9Link.txt
    # 703580224 bases of 2652783500 (26.522%) in intersection
    cat fb.mm10.chainSynBosTau9Link.txt
    # 659095603 bases of 2652783500 (24.845%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 bosTau9) > rbest.log 2>&1 &
    # real    214m24.819s

    cat fb.mm10.chainRBest.BosTau9.txt
    # 667950653 bases of 2652783500 (25.179%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/bosTau9/bed/blastz.mm10.swap
    cd /hive/data/genomes/bosTau9/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1 &
    #  real    41m22.962s

    cat fb.bosTau9.chainMm10Link.txt
    # 695248613 bases of 2715853792 (25.600%) in intersection
    cat fb.bosTau9.chainSynMm10Link.txt
    # 660591041 bases of 2715853792 (24.324%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` bosTau9 mm10) > rbest.log 2>&1 &
    # real    204m36.465s

    cat fb.bosTau9.chainRBest.Mm10.txt
    # 667305554 bases of 2715853792 (24.571%) in intersection

##############################################################################
# LASTZ mouse/mm10 vs. Japanese quail/cotJap2 - (DONE - 2018-11-15 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15
    cd /hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15

    printf "# Mouse vs. Japanese quail
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/scratch/data/blastz/HoxD55.q
#      A    C    G    T
#     91  -90  -25 -100
#    -90  100 -100  -25
#    -25 -100  100  -90
#   -100  -25  -90  91

# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Japanese quail cotJap2
SEQ2_DIR=/hive/data/genomes/cotJap2/cotJap2.2bit
SEQ2_LEN=/hive/data/genomes/cotJap2/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=50

BASE=/hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15
TMPDIR=/dev/shm
" > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    82m16.032s

    cat fb.mm10.chainCotJap2Link.txt
    # 97251364 bases of 2652783500 (3.666%) in intersection
    cat fb.mm10.chainSynCotJap2Link.txt
    # 67653818 bases of 2652783500 (2.550%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 cotJap2) > rbest.log 2>&1 &
    # real    104m58.905s

    cat fb.mm10.chainRBest.CotJap2.txt
    # 76298136 bases of 2652783500 (2.876%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/cotJap2/bed/blastz.mm10.swap
    cd /hive/data/genomes/cotJap2/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    6m37.873s

    cat fb.cotJap2.chainMm10Link.txt
    # 82592561 bases of 917263224 (9.004%) in intersection
    cat fb.cotJap2.chainSynMm10Link.txt
    # 66583746 bases of 917263224 (7.259%) in intersection

    # mistakenly started this on ku, it failed at the download step since
    # it could not see the /gbdb/mm10/ hierarchy:
    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` cotJap2 mm10) > rbest.log 2>&1 &
    # real    79m48.767s

    # continue on hgwdev
    time (doRecipBest.pl -load -workhorse=hgwdev -continue=download -buildDir=`pwd` cotJap2 mm10) > rbest.download.log 2>&1 &
    # real    1m40.970s

    cat fb.cotJap2.chainRBest.Mm10.txt
    # 76078816 bases of 917263224 (8.294%) in intersection

#########################################################################
2019-01-17: tabula muris track (max)
# download 7Tb of data from Amazon, using token, CZI pays (got token by email, via Angela Pisco, James Webber)
export AWS_ACCESS_KEY_ID=xxxxx
export AWS_SESSION_TOKEN=xxxxx
aws s3 sync s3://czbiohub-tabula-muris/tabula_muris_bam_files/ . --delete
cd ~/projects/czi/cbData/ucsc/tabulaMuris
csvToTab TM_facs_metadata.csv > TM_facs_metadata.tsv
cat TM_facs_metadata.csv | tr '.' '-' | csvToTab > TM_facs_metadata.fix.tsv
# this is not necessary anymore, the new mm10.sizes file comes with cbTrackHub and
# includes the ERCCs
hgsql -N -e 'select alias,chrom from chromAlias;' mm10 > mm10.chromAlias.tab
faSize ERCC92.fa -detailed > ERCC.sizes
cat /hive/data/genomes/mm10/chrom.sizes ERCC.sizes > mm10ercc.sizes

# the next one requires single cell browser, from https://github.com/maximilianh/cellBrowser
cbTrackHub mm10 bam/ TM_facs_metadata.fix.tsv cell_ontology_class hub/ --name "TabulaMuris"

#########################################################################
# LIFTOVER TO GRCm38B (DONE - 2018-03-01 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/blat.GRCm38B.2019-03-01
    cd /hive/data/genomes/mm10/bed/blat.GRCm38B.2019-03-01
    doSameSpeciesLiftOver.pl -verbose=2 \
	-fileServer=hgwdev \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
         mm10 GRCm38B
    doSameSpeciesLiftOver.pl -verbose=2 \
	-debug -fileServer=hgwdev \
	-query2Bit=/hive/data/genomes/mm10/mm10.2bit \
	-querySizes=/hive/data/genomes/mm10/chrom.sizes \
	-target2Bit=/hive/data/genomes/GRCm38B/GRCm38B.2bit \
	-targetSizes=/hive/data/genomes/GRCm38B/chrom.sizes \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/mm10/mm10.11.ooc mm10 GRCm38B

    time (doSameSpeciesLiftOver.pl -verbose=2 \
	-fileServer=hgwdev \
	-query2Bit=/hive/data/genomes/mm10/mm10.2bit \
	-querySizes=/hive/data/genomes/mm10/chrom.sizes \
	-target2Bit=/hive/data/genomes/GRCm38B/GRCm38B.2bit \
	-targetSizes=/hive/data/genomes/GRCm38B/chrom.sizes \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/mm10/mm10.11.ooc \
         mm10 GRCm38B) > doLiftOverToGRCm38B.log 2>&1
    # real    156m50.777s

    # see if the liftOver menus function in the browser from mm10 to GRCm38B

#########################################################################
#############################################################################
# hgPal downloads (rebuilt knownGene and knownCanonical 2019-04-01 braney )

    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18
    cd /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18
    cat ../species.list | tr '[ ]' '[\n]' > order.list

    export mz=multiz60way
    export gp=knownGene
    export db=mm10
    export I=0
    mkdir exonAA exonNuc
    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
        if [ $I -gt 6 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done > $gp.jobs
    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    time sh -x ./$gp.jobs > $gp.jobs.log 2>&1 &
    # real    59m23.279s

    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    # real    1m35.590s
    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
    # real    7m46.538s

    export mz=multiz60way
    export gp=knownGene
    export db=mm10
    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    rm -rf $pd
    mkdir -p $pd
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz

    rm -rf exonAA exonNuc

    cd /hive/data/genomes/mm10/bed/multiz60way/pal
    export mz=multiz60way
    export gp=ncbiRefSeq
    export db=mm10
    export I=0
    mkdir exonAA exonNuc
    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
    do
        I=`echo $I | awk '{print $1+1}'`
	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
        if [ $I -gt 6 ]; then
            echo "date"
            echo "wait"
            I=0
        fi
    done > $gp.jobs
    echo "date" >> $gp.jobs
    echo "wait" >> $gp.jobs

    time sh -x $gp.jobs > $gp.jobs.log 2>&1
    # real    126m0.688s

    export mz=multiz60way
    export gp=ncbiRefSeq
    export db=mm10
    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
    # real    2m56.817s
    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
    # real    14m8.080s

    rm -rf exonAA exonNuc

    # we're only distributing exons at the moment
    export mz=multiz60way
    export gp=ncbiRefSeq
    export db=mm10
    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz

    ### And knownCanonical
    cd /hive/data/genomes/mm10/bed/multiz60way/pal
    export mz=multiz60way
    export gp=knownCanonical
    export db=mm10
    mkdir exonAA exonNuc knownCanonical

    time cut -f1 ../../../chrom.sizes | while read C
    do
        echo $C 1>&2
	hgsql mm10 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
    done
    #   real    0m15.897s

    ls knownCanonical/*.known.bed | while read F
    do
      if [ -s $F ]; then
         echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
      fi
    done | while read C
    do
	echo "date"
	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
	    gzip -c > exonNuc/$C.exonNuc.fa.gz"
	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
	    gzip -c > exonAA/$C.exonAA.fa.gz"
    done > $gp.$mz.jobs

    time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1
    # 267m58.813s

    rm *.known.bed
    export mz=multiz60way
    export gp=knownCanonical
    export db=mm10
    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz &
    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz &
    # about 6 minutes

    rm -rf exonAA exonNuc

    export mz=multiz60way
    export gp=knownCanonical
    export db=mm10
    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
    mkdir -p $pd
    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz

    cd  $pd
    md5sum *.fa.gz > md5sum.txt
##############################################################################
# LASTZ Rat regenRn0 (DONE - 2019-07-01 - Jonathan)
    mkdir /hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01
    cd /hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01

    printf '# rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/scratch/data/mm10/mm10.2bit
SEQ1_LEN=/scratch/data/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rat RegenRn0
SEQ2_DIR=/hive/data/genomes/regenRn0/regenRn0.2bit
SEQ2_LEN=/hive/data/genomes/regenRn0/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10RegenRn0
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-syntenicNet -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
    #   real    196m22.733s

    cat fb.mm10.chainRegenRn0Link.txt
    #	1843678500 bases of 2652783500 (69.500%) in intersection
    cat fb.mm10.chainSynRegenRn0Link.txt
    #   1720395177 bases of 2652783500 (64.852%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 regenRn0) > rbest.log 2>&1 &
    # real    494m43.241s

    cat fb.mm10.chainRBest.RegenRn0.txt
    # 1694384084 bases of 2652783500 (63.872%) in intersection

    mkdir /hive/data/genomes/regenRn0/bed/blastz.mm10.swap
    cd /hive/data/genomes/regenRn0/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01/DEF \
	-swap -syntenicNet -noDbNameCheck \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    106m31.449s

    cat fb.regenRn0.chainMm10Link.txt
    #   1803664991 bases of 2534810853 (71.156%) in intersection
    cat fb.regenRn0.chainSynMm10Link.txt
    #   1712372147 bases of 2534810853 (67.554%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` regenRn0 mm10) > rbest.log 2>&1
    # real    536m51.292s

    cat fb.regenRn0.chainRBest.Mm10.txt
    # 1695272967 bases of 2534810853 (66.880%) in intersection

##############################################################################
# LASTZ Rhesus rheMac10 (DONE - 2019-07-03 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03
    cd /hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03

    printf '# rhesus vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rhesus RheMac10
SEQ2_DIR=/hive/data/genomes/rheMac10/rheMac10.2bit
SEQ2_LEN=/hive/data/genomes/rheMac10/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10RheMac10
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    #   real    211m21.922s

    cat fb.mm10.chainRheMac10Link.txt
    #	923559693 bases of 2652783500 (34.815%) in intersection
    cat fb.mm10.chainSynRheMac10Link.txt
    #   878479553 bases of 2652783500 (33.115%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 rheMac10) > rbest.log 2>&1 &
    # real    315m43.465s

    cat fb.mm10.chainRBest.RheMac10.txt
    # 879885863 bases of 2652783500 (33.168%) in intersection

    mkdir /hive/data/genomes/rheMac10/bed/blastz.mm10.swap
    cd /hive/data/genomes/rheMac10/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    52m48.045s

    cat fb.rheMac10.chainMm10Link.txt
    #	918551088 bases of 2936892733 (31.276%) in intersection
    cat fb.rheMac10.chainSynMm10Link.txt
    #   876230433 bases of 2936892733 (29.835%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` rheMac10 mm10) > rbest.log 2>&1
    # real    303m40.303s

    cat fb.rheMac10.chainRBest.Mm10.txt
    # 878542993 bases of 2936892733 (29.914%) in intersection

#########################################################################
# LASTZ Rat regenRn1 (DONE - 2019-09-12 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRegenRn1.2019-09-12
    cd /hive/data/genomes/mm10/bed/lastzRegenRn1.2019-09-12

    printf '# rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rat RegenRn1
SEQ2_DIR=/hive/data/genomes/regenRn1/regenRn1.2bit
SEQ2_LEN=/hive/data/genomes/regenRn1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzRegenRn1.2019-09-12
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10RegenRn1
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-syntenicNet -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
    #   real    185m22.610s


    cat fb.mm10.chainRegenRn1Link.txt
    #	1699005386 bases of 2652783500 (64.046%) in intersection
    cat fb.mm10.chainSynRegenRn1Link.txt
    #   1591231371 bases of 2652783500 (59.983%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 regenRn1) > rbest.log 2>&1 &
    # real    516m24.464s

    cat fb.mm10.chainRBest.RegenRn1.txt
    # 1565180527 bases of 2652783500 (59.001%) in intersection

    mkdir /hive/data/genomes/regenRn1/bed/blastz.mm10.swap
    cd /hive/data/genomes/regenRn1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRegenRn1.2019-09-12/DEF \
	-swap -syntenicNet -noDbNameCheck \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    94m19.060s

    cat fb.regenRn1.chainMm10Link.txt
    #   1634389849 bases of 2282482188 (71.606%) in intersection
    cat fb.regenRn1.chainSynMm10Link.txt
    #   1564752158 bases of 2282482188 (68.555%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` regenRn1 mm10) > rbest.log 2>&1
    # real    488m25.471s

    cat fb.regenRn1.chainRBest.Mm10.txt
    # 1565313924 bases of 2282482188 (68.579%) in intersection

##############################################################################
# crispr whole genome (DONE - 2019-10-08 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/crisprAll
    cd /hive/data/genomes/mm10/bed/crisprAll

    # the large shoulder argument will cause the entire genome to be scanned
    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \
    mm10 knownGene -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
    -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev) > ranges.log 2>&1
    # real    2m58.652s

    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
       -continue=guides -stop=guides \
    mm10 knownGene -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
    -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev) > guides.log 2>&1
    # real    8m40.172s
Completed: 100 of 100 jobs
CPU time in finished jobs:      11503s     191.72m     3.20h    0.13d  0.000 y
IO & Wait Time:                   254s       4.23m     0.07h    0.00d  0.000 y
Average job time:                 118s       1.96m     0.03h    0.00d
Longest finished job:             386s       6.43m     0.11h    0.00d
Submission to last job:           387s       6.45m     0.11h    0.00d

    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
       -continue=specScoreJobList -stop=specScores \
    mm10 knownGene -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
    -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev) > specScores.log 2>&1
Completed: 2947877 of 2947877 jobs
CPU time in finished jobs:  262070681s 4367844.69m 72797.41h 3033.23d  8.310 y
IO & Wait Time:               6243499s  104058.31m  1734.31h   72.26d  0.198 y
Average job time:                  91s       1.52m     0.03h    0.00d
Longest finished job:             217s       3.62m     0.06h    0.00d
Submission to last job:        274925s    4582.08m    76.37h    3.18d

# Number of specScores: 220282678

# real    5619m9.026s
# user    617m24.869s
# sys     52m8.371s

    ### remember to get back to hgwdev to run this
    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
       -continue=effScores -stop=load \
    mm10 knownGene -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
    -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev) > load.log 2>&1
    # real    970m21.487s
    # user    0m0.983s
    # sys     0m2.219s

# effScores:
Completed: 27697 of 27697 jobs
CPU time in finished jobs:   14348277s  239137.94m  3985.63h  166.07d  0.455 y
IO & Wait Time:                150120s    2502.01m    41.70h    1.74d  0.005 y
Average job time:                 523s       8.72m     0.15h    0.01d
Longest finished job:            1966s      32.77m     0.55h    0.02d
Submission to last job:         15067s     251.12m     4.19h    0.17d

# offTargets:
Completed: 147394 of 147394 jobs
CPU time in finished jobs:    2213680s   36894.66m   614.91h   25.62d  0.070 y
IO & Wait Time:               2663355s   44389.25m   739.82h   30.83d  0.084 y
Average job time:                  33s       0.55m     0.01h    0.00d
Longest finished job:              68s       1.13m     0.02h    0.00d

    # cleaning up 2021-04-24 - Hiram
    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
       -continue=cleanup mm10 -tableName=crisprAll -fileServer=hgwdev \
    -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev) > cleanup.log 2>&1
    # real    430m18.499s

#########################################################################

# For ENCODE 3 tracks, see doc/encode3/mouse.txt

##############################################################################
# LASTZ Gorilla gorGor6 (DONE - 2019-11-20 - Hiram)
    #	establish a screen to control this job
    screen -S mm10gorGor6
    mkdir /hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20
    cd /hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20

    printf '# mouse vs. gorilla
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=1

# QUERY: gorilla gorGor6
SEQ2_DIR=/hive/data/genomes/gorGor6/gorGor6.2bit
SEQ2_LEN=/hive/data/genomes/gorGor6/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=130
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    518m27.777s

    cat fb.mm10.chainGorGor6Link.txt
    #	929953885 bases of 2652783500 (35.056%) in intersection
    cat fb.mm10.chainSynGorGor6Link.txt
    #   882047357 bases of 2652783500 (33.250%) in intersection

    time (doRecipBest.pl -workhorse=hgwdev -load  mm10 gorGor6 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    270m54.749s

    cat fb.mm10.chainRBest.GorGor6.txt
    # 885135149 bases of 2652783500 (33.366%) in intersection

    mkdir /hive/data/genomes/gorGor6/bed/blastz.mm10.swap
    cd /hive/data/genomes/gorGor6/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    72m34.088s

    cat fb.gorGor6.chainMm10Link.txt
    #	1017872526 bases of 2999027915 (33.940%) in intersection
    cat fb.gorGor6.chainSynMm10Link.txt
    #    880983055 bases of 2999027915 (29.376%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev gorGor6 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    237m38.959s

    cat fb.gorGor6.chainRBest.Mm10.txt
    # 883663662 bases of 2999027915 (29.465%) in intersection

##############################################################################
# LASTZ Chinese hamster ovary cell line CHO-K1  regenCho1
#	(DONE - 2019-11-26 - Hiram)
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-26
    cd /hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-26

    printf '# Chinese hamster ovary cell line vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Chinese hamster ovary cell line CHO-K1  regenCho1
SEQ2_DIR=/hive/data/genomes/regenCho1/regenCho1.2bit
SEQ2_LEN=/hive/data/genomes/regenCho1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-26
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    340m52.020s

    cat fb.mm10.chainRegenCho1Link.txt
    #	1525566783 bases of 2652783500 (57.508%) in intersection
    cat fb.mm10.chainSynRegenCho1Link.txt
    #   1410851403 bases of 2652783500 (53.184%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 regenCho1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    664m46.970s

    cat fb.mm10.chainRBest.RegenCho1.txt
    # 1395524606 bases of 2652783500 (52.606%) in intersection

    mkdir /hive/data/genomes/regenCho1/bed/blastz.mm10.swap
    cd /hive/data/genomes/regenCho1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-26/DEF \
	-noDbNameCheck -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    101m20.296s

    cat fb.regenCho1.chainMm10Link.txt
    #	1522181082 bases of 2266312740 (67.166%) in intersection
    cat fb.regenCho1.chainSynMm10Link.txt
    #   1397889394 bases of 2266312740 (61.681%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev regenCho1 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    660m29.571s

    cat fb.regenCho1.chainRBest.Mm10.txt
    # 1396267649 bases of 2266312740 (61.610%) in intersection

##############################################################################
clinvar lift, done, Fri Jan 31 06:12:46 PST 2020, max
doClinvarLift mm10
featureBits hg38 clinvarLift.bed
620623 bases of 3095998939 (0.020%) in intersection
wc -l clinvarLift.bed
610774 clinvarLift.bed
featureBits mm10 clinvarLift.mm10.bed
581378 bases of 2652783500 (0.022%) in intersection
wc -l clinvarLift.mm10.bed
575023 clinvarLift.mm10.bed
##############################################################################
# LASTZ Southern sea otter enhLutNer1 (DONE - 2020-04-16 - Jonathan)
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzEnhLutNer1.2020-04-15
    cd /hive/data/genomes/mm10/bed/lastzEnhLutNer1.2020-04-15

    printf '# Southern sea otter vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Southern sea otter enhLutNer1
SEQ2_DIR=/hive/data/genomes/enhLutNer1/enhLutNer1.2bit
SEQ2_LEN=/hive/data/genomes/enhLutNer1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=180
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzEnhLutNer1.2020-04-15
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    140m43.505s

    cat fb.mm10.chainEnhLutNer1Link.txt
    #   772059271 bases of 2652783500 (29.104%) in intersection
    cat fb.mm10.chainSynEnhLutNer1Link.txt
    #   717097454 bases of 2652783500 (27.032%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 enhLutNer1 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    174m28.167s

    cat fb.mm10.chainRBest.EnhLutNer1.txt
    # 734878489 bases of 2652783500 (27.702%) in intersection

    mkdir /hive/data/genomes/enhLutNer1/bed/blastz.mm10.swap
    cd /hive/data/genomes/enhLutNer1/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzEnhLutNer1.2020-04-15/DEF \
	-noDbNameCheck -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    48m44.604s

    cat fb.enhLutNer1.chainMm10Link.txt
    #   787727864 bases of 2413653822 (32.636%) in intersection
    cat fb.enhLutNer1.chainSynMm10Link.txt
    #   712950571 bases of 2413653822 (29.538%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev enhLutNer1 mm10 \
      -buildDir=`pwd`) > rbest.log 2>&1 &
    # real    167m45.490s

    cat fb.enhLutNer1.chainRBest.Mm10.txt
    # 734620004 bases of 2413653822 (30.436%) in intersection

##############################################################################
# LASTZ German shepard canFam4 (DONE - 2020-04-02 - Hiram)
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzCanFam4.2020-04-02
    cd /hive/data/genomes/mm10/bed/lastzCanFam4.2020-04-02

    printf '# German shepard canFam4 vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: German shepard canFam4
SEQ2_DIR=/hive/data/genomes/canFam4/canFam4.2bit
SEQ2_LEN=/hive/data/genomes/canFam4/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCanFam4.2020-04-02
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    483m10.607s

    cat fb.mm10.chainCanFam4Link.txt
    #	777883731 bases of 2652783500 (29.323%) in intersection
    cat fb.mm10.chainSynCanFam4Link.txt
    #   736602602 bases of 2652783500 (27.767%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 canFam4 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    219m16.168s

    cat fb.mm10.chainRBest.CanFam4.txt
    # 741307883 bases of 2652783500 (27.945%) in intersection

    mkdir /hive/data/genomes/canFam4/bed/blastz.mm10.swap
    cd /hive/data/genomes/canFam4/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCanFam4.2020-04-02/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    50m20.639s

    cat fb.canFam4.chainMm10Link.txt
    #	772902855 bases of 2481941580 (31.141%) in intersection
    cat fb.canFam4.chainSynMm10Link.txt
    #   737924732 bases of 2481941580 (29.732%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev canFam4 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    173m38.016s

    cat fb.canFam4.chainRBest.Mm10.txt
    # 740357755 bases of 2481941580 (29.830%) in intersection

##############################################################################
# LASTZ woodchuck/Marmota monax/GCA_901343595.1 (DONE - 2020-05-29 - Hiram)
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29
    cd /hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29

    # do NOT want dots in the name of the sequence, eliminate the .1 -> v1
    ln -s /hive/data/genomes/asmHubs/genbankBuild/GCA/901/343/595/GCA_901343595.1_MONAX5/GCA_901343595.1_MONAX5.2bit GCA_901343595v1_MONAX5.2bit
    ln -s /hive/data/genomes/asmHubs/genbankBuild/GCA/901/343/595/GCA_901343595.1_MONAX5/GCA_901343595.1_MONAX5.chrom.sizes GCA_901343595v1_MONAX5.chrom.sizes

    printf '# woodchuck/Marmota monax/GCA_901343595.1 vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: woodchuck/Marmota monax/GCA_901343595.1
SEQ2_DIR=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/GCA_901343595v1_MONAX5.2bit
SEQ2_LEN=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/GCA_901343595v1_MONAX5.chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=400
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-noDbNameCheck -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #  real    125m59.598s

    cat fb.mm10.chainGCA_901343595v1_MONAX5Link.txt
    # 923698956 bases of 2652783500 (34.820%) in intersection
    cat fb.mm10.chainSynGCA_901343595v1_MONAX5Link.txt
    # 851857022 bases of 2652783500 (32.112%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -load \
       -workhorse=hgwdev mm10 GCA_901343595v1_MONAX5 \
      -query2Bit=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/GCA_901343595v1_MONAX5.2bit \
      -querySizes=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/GCA_901343595v1_MONAX5.chrom.sizes \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    226m4.391s

    cat fb.mm10.chainRBest.GCA_901343595v1_MONAX5.txt
    # 877175980 bases of 2652783500 (33.066%) in intersection

    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/901/343/595/GCA_901343595.1_MONAX5/trackData/blastz.mm10.swap
    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/901/343/595/GCA_901343595.1_MONAX5/trackData/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/DEF \
        -noDbNameCheck -swapDir=`pwd` -skipDownload -trackHub \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &

    # real    25m28.210s
    # ran into problem of expired certificate at genome-source.gi.ucsc.edu
    # finished netChains.csh manually, then continuing:
    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/DEF \
        -noDbNameCheck -swapDir=`pwd` -skipDownload -trackHub \
	-continue=load -swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > load.log 2>&1 &
    #	real    37m23.399s

    cat fb.GCA_901343595v1_MONAX5.chain.Mm10Link.txt
    # 974411924 bases of 2625891882 (37.108%) in intersection
    cat fb.GCA_901343595v1_MONAX5.chainSyn.Mm10Link.txt
    # 865165537 bases of 2625891882 (32.947%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -load \
      -skipDownload -trackHub -workhorse=hgwdev GCA_901343595v1_MONAX5 mm10 \
      -target2Bit=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/GCA_901343595v1_MONAX5.2bit \
      -targetSizes=/hive/data/genomes/mm10/bed/lastzGCA_901343595v1.2020-05-29/GCA_901343595v1_MONAX5.chrom.sizes \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    277m4.130s

    cat fb.GCA_901343595v1_MONAX5.chainRBest.Mm10.txt
    # 877160501 bases of 2625891882 (33.404%) in intersection

##############################################################################
# ENCODE Registry of Candidate cis-Regulatory Elements
#
# 2020-05-12  kate
#
# From ENCODE 3 Data Analysis Center at U Mass Med Center (Zlab)
# Data contacts:  Henry Pratt, Jill Moore, Zhiping Weng PI
#
# RM #24668
#
# Download BED file (hosted on their integrative hub)

cd /hive/data/outside/encode3/ccre
mkdir mouse
cd mouse
wget http://gcp.wenglab.org/hubs/integrative1/data/mm10/cta/mm10-ccres.bigbed

###
# Add scores
wget -nd https://users.wenglab.org/moorej3/mouse-maxz-dnase.txt.gz
gunzip Mouse-maxZ-DNase.txt.gz

sort Mouse-maxZ-DNase.txt > Mouse-maxZ-DNase.sorted.txt
# noting that order of accessions in score file doesn't match bed file ;-(
sort -k 4 mm10-ccREs.bed > mm10-ccREs.sorted.bed
paste mm10-ccREs.sorted.bed Mouse-maxZ-DNase.sorted.txt > ccres.prescored.bed
# sanity check ids match

# score using zscore,  min(zscore*100),1000), and reformat
awk '{OFS="\t"; print $1, $2, $3, $4, ($13>10)? 1000 : int($13 * 100), $6, $7, $8, $9, $10, $13}' \
        ccres.prescored.bed | bedSort stdin ccres.scored.bed
set f = encodeCcreCombined

# Reformat to add fields for filtering and mouseover, etc.
set bin = ~/kent/src/hg/makeDb/outside/encode3/ccre
perl $bin/makeCcreCombined.pl < ccres.scored.bed > $f.bed
set lib = ~/kent/src/hg/lib
bedToBigBed -tab -type=bed9+6 -as=$lib/$f.as $f.bed /hive/data/genomes/mm10/chrom.sizes $f.bb
mkdir -p /gbdb/mm10/encode3/ccre
ln -s `pwd`/$f.bb /gbdb/mm10/encode3/ccre


##############################################################################
# LASTZ mouse/mm10 bonobo/panPan3 - (DONE - 2020-06-15 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzPanPan3.2020-06-15
    cd /hive/data/genomes/mm10/bed/lastzPanPan3.2020-06-15

    printf '# mouse vs bonobo
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
# the default matrix is:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# QUERY: bonobo panPan3
SEQ2_DIR=/hive/data/genomes/panPan3/panPan3.2bit
SEQ2_LEN=/hive/data/genomes/panPan3/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzPanPan3.2020-06-15
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
        -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
            -syntenicNet) > do.log 2>&1
    # real    385m29.712s

    cat fb.mm10.chainPanPan3Link.txt
    # 935579510 bases of 2652783500 (35.268%) in intersection
    cat fb.mm10.chainSynPanPan3Link.txt
    # 888900388 bases of 2652783500 (33.508%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 panPan3) \
      > rbest.log 2>&1 &
    # real    329m28.051s
    cat fb.mm10.chainRBest.PanPan3.txt
    # 890894306 bases of 2652783500 (33.583%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/panPan3/bed/blastz.mm10.swap
    cd /hive/data/genomes/panPan3/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzPanPan3.2020-06-15/DEF \
        -swap -chainMinScore=3000 -chainLinearGap=medium \
          -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    55m23.982s

    cat fb.panPan3.chainMm10Link.txt
    # 954214151 bases of 3015350297 (31.645%) in intersection
    cat fb.panPan3.chainSynMm10Link.txt
    # 887980807 bases of 3015350297 (29.449%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` panPan3 mm10) \
       > rbest.log 2>&1
    # real    305m11.756s

    cat fb.panPan3.chainRBest.Mm10.txt
    # 889360051 bases of 3015350297 (29.494%) in intersection

##############################################################################
# LASTZ Great Dane canFam5 (DONE - 2020-07-29 - Hiram)
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzCanFam5.2020-07-29
    cd /hive/data/genomes/mm10/bed/lastzCanFam5.2020-07-29

    printf '# German shepard canFam5 vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: German shepard canFam5
SEQ2_DIR=/hive/data/genomes/canFam5/canFam5.2bit
SEQ2_LEN=/hive/data/genomes/canFam5/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCanFam5.2020-07-29
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    1052m41.484s

    cat fb.mm10.chainCanFam5Link.txt
    #	776486006 bases of 2652783500 (29.271%) in intersection
    cat fb.mm10.chainSynCanFam5Link.txt
    #   735561772 bases of 2652783500 (27.728%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 canFam5 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    214m12.449s

    cat fb.mm10.chainRBest.CanFam5.txt
    # 740117947 bases of 2652783500 (27.900%) in intersection

    mkdir /hive/data/genomes/canFam5/bed/blastz.mm10.swap
    cd /hive/data/genomes/canFam5/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCanFam5.2020-07-29/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    44m9.935s

    cat fb.canFam5.chainMm10Link.txt
    #	759821061 bases of 2337131234 (32.511%) in intersection
    cat fb.canFam5.chainSynMm10Link.txt
    #   731350605 bases of 2337131234 (31.293%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev canFam5 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    162m30.634s

    cat fb.canFam5.chainRBest.Mm10.txt
    # 739177732 bases of 2337131234 (31.628%) in intersection

##############################################################################
# LIFTOVER TO mm39 (DONE - 2020-07-30 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/blat.mm39.2020-07-30
    cd /hive/data/genomes/mm10/bed/blat.mm39.2020-07-30
    doSameSpeciesLiftOver.pl -verbose=2 \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
        -target2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -targetSizes=/hive/data/genomes/mm10/chrom.sizes \
         mm10 mm39
    time (doSameSpeciesLiftOver.pl -verbose=2 \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -target2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -targetSizes=/hive/data/genomes/mm10/chrom.sizes \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
         mm10 mm39) > doLiftOverToMm39.log 2>&1
    # real    257m19.983s

    # see if the liftOver menus function in the browser from mm10 to mm39

##############################################################################
# LASTZ mouse mm10 vs marmoset calJac4 (DONE - 2020-08-03 - Hiram)
    # NOTE: XXX The date here 08-03 is incorrect, it should be 09-03
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzCalJac4.2020-08-03
    cd /hive/data/genomes/mm10/bed/lastzCalJac4.2020-08-03

    printf '# mouse vs marmoset calJac4
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: marmoset calJac4
SEQ2_DIR=/hive/data/genomes/calJac4/calJac4.2bit
SEQ2_LEN=/hive/data/genomes/calJac4/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCalJac4.2020-08-03
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    232m20.046s

    cat fb.mm10.chainCalJac4Link.txt
    #	877278264 bases of 2652783500 (33.070%) in intersection
    cat fb.mm10.chainSynCalJac4Link.txt
    #   830868888 bases of 2652783500 (31.321%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 calJac4 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    322m7.141s

    cat fb.mm10.chainRBest.CalJac4.txt
    # 835445771 bases of 2652783500 (31.493%) in intersection

    mkdir /hive/data/genomes/calJac4/bed/blastz.mm10.swap
    cd /hive/data/genomes/calJac4/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCalJac4.2020-08-03/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #	real    51m16.400s

    cat fb.calJac4.chainMm10Link.txt
    #	882506277 bases of 2859817025 (30.859%) in intersection
    cat fb.calJac4.chainSynMm10Link.txt
    #   831171319 bases of 2859817025 (29.064%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev calJac4 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    308m53.845s

    cat fb.calJac4.chainRBest.Mm10.txt
    # 833789913 bases of 2859817025 (29.155%) in intersection

##############################################################################
# LASTZ Rat rn7 (DONE - 2021-02-15 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzRn7.2021-02-15
    cd /hive/data/genomes/mm10/bed/lastzRn7.2021-02-15

    printf '# rat vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rat Rn7
SEQ2_DIR=/hive/data/genomes/rn7/rn7.2bit
SEQ2_LEN=/hive/data/genomes/rn7/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=500

BASE=/hive/data/genomes/mm10/bed/lastzRn7.2021-02-15
TMPDIR=/dev/shm
' > DEF

    #	establish a screen to control this job
    screen -S mm10Rn7
    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-syntenicNet \
	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 &
    #   real    245m21.728s

    sed -e 's/^/    # /;' fb.mm10.chainRn7Link.txt
    # 1896928045 bases of 2652783500 (71.507%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynRn7Link.txt
    # 1787142074 bases of 2652783500 (67.369%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
	 mm10 rn7) > rbest.log 2>&1 &
    # real    578m13.711s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.Rn7.txt
    # 1753198266 bases of 2652783500 (66.089%) in intersection

    mkdir /hive/data/genomes/rn7/bed/blastz.mm10.swap
    cd /hive/data/genomes/rn7/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzRn7.2021-02-15/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
	-chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
    #	real    112m36.899s

    sed -e 's/^/    # /;' fb.rn7.chainMm10Link.txt
    # 1853300495 bases of 2626580772 (70.559%) in intersection
    sed -e 's/^/    # /;' fb.rn7.chainSynMm10Link.txt
    # 1762899567 bases of 2626580772 (67.118%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
	rn7 mm10) > rbest.log 2>&1
    # real    599m24.766s

    sed -e 's/^/    # /;' fb.rn7.chainRBest.Mm10.txt 
    # 1753558422 bases of 2626580772 (66.762%) in intersection

##############################################################################
# lastz frog xenTro10 (DONE - 2021-02-22 - Hiram)
    # establish a screen to control this job with a name to indicate what it is
    screen -S mm10XenTro10
    mkdir /hive/data/genomes/mm10/bed/lastzXenTro10.2021-02-22
    cd /hive/data/genomes/mm10/bed/lastzXenTro10.2021-02-22

    printf '# Mouse vs. frog
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/hive/data/staging/data/blastz/HoxD55.q

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10

# QUERY: frog xenTro10
SEQ2_DIR=/hive/data/genomes/xenTro10/xenTro10.2bit
SEQ2_LEN=/hive/data/genomes/xenTro10/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LAP=0
SEQ2_LIMIT=20

BASE=/hive/data/genomes/mm10/bed/lastzXenTro10.2021-02-22
TMPDIR=/dev/shm
' > DEF

    time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
          -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) \
              > do.log 2>&1 &
    #	real    288m14.529s

    sed -e 's/^/    # /;' fb.mm10.chainXenTro10Link.txt
    # 96546694 bases of 2652783500 (3.639%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynXenTro10Link.txt
    # 34676951 bases of 2652783500 (1.307%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 \
       xenTro10) > rbest.log 2>&1 &
    # real    396m41.983s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.XenTro10.txt
    # 62288287 bases of 2652783500 (2.348%) in intersection

    #	and for the swap
    mkdir /hive/data/genomes/xenTro10/bed/blastz.mm10.swap
    cd /hive/data/genomes/xenTro10/bed/blastz.mm10.swap
    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzXenTro10.2021-02-22/DEF \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
        -syntenicNet -swap -chainMinScore=5000 -chainLinearGap=loose) \
	> swap.log 2>&1 &
    #	real    24m33.940s

    sed -e 's/^/    # /;' fb.xenTro10.chainMm10Link.txt
    # 121679610 bases of 1448461978 (8.401%) in intersection
    sed -e 's/^/    # /;' fb.xenTro10.chainSynMm10Link.txt
    # 35210769 bases of 1448461978 (2.431%) in intersection

  time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` xenTro10 mm10) \
         > rbest.log 2>&1 &
    # real    372m38.637s

    sed -e 's/^/    # /;' fb.xenTro10.chainRBest.Mm10.txt
    # 58901471 bases of 1448461978 (4.066%) in intersection

##############################################################################
# LASTZ Ryukyu mouse GCF_900094665.1 (DONE - 2021-04-26 - Hiram)
    mkdir /hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26
    cd /hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26

    printf '# GCF_900094665.1 Mus caroli (Ryukyu mouse) vs mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_T=2
BLASTZ_O=400
BLASTZ_E=30
BLASTZ_M=254
# default BLASTZ_Q score matrix:
#       A     C     G     T
# A    91  -114   -31  -123
# C  -114   100  -125   -31
# G   -31  -125   100  -114
# T  -123   -31  -114    91

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=10

# QUERY: Mus croli - Ryukyu mouse GCF_900094665.1
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26
TMPDIR=/dev/shm
' > DEF

export targetDb="mm10"
export asmId="GCF_900094665.1"
export gcPath="GCF/900/094/665"
cd /hive/data/genomes/$targetDb/bed/lastz${asmId}.2021-04-26
time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF \
   -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
     -syntenicNet -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
cat fb.${targetDb}.chain.${asmId}Link.txt
cat fb.${targetDb}.chainSyn.${asmId}Link.txt

grep -w real do.log | sed -e 's/^/    # /;'
    # real      207m59.745s

sed -e 's/^/    # /;' fb.$targetDb.chain.${asmId}Link.txt
    # 2303277151 bases of 2818974548 (81.706%) in intersection
sed -e 's/^/    # /;' fb.$targetDb.chainSyn.${asmId}Link.txt
    # 2187910131 bases of 2818974548 (77.614%) in intersection

time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
-query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \
-querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \
$targetDb ${asmId}) >> rbest.log 2>&1
grep -w real rbest.log | sed -e 's/^/    # /;'
    # real      274m57.907s

sed -e 's/^/    # /;' fb.$targetDb.chainRBest.$asmId.txt
    # 2074680070 bases of 2818974548 (73.597%) in intersection

# total time for all the above:
    # real    482m57.733s

#######################################
### the swap to the assembly hub
export target="mm10"
export Target="Mm10"
export query="GCF_900094665.1"
export asmId="GCF_900094665.1_CAROLI_EIJ_v1.1"
export gcPath="GCF/900/094/665"

mkdir -p /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap
cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap

time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \
    /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \
  -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1
grep -w real swap.log | sed -e 's/^/    # /;'
    # real    554m2.489s

sed -e 's/^/    # /;' fb.${query}.chain.${Target}Link.txt
    # 2116460904 bases of 2553121441 (82.897%) in intersection
sed -e 's/^/    # /;' fb.${query}.chainSyn.${Target}Link.txt
    # 2081173211 bases of 2553121441 (81.515%) in intersection

time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
-target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \
$query $target) >> rbest.log 2>&1
grep -w real rbest.log | sed -e 's/^/    # /;'
    # real      246m55.342s

sed -e 's/^/    # /;' fb.${query}.chainRBest.${Target}.txt
    # 2078102689 bases of 2553121441 (81.395%) in intersection

# Complete run time for all the swap operation:
    # real    367m14.987s

##############################################################################
# LASTZ dog boxer Tasha canFam6 (DONE - 2021-05-17 - Hiram)
    #	establish a screen to control this job
    mkdir /hive/data/genomes/mm10/bed/lastzCanFam6.2021-05-17
    cd /hive/data/genomes/mm10/bed/lastzCanFam6.2021-05-17

    printf '# boxer Tasha canFam6 vs mouse
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
BLASTZ_M=254

# TARGET: Mouse Mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: boxer Tasha canFam6
SEQ2_DIR=/hive/data/genomes/canFam6/canFam6.2bit
SEQ2_LEN=/hive/data/genomes/canFam6/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/hive/data/genomes/mm10/bed/lastzCanFam6.2021-05-17
TMPDIR=/dev/shm
' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF \
	-verbose=2 -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
    #	real    276m30.999s

    sed -e 's/^/    # /;' fb.mm10.chainCanFam6Link.txt
    # 791039766 bases of 2739603606 (28.874%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynCanFam6Link.txt
    # 747308674 bases of 2739603606 (27.278%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev mm10 canFam6 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    #	real    260m22.664s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.CanFam6.txt
    # 731553248 bases of 2739603606 (26.703%) in intersection

    mkdir /hive/data/genomes/canFam6/bed/blastz.mm10.swap
    cd /hive/data/genomes/canFam6/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCanFam6.2021-05-17/DEF \
	-swap -syntenicNet \
	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
    #   real    48m21.892s

    sed -e 's/^/    # /;' fb.canFam6.chainMm10Link.txt
    # 747891292 bases of 2312743346 (32.338%) in intersection
    sed -e 's/^/    # /;' fb.canFam6.chainSynMm10Link.txt
    # 721253430 bases of 2312743346 (31.186%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev canFam6 mm10 \
      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
    # real    165m54.013s

    sed -e 's/^/    # /;' fb.canFam6.chainRBest.Mm10.txt
    # 730713145 bases of 2312743346 (31.595%) in intersection

##############################################################################

# JASPAR mouse track addition by Daniel 10/11/21
cd ~/kent/src/hg/makeDb/trackDb
vi mouse/jaspar.ra
curl http://expdata.cmmt.ubc.ca/JASPAR/UCSC_tracks/mm10/trackDb.txt >> mouse/jaspar.ra
cd /hive/data/genomes/mm10/bed
mkdir jaspar
cd jaspar
wget http://expdata.cmmt.ubc.ca/JASPAR/downloads/UCSC_tracks/2022/JASPAR2022_mm10.bb
wget http://expdata.cmmt.ubc.ca/JASPAR/downloads/UCSC_tracks/2020/JASPAR2020_mm10.bb
mv JASPAR2022_mm10.bb JASPAR2022.bb
mv JASPAR2020_mm10.bb JASPAR2020.bb
cd /gbdb/mm10
mkdir jaspar
cd jaspar
ln -s /hive/data/genomes/mm10/bed/jaspar/JASPAR2022.bb .
ln -s /hive/data/genomes/mm10/bed/jaspar/JASPAR2020.bb .

###############################################################################

# JASPAR$ 2022 bigBed update 12/7/21
cd /hive/data/genomes/mm10/bed/jaspar
wget http://expdata.cmmt.ubc.ca/JASPAR/downloads/UCSC_tracks/2022/JASPAR2022_mm10.bb
rm JASPAR2022.bb
mv JASPAR2022_mm10.bb JASPAR2022.bb
ls -lh
ls -lh /gbdb/mm10/jaspar

###############################################################################

# JASPAR$ 2022 bigBed update 1/3/22
cd /hive/data/genomes/mm10/bed/jaspar
wget http://expdata.cmmt.ubc.ca/JASPAR/downloads/UCSC_tracks/2022/JASPAR2022_mm10.bb
rm JASPAR2022.bb
mv JASPAR2022_mm10.bb JASPAR2022.bb
ls -lh
ls -lh /gbdb/mm10/jaspar


# TOGA annotations track 
# got the following files from Michale
# query_annotation.bed
# togaData.tab

mkdir /cluster/data/mm10/bed/togaBigBed
cd /cluster/data/mm10/bed/togaBigBed

sort -k 4 query_annotation.bed > sorted.query.bed
sort togaData.tab > sorted.togaData.tab
join -t $'\t' -1 4 sorted.query.bed sorted.togaData.tab  > joined.tab
cut -f 2-4 joined.tab > part1.tab
cut -f 1 joined.tab > part2.tab
cut -f 5-31 joined.tab > part3.tab
paste part1.tab part2.tab part3.tab | sort -k1,1 -k2,2n > merge.bed

bedToBigBed -tab -type=bed12+19 merge.bed /cluster/data/mm10/chrom.sizes toga.bb -as=$HOME/kent/src/hg/lib/togaData.as

sort togaNucl.tab > sorted.togaNucl.tab
join -t $'\t' -1 4 sorted.query.bed sorted.togaNucl.tab  > joined.tab
cut -f 2-4 joined.tab > part1.tab
cut -f 1 joined.tab > part2.tab
cut -f 13-21 joined.tab > part3.tab
paste part1.tab part2.tab part3.tab | sort -k1,1 -k2,2n -k 5,5n > merge.bed

bedToBigBed -tab -type=bed4+9 merge.bed /cluster/data/mm10/chrom.sizes togaNucl.bb -as=$HOME/kent/src/hg/lib/togaNucl.as

sort togaInActMut.tab > sorted.togaInactMut.tab
join -t $'\t' -1 4 sorted.query.bed sorted.togaInactMut.tab  > joined.tab
cut -f 2-4 joined.tab > part1.tab
cut -f 1 joined.tab > part2.tab
cut -f 13-20 joined.tab > part3.tab
paste part1.tab part2.tab part3.tab | sort -k1,1 -k2,2n > merge.bed

bedToBigBed -tab -type=bed4+6 merge.bed /cluster/data/mm10/chrom.sizes togaInactMut.bb -as=$HOME/kent/src/hg/lib/togaInactMut.as

###############################################################################
# ReMap refs #28960 (2022-04-13 Gerardo)
cd /hive/data/genomes/mm10/bed
mkdir reMap
cd reMap
wget https://remap.univ-amu.fr/storage/public/hubReMap2022/mm10/bigBed/remap2022_all_macs2_mm10_v1_0.bb
mv remap2022_all_macs2_mm10_v1_0.bb reMap2022.bb
wget https://remap.univ-amu.fr/storage/public/hubReMap2022/mm10/bigBed/test.bw
mv test.bw reMapDensity2022.bw
cd /gbdb/mm10
mkdir reMap
cd reMap
ln -s /hive/data/genomes/mm10/bed/reMap/reMap2022.bb
ln -s /hive/data/genomes/mm10/bed/reMap/reMapDensity2022.bw
cd ~/kent/src/hg/makeDb/trackDb/mouse/
cd ~/kent/src/hg/makeDb/trackDb
curl https://remap.univ-amu.fr/storage/public/hubReMap2022/mm10/trackDb.txt > mouse/mm10/reMap.ra
vi mouse/mm10/reMap.ra
vi human/mm10/trackDb.ra

##############################################################################
# LASTZ Mouse Mm10 vs. house mouse GCA_001624535.1

# should be able to run this from anywhere, this time it was run from:
    cd kent/src/hg/utils/automation

  time (~/kent/src/hg/utils/automation/pairLastz.sh \
        mm10 GCA_001624535.1_FVB_NJ_v1 mammal mammal) \
           > mm10.GCA_001624535.1_20220422.log 2>&1 &
  # check the total time
grep -w real  mm10.GCA_001624535.1_20220422.log | tail -1 | sed -e 's/^/    # /;'
    # real      1211m31.498s
##############################################################################
# LASTZ Mouse Mm10 vs. house mouse GCA_001624535.1
#    (DONE - 2022-04-22 - Gerardo)

    mkdir /hive/data/genomes/mm10/bed/lastzGCA_001624535.1.2022-04-22
    cd /hive/data/genomes/mm10/bed/lastzGCA_001624535.1.2022-04-22

    printf '# house mouse GCA_001624535.1 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: house mouse 2016-04-26 GCA_001624535.1_FVB_NJ_v1
SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/001/624/535/GCA_001624535.1/GCA_001624535.1.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/001/624/535/GCA_001624535.1/GCA_001624535.1.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCA_001624535.1.2022-04-22
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCA_001624535.1_FVB_NJ_v1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	671m40.907s

    sed -e 's/^/    # /;' fb.mm10.chainGCA_001624535.1Link.txt
    # 2460323049 bases of 2818974548 (87.277%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCA_001624535.1Link.txt
    # 2350111883 bases of 2818974548 (83.368%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCA/001/624/535/GCA_001624535.1/GCA_001624535.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCA/001/624/535/GCA_001624535.1/GCA_001624535.1.chrom.sizes.txt" \
        mm10 GCA_001624535.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	101m41.454s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCA_001624535.1.txt
    # 2249950647 bases of 2818974548 (79.815%) in intersection

    ### and for the swap

    cd /hive/data/genomes/asmHubs/allBuild/GCA/001/624/535/GCA_001624535.1_FVB_NJ_v1/trackData/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
   -qAsmId GCA_001624535.1_FVB_NJ_v1 /hive/data/genomes/mm10/bed/lastzGCA_001624535.1.2022-04-22/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	344m25.859s

    sed -e 's/^/    # /;' fb.GCA_001624535.1.chainMm10Link.txt
    # 2272307441 bases of 2588619290 (87.781%) in intersection
    sed -e 's/^/    # /;' fb.GCA_001624535.1.chainSynMm10Link.txt
    # 2258459310 bases of 2588619290 (87.246%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
    \
   -target2bit="/hive/data/genomes/asmHubs/GCA/001/624/535/GCA_001624535.1/GCA_001624535.1.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCA/001/624/535/GCA_001624535.1/GCA_001624535.1.chrom.sizes.txt" \
   GCA_001624535.1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	93m42.796s

    sed -e 's/^/    # /;' fb.GCA_001624535.1.chainRBest.Mm10.txt
    # 2255354454 bases of 2588619290 (87.126%) in intersection


real	1211m31.498s
user	0m1.396s
sys	0m1.411s

##############################################################################
# LIFTOVER TO GCA_001632575.1_C3H_HeJ_v1 (DONE - 2023-01-19 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/blat.GCA_001632575.1.2023-01-19
    cd /hive/data/genomes/mm10/bed/blat.GCA_001632575.1.2023-01-19
    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -target2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -targetSizes=/hive/data/genomes/mm10/chrom.sizes \
 -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.2bit \
 -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.chrom.sizes \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
         mm10 GCA_001632575.1
    time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -target2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -targetSizes=/hive/data/genomes/mm10/chrom.sizes \
 -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.2bit \
 -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.chrom.sizes \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
         mm10 GCA_001632575.1) > doLiftOverToGCA_001632575.1.log 2>&1
    # real    190m50.501s

    # see if the liftOver menus function in the browser from mm10
    #    to GCA_001632575.1

##############################################################################
# LIFTOVER TO mm10 (DONE - 2023-01-19 - Hiram)
    ssh hgwdev
    # going to need an ooc for this GenArk hub
    cd  /hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1
    time blat GCA_001632575.1_C3H_HeJ_v1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=GCA_001632575.1_C3H_HeJ_v1.11.ooc -repMatch=1000
# Wrote 14128 overused 11-mers to GCA_001632575.1_C3H_HeJ_v1.11.ooc
# real    0m46.685s


    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/trackData/blat.mm10.2023-01-19
    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/trackData/blat.mm10.2023-01-19
    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
 -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.2bit \
 -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.chrom.sizes \
        -query2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -querySizes=/hive/data/genomes/mm10/chrom.sizes \
        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.11.ooc \
         GCA_001632575.1 mm10
    time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
 -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.2bit \
 -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.chrom.sizes \
        -query2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -querySizes=/hive/data/genomes/mm10/chrom.sizes \
        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/632/575/GCA_001632575.1_C3H_HeJ_v1/GCA_001632575.1_C3H_HeJ_v1.11.ooc \
         GCA_001632575.1 mm10) > doLiftOverToMm10.log 2>&1

    # real    244m28.766s

    # see if the liftOver menus function in the browser from mm10
    #    to GCA_001632575.1

##############################################################################
# LIFTOVER TO GCA_001624675.1_C3H_HeJ_v1 (DONE - 2023-01-20 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/mm10/bed/blat.GCA_001624675.1.2023-01-20
    cd /hive/data/genomes/mm10/bed/blat.GCA_001624675.1.2023-01-20
    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -target2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -targetSizes=/hive/data/genomes/mm10/chrom.sizes \
 -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.2bit \
 -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.chrom.sizes \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
         mm10 GCA_001624675.1

    time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -target2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -targetSizes=/hive/data/genomes/mm10/chrom.sizes \
 -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.2bit \
 -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.chrom.sizes \
        -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
         mm10 GCA_001624675.1) > doLiftOverToGCA_001624675.1.log 2>&1
    # real    160m19.129s

    # see if the liftOver menus function in the browser from mm10
    #    to GCA_001624675.1

##############################################################################
# LIFTOVER TO mm10 (DONE - 2023-01-20 - Hiram)
    ssh hgwdev
    # going to need an ooc for this GenArk hub
    cd  /hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1
    time blat GCA_001624675.1_NOD_ShiLtJ_v1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=GCA_001624675.1_NOD_ShiLtJ_v1.11.ooc -repMatch=1000

# Wrote 13801 overused 11-mers to GCA_001624675.1_NOD_ShiLtJ_v1.11.ooc
# real    0m29.753s

    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/trackData/blat.mm10.2023-01-20
    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/trackData/blat.mm10.2023-01-20
    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
 -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.2bit \
 -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.chrom.sizes \
        -query2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -querySizes=/hive/data/genomes/mm10/chrom.sizes \
        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.11.ooc \
         GCA_001624675.1 mm10
    time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
 -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.2bit \
 -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.chrom.sizes \
        -query2Bit=/hive/data/genomes/mm10/mm10.2bit \
        -querySizes=/hive/data/genomes/mm10/chrom.sizes \
        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/001/624/675/GCA_001624675.1_NOD_ShiLtJ_v1/GCA_001624675.1_NOD_ShiLtJ_v1.11.ooc \
         GCA_001624675.1 mm10) > doLiftOverToMm10.log 2>&1

    # real    193m24.137s

    # see if the liftOver menus function in the browser from mm10
    #    to GCA_001624675.1

##############################################################################
# FANTOM5 refs #21605 (2023-06-09 Gerardo)
cd /hive/data/outside/
mkdir fantom5
cd fantom5
hubClone -download https://fantom.gsc.riken.jp/5/datahub/hub.txt
cd /gbdb/mm10
mkdir fantom5
cd fantom5
# Making symlinks for big files
for file in $(ls /hive/data/outside/fantom5/riken_f5/mm10/*.b*) ; do ln -s $file; done
cd /hive/data/outside/fantom5/riken_f5/mm10/
cp trackDb.txt fantom5.ra
vi fantom5.ra
# Indented subtracks
# Changing bigDataUrl
# Removing non-alpha characters
cd ~/kent/src/hg/makeDb/trackDb/mouse/mm10/
cp /hive/data/outside/fantom5/riken_f5/hg38/fantom5.ra .
vi trackDb.ra
#include fantom5.ra alpha
##############################################################################
# LASTZ Mouse Mm10 vs. chicken GCF_016699485.2
#    (DONE - 2023-05-29 - Gerardo)

    mkdir /hive/data/genomes/mm10/bed/lastzGCF_016699485.2.2023-05-29
    cd /hive/data/genomes/mm10/bed/lastzGCF_016699485.2.2023-05-29

    printf '# chicken GCF_016699485.2 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: chicken 2021-01-19 GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/016/699/485/GCF_016699485.2/GCF_016699485.2.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/016/699/485/GCF_016699485.2/GCF_016699485.2.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_016699485.2.2023-05-29
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	460m9.556s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_016699485.2Link.txt
    # 69911555 bases of 2818974548 (2.480%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_016699485.2Link.txt
    # 50043505 bases of 2818974548 (1.775%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCF/016/699/485/GCF_016699485.2/GCF_016699485.2.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/016/699/485/GCF_016699485.2/GCF_016699485.2.chrom.sizes.txt" \
        mm10 GCF_016699485.2) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	51m15.277s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_016699485.2.txt
    # 53269354 bases of 2818974548 (1.890%) in intersection

    ### and for the swap

    cd /hive/data/genomes/asmHubs/allBuild/GCF/016/699/485/GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b/trackData/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
   -qAsmId GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b /hive/data/genomes/mm10/bed/lastzGCF_016699485.2.2023-05-29/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	3m59.186s

    sed -e 's/^/    # /;' fb.GCF_016699485.2.chainMm10Link.txt
    # 56470179 bases of 1053332251 (5.361%) in intersection
    sed -e 's/^/    # /;' fb.GCF_016699485.2.chainSynMm10Link.txt
    # 48075570 bases of 1053332251 (4.564%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
    \
   -target2bit="/hive/data/genomes/asmHubs/GCF/016/699/485/GCF_016699485.2/GCF_016699485.2.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/016/699/485/GCF_016699485.2/GCF_016699485.2.chrom.sizes.txt" \
   GCF_016699485.2 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	34m17.704s

    sed -e 's/^/    # /;' fb.GCF_016699485.2.chainRBest.Mm10.txt
    # 53161662 bases of 1053332251 (5.047%) in intersection

real	549m48.888s
user	0m1.670s
sys	0m2.038s
##############################################################################
# LASTZ Mouse Mm10 vs. domestic ferret GCF_011764305.1
#    (DONE - 2023-05-29 - Gerardo)

    mkdir /hive/data/genomes/mm10/bed/lastzGCF_011764305.1.2023-05-29
    cd /hive/data/genomes/mm10/bed/lastzGCF_011764305.1.2023-05-29

    printf '# domestic ferret GCF_011764305.1 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: domestic ferret 2020-03-27 GCF_011764305.1_ASM1176430v1.1
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/011/764/305/GCF_011764305.1/GCF_011764305.1.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/011/764/305/GCF_011764305.1/GCF_011764305.1.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_011764305.1.2023-05-29
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCF_011764305.1_ASM1176430v1.1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	887m53.003s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_011764305.1Link.txt
    # 786269484 bases of 2818974548 (27.892%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_011764305.1Link.txt
    # 736073378 bases of 2818974548 (26.111%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCF/011/764/305/GCF_011764305.1/GCF_011764305.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/011/764/305/GCF_011764305.1/GCF_011764305.1.chrom.sizes.txt" \
        mm10 GCF_011764305.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	136m18.830s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_011764305.1.txt
    # 725535934 bases of 2818974548 (25.738%) in intersection

    ### and for the swap

    cd /hive/data/genomes/asmHubs/allBuild/GCF/011/764/305/GCF_011764305.1_ASM1176430v1.1/trackData/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
   -qAsmId GCF_011764305.1_ASM1176430v1.1 /hive/data/genomes/mm10/bed/lastzGCF_011764305.1.2023-05-29/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	45m42.440s

    sed -e 's/^/    # /;' fb.GCF_011764305.1.chainMm10Link.txt
    # 755800573 bases of 2577107489 (29.327%) in intersection
    sed -e 's/^/    # /;' fb.GCF_011764305.1.chainSynMm10Link.txt
    # 716112438 bases of 2577107489 (27.787%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
    \
   -target2bit="/hive/data/genomes/asmHubs/GCF/011/764/305/GCF_011764305.1/GCF_011764305.1.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/011/764/305/GCF_011764305.1/GCF_011764305.1.chrom.sizes.txt" \
   GCF_011764305.1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	115m58.410s

    sed -e 's/^/    # /;' fb.GCF_011764305.1.chainRBest.Mm10.txt
    # 724974151 bases of 2577107489 (28.131%) in intersection

real	1185m57.217s
user	0m2.091s
sys	0m2.151s
##############################################################################
# LASTZ Mouse Mm10 vs. Sumatran orangutan GCF_028885655.1
#    (DONE - 2023-05-29 - Gerardo)

    mkdir /hive/data/genomes/mm10/bed/lastzGCF_028885655.1.2023-05-29
    cd /hive/data/genomes/mm10/bed/lastzGCF_028885655.1.2023-05-29

    printf '# Sumatran orangutan GCF_028885655.1 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Sumatran orangutan 2023-02-28 GCF_028885655.1_NHGRI_mPonAbe1-v1.1-hic.freeze_pri
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/028/885/655/GCF_028885655.1/GCF_028885655.1.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/028/885/655/GCF_028885655.1/GCF_028885655.1.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_028885655.1.2023-05-29
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCF_028885655.1_NHGRI_mPonAbe1-v1.1-hic.freeze_pri -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	935m21.904s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_028885655.1Link.txt
    # 968671735 bases of 2818974548 (34.363%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_028885655.1Link.txt
    # 917832766 bases of 2818974548 (32.559%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCF/028/885/655/GCF_028885655.1/GCF_028885655.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/028/885/655/GCF_028885655.1/GCF_028885655.1.chrom.sizes.txt" \
        mm10 GCF_028885655.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	249m25.816s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_028885655.1.txt
    # 895237861 bases of 2818974548 (31.758%) in intersection

    ### and for the swap

    cd /hive/data/genomes/asmHubs/allBuild/GCF/028/885/655/GCF_028885655.1_NHGRI_mPonAbe1-v1.1-hic.freeze_pri/trackData/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
   -qAsmId GCF_028885655.1_NHGRI_mPonAbe1-v1.1-hic.freeze_pri /hive/data/genomes/mm10/bed/lastzGCF_028885655.1.2023-05-29/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	53m48.020s

    sed -e 's/^/    # /;' fb.GCF_028885655.1.chainMm10Link.txt
    # 969760022 bases of 3365490689 (28.815%) in intersection
    sed -e 's/^/    # /;' fb.GCF_028885655.1.chainSynMm10Link.txt
    # 905523135 bases of 3365490689 (26.906%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
    \
   -target2bit="/hive/data/genomes/asmHubs/GCF/028/885/655/GCF_028885655.1/GCF_028885655.1.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/028/885/655/GCF_028885655.1/GCF_028885655.1.chrom.sizes.txt" \
   GCF_028885655.1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	232m8.721s

    sed -e 's/^/    # /;' fb.GCF_028885655.1.chainRBest.Mm10.txt
    # 893784537 bases of 3365490689 (26.557%) in intersection

real	1470m50.153s
user	0m2.905s
sys	0m2.377s
##############################################################################
# LASTZ chimpanzee GCF_028858775.1 vs. Mouse Mm10
#    (DONE - 2023-05-29 - Gerardo)

    mkdir /hive/data/genomes/asmHubs/allBuild/GCF/028/858/775/GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri/trackData/lastzMm10.2023-05-29
    cd /hive/data/genomes/asmHubs/allBuild/GCF/028/858/775/GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri/trackData/lastzMm10.2023-05-29

    printf '# Mouse Mm10 vs. chimpanzee GCF_028858775.1
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: chimpanzee 2023-02-27 GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri
SEQ1_DIR=/hive/data/genomes/asmHubs/GCF/028/858/775/GCF_028858775.1/GCF_028858775.1.2bit
SEQ1_LEN=/hive/data/genomes/asmHubs/GCF/028/858/775/GCF_028858775.1/GCF_028858775.1.chrom.sizes.txt
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Mouse  mm10
SEQ2_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ2_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/asmHubs/allBuild/GCF/028/858/775/GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri/trackData/lastzMm10.2023-05-29
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
      -tAsmId GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri  -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	962m5.922s

    sed -e 's/^/    # /;' fb.GCF_028858775.1.chainMm10Link.txt
    # 1052550747 bases of 3225356997 (32.634%) in intersection
    sed -e 's/^/    # /;' fb.GCF_028858775.1.chainSynMm10Link.txt
    # 903800311 bases of 3225356997 (28.022%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
      -target2Bit="/hive/data/genomes/asmHubs/GCF/028/858/775/GCF_028858775.1/GCF_028858775.1.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/028/858/775/GCF_028858775.1/GCF_028858775.1.chrom.sizes.txt" \
       \
        GCF_028858775.1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	253m21.822s

    sed -e 's/^/    # /;' fb.GCF_028858775.1.chainRBest.Mm10.txt
    # 893733597 bases of 3225356997 (27.710%) in intersection

    ### and for the swap

    cd /hive/data/genomes/mm10/bed/blastz.GCF_028858775.1.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
  -tAsmId GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri  /hive/data/genomes/asmHubs/allBuild/GCF/028/858/775/GCF_028858775.1_NHGRI_mPanTro3-v1.1-hic.freeze_pri/trackData/lastzMm10.2023-05-29/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	53m57.464s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_028858775.1Link.txt
    # 968374021 bases of 2818974548 (34.352%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_028858775.1Link.txt
    # 917752702 bases of 2818974548 (32.556%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
   -query2bit="/hive/data/genomes/asmHubs/GCF/028/858/775/GCF_028858775.1/GCF_028858775.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/028/858/775/GCF_028858775.1/GCF_028858775.1.chrom.sizes.txt" \
    \
   mm10 GCF_028858775.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	260m16.557s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_028858775.1.txt
    # 895305709 bases of 2818974548 (31.760%) in intersection

real	1529m47.858s
user	0m3.258s
sys	0m2.169s
##############################################################################
# LASTZ western lowland gorilla GCF_029281585.1 vs. Mouse Mm10
#    (DONE - 2023-05-29 - Gerardo)

    mkdir /hive/data/genomes/asmHubs/allBuild/GCF/029/281/585/GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri/trackData/lastzMm10.2023-05-29
    cd /hive/data/genomes/asmHubs/allBuild/GCF/029/281/585/GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri/trackData/lastzMm10.2023-05-29

    printf '# Mouse Mm10 vs. western lowland gorilla GCF_029281585.1
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: western lowland gorilla 2023-03-20 GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri
SEQ1_DIR=/hive/data/genomes/asmHubs/GCF/029/281/585/GCF_029281585.1/GCF_029281585.1.2bit
SEQ1_LEN=/hive/data/genomes/asmHubs/GCF/029/281/585/GCF_029281585.1/GCF_029281585.1.chrom.sizes.txt
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Mouse  mm10
SEQ2_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ2_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/asmHubs/allBuild/GCF/029/281/585/GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri/trackData/lastzMm10.2023-05-29
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
      -tAsmId GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri  -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	969m31.867s

    sed -e 's/^/    # /;' fb.GCF_029281585.1.chainMm10Link.txt
    # 1163587557 bases of 3600562452 (32.317%) in intersection
    sed -e 's/^/    # /;' fb.GCF_029281585.1.chainSynMm10Link.txt
    # 901996582 bases of 3600562452 (25.052%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
      -target2Bit="/hive/data/genomes/asmHubs/GCF/029/281/585/GCF_029281585.1/GCF_029281585.1.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/029/281/585/GCF_029281585.1/GCF_029281585.1.chrom.sizes.txt" \
       \
        GCF_029281585.1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	267m18.829s

    sed -e 's/^/    # /;' fb.GCF_029281585.1.chainRBest.Mm10.txt
    # 893796215 bases of 3600562452 (24.824%) in intersection

    ### and for the swap

    cd /hive/data/genomes/mm10/bed/blastz.GCF_029281585.1.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
  -tAsmId GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri  /hive/data/genomes/asmHubs/allBuild/GCF/029/281/585/GCF_029281585.1_NHGRI_mGorGor1-v1.1-0.2.freeze_pri/trackData/lastzMm10.2023-05-29/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	54m56.098s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_029281585.1Link.txt
    # 969007315 bases of 2818974548 (34.374%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_029281585.1Link.txt
    # 917900547 bases of 2818974548 (32.562%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
   -query2bit="/hive/data/genomes/asmHubs/GCF/029/281/585/GCF_029281585.1/GCF_029281585.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/029/281/585/GCF_029281585.1/GCF_029281585.1.chrom.sizes.txt" \
    \
   mm10 GCF_029281585.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	270m58.663s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_029281585.1.txt
    # 895198565 bases of 2818974548 (31.756%) in intersection

real	1562m52.789s
user	0m4.517s
sys	0m2.098s
##############################################################################
## allGaps track per user request redmine 32104 - DONE - Hiram - 2023-09-11

    mkdir /hive/data/genomes/mm10/bed/allGaps
    cd /hive/data/genomes/mm10/bed/allGaps
    twoBitInfo -nBed ../../mm10.p6.2bit stdout \
       | sort -k1,1 -k2,2n > mm10.p6.allGaps.bed
    bedToBigBed -type=bed3 mm10.p6.allGaps.bed \
        ../../chrom.sizes.p6 mm10.p6.allGaps.bb

    ln -s `pwd`/mm10.p6.allGaps.bed /gbdb/mm10/bbi/allGaps.bb

    #  all these gaps:
    featureBits -countGaps mm10 mm10.p6.allGaps.bed > fb.allGaps.txt 2>&1
    #	79435853 bases of 2818974548 (2.818%) in intersection

    # standard AGP defined gap track:
    featureBits -countGaps mm10 mm10.gap.bed > fb.gap.txt 2>&1
    #	79370942 bases of 2818974548 (2.816%) in intersection

    # with both, should be identical to the standard gap track:
    featureBits -countGaps mm10 mm10.gap.bed mm10.p6.allGaps.bed
    #	79370942 bases of 2818974548 (2.816%) in intersection

##############################################################################
# VISTA Enhancers track refs #16044  (2023-09-20 Gerardo)
cd /hive/data/outside/
mkdir vistaEnhancers; cd vistaEnhancers
mkdir mm10; cd mm10
liftOver -bedPlus=9 /hive/data/outside/vistaEnhancers/mm9/sortedVistaEnhancers.bed /hive/data/gbdb/mm9/liftOver/mm9ToMm10.over.chain.gz vistaEnhancers.bed unMapped
bedToBigBed -tab -type=bed9+ -as=/hive/data/outside/vistaEnhancers/mm9/vistaEnhancers.as vistaEnhancers.bed https://hgdownload.soe.ucsc.edu/goldenPath/mm10/bigZips/mm10.chrom.sizes vistaEnhancers.bb
cd /gbdb/mm10
mkdir vistaEnhancers; cd vistaEnhancers
# Making symlink for big files
ln -s /hive/data/outside/vistaEnhancers/mm10/vistaEnhancers.bb
cd ~/kent/src/hg/makeDb/trackDb/mouse/mm10
vi trackDb.ra
#############################################################################

#############################################################################
# JASPAR 2024 bigBed update 11/13/24

cd /hive/data/genomes/mm10/bed/jaspar
wget https://frigg.uio.no/JASPAR/JASPAR_genome_browser_tracks/current/mm10/JASPAR2024_mm10.bb
mv JASPAR2024_mm10.bb JASPAR2024.bb
ln -s JASPAR2024.bb /gbdb/mm10/jaspar/JASPAR2024.bb

##############################################################################
# LASTZ Mouse Mm10 vs. water buffalo GCF_019923935.1
#    (DONE - 2024-01-03 - mspeir)

    mkdir /hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03
    cd /hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03

    printf '# water buffalo GCF_019923935.1 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: water buffalo 2021-09-10 GCF_019923935.1_NDDB_SH_1
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCF_019923935.1_NDDB_SH_1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	198m37.349s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_019923935.1Link.txt
    # 726061208 bases of 2818974548 (25.756%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_019923935.1Link.txt
    # 676398917 bases of 2818974548 (23.995%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt" \
        mm10 GCF_019923935.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	221m47.012s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_019923935.1.txt
    # 667467634 bases of 2818974548 (23.678%) in intersection

    ### and for the swap
# swap into: /hive/data/genomes/asmHubs/allBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1/trackData/blastz.mm10.swap
# running /hive/data/genomes/asmHubs/allBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1/trackData/blastz.mm10.swap/runSwap.sh
+ cd /hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
+ export defaultName=GCF_019923935.1_NDDB_SH_1
+ defaultName=GCF_019923935.1_NDDB_SH_1
+ export asmId=GCF_019923935.1_NDDB_SH_1
+ asmId=GCF_019923935.1_NDDB_SH_1
+ export buildDir=/hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
+ buildDir=/hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
+ rm -f GCF_019923935.1_NDDB_SH_1.chromAlias.txt
+ ln -s trackData/chromAlias/GCF_019923935.1_NDDB_SH_1.chromAlias.txt .
+ '[' -s trackData/chromAlias/GCF_019923935.1_NDDB_SH_1.chromAlias.bb ']'
+ rm -f GCF_019923935.1_NDDB_SH_1.chromAlias.bb
+ ln -s trackData/chromAlias/GCF_019923935.1_NDDB_SH_1.chromAlias.bb .
+ /cluster/home/mspeir/kent/src/hg/utils/automation/asmHubTrackDb.sh GCF_019923935.1_NDDB_SH_1 GCF_019923935.1_NDDB_SH_1 /hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
# no ensGene found
composite chainNet
constructing synNet.bb links GCF_019923935.1_NDDB_SH_1 hg38
constructing rbestNet.bb links GCF_019923935.1_NDDB_SH_1 hg38
constructing synNet.bb links GCF_019923935.1_NDDB_SH_1 mm10
constructing rbestNet.bb links GCF_019923935.1_NDDB_SH_1 mm10
##############################################################################
# LASTZ Mouse Mm10 vs. water buffalo GCF_019923935.1
#    (DONE - 2024-01-03 - mspeir)

    mkdir /hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03
    cd /hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03

    printf '# water buffalo GCF_019923935.1 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: water buffalo 2021-09-10 GCF_019923935.1_NDDB_SH_1
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCF_019923935.1_NDDB_SH_1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	198m37.349s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_019923935.1Link.txt
    # 726061208 bases of 2818974548 (25.756%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_019923935.1Link.txt
    # 676398917 bases of 2818974548 (23.995%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt" \
        mm10 GCF_019923935.1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	221m47.012s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_019923935.1.txt
    # 667467634 bases of 2818974548 (23.678%) in intersection

    ### and for the swap

    cd /hive/data/genomes/asmHubs/allBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1/trackData/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
   -qAsmId GCF_019923935.1_NDDB_SH_1 /hive/data/genomes/mm10/bed/lastzGCF_019923935.1.2024-01-03/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	109m24.788s

    sed -e 's/^/    # /;' fb.GCF_019923935.1.chainMm10Link.txt
    # 692375708 bases of 2622460639 (26.402%) in intersection
    sed -e 's/^/    # /;' fb.GCF_019923935.1.chainSynMm10Link.txt
    # 657128856 bases of 2622460639 (25.058%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
    \
   -target2bit="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt" \
   GCF_019923935.1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	195m35.449s

    sed -e 's/^/    # /;' fb.GCF_019923935.1.chainRBest.Mm10.txt
    # 666765450 bases of 2622460639 (25.425%) in intersection

real	725m31.677s
user	0m3.111s
sys	0m2.760s

##############################################################################
# LASTZ Mouse Mm10 vs. sheep GCF_016772045.2
#    (DONE - 2024-03-15 - Gerardo)

    mkdir /hive/data/genomes/mm10/bed/lastzGCF_016772045.2.2024-03-15
    cd /hive/data/genomes/mm10/bed/lastzGCF_016772045.2.2024-03-15

    printf '# sheep GCF_016772045.2 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: sheep 2023-07-20 GCF_016772045.2_ARS-UI_Ramb_v3.0
SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/016/772/045/GCF_016772045.2/GCF_016772045.2.2bit
SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/016/772/045/GCF_016772045.2/GCF_016772045.2.chrom.sizes.txt
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzGCF_016772045.2.2024-03-15
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
       -qAsmId GCF_016772045.2_ARS-UI_Ramb_v3.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	228m40.184s

    sed -e 's/^/    # /;' fb.mm10.chainGCF_016772045.2Link.txt
    # 715231844 bases of 2818974548 (25.372%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynGCF_016772045.2Link.txt
    # 667233860 bases of 2818974548 (23.669%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
       \
      -query2Bit="/hive/data/genomes/asmHubs/GCF/016/772/045/GCF_016772045.2/GCF_016772045.2.2bit" \
-querySizes="/hive/data/genomes/asmHubs/GCF/016/772/045/GCF_016772045.2/GCF_016772045.2.chrom.sizes.txt" \
        mm10 GCF_016772045.2) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	205m28.879s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.GCF_016772045.2.txt
    # 658850497 bases of 2818974548 (23.372%) in intersection

    ### and for the swap

    cd /hive/data/genomes/asmHubs/allBuild/GCF/016/772/045/GCF_016772045.2_ARS-UI_Ramb_v3.0/trackData/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
   -qAsmId GCF_016772045.2_ARS-UI_Ramb_v3.0 /hive/data/genomes/mm10/bed/lastzGCF_016772045.2.2024-03-15/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	39m9.020s

    sed -e 's/^/    # /;' fb.GCF_016772045.2.chainMm10Link.txt
    # 691608459 bases of 2654063983 (26.058%) in intersection
    sed -e 's/^/    # /;' fb.GCF_016772045.2.chainSynMm10Link.txt
    # 647504274 bases of 2654063983 (24.397%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
    \
   -target2bit="/hive/data/genomes/asmHubs/GCF/016/772/045/GCF_016772045.2/GCF_016772045.2.2bit" \
-targetSizes="/hive/data/genomes/asmHubs/GCF/016/772/045/GCF_016772045.2/GCF_016772045.2.chrom.sizes.txt" \
   GCF_016772045.2 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	190m23.152s

    sed -e 's/^/    # /;' fb.GCF_016772045.2.chainRBest.Mm10.txt
    # 658848325 bases of 2654063983 (24.824%) in intersection

real	663m45.533s
user	0m3.053s
sys	0m2.728s
##############################################################################
##############################################################################
# ENCODE4 triplets - Jeltje April 2025

mkdir -p april2025
#wget -O april2025/mouse_ucsc_transcripts.gtf "https://zenodo.org/records/15116042/files/mouse_ucsc_transcripts.gtf?download=1"
#wget -O  april2025/filt_ab_tpm_mouse.tsv "https://zenodo.org/records/15116042/files/filt_ab_tpm_mouse.tsv?download=1"
#wget -O  april2025/mouse_protein_summary.tsv "https://zenodo.org/records/15116042/files/mouse_protein_summary.tsv?download=1"
#wget -O  april2025/mouse_sample_info.tsv https://zenodo.org/records/15116042/files/lr_mouse_library_data_summary.tsv?download=1

gtfFile='april2025/mouse_ucsc_transcripts.gtf'
quantFile='april2025/filt_ab_tpm_mouse.tsv'  # really counts per million since every read is full length
protFile='april2025/mouse_protein_summary.tsv'
sampleFile='april2025/mouse_sample_info.tsv'
# this outputs bed12 + extra ID fields, topval expressions for mouseover and an expression html table
./gtfToBed.py $gtfFile $quantFile $protFile $sampleFile transcripts.bed > missing.ids
bedSort transcripts.bed transcripts.bed 

cat << '_EOF_' > encode4.as
table encode4
"Bed 12+8 file with annotation source and values per sample in a html table."
    (
    string chrom;      "Chromosome (or contig, scaffold, etc.)"
    uint   chromStart; "Start position in chromosome"
    uint   chromEnd;   "End position in chromosome"
    string name;       "Name of item"
    uint   score;      "Score from 0-1000"
    char[1] strand;    "+ or -"
    uint thickStart;   "Start of where display should be thick (start codon)"
    uint thickEnd;     "End of where display should be thick (stop codon)"
    uint reserved;     "Used as itemRgb as of 2004-11-22"
    int blockCount;    "Number of blocks"
    int[blockCount] blockSizes; "Comma separated list of block sizes"
    int[blockCount] chromStarts; "Start positions relative to chromStart"
    string source;      "Annotation source"
    string gene_id;     "gene ID"	
    string gene_name;   "gene name"	
    string transcript_id;   "transcript ID"	
    string transcript_name;   "transcript name"	
    float maxScore;   "Highest expression score (counts per million)"
    lstring maxScoreHtml;   "Highest expression score and sample(s)"
    lstring expr_table; "Expression values per sample in TPM"
    )
_EOF_

bedToBigBed -type=bed12+8 -as=encode4.as -tab transcripts.bed /hive/data/genomes/mm10/chrom.sizes encode4.bb


##############################################################################
# Recount3 - Jeltje April 2025

####################################
# recount3 intron tracks ticket 34886
# Jeltje January 2025
# NOTE: The sra files are so large that the trackDb.ra file needs a maxWindowToDraw limit
# or else the browser window won't load within the set time

see kent/src/hg/lib/recount3.as

process_dataset() {
	local dset=$1
	if [ ! -f "$dset.tsv.bgz" ]; then
	    wget -nv -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz
	fi
	~/kent/src/hg/makeDb/outside/recount3/junctionsToBed.py --junctions $dset.tsv.bgz --bed $dset.bed --decorator dec$dset.bed --compilation $dset
#	bedSort $dset.bed $dset.bed
	bedSort dec$dset.bed dec$dset.bed
}

dset=srav1m
process_dataset $dset

bedToBigBed -type=bed9+6 -tab -as=${HOME}/kent/src/hg/lib/recount3.as $dset.bed /hive/data/genomes/mm10/chrom.sizes $dset.bb &
bedToBigBed -type=bed12+ -as=${HOME}/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/mm10/chrom.sizes dec$dset.bb &
wait

#rm srav1m.bed srav1m.tsv junctions.bgz
2025-08-04 markd: update to add size column for filter

##############################################################################
# ENCODE4 triplets - Jeltje April 2025

mkdir -p april2025
#wget -O april2025/mouse_ucsc_transcripts.gtf "https://zenodo.org/records/15116042/files/mouse_ucsc_transcripts.gtf?download=1"
#wget -O  april2025/filt_ab_tpm_mouse.tsv "https://zenodo.org/records/15116042/files/filt_ab_tpm_mouse.tsv?download=1"
#wget -O  april2025/mouse_protein_summary.tsv "https://zenodo.org/records/15116042/files/mouse_protein_summary.tsv?download=1"
#wget -O  april2025/mouse_sample_info.tsv https://zenodo.org/records/15116042/files/lr_mouse_library_data_summary.tsv?download=1

gtfFile='april2025/mouse_ucsc_transcripts.gtf'
quantFile='april2025/filt_ab_tpm_mouse.tsv'  # really counts per million since every read is full length
protFile='april2025/mouse_protein_summary.tsv'
sampleFile='april2025/mouse_sample_info.tsv'
# this outputs bed12 + extra ID fields, topval expressions for mouseover and an expression html table
./gtfToBed.py $gtfFile $quantFile $protFile $sampleFile transcripts.bed > missing.ids
bedSort transcripts.bed transcripts.bed 

cat << '_EOF_' > encode4.as
table encode4
"Bed 12+8 file with annotation source and values per sample in a html table."
    (
    string chrom;      "Chromosome (or contig, scaffold, etc.)"
    uint   chromStart; "Start position in chromosome"
    uint   chromEnd;   "End position in chromosome"
    string name;       "Name of item"
    uint   score;      "Score from 0-1000"
    char[1] strand;    "+ or -"
    uint thickStart;   "Start of where display should be thick (start codon)"
    uint thickEnd;     "End of where display should be thick (stop codon)"
    uint reserved;     "Used as itemRgb as of 2004-11-22"
    int blockCount;    "Number of blocks"
    int[blockCount] blockSizes; "Comma separated list of block sizes"
    int[blockCount] chromStarts; "Start positions relative to chromStart"
    string source;      "Annotation source"
    string gene_id;     "gene ID"	
    string gene_name;   "gene name"	
    string transcript_id;   "transcript ID"	
    string transcript_name;   "transcript name"	
    float maxScore;   "Highest expression score (counts per million)"
    lstring maxScoreHtml;   "Highest expression score and sample(s)"
    lstring expr_table; "Expression values per sample in TPM"
    )
_EOF_

bedToBigBed -type=bed12+8 -as=encode4.as -tab transcripts.bed /hive/data/genomes/mm10/chrom.sizes encode4.bb
#############################################################################
# VISTA Enhancers update 2025 #35531 (2025-04-30 Gerardo)

cd /hive/data/outside/otto/vista
wget -q  https://gitlab.com/egsb-mfgl/vista-data/-/raw/main/locus_ucsc_mm10.bed  -O vista.mm10.latest.bed

bedToBigBed -tab -sort -type=bed9+1 -as=vista.as vista.mm10.latest.bed \
  https://hgdownload.soe.ucsc.edu/goldenPath/mm10/bigZips/mm10.chrom.sizes vista.mm10.latest.bb \
  > /dev/null 2>&1

mv vista.mm10.latest.bb vista.mm10.bb
cd /gbdb/mm10/vistaEnhancers/
# Making symlink for bigBed files
ln -s /hive/data/outside/otto/vista/vista.mm10.bb vistaEnhancers.bb
cd ~/kent/src/hg/makeDb/trackDb/human/mm10
vi trackDb.ra
##############################################################################
# LASTZ Mouse Mm10 vs. Ferret MusFur1
#    (DONE - 2025-06-16 - Gerardo)

    mkdir /hive/data/genomes/mm10/bed/lastzMusFur1.2025-06-16
    cd /hive/data/genomes/mm10/bed/lastzMusFur1.2025-06-16

    printf '# Ferret MusFur1 vs. Mouse Mm10
BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz

# TARGET: Mouse  mm10
SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=40

# QUERY: Ferret  musFur1
SEQ2_DIR=/hive/data/genomes/musFur1/musFur1.2bit
SEQ2_LEN=/hive/data/genomes/musFur1/chrom.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/hive/data/genomes/mm10/bed/lastzMusFur1.2025-06-16
TMPDIR=/dev/shm

' > DEF

    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl  -verbose=2 `pwd`/DEF -syntenicNet \
        -workhorse=hgwdev -smallClusterHub=hgwdev -fileServer=hgwdev -bigClusterHub=hgwdev \
        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
    grep -w real do.log | sed -e 's/^/    # /;'
    # real	254m4.751s

    sed -e 's/^/    # /;' fb.mm10.chainMusFur1Link.txt
    # 777998409 bases of 2739603606 (28.398%) in intersection
    sed -e 's/^/    # /;' fb.mm10.chainSynMusFur1Link.txt
    # 724245099 bases of 2739603606 (26.436%) in intersection

    time (~/kent/src/hg/utils/automation/doRecipBest.pl  -load -workhorse=hgwdev -buildDir=`pwd` \
       \
       \
        mm10 musFur1) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	104m2.352s

    sed -e 's/^/    # /;' fb.mm10.chainRBest.MusFur1.txt
    # 718345345 bases of 2739603606 (26.221%) in intersection

    ### and for the swap

    cd /hive/data/genomes/musFur1/bed/blastz.mm10.swap

   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl  -swap -verbose=2 \
    /hive/data/genomes/mm10/bed/lastzMusFur1.2025-06-16/DEF -swapDir=`pwd` \
  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -fileServer=hgwdev -bigClusterHub=hgwdev \
    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1

    grep -w real swap.log | sed -e 's/^/    # /;'
    # real	46m56.431s

    sed -e 's/^/    # /;' fb.musFur1.chainMm10Link.txt
    # 733367553 bases of 2277906570 (32.195%) in intersection
    sed -e 's/^/    # /;' fb.musFur1.chainSynMm10Link.txt
    # 700100333 bases of 2277906570 (30.734%) in intersection
\    time (~/kent/src/hg/utils/automation/doRecipBest.pl  -load -workhorse=hgwdev -buildDir=`pwd` \
    \
    \
   musFur1 mm10) > rbest.log 2>&1

    grep -w real rbest.log | sed -e 's/^/    # /;'
    # real	73m47.683s

    sed -e 's/^/    # /;' fb.musFur1.chainRBest.Mm10.txt
    # 717968097 bases of 2277906570 (31.519%) in intersection

real	478m51.661s
user	0m1.056s
sys	0m0.706s
##############################################################################
