# for emacs: -*- mode: sh; -*-
user    0m0.063s
sys     0m0.066s


#       DATE:   03-Oct-2011
#       ORGANISM:       Tursiops truncatus
#       TAXID:  9739
#       ASSEMBLY LONG NAME:     Ttru_1.4
#       ASSEMBLY SHORT NAME:    Ttru_1.4
#       ASSEMBLY SUBMITTER:     Baylor College of Medicine
#       ASSEMBLY TYPE:  Haploid
#       NUMBER OF ASSEMBLY-UNITS:       1
#       ASSEMBLY ACCESSION:     GCA_000151865.2

#       FTP-RELEASE DATE: 03-Jan-2012

#       http://www.ncbi.nlm.nih.gov/genome/769
#       http://www.ncbi.nlm.nih.gov/genome/assembly/325108/
#       http://www.ncbi.nlm.nih.gov/bioproject/20365
#       http://www.ncbi.nlm.nih.gov/bioproject/34125 - chrMt NC_012059.1

#       http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=ABRN02
#       Genome Coverage : 2.5x Sanger; 3.5x 454; 30x Illumina

#       http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9739

# rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Tursiops_truncatus/Ttru_1.4/

##########################################################################
# Download sequence (DONE - 2012-04-11 - Hiram)
    mkdir /hive/data/genomes/turTru2
    cd /hive/data/genomes/turTru2
    mkdir genbank
    cd genbank
    time rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Tursiops_truncatus/Ttru_1.4/ ./
    #   real    14m52.432s

    # verify the size of the sequence here:
    faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz
    #   2551980185 bases (219594130 N's 2332386055 real 2332386055 upper
    #   0 lower) in 240900 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7
    #   min 200 (gi|366711530|gb|ABRN02115282.1|)
    #   max 1003560 (gi|366982705|gb|JH472452.1|) median 706

##########################################################################
# fixup names for UCSC standards (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/turTru2/ucsc
    cd /hive/data/genomes/turTru2/ucsc

    ########################  Unplaced scaffolds
    # verify we don't have any .acc numbers different from .1
    zcat \
    ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
	| cut -f1 | egrep "^JH|^ABRN" \
	| sed -e 's/^JH[0-9][0-9]*//; s/^ABRN[0-9][0-9]*//' | sort | uniq -c
    #   868326 .1

    # this is like the unplaced.pl script in other assemblies except it
    #	does not add chrUn_ to the names since they are all just scaffolds

    cat << '_EOF_' > unplaced.pl
#!/bin/env perl

use strict;
use warnings;

my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
open (FH, "zcat $agpFile|") or die "can not read $agpFile";
open (UC, "|gzip -c > unplaced.agp.gz") or die "can not write to unplaced.agp.gz";
while (my $line = <FH>) {
    if ($line =~ m/^#/) {
        print UC $line;
    } else {
        $line =~ s/\.1//;    
        printf UC "%s", $line;
    }
}
close (FH);
close (UC);

open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
open (UC, "|gzip -c > unplaced.fa.gz") or die "can not write to unplaced.fa.gz";
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        chomp $line;
        $line =~ s/.*gb\|//;
        $line =~ s/\|.*//;
        $line =~ s/\.1//;    
        printf UC ">$line\n";
    } else {
        print UC $line;
    }
}
close (FH);
close (UC);
'_EOF_'
    # << happy emacs
    chmod +x unplaced.pl
    time ./unplaced.pl
    #   real    11m26.151s

    # verify nothing lost from original:
    time faSize *.fa.gz
    #   2551980185 bases (219594130 N's 2332386055 real 2332386055 upper
    #   0 lower) in 240900 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7 min 200 (ABRN02115282)
    #   max 1003560 (JH472452) median 706

    # make sure none of the names got to be over 31 characers long:
    zcat *.agp.gz | grep -v "^#" | cut -f1 | awk '{print length($0)}' \
        | sort -rn | uniq -c | head
    #   181924 12
    #   686402 8

##########################################################################
# Initial makeGenomeDb.pl (DONE - 2012-04-13 - Hiram)
    cd /hive/data/genomes/turTru2
    cat << '_EOF_' > turTru2.config.ra
# Config parameters for makeGenomeDb.pl:
db turTru2
clade mammal
genomeCladePriority 35
scientificName Tursiops truncatus
commonName Dolphin
assemblyDate Oct. 2011
assemblyLabel Baylor College of Medicine Ttru_1.4 (NCBI project 20365, GCA_000151865.2, WGS ABRN02)
assemblyShortLabel Baylor Ttru_1.4
ncbiAssemblyName Ttru_1.4
ncbiAssemblyId 325108
orderKey 2364
mitoAcc NC_012059
fastaFiles /hive/data/genomes/turTru2/ucsc/*.fa.gz
agpFiles /hive/data/genomes/turTru2/ucsc/*.agp.gz
# qualFiles none
dbDbSpeciesDir dolphin
taxId 9739
'_EOF_'
    # << happy emacs

    # verify sequence and agp are OK
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
        -stop=agp turTru2.config.ra > agp.log 2>&1
    #   real    1m56.055s
    # verify OK:
    tail -1 agp.log
    #   *** All done!  (through the 'agp' step)

    # finish it off
    time makeGenomeDb.pl -continue=db -workhorse=hgwdev -fileServer=hgwdev \
        -dbHost=hgwdev turTru2.config.ra > db.log 2>&1
    #   real    21m20.515s
    #	add the trackDb entries to the source tree, and the 2bit link:
    ln -s `pwd`/turTru2.unmasked.2bit /gbdb/turTru2/turTru2.2bit
    #	browser should function now

#########################################################################
# running repeat masker (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/repeatMasker
    cd /hive/data/genomes/turTru2/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek turTru2 > do.log 2>&1 &
    #   real    1304m42.773s

    cat faSize.rmsk.txt
    #   2551996573 bases (219594130 N's 2332402443 real 1317197601 upper
    #   1015204842 lower) in 240901 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7 min 200 (ABRN02115282)
    #   max 1003560 (JH472452) median 706
    #   %39.78 masked total, %43.53 masked real

    egrep -i "versi|relea" do.log
#    April 26 2011 (open-3-3-0) version of RepeatMasker
# CC   RELEASE 20110920; 
# RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $

    time featureBits -countGaps turTru2 rmsk
    #   1019407879 bases of 2551996573 (39.946%) in intersection
    #   real    2m37.291s

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/simpleRepeat
    cd /hive/data/genomes/turTru2/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	turTru2 > do.log 2>&1 &
    #   real    180m2.834s
    cat fb.simpleRepeat 
    #   35337050 bases of 2332423317 (1.515%) in intersection

    # add to rmsk after it is done:
    cd /hive/data/genomes/turTru2
    twoBitMask turTru2.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed turTru2.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa turTru2.2bit stdout | faSize stdin > faSize.turTru2.2bit.txt
    cat faSize.turTru2.2bit.txt
    #   2551996573 bases (219594130 N's 2332402443 real 1316500848 upper
    #   1015901595 lower) in 240901 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7 min 200 (ABRN02115282)
    #   max 1003560 (JH472452) median 706
    #   %39.81 masked total, %43.56 masked real

    rm /gbdb/turTru2/turTru2.2bit
    ln -s `pwd`/turTru2.2bit /gbdb/turTru2/turTru2.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/gap
    cd /hive/data/genomes/turTru2/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../turTru2.unmasked.2bit > findMotif.txt 2>&1
    #	real    0m26.240s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    featureBits turTru2 -not gap -bed=notGap.bed
    #   2332423317 bases of 2332423317 (100.000%) in intersection

    time featureBits turTru2 allGaps.bed notGap.bed -bed=new.gaps.bed
    #   20874 bases of 2332423317 (0.001%) in intersection
    #   real    496m11.739s
    wc -l new.gaps.bed
    #   20823 new.gaps.bed
    # the biggest one here is 4 bases:
    awk '{print $3-$2,$0}' new.gaps.bed | sort -rn | less

    #	what is the highest index in the existing gap table:
    hgsql -N -e "select ix from gap;" turTru2 | sort -n | tail -1
    #	216
    cat << '_EOF_' > mkGap.pl
#!/bin/env perl

use strict;
use warnings;

my $ix=`hgsql -N -e "select ix from gap;" turTru2 | sort -n | tail -1`;
chomp $ix;

open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
while (my $line = <FH>) {
    my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
    ++$ix;
    printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
        $chromEnd, $ix, $chromEnd-$chromStart;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./mkGap.pl
    ./mkGap.pl > other.bed
    featureBits -countGaps turTru2 other.bed
    #   20874 bases of 2551996573 (0.001%) in intersection
    #   real    11m57.952s
    wc -l other.bed
    #	20823
    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
	-noLoad turTru2 otherGap other.bed
    #   Read 20823 elements of size 8 from other.bed
    #	starting with this many
    hgsql -e "select count(*) from gap;" turTru2
    #	 313713
    hgsql turTru2 -e 'load data local infile "bed.tab" into table gap;'
    #	result count:
    hgsql -e "select count(*) from gap;" turTru2
    #	334536
    # == 20823 + 313713
    # verify we aren't adding gaps where gaps already exist
    # this would output errors if that were true:
    gapToLift -minGap=1 turTru2 nonBridged.lift -bedFile=nonBridged.bed
    #   one of these new ones is adjacent to an existing gap:

    # see example in danRer7.txt
    # are there non-bridged gaps here:
    hgsql -N -e "select bridge from gap;" turTru2 | sort | uniq -c
    #   334536 yes

##########################################################################
## WINDOWMASKER (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/windowMasker
    cd /hive/data/genomes/turTru2/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev turTru2 > do.log 2>&1 &
    #   real    181m2.383s

    # Masking statistics
    twoBitToFa turTru2.wmsk.2bit stdout | faSize stdin
    #   2551996573 bases (219594130 N's 2332402443 real 1557512287 upper
    #   774890156 lower) in 240901 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7 min 200 (ABRN02115282)
    #   max 1003560 (JH472452) median 706
    #   %30.36 masked total, %33.22 masked real

    twoBitToFa turTru2.wmsk.sdust.2bit stdout | faSize stdin
    #   2551996573 bases (219594130 N's 2332402443 real 1545284164 upper
    #   787118279 lower) in 240901 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7 min 200 (ABRN02115282)
    #   max 1003560 (JH472452) median 706
    #   %30.84 masked total, %33.75 masked real

    hgLoadBed turTru2 windowmaskerSdust windowmasker.sdust.bed.gz
    #   Read 13508939 elements of size 3 from windowmasker.sdust.bed.gz

    time featureBits -countGaps turTru2 windowmaskerSdust
    #   1006694538 bases of 2551996573 (39.447%) in intersection
    #   real    3m33.405s

    #	eliminate the gaps from the masking
    time featureBits turTru2 -not gap -bed=notGap.bed
    #   2332402443 bases of 2332402443 (100.000%) in intersection
    #   real    1m30.308s
    time nice -n +19 featureBits turTru2 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    #   787118279 bases of 2332402443 (33.747%) in intersection
    #   real    318m12.378s

    #	reload track to get it clean
    hgLoadBed turTru2 windowmaskerSdust cleanWMask.bed.gz
    #   Read 13532042 elements of size 4 from cleanWMask.bed.gz
    time featureBits -countGaps turTru2 windowmaskerSdust
    #   787118279 bases of 2551996573 (30.843%) in intersection
    #   real    3m20.475s

    zcat cleanWMask.bed.gz \
	| twoBitMask ../../turTru2.unmasked.2bit stdin \
	    -type=.bed turTru2.cleanWMSdust.2bit
    twoBitToFa turTru2.cleanWMSdust.2bit stdout | faSize stdin \
        > turTru2.cleanWMSdust.faSize.txt
    cat turTru2.cleanWMSdust.faSize.txt
    #   2551996573 bases (219594130 N's 2332402443 real 1545284164 upper
    #   787118279 lower) in 240901 sequences in 1 files
    #   Total size: mean 10593.5 sd 37887.7 min 200 (ABRN02115282)
    #   max 1003560 (JH472452) median 706
    #   %30.84 masked total, %33.75 masked real

    # how much does this window masker and repeat masker overlap:
    time featureBits -countGaps turTru2 rmsk windowmaskerSdust
    #   553188559 bases of 2551996573 (21.677%) in intersection
    #   real    5m33.965s

#########################################################################
# MASK SEQUENCE WITH WM+TRF (DONE - 2012-04-13 - Hiram)
    #	since rmsk masks so very little of the genome, use the clean WM mask
    #	on the genome sequence
#    cd /hive/data/genomes/turTru2
#    twoBitMask -add bed/windowMasker/turTru2.cleanWMSdust.2bit \
#	bed/simpleRepeat/trfMask.bed turTru2.2bit
    #	safe to ignore the warnings about BED file with >=13 fields
#    twoBitToFa turTru2.2bit stdout | faSize stdin > faSize.turTru2.txt
#    cat faSize.turTru2.txt
    #   927696114 bases (111611440 N's 816084674 real 607935500 upper
    #   208149174 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %22.44 masked total, %25.51 masked real

    #	create symlink to gbdb
#    rm /gbdb/turTru2/turTru2.2bit
#    ln -s `pwd`/turTru2.2bit /gbdb/turTru2/turTru2.2bit

########################################################################
# cpgIslands - (DONE - 2011-04-23 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/cpgIslands
    cd /hive/data/genomes/turTru2/bed/cpgIslands
    time doCpgIslands.pl turTru2 > do.log 2>&1
    #    fixing broken command in the 'head' calculation
    doCpgIslands.pl  -continue=cleanup turTru2
    #   Elapsed time: 8m21s

    cat fb.turTru2.cpgIslandExt.txt
    #   29423228 bases of 2332402443 (1.261%) in intersection

#########################################################################
# genscan - (DONE - 2011-04-26 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/genscan
    cd /hive/data/genomes/turTru2/bed/genscan
    time doGenscan.pl turTru2 > do.log 2>&1
    # recovering from power failure, kluster run had just finished
    # and it had just started on makeBed:
    time ./doMakeBed.csh
    #   real    298m41.971s
    time doGenscan.pl -continue=load turTru2 > load.log 2>&1
    #   real    17m53.512s
    # failed out of inodes on the hive FS, continuing:
    time doGenscan.pl -continue=cleanup turTru2 > cleanup.log 2>&1
    #   real    20m48.384s

    cat fb.turTru2.genscan.txt
    #   53945792 bases of 2332402443 (2.313%) in intersection
    cat fb.turTru2.genscanSubopt.txt
    #   59031653 bases of 2332402443 (2.531%) in intersection

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-07 - Hiram)
    # Use -repMatch=900, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 2332402443 / 2897316137 \) \* 1024
    #	( 2332402443 / 2897316137 ) * 1024 = 824.342249

    # round up to 850

    cd /hive/data/genomes/turTru2
    time blat turTru2.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/turTru2.11.ooc -repMatch=850
    #   Wrote 27158 overused 11-mers to jkStuff/turTru2.11.ooc
    #   real    1m20.824s

    # there are no non-bridged gaps, no lift file needed for genbank
    hgsql -N -e "select bridge from gap;" turTru2 | sort | uniq -c
    #   334536 yes
#    cd /hive/data/genomes/turTru2/jkStuff
#    gapToLift turTru2 turTru2.nonBridged.lift -bedFile=turTru2.nonBridged.bed
    # largest non-bridged contig:
#    awk '{print $3-$2,$0}' turTru2.nonBridged.bed | sort -nr | head
    #   123773608 chrX  95534   123869142       chrX.01

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-05-07 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Tursiops truncatus	102	8040	0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev  
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add turTru2 just after ce2
# turTru2 (Dolphin)
turTru2.serverGenome = /hive/data/genomes/turTru2/turTru2.2bit
turTru2.clusterGenome = /hive/data/genomes/turTru2/turTru2.2bit
turTru2.ooc = /hive/data/genomes/turTru2/jkStuff/turTru2.11.ooc
turTru2.lift = no
turTru2.refseq.mrna.native.pslCDnaFilter  = ${lowCover.refseq.mrna.native.pslCDnaFilter}
turTru2.refseq.mrna.xeno.pslCDnaFilter    = ${lowCover.refseq.mrna.xeno.pslCDnaFilter}
turTru2.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter}
turTru2.genbank.mrna.xeno.pslCDnaFilter   = ${lowCover.genbank.mrna.xeno.pslCDnaFilter}
turTru2.genbank.est.native.pslCDnaFilter  = ${lowCover.genbank.est.native.pslCDnaFilter}
turTru2.refseq.mrna.native.load = no
turTru2.refseq.mrna.xeno.load = yes
turTru2.genbank.mrna.native.load = no
turTru2.genbank.mrna.xeno.load = yes
turTru2.genbank.est.native.load = no
turTru2.downloadDir = turTru2
turTru2.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding turTru2 Dolphin" etc/genbank.conf
    git push
    make etc-update

    git pull
    # Edit src/lib/gbGenome.c to add new species.
    git commit -m "adding definition for turTruNames Tursiops truncatus" \
        src/lib/gbGenome.c
    git push
    make install-server

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S turTru2           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial turTru2 &
    #   var/build/logs/2012.06.08-10:05:05.turTru2.initalign.log
    #   real    1400m6.762s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad turTru2 &
    #	logFile: var/dbload/hgwdev/logs/2012.06.12-13:43:53.dbload.log
    #   real    4m45.316s

    # enable daily alignment and update of hgwdev (DONE - 2012-06-12 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add turTru2 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added turTru2." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

##############################################################################
# LASTZi cow bosTau7 (DONE 2012-06-19 - Chin)
    #   original alignment done at bosTau7.txt
    cd /hive/data/genomes/bosTau7/bed/lastzTurTru2.2012-06-19

    cat fb.bosTau7.chainTurTru2Link.txt
    #   1696154184 bases of 2804673174 (60.476%) in intersection    
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/bosTau7/bed
    ln -s lastzTurTru2.2012-06-19 lastz.turTru2


    # and the swap
    mkdir /hive/data/genomes/turTru2/bed/blastz.bosTau7.swap
    cd /hive/data/genomes/turTru2/bed/blastz.bosTau7.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
    /hive/data/genomes/bosTau7/bed/lastzTurTru2.2012-06-19/DEF \
    -swap -syntenicNet \
    -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
    -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    532m53.448s
    cat fb.turTru2.chainBosTau7Link.txt
    #   1625706583 bases of 2332402443 (69.701%) in intersection
    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/turTru2/bed
    ln -s blastz.bosTau7.swap lastz.bosTau7

#########################################################################
# set default position to RHO gene displays  (DONE - 2012-07-26 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="JH481929:22304-29703" where name="turTru2";' \
	hgcentraltest

############################################################################
# pushQ entry (DONE - 2012-07-26 - Hiram)
    mkdir /hive/data/genomes/turTru2/pushQ
    cd /hive/data/genomes/turTru2/pushQ
    # Mark says don't let the transMap track get there
    time makePushQSql.pl turTru2 2> stderr.txt | grep -v transMap > turTru2.sql
    #   real    3m41.168s
    # check the stderr.txt for bad stuff, these kinds of warnings are OK:
# WARNING: hgwdev does not have /gbdb/turTru2/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/turTru2/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/turTru2/bbi/quality.bw
# WARNING: turTru2 does not have seq
# WARNING: turTru2 does not have extFile
# WARNING: turTru2 does not have estOrientInfo

    scp -p turTru2.sql hgwbeta:/tmp
    ssh hgwbeta "hgsql qapushq < /tmp/turTru2.sql"

############################################################################
# create ucscToINSDC name mapping (DONE - 2013-08-23 - Hiram)
    mkdir /hive/data/genomes/turTru2/bed/ucscToINSDC
    cd /hive/data/genomes/turTru2/bed/ucscToINSDC

    # copying these scripts from the previous load and improving them
    # with each instance
    ./translateNames.sh NC_012059.1
    ./verifyAll.sh
    ./join.sh

    sed -e "s/21/12/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
      | hgLoadSqlTab turTru2 ucscToINSDC stdin ucscToINSDC.tab
    checkTableCoords turTru2 ucscToINSDC
    featureBits -countGaps turTru2 ucscToINSDC
    # 2551996573 bases of 2551996573 (100.000%) in intersection

    # verify the track link to INSDC functions

##############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
