# for emacs: -*- mode: sh; -*-

# This file describes browser build for the oreNil2
# Oreochromis niloticus - Nile Tilapia

# DATE:   31-Jan-2011
# ORGANISM:       Oreochromis niloticus
# TAXID:  8128
# ASSEMBLY LONG NAME:     Orenil1.1
# ASSEMBLY SHORT NAME:    Orenil1.1
# ASSEMBLY SUBMITTER:     Broad Institute
# ASSEMBLY TYPE:  Haploid
# NUMBER OF ASSEMBLY-UNITS:       1
# ASSEMBLY ACCESSION:     GCA_000188235.2

# FTP-RELEASE DATE: 09-Feb-2012

#	rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_other/Oreochromis_niloticus/Orenil1.1/

#	Genome ID:
#	http://www.ncbi.nlm.nih.gov/genome/197
#       http://www.ncbi.nlm.nih.gov/bioproject/59571
#       http://www.ncbi.nlm.nih.gov/nuccore/320434504
#       http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AERX01
#       269x

#	Taxonomy:
#       http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=8128

#	Mitochondrial sequence:
#       http://www.ncbi.nlm.nih.gov/bioproject/42843 - chrMt NC_013663.1

#	Assembly ID: 354508
#       http://www.ncbi.nlm.nih.gov/genome/assembly/354508/


#############################################################################
# fetch sequence from genbank (DONE - 2012-04-10 - Hiram)
    mkdir -p /hive/data/genomes/genomes/oreNil2/genbank
    cd /hive/data/genomes/genomes/oreNil2/genbank

    rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_other/Oreochromis_niloticus/Orenil1.1/ ./

    # measure sequence to be used here
    faSize Primary_Assembly/assembled_chromosomes/FASTA/*.fa.gz \
        Primary_Assembly/unplaced_scaffolds/FASTA/*.fa.gz

    #   927679487 bases (111611440 N's 816068047 real 816068047 upper 0 lower)
    #   in 5677 sequences in 23 files
    #   Total size: mean 163410.2 sd 1922362.4
    #   min 1000 (gi|320342652|gb|AERX01077754.1|)
    #   max 51042256 (gi|374976115|gb|CM001450.1|) median 2009

#############################################################################
# fixup names for UCSC standards (DONE - 2012-04-10 - Hiram)
    mkdir /hive/data/genomes/oreNil2/ucsc
    cd /hive/data/genomes/oreNil2/ucsc

    ########################  Assembled Chromosomes
    cat << '_EOF_' > toUcsc.pl
#!/bin/env perl

use strict;
use warnings;

my %accToChr;

open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
        die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    chomp $line;
    my ($chrN, $acc) = split('\s+', $line);
    $accToChr{$acc} = $chrN;
}
close (FH);

foreach my $acc (keys %accToChr) {
    my $chrN =  $accToChr{$acc};
    print "$acc $accToChr{$acc}\n";
    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.agp.gz|") or die "can not read chr${chrN}.agp.gz";
    open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
    while (my $line = <FH>) {
        if ($line =~ m/^#/) {
            print UC $line;
        } else {
            $line =~ s/^$acc/chr${chrN}/;
            print UC $line;
        }
    }
    close (FH);
    close (UC);
    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
    open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
    while (my $line = <FH>) {
        if ($line =~ m/^>/) {
            printf UC ">chr${chrN}\n";
        } else {
            print UC $line;
        }
    }
    close (FH);
    close (UC);
}
'_EOF_'
    # << happy emacs
    chmod +x toUcsc.pl
    time ./toUcsc.pl
    #	real    0m33.257s
    gzip *.fa *.agp
    faSize chr*.fa.gz
    #   657350972 bases (50870875 N's 606480097 real 606480097 upper 0 lower)
    #   in 22 sequences in 22 files
    #   Total size: mean 29879589.6 sd 7407420.0 min 17092887 (chrLG10)
    #   max 51042256 (chrLG7) median 31194787

    ########################  Unplaced scaffolds
    # verify we don't have any .acc numbers different from .1
    zcat \
    ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
	| cut -f1 | egrep "^GL|^AERX" \
	| sed -e 's/^GL[0-9][0-9]*//; s/^AERX[0-9][0-9]*//' | sort | uniq -c
    #  71382 .1
    #   1737 .2

    # in that case, since they are not all .1, we need to use the full name
    #   modified to remove the .
    # this is like the unplaced.pl script in other assemblies except it
    #	does not add chrUn_ to the names since they are all just scaffolds

    cat << '_EOF_' > unplaced.pl
#!/bin/env perl

use strict;
use warnings;

my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
open (FH, "zcat $agpFile|") or die "can not read $agpFile";
open (UC, "|gzip -c > unplaced.agp.gz") or die "can not write to unplaced.agp.gz";
while (my $line = <FH>) {
    if ($line =~ m/^#/) {
        print UC $line;
    } else {
        $line =~ s/\./-/;    
        printf UC "%s", $line;
    }
}
close (FH);
close (UC);

open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
open (UC, "|gzip -c > unplaced.fa.gz") or die "can not write to unplaced.fa.gz";
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        chomp $line;
        $line =~ s/.*gb\|//;
        $line =~ s/\|.*//;
        $line =~ s/\./-/;    
        printf UC ">$line\n";
    } else {
        print UC $line;
    }
}
close (FH);
close (UC);
'_EOF_'
    # << happy emacs
    chmod +x unplaced.pl
    time ./unplaced.pl
    #   real    1m5.894s
    # make sure none of the names got to be over 31 characers long:
    zcat unplaced.agp | grep -v "^#" | cut -f1 | awk '{print length($0)}' \
        | sort -rn | uniq -c | head
    #      4077 14
    #   69042 10

    faSize unplaced.fa.gz
    #   270328515 bases (60740565 N's 209587950 real 209587950 upper 0 lower)
    #   in 5655 sequences in 1 files
    #   Total size: mean 47803.5 sd 238106.9 min 1000 (AERX01077754-1)
    #   max 7888616 (GL831144-1) median 1992

    # verify we have the same sequence as delivered from genbank:
    faSize *.fa.gz
    #   927679487 bases (111611440 N's 816068047 real 816068047 upper 0 lower)
    #   in 5677 sequences in 23 files
    #   Total size: mean 163410.2 sd 1922362.4 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009

    # from genbank:
    #   927679487 bases (111611440 N's 816068047 real 816068047 upper 0 lower)
    #   in 5677 sequences in 23 files
    #   Total size: mean 163410.2 sd 1922362.4
    #   min 1000 (gi|320342652|gb|AERX01077754.1|)
    #   max 51042256 (gi|374976115|gb|CM001450.1|) median 2009

#############################################################################
#  Initial database build (DONE - 2012-04-10 - Hiram)

    cd /hive/data/genomes/oreNil2
    cat << '_EOF_' > oreNil2.config.ra
# Config parameters for makeGenomeDb.pl:
db oreNil2
clade vertebrate
genomeCladePriority 85
scientificName Oreochromis niloticus
commonName Nile tilapia
assemblyDate Jan. 2011
assemblyLabel Broad Institute of MIT and Harvard Orenil1.1 (GCA_000188235.1)
assemblyShortLabel Nile tilapia
ncbiAssemblyName Orenil1.1
ncbiAssemblyId 354508
orderKey 4679
mitoAcc NC_013663
fastaFiles /cluster/data/oreNil2/ucsc/*.fa.gz
agpFiles /cluster/data/oreNil2/ucsc/*.agp.gz
dbDbSpeciesDir oreNil
taxId 8128
'_EOF_'
    # << happy emacs

    # verify sequence and agp are OK
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
        -stop=agp oreNil2.config.ra > agp.log 2>&1
    #	real    1m6.870s
    # verify OK:
    tail -1 agp.log
    #   *** All done!  (through the 'agp' step)

    time makeGenomeDb.pl -continue=db oreNil2.config.ra > db.log 2>&1
    #   real    8m45.076s

##########################################################################
# running repeat masker (DONE - 2012-04-10 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/repeatMasker
    cd /hive/data/genomes/oreNil2/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek oreNil2 > do.log 2>&1 &
    #   real    67m27.295s

    cat faSize.rmsk.txt
    #   927696114 bases (111611440 N's 816084674 real 786292709 upper
    #   29791965 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %3.21 masked total, %3.65 masked real

    egrep -i "versi|relea" do.log
#    April 26 2011 (open-3-3-0) version of RepeatMasker
# CC   RELEASE 20110920; 
# RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $

    featureBits -countGaps oreNil2 rmsk
    #	30137751 bases of 927696114 (3.249%) in intersection

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2012-04-10 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/simpleRepeat
    cd /hive/data/genomes/oreNil2/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	oreNil2 > do.log 2>&1 &
    #   real    8m53.572s
    cat fb.simpleRepeat 
    #	7960749 bases of 866932349 (0.918%) in intersection

    # add to rmsk after it is done:
    cd /hive/data/genomes/oreNil2
    twoBitMask oreNil2.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed oreNil2.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa oreNil2.2bit stdout | faSize stdin > faSize.oreNil2.2bit.txt
    cat faSize.oreNil2.2bit.txt
    #	1511735326 bases (153400444 N's 1358334882 real 1024824487 upper
    #	333510395 lower) in 19550 sequences in 1 files
    #	%22.06 masked total, %24.55 masked real

    rm /gbdb/oreNil2/oreNil2.2bit
    ln -s `pwd`/oreNil2.2bit /gbdb/oreNil2/oreNil2.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2012-04-11 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/gap
    cd /hive/data/genomes/oreNil2/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../oreNil2.unmasked.2bit > findMotif.txt 2>&1
    #	real    3m21.738s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    featureBits oreNil2 -not gap -bed=notGap.bed
    #	866932349 bases of 866932349 (100.000%) in intersection
    time featureBits oreNil2 allGaps.bed notGap.bed -bed=new.gaps.bed
    #   50847675 bases of 866932349 (5.865%) in intersection
    #   real    2m16.940s

    #	what is the highest index in the existing gap table:
    hgsql -N -e "select ix from gap;" oreNil2 | sort -n | tail -1
    #	1004
    cat << '_EOF_' > mkGap.pl
#!/bin/env perl

use strict;
use warnings;

my $ix=`hgsql -N -e "select ix from gap;" oreNil2 | sort -n | tail -1`;
chomp $ix;

open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
while (my $line = <FH>) {
    my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
    ++$ix;
    printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
        $chromEnd, $ix, $chromEnd-$chromStart;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./mkGap.pl
    ./mkGap.pl > other.bed
    featureBits -countGaps oreNil2 other.bed
    #	50847675 bases of 927696114 (5.481%) in intersection
    wc -l other.bed
    #	38113
    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
	-noLoad oreNil2 otherGap other.bed
    #	starting with this many
    hgsql -e "select count(*) from gap;" oreNil2
    #	 33964
    hgsql oreNil2 -e 'load data local infile "bed.tab" into table gap;'
    #	result count:
    hgsql -e "select count(*) from gap;" oreNil2
    #	72077
    # == 33964 + 38113
    # verify we aren't adding gaps where gaps already exist
    # this would output errors if that were true:
    gapToLift -minGap=1 oreNil2 nonBridged.lift -bedFile=nonBridged.bed
    # see example in danRer7.txt
    # there are no non-bridged gaps here:
    hgsql -N -e "select bridge from gap;" oreNil2 | sort | uniq -c
    #   232 no
    #   71845 yes

##########################################################################
## WINDOWMASKER (DONE - 2012-04-10 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/windowMasker
    cd /hive/data/genomes/oreNil2/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev oreNil2 > do.log 2>&1 &
    #   real    66m1.067s

    # Masking statistics
    twoBitToFa oreNil2.wmsk.2bit stdout | faSize stdin
    #   927696114 bases (111611440 N's 816084674 real 612807423 upper
    #   203277251 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %21.91 masked total, %24.91 masked real

    twoBitToFa oreNil2.wmsk.sdust.2bit stdout | faSize stdin
    #   927696114 bases (111611440 N's 816084674 real 608110449 upper
    #   207974225 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %22.42 masked total, %25.48 masked real

    hgLoadBed oreNil2 windowmaskerSdust windowmasker.sdust.bed.gz
    #	Loaded 5157020 elements of size 3

    featureBits -countGaps oreNil2 windowmaskerSdust
    #   319585665 bases of 927696114 (34.449%) in intersection

    #	eliminate the gaps from the masking
    featureBits oreNil2 -not gap -bed=notGap.bed
    #	816084674 bases of 816084674 (100.000%) in intersection
    time nice -n +19 featureBits oreNil2 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    #   207974225 bases of 816084674 (25.484%) in intersection
    #	real    1m44.114s
    #	reload track to get it clean
    hgLoadBed oreNil2 windowmaskerSdust cleanWMask.bed.gz
    #	Loaded 5181510 elements of size 4
    featureBits -countGaps oreNil2 windowmaskerSdust
    #   207974225 bases of 927696114 (22.418%) in intersection
    zcat cleanWMask.bed.gz \
	| twoBitMask ../../oreNil2.unmasked.2bit stdin \
	    -type=.bed oreNil2.cleanWMSdust.2bit
    twoBitToFa oreNil2.cleanWMSdust.2bit stdout | faSize stdin \
        > oreNil2.cleanWMSdust.faSize.txt
    cat oreNil2.cleanWMSdust.faSize.txt
    #   927696114 bases (111611440 N's 816084674 real 608110449 upper
    #   207974225 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %22.42 masked total, %25.48 masked real

    # how much does this window masker and repeat masker overlap:
    featureBits -countGaps oreNil2 rmsk windowmaskerSdust
    #   21445936 bases of 927696114 (2.312%) in intersection

#########################################################################
# MASK SEQUENCE WITH WM+TRF (DONE - 2012-04-11 - Hiram)
    #	since rmsk masks so very little of the genome, use the clean WM mask
    #	on the genome sequence
    cd /hive/data/genomes/oreNil2
    twoBitMask -add bed/windowMasker/oreNil2.cleanWMSdust.2bit \
	bed/simpleRepeat/trfMask.bed oreNil2.2bit
    #	safe to ignore the warnings about BED file with >=13 fields
    twoBitToFa oreNil2.2bit stdout | faSize stdin > faSize.oreNil2.txt
    cat faSize.oreNil2.txt
    #   927696114 bases (111611440 N's 816084674 real 607935500 upper
    #   208149174 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %22.44 masked total, %25.51 masked real

    #	create symlink to gbdb
    ssh hgwdev
    rm /gbdb/oreNil2/oreNil2.2bit
    ln -s `pwd`/oreNil2.2bit /gbdb/oreNil2/oreNil2.2bit

########################################################################
# cpgIslands - (DONE - 2011-04-20 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/cpgIslands
    cd /hive/data/genomes/oreNil2/bed/cpgIslands
    time doCpgIslands.pl oreNil2 > do.log 2>&1
    #   Elapsed time: 11m10s

    time featureBits oreNil2 cpgIslandExt > fb.oreNil2.cpgIslandExt.txt 2>&1
    #   6076152 bases of 816084674 (0.745%) in intersection
    #   real    0m2.342s

#########################################################################
# genscan - (DONE - 2011-04-26 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/genscan
    cd /hive/data/genomes/oreNil2/bed/genscan
    time doGenscan.pl oreNil2 > do.log 2>&1
    #   real    26m8.894s
    # three broken jobs rerunning at window size of 2000000:
./runGsBig.csh chrLG12 000 gtf/000/chrLG12.gtf pep/000/chrLG12.pep subopt/000/chrLG12.bed &
./runGsBig.csh chrLG20 000 gtf/000/chrLG20.gtf pep/000/chrLG20.pep subopt/000/chrLG20.bed &
./runGsBig.csh chrLG22 000 gtf/000/chrLG22.gtf pep/000/chrLG22.pep subopt/000/chrLG22.bed
    #   real    13m41.480s
    # continuing:
    time doGenscan.pl -continue=makeBed oreNil2 > makeBed.log 2>&1
    #   Elapsed time: 9m20s
    cat fb.oreNil2.genscan.txt
    #   38275835 bases of 816084674 (4.690%) in intersection
    cat fb.oreNil2.genscanSubopt.txt
    #   28219603 bases of 816084674 (3.458%) in intersection

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-03 - Hiram)
    # Use -repMatch=900, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 816084674 / 2897316137 \) \* 1024
    #	( 816084674 / 2897316137 ) * 1024 = 288.429245

    # round up to 300

    cd /hive/data/genomes/oreNil2
    time blat oreNil2.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/oreNil2.11.ooc -repMatch=300
    #   Wrote 16614 overused 11-mers to jkStuff/oreNil2.11.ooc
    #   real    0m28.015s

    # there are non-bridged gaps, construct lift file needed for genbank
    hgsql -N -e "select bridge from gap;" oreNil2 | sort | uniq -c
    #   232 no
    #   71845 yes

    cd /hive/data/genomes/oreNil2/jkStuff
    gapToLift oreNil2 oreNil2.nonBridged.lift -bedFile=oreNil2.nonBridged.bed
    # largest non-bridged contig:
    awk '{print $3-$2,$0}' oreNil2.nonBridged.bed | sort -nr | head
    #   13623339 chrLG1 17571448        31194787        chrLG1.07

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Oreochromis niloticus	351	120993	0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev  
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add oreNil2 just after ce2
# oreNil2 (Nile tilapia)
oreNil2.serverGenome = /hive/data/genomes/oreNil2/oreNil2.2bit
oreNil2.clusterGenome = /hive/data/genomes/oreNil2/oreNil2.2bit
oreNil2.ooc = /hive/data/genomes/oreNil2/jkStuff/oreNil2.11.ooc
oreNil2.lift = /hive/data/genomes/oreNil2/jkStuff/oreNil2.nonBridged.lift
oreNil2.refseq.mrna.native.pslCDnaFilter  = ${lowCover.refseq.mrna.native.pslCDnaFilter}
oreNil2.refseq.mrna.xeno.pslCDnaFilter    = ${lowCover.refseq.mrna.xeno.pslCDnaFilter}
oreNil2.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter}
oreNil2.genbank.mrna.xeno.pslCDnaFilter   = ${lowCover.genbank.mrna.xeno.pslCDnaFilter}
oreNil2.genbank.est.native.pslCDnaFilter  = ${lowCover.genbank.est.native.pslCDnaFilter}
oreNil2.refseq.mrna.native.load = yes
oreNil2.refseq.mrna.xeno.load = yes
oreNil2.genbank.mrna.xeno.load = yes
oreNil2.genbank.est.native.load = yes
oreNil2.downloadDir = oreNil2
oreNil2.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding oreNil2 Nile tilapia" etc/genbank.conf
    git push
    make etc-update

    git pull
    # Edit src/lib/gbGenome.c to add new species.
    git commit -m "adding definition for oreNilNames" \
        src/lib/gbGenome.c
    git push
    make install-server

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S oreNil2           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial oreNil2 &
    #   var/build/logs/2012.06.08-09:52:19.oreNil2.initalign.log
    #   real    1252m57.323s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad oreNil2 &
    #	logFile:  var/dbload/hgwdev/logs/2012.06.11-10:16:13.dbload.log
    #   real    30m6.085s

    # enable daily alignment and update of hgwdev (DONE - 2012-05-11 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add oreNil2 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added oreNil2." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
# set default position to RHO gene displays  (DONE - 2012-07-24 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="chrLG5:26068730-26095423" where name="oreNil2";' \
	hgcentraltest

############################################################################
# pushQ entry (DONE - 2012-07-24 - Hiram)
    mkdir /hive/data/genomes/oreNil2/pushQ
    cd /hive/data/genomes/oreNil2/pushQ
    # Mark says don't let the transMap track get there
    time makePushQSql.pl oreNil2 2> stderr.txt | grep -v transMap > oreNil2.sql
    #   real    3m31.947s

    scp -p oreNil2.sql hgwbeta:/tmp
    ssh hgwbeta "hgsql qapushq < /tmp/oreNil2.sql"

############################################################################
# create ucscToINSDC name mapping (DONE - 2013-08-23 - Hiram)
    mkdir /hive/data/genomes/oreNil2/bed/ucscToINSDC
    cd /hive/data/genomes/oreNil2/bed/ucscToINSDC

    # copying these scripts from the previous load and improving them
    # with each instance
    ./translateNames.sh NC_013663.1
    ./verifyAll.sh
    ./join.sh

    sed -e "s/21/14/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
      | hgLoadSqlTab oreNil2 ucscToINSDC stdin ucscToINSDC.tab
    checkTableCoords oreNil2 ucscToINSDC
    featureBits -countGaps oreNil2 ucscToINSDC
    # 927696114 bases of 927696114 (100.000%) in intersection

    # verify the track link to INSDC functions

##############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
##############################################################################

##############################################################################
# LIFTOVER TO oreNil3 (DONE - 2017-07-27 - Hiram )
    mkdir /hive/data/genomes/oreNil2/bed/blat.oreNil3.2017-07-27
    cd /hive/data/genomes/oreNil2/bed/blat.oreNil3.2017-07-27
    # -debug run to create run dir, preview scripts...
    doSameSpeciesLiftOver.pl -debug \
        -ooc /hive/data/genomes/oreNil2/jkStuff/oreNil2.11.ooc \
	   oreNil2 oreNil3
    # Real run:
    time (doSameSpeciesLiftOver.pl -verbose=2 \
        -ooc /hive/data/genomes/oreNil2/jkStuff/oreNil2.11.ooc \
	-bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
	 oreNil2 oreNil3) > do.log 2>&1 &
    #   real    331m53.848s

    # verify this functions in the genome browser from oreNil2 to oreNil3

########################################################################
