# for emacs: -*- mode: sh; -*-

#	DATE:	03-Apr-2011
#	ORGANISM:	Mustela putorius furo
#	TAXID:	9669
#	ASSEMBLY LONG NAME:	MusPutFur1.0
#	ASSEMBLY SHORT NAME:	MusPutFur1.0
#	ASSEMBLY SUBMITTER:	Ferret Genome Sequencing Consortium
#	ASSEMBLY TYPE:	Haploid
#	NUMBER OF ASSEMBLY-UNITS:	1
#	Assembly Accession:	GCA_000215625.1



#       http://www.ncbi.nlm.nih.gov/genome/3295
#       http://www.ncbi.nlm.nih.gov/assembly/286418/
#       http://www.ncbi.nlm.nih.gov/bioproject/59869

#	http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AEYP01
#       Genome Coverage : 162x

#       http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9669

#	rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mustela_putorius/MusPutFur1.0/

##########################################################################
# Download sequence (DONE - 2013-01-10 - Pauline)
    # XXX NOTE: There were duplicate contigs in this assembly.  To remove the
    # duplicates, the correspondingly Ensembl release was fetched:
    mkdir -p /hive/data/genomes/musFur1/ensembl
    cd /hive/data/genomes/musFur1/ensembl
    wget -r --cut-dirs=5 \
"ftp://ftp.ensembl.org/pub/release-70/fasta/mustela_putorius_furo/dna/"
    # run an faCount on the toplevel sequence:
    faCount Mustela_putorius_furo.MusPutFur1.0.70.dna.toplevel.fa.gz \
         > toplevel.faCount.txt
    # generate a chrom.sizes file from that:
    egrep -v "^#seq|^total" toplevel.faCount.txt | cut -f1,2 | sort -k2nr \
       | sed -e "s/\.1//" > ensembl.chrom.sizes
    # then from the previous attempt at the UCSC build of this sequence:
    twoBitInfo ../musFur1.unmasked.2bit stdout | sort -k2nr > musFur1.chrom.sizes
    # then, finding the dups by comparing those two chrom.sizes listings
    # to look for items with only one entry on the lists, which would be
    # the extra one in the duplicate contig assembly:
    egrepStr=`awk '{printf "|%s|%s", $1, $3}' musFur1.twoBitDup.txt | sed -e 's/^.//'`
    egrep -h "$egrepStr" ensembl.chrom.sizes musFur1.chrom.sizes | cut -f1 \
       | sort | uniq -c | awk '$1 < 2' \
          | awk '{print $2}' > remove.duplicate.list
    # now, to remove the duplicates:
    cd /hive/data/genomes/musFur1/genbank
    faSomeRecords -exclude ucsc.fa remove.duplicate.list ucsc.noDup.fa
    egrepStr=`cat remove.duplicate.list | xargs echo | sed -e 's/ /|/g'`
    egrep -v "$egrepStr" ucsc.agp > ucsc.noDup.agp

    # using these settings in the musFur1.config.ra file:
fastaFiles /hive/data/genomes/musFur1/genbank/ucsc.noDup.fa
agpFiles /hive/data/genomes/musFur1/genbank/ucsc.noDup.agp

    ########################################################################
    #  the following was the original setup for this build that included the
    # duplicates:
    mkdir /hive/data/genomes/musFur1
    cd /hive/data/genomes/musFur1
    mkdir genbank
    cd genbank
    time rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mustela_putorius/MusPutFur1.0/ ./
    #   real    3m8.737s

    # verify the size of the sequence here:
    faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz

# 2410863155 bases (132851443 N's 2278011712 real 2278011712 upper 0 lower) in 7782 sequences in 1 files
# Total size: mean 309799.9 sd 1997002.0 min 1000 (gi|334565269|gb|AEYP01117479.1|) max 52375790 (gi|334708367|gb|GL896898.1|) median 1448

zcat unplaced.scaf.agp.gz | awk '{print $1}' | sed -e 's/GL[0-9][0-9]*//; s/AEYP[0-9][0-9]*//' | sort | uniq
#
.1

zcat unplaced.scaf.agp.gz | sed -e 's/\.1//' > ../../../ucsc.agp

    zcat Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz \
        | sed -e "s/^>.*GL8/>GL8/; s/^>.*AEYP0/>AEYP0/; s/\.1. Mustela.*//" \
            > ucsc.fa

    # strip the names down to something reasonable, they can all be:
    # >GL[0-9]
    # since they are all .1 versions:
    zcat Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz \
        | sed -e "s/^>.*GL/>GL/; s/.1. Petromyzon.*//" \
        | gzip -c > ucsc.fa.gz
    zcat Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
        | sed -e "s/^\(GL[0-9]*\).1/\1/;" | gzip -c > ucsc.agp.gz
    time checkAgpAndFa  ucsc.agp.gz ucsc.fa.gz 2>&1 | tail -2
# Valid Fasta file entry
# All AGP and FASTA entries agree - both files are valid
    #   real    0m12.951s

    mkdir /hive/data/genomes/musFur1/photograph
    cd /hive/data/genomes/musFur1/photograph
    wget --timestamping \
        http://www.nasa.gov/centers/kennedy/images/content/91159main_93pc780.jpg

    convert -geometry "220x300" 91159main_93pc780.jpg \
        Alligator_Mississippiensis.jpg
    # check this .jpg file into the source tree kent/src/hg/htdocs/images/
    git commit -m "gator photo from NASA Kennedy" Alligator_Mississippiensis.jpg
    # and copy to /usr/local/apache/htdocs/images
    cp -p Alligator_Mississippiensis.jpg /usr/local/apache/htdocs/images

##########################################################################
# Fetch photograph (DONE - 2013-03-05 - Hiram)
    # using the photo indicated on the NCBI genome project page:
    mkdir /hive/data/genomes/musFur1/photo
    cd /hive/data/genomes/musFur1/photo
    wget --timestamping \
         http://upload.wikimedia.org/wikipedia/commons/3/32/Ferret_2008.png
    convert -geometry "300x300" Ferret_2008.png Mustela_putorius_furo.png
    # checkin this file Mustela_putorius_furo.png to the source tree

##########################################################################
# Initial makeGenomeDb.pl (DONE - 2013-01-10,03-05 - Pauline, Hiram)
    # obtain a template for this from the source tree:
    # kent/src/hg/utils/automation/configFiles/
    # and check it back into the source tree when completed here:
    cd /hive/data/genomes/musFur1
    cat << '_EOF_' > musFur1.config.ra
# Config parameters for makeGenomeDb.pl:
db musFur1
clade vertebrate
genomeCladePriority 16
scientificName Mustela putorius furo
commonName Ferret
assemblyDate Apr. 2011
assemblyLabel Ferret Genome Sequencing Consortium
assemblyShortLabel MusPutFur1.0
orderKey 2430
mitoAcc none
fastaFiles /hive/data/genomes/musFur1/genbank/ucsc.noDup.fa
agpFiles /hive/data/genomes/musFur1/genbank/ucsc.noDup.agp
# qualFiles none
dbDbSpeciesDir ferret
photoCreditURL http://commons.wikimedia.org/wiki/File:Ferret_2008.png
photoCreditName Alfredo Gutirrez and Wikimedia Commons
ncbiGenomeId 3295
ncbiAssemblyId 286418
ncbiAssemblyName MusPutFur1.0
ncbiBioProject 59869
genBankAccessionID GCA_000215625.1
taxId 9669
'_EOF_'
    # << happy emacs
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
        -stop=agp musFur1.config.ra >>& agp.log
    #   real    1m6.334s
    # verify OK:
    tail -1 agp.log
    #   *** All done!  (through the 'agp' step)

    # finish it off
    time makeGenomeDb.pl -continue=db -workhorse=hgwdev -fileServer=hgwdev \
        -dbHost=hgwdev musFur1.config.ra > db.log 2>&1
    #   real    17m58.217s
    # XXX failed in the trackDb/html setup due to missing image:
    #   htdocs/images/Mustela_putorius_furo.{jpg|png|gif}

    #	add the trackDb entries to the source tree, and the 2bit link:
    ln -s `pwd`/musFur1.unmasked.2bit /gbdb/musFur1/musFur1.2bit
    #	browser should function now, add the files from the trackDb
    #   hierarchy here to the source tree

    #   after checking in the photograph and getting it into
    #   /usr/local/apache/htdocs/images:
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev \
        -continue=trackDb -forceDescription -dbHost=hgwdev musFur1.config.ra

##########################################################################
# running repeat masker (DONE - 2013-03-05 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/repeatMasker
    cd /hive/data/genomes/musFur1/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek musFur1 > do.log 2>&1 &
    #   real    843m37.578s

    cat faSize.rmsk.txt
    #   2410758013 bases (132851443 N's 2277906570 real 1390898176 upper
    #   887008394 lower) in 7741 sequences in 1 files
    #   Total size: mean 311427.2 sd 2002158.7 min 1000 (AEYP01117479)
    #      max 52375790 (GL896898) median 1445
    #   %36.79 masked total, %38.94 masked real

    egrep -i "versi|relea" do.log
    #    January 10 2013 (open-4-0-0) version of RepeatMasker
    #       CC   RELEASE 20120418;

    featureBits -countGaps musFur1 rmsk
    #   887930078 bases of 2410758013 (36.832%) in intersection

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2013-03-05 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/simpleRepeat
    cd /hive/data/genomes/musFur1/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	musFur1 > do.log 2>&1 &
    #   real    9m42.005s

    cat fb.simpleRepeat
    #   25005240 bases of 2277906570 (1.098%) in intersection

    # add to rmsk after it is done:
    cd /hive/data/genomes/musFur1
    twoBitMask musFur1.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed musFur1.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa musFur1.2bit stdout | faSize stdin > faSize.musFur1.2bit.txt
    cat faSize.musFur1.2bit.txt

    #   2410758013 bases (132851443 N's 2277906570 real 1389998131 upper
    #   887908439 lower) in 7741 sequences in 1 files
    #   Total size: mean 311427.2 sd 2002158.7 min 1000 (AEYP01117479)
    #   max 52375790 (GL896898) median 1445
    #   %36.83 masked total, %38.98 masked real

    rm /gbdb/musFur1/musFur1.2bit
    ln -s `pwd`/musFur1.2bit /gbdb/musFur1/musFur1.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2013-03-05 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/gap
    cd /hive/data/genomes/musFur1/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../musFur1.unmasked.2bit > findMotif.txt 2>&1
    #   real    0m12.841s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits musFur1 -not gap -bed=notGap.bed
    #   2277906570 bases of 2277906570 (100.000%) in intersection
    #   real    0m13.015s

    # can see now if allGaps.bed actually is all the gaps:
    hgsql -N -e "select size from gap;" musFur1 | ave stdin | grep total
# total 132851443.000000
    ave -col=5 allGaps.bed | grep total
# total 132851443.000000
    # same count, no new gaps

    # check if any non-bridged gaps here:
    hgsql -N -e "select bridge from gap;" musFur1 | sort | uniq -c
    #  109700 yes

##########################################################################
## WINDOWMASKER (DONE - 2013-03-05 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/windowMasker
    cd /hive/data/genomes/musFur1/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev musFur1 > do.log 2>&1 &
    #   real    185m5.285s

    # Masking statistics
    cat faSize.musFur1.wmsk.txt
    # 2410758013 bases (132851443 N's 2277906570 real 1575810421 upper
    #   702096149 lower) in 7741 sequences in 1 files
    # Total size: mean 311427.2 sd 2002158.7 min 1000 (AEYP01117479)
    #   max 52375790 (GL896898) median 1445
    # %29.12 masked total, %30.82 masked real

    cat faSize.musFur1.wmsk.sdust.txt
    #   2410758013 bases (132851443 N's 2277906570 real 1563711507 upper
    #   714195063 lower) in 7741 sequences in 1 files
    #   Total size: mean 311427.2 sd 2002158.7 min 1000 (AEYP01117479)
    #      max 52375790 (GL896898) median 1445
    #   %29.63 masked total, %31.35 masked real

    cat faSize.musFur1.cleanWMSdust.txt
    #   2410758013 bases (132851443 N's 2277906570 real 1563711507 upper
    #   714195063 lower) in 7741 sequences in 1 files
    #   Total size: mean 311427.2 sd 2002158.7 min 1000 (AEYP01117479)
    #   max 52375790 (GL896898) median 1445
    #   %29.63 masked total, %31.35 masked real

    cat fb.musFur1.windowmaskerSdust.clean.txt
    #   714195063 bases of 2410758013 (29.625%) in intersection

    # how much does this window masker and repeat masker overlap:
    # can be done after rmsk is done.  The script will often
    #   fail on this command in the doLoad.csh if RM is not yet
    #   complete and these are running at the same time:
    featureBits -countGaps musFur1 rmsk windowmaskerSdust
    #   453442864 bases of 2410758013 (18.809%) in intersection

    # if the script did fail on that command, finish it:
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-continue=cleanup -dbHost=hgwdev musFur1 > cleanup.log 2>&1 &
    #   real    1m43.905s

##########################################################################
# cpgIslands - (DONE - 2013-03-06 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/cpgIslands
    cd /hive/data/genomes/musFur1/bed/cpgIslands
    time doCpgIslands.pl musFur1 > do.log 2>&1
    #   real    38m33.141s

    cat fb.musFur1.cpgIslandExt.txt
    #   40222518 bases of 2277906570 (1.766%) in intersection

#########################################################################
# genscan - (DONE - 2013-03-06 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/genscan
    cd /hive/data/genomes/musFur1/bed/genscan
    time doGenscan.pl musFur1 > do.log 2>&1
    #   real    33m41.740s
    # one job failed, changed the -window size to 2000000 to get
    #   it to complete
    ./runLastOne.csh GL896906 000 gtf/000/GL896906.gtf \
         pep/000/GL896906.pep subopt/000/GL896906.bed
    time doGenscan.pl -continue=makeBed musFur1 > bed.log 2>&1
    #   real    4m33.037s

    cat fb.musFur1.genscan.txt
    #   25257909 bases of 647368134 (3.902%) in intersection
    cat fb.musFur1.genscanSubopt.txt
    #   23439935 bases of 647368134 (3.621%) in intersection

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2013-03-06 - Hiram)
    # Use -repMatch=400, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 2277906570 / 2897316137 \) \* 1024
    #   ( 2277906570 / 2897316137 ) * 1024 = 805.081744

    # round up to 350

    cd /hive/data/genomes/musFur1
    time blat musFur1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/musFur1.11.ooc -repMatch=350
    #   Wrote 152295 overused 11-mers to jkStuff/musFur1.11.ooc
    #   real    0m59.156s

    # there are no non-bridged gaps, no lift file needed for genbank
    hgsql -N -e "select bridge from gap;" musFur1 | sort | uniq -c
    #   109700 yes
    # find other make doc where gapToLift is used to make nonBridged.bed file

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2013-03-06 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Mustela putorius        9       0       0
# Mustela putorius furo   122937  4210    0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add musFur1 just before petMar1
# musFur1 (Ferret, Mustela putorius furo, taxId 9669)
musFur1.serverGenome = /hive/data/genomes/musFur1/musFur1.2bit
musFur1.clusterGenome = /hive/data/genomes/musFur1/musFur1.2bit
musFur1.ooc = /hive/data/genomes/musFur1/jkStuff/musFur1.11.ooc
musFur1.lift = no
musFur1.perChromTables = no
musFur1.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
musFur1.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
musFur1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
musFur1.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
musFur1.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
musFur1.refseq.mrna.native.load = yes
musFur1.refseq.mrna.xeno.load = yes
musFur1.genbank.mrna.xeno.load = no
musFur1.genbank.est.native.load = yes
musFur1.downloadDir = musFur1

    # end of section added to etc/genbank.conf
    git commit -m "adding musFur1 ferret redmine 6352" etc/genbank.conf
    git push
    make etc-update

    git pull
    # Edit src/lib/gbGenome.c to add new species.
#  musFurNames[] = {"Mustela putorius furo", "Mustela putorius", NULL};

    git commit -m "adding definition for musFurNames redmine 6352" \
        src/lib/gbGenome.c
    git push
    make install-server

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S musFur1           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial musFur1 &
    #   var/build/logs/2013.03.06-09:49:44.musFur1.initalign.log
    #   real    325m17.355s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad musFur1 &
    #   real    28m17.094s
    #   var/dbload/hgwdev/logs/2013.03.06-15:15:56.dbload.log
    #   real    31m56.575s

    # check the end of that dbload.log to see if it was successful
    #   hgwdev 2013.03.06-15:47:47 dbload: finish

    # enable daily alignment and update of hgwdev (DONE - 2013-03-06 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add musFur1 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added musFur1/ferret to the daily build" \
        etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

############################################################################
# construct ucscToEnsembl chrom name translation (2013-03-06 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/ucscToEnsembl
    cd /hive/data/genomes/musFur1/bed/ucscToEnsembl

    # all the Ensembl names are the same as UCSC with the addition of .1
    cat ../../chrom.sizes | cut -f1 | sed -e 's/^\(.*\)/\1\t\1.1/' \
        | sort > ucscToEnsembl.tab

    # find length of ucsc names for key length in SQL below:
    awk '{print length($1)}' ucscToEnsembl.tab | sort -u
    #  12
    #   8

    cat << '_EOF_' > ucscToEnsembl.sql
# UCSC to Ensembl chr name translation
CREATE TABLE ucscToEnsembl (
    ucsc varchar(255) not null,        # UCSC chromosome name
    ensembl varchar(255) not null,     # Ensembl chromosome name
              #Indices
    PRIMARY KEY(ucsc(12))
);
'_EOF_'

    hgsql musFur1 < ucscToEnsembl.sql
    hgsql musFur1 \
-e 'LOAD DATA LOCAL INFILE "ucscToEnsembl.tab" INTO TABLE ucscToEnsembl'

############################################################################
#  Ensembl Genes version 70  (DONE - 2013-03-06 - Hiram)
     cd /hive/data/genomes/musFur1
     cat << '_EOF_' > musFur1.ensGene.ra
# required db variable
db musFur1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to protect in perl:
nameTranslation 's/^MT/chrM/; s/\.1//'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=70 -stop=process \
        musFur1.ensGene.ra > ensGene.process.log 2>&1
    # log indicates OK:
    #   genePredCheck -db=musFur1 musFur1.allGenes.gp.gz
    #   checked: 23963 failed: 0
    doEnsGeneUpdate.pl -ensVersion=70 -continue=load \
        musFur1.ensGene.ra > ensGeneV70.load.log 2>&1

    featureBits musFur1 ensGene
    # 52850166 bases of 2277906570 (2.320%) in intersection

#########################################################################
# set default position as recommended from Jeramiah Smith
#       (DONE - 2012-10-23 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="GL476334:480870-830419" where name="musFur1";' \
	hgcentraltest

############################################################################
# downloads and pushQ entry (DONE - 2012-03-06 - Hiram)
    # after adding musFur1 to the all.joiner file and verifying that
    #   joinerCheck is clean, can construct the downloads:
    cd /hive/data/genomes/musFur1
    time makeDownloads.pl -workhorse=hgwdev musFur1
XXX - running - Thu Mar  7 10:02:01 PST 2013
    #   real    21m55.107s

    mkdir /hive/data/genomes/musFur1/pushQ
    cd /hive/data/genomes/musFur1/pushQ
    # Mark says don't let the transMap track get there
    time makePushQSql.pl musFur1 2> stderr.txt > musFur1.sql
    #   real    3m38.916s
    # will have to verify this one after loading on hgwbeta:
# WARNING: Could not tell (from trackDb, all.joiner and hardcoded lists of
# supporting and genbank tables) which tracks to assign these tables to:
#   ucscToEnsembl
    # the script should be fixed to place this in Ensembl Genes track

    # check the stderr.txt for bad stuff, these kinds of warnings are OK:
# WARNING: hgwdev does not have /gbdb/musFur1/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/musFur1/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/musFur1/bbi/quality.bw
# WARNING: musFur1 does not have seq
# WARNING: musFur1 does not have extFile

    scp -p musFur1.sql hgwbeta:/tmp/
    ssh hgwbeta "hgsql qapushq < /tmp/musFur1.sql"

##########################################################################
#  BLATSERVERS ENTRY (DONE - 2012-10-23 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("musFur1", "blat4b", "17838", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("musFur1", "blat4b", "17839", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
# fixup search rule for assembly track/gold table (DONE - 2013-08-06 - Hiram)
    hgsql -N -e "select frag from gold;" musFur1 | sort | head -1
AEYP01000001.1
    hgsql -N -e "select frag from gold;" musFur1 | sort | tail -2
AEYP01117481.1
AEYP01117482.1

    # verify this rule will find them all or eliminate them all:
    hgsql -N -e "select frag from gold;" musFur1 | wc -l
    # 117441

    hgsql -N -e "select frag from gold;" musFur1 | egrep -e 'AEYP0[0-9]+(\.1)?' | wc -l
    # 117441

    hgsql -N -e "select frag from gold;" musFur1 | egrep -v -e 'AEYP0[0-9]+(\.1)?' | wc -l
    # 0

    # hence, add to trackDb/ferret/musFur1/trackDb.ra
searchTable gold
shortCircuit 1
termRegex AEYP0[0-9]+(\.1)?
query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
searchPriority 8

#########################################################################
# create ucscToINSDC name mapping (DONE - 2013-08-16 - Hiram)
    mkdir /hive/data/genomes/musFur1/bed/ucscToINSDC
    cd /hive/data/genomes/musFur1/bed/ucscToINSDC

    # copying these scripts from the previous load and improving them
    # with each instance
    ./translateNames.sh
    ./verifyAll.sh
    ./join.sh

    # verify the track link to INSDC functions

##############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
