# for emacs: -*- mode: sh; -*-

# This file describes browser build for the ambMex2

#  Can use existing photograph (otherwise find one before starting here)

#########################################################################
#  Initial steps, find photograph (DONE - 2019-03-26 - Hiram)

# To start this initialBuild.txt document, from a previous assembly document:

mkdir ~/kent/src/hg/makeDb/doc/ambMex2
cd ~/kent/src/hg/makeDb/doc/ambMex2

sed -e 's/rouAeg1/ambMex2/g; s/RouAeg1/AmbMex2/g; s/DONE/TBD/g;' \
  ../galGal6/initialBuild.txt > initialBuild.txt

mkdir -p /hive/data/genomes/ambMex2/genbank
cd /hive/data/genomes/ambMex2

#  Can use existing photograph
cp -p ../ambMex1/photoReference.txt ./
cat photoReference.txt
photoCreditURL  https://www.flickr.com/people/35871148@N04
photoCreditName Ruben Undheim/Flickr

## download from NCBI
cd /hive/data/genomes/ambMex2/genbank

time rsync --stats -L -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/vertebrate_other/Ambystoma_mexicanum/all_assembly_versions/GCA_002915635.2_ASM291563v2/ ./

    #	real    8m10.720s
# this information is from the top of 
#    ambMex2/genbank/GCA_002915635.2_ASM291563v2_assembly_report.txt

# Assembly name:  ASM291563v2
# Organism name:  Ambystoma mexicanum (axolotl)
# Infraspecific name:  strain=DD151
# Sex:  male
# Taxid:          8296
# BioSample:      SAMN06554622
# BioProject:     PRJNA378970
# Submitter:      Max Planck Society/University of Kentucky
# Date:           2018-12-04
# Assembly type:  haploid
# Release type:   major
# Assembly level: Chromosome
# Genome representation: full
# WGS project:    PGSH01
# Assembly method: MARVEL v. 2016-10-10; Joinmap v. 4.1; AllMaps MAY-2018
# Expected final version: no
# Genome coverage: 30.0x
# Sequencing technology: PacBio
# RefSeq category: Representative Genome
# GenBank assembly accession: GCA_002915635.2
#
## Assembly-Units:
## GenBank Unit Accession       RefSeq Unit Accession   Assembly-Unit name
## GCA_002915645.2              Primary Assembly

# check assembly size for later reference:

faSize G*v2_genomic.fna.gz
# 32396370977 bases (4029676509 N's 28366694468 real 28365740082 upper
#	954386 lower) in 98070 sequences in 1 files
# Total size: mean 330339.3 sd 20104120.1 min 1033 (PGSH01113832.1)
#	max 2030161756 (CM010939.1) median 40921
# %0.00 masked total, %0.00 masked real

#    real    6m32.968s

#############################################################################
# establish config.ra file (DONE - Hiram - 2019-04-09)
    cd /hive/data/genomes/ambMex2
    ~/kent/src/hg/utils/automation/prepConfig.pl ambMex2 vertebrate axolotl \
       genbank/*_assembly_report.txt > ambMex2.config.ra

    # compare with previous version to see if it is sane:
    diff ambMex2.config.ra ../ambMex1/ambMex1.config.ra

    # verify it really does look sane
    cat ambMex2.config.ra
# config parameters for makeGenomeDb.pl:
db ambMex2
clade vertebrate
# genomeCladePriority 70
scientificName Ambystoma mexicanum
commonName Axolotl
assemblyDate Dec. 2018
assemblyLabel Max Planck Society/University of Kentucky
assemblyShortLabel ASM291563v2
orderKey 1943
# no mito sequence needed
mitoAcc none
fastaFiles /hive/data/genomes/ambMex2/ucsc/*.fa.gz
agpFiles /hive/data/genomes/ambMex2/ucsc/*.agp
# qualFiles none
dbDbSpeciesDir axolotl
photoCreditURL  https://www.flickr.com/people/35871148@N04
photoCreditName Ruben Undheim/Flickr
ncbiGenomeId 381
ncbiAssemblyId 2130471
ncbiAssemblyName ASM291563v2
ncbiBioProject 378970
ncbiBioSample SAMN06554622
genBankAccessionID GCA_002915635.2
taxId 8296

#############################################################################
# setup UCSC named files (DONE - 2019-03-26 - Hiram)

    mkdir /hive/data/genomes/ambMex2/ucsc
    cd /hive/data/genomes/ambMex2/ucsc

    # check for duplicate sequences:
    time faToTwoBit -long -noMask ../genbank/G*v2_genomic.fna.gz genbank.2bit
    #  real    7m9.731s

    time twoBitDup genbank.2bit
    # real    2m3.641s

    # no output is a good result, otherwise, would have to eliminate duplicates
    # the scripts creating the fasta here will be using this refseq.2bit file
    # remove it later

    time ~/kent/src/hg/utils/automation/ucscCompositeAgp.pl \
      ../genbank/G*v2_genomic.fna.gz \
	../genbank/*_assembly_structure/Primary_Assembly
CM010927.1 chr1P
CM010928.1 chr1Q
CM010929.1 chr2P
CM010930.1 chr2Q
CM010931.1 chr3P
CM010932.1 chr3Q
CM010933.1 chr4P
CM010934.1 chr4Q
CM010935.1 chr5P
CM010936.1 chr5Q
CM010937.1 chr6P
CM010938.1 chr6Q
CM010939.1 chr7
CM010940.1 chr8
CM010941.1 chr9
CM010942.1 chr10
CM010943.1 chr11
CM010944.1 chr12
CM010945.1 chr13
CM010946.1 chr14

real    96m6.237s

    time ~/kent/src/hg/utils/automation/unplacedWithChroms.pl \
       ../genbank/*_assembly_structure/Primary_Assembly
# processed 98050 sequences into chrUn.fa.gz
# real    82m20.093s

    # there are no unlocalized sequences 

    time ~/kent/src/hg/utils/automation/unlocalizedWithChroms.pl \
       ../genbank/*_assembly_structure/Primary_Assembly
# can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf at /cluster/home/hiram/kent/src/hg/utils/automation/unlocalizedWithChroms.pl line 23.

    # using mitochondrions NC_005797.1 to be specified on conf.ra file

    # verify fasta and AGPs agree
    time faToTwoBit -long *.fa.gz test.2bit
    # 

    time cat *.agp | checkAgpAndFa stdin test.2bit 2>&1 | tail -4
    # All AGP and FASTA entries agree - both files are valid
    # real    2m51.784s
XXX

    # and no sequence lost from orginal:
    twoBitToFa test.2bit stdout | faSize stdin
# 1065365425 bases (9784466 N's 1055580959 real 1055580959 upper 0 lower)
#	in 464 sequences in 1 files
# Total size: mean 2296046.2 sd 14494999.8 min 87 (chrUn_NW_020109844v1)
#	max 197608386 (chr1) median 10066

    # same numbers as above (except for upper/lower masking)
# 1065365425 bases (9784466 N's 1055580959 real 838536335 upper
#	217044624 lower) in 464 sequences in 1 files
# Total size: mean 2296046.2 sd 14494999.8 min 87 (NW_020109844.1)
#	max 197608386 (NC_006088.5) median 10066

    # no longer need these temporary 2bit files
    rm test.2bit refseq.2bit

#############################################################################
#  Initial database build (DONE - 2019-04-12 - Hiram)

    # run this in debug mode so the jkStuff/makeUnmasked2bit.csh
    # script can be fixed up to add -long to the faToTwoBit command
    time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
         -debug -stop=seq ambMex2.config.ra) > seq.log 2>&1
    # then, running the procedure:
    chmod +x jkStuff/*.csh
    ./jkStuff/getMito.csh
    time (./jkStuff/makeUnmasked2bit.csh ) >> seq.log 2>&1 &
    # real    24m36.006s

    # verify sequence and AGP are OK:
    time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
         -continue=agp -stop=agp ambMex2.config.ra) > agp.log 2>&1
    # real    0m46.829s

    # then finish it off:
    time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
       -fileServer=hgwdev -continue=db ambMex2.config.ra) > db.log 2>&1
    # real    154m47.941s

    # trouble with the trackDb make, new file required in trackDb,fix the script
    time (~/kent/src/hg/utils/automation/makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
     -fileServer=hgwdev -continue=trackDb ambMex2.config.ra) > trackDb.log 2>&1
    # real    0m12.044s

    # check in the trackDb files created in TemporaryTrackDbCheckout/
    #    and add ambMex2 to trackDb/makefile

    # temporary symlink until masked sequence is available
    cd /hive/data/genomes/ambMex2
    ln -s `pwd`/ambMex2.unmasked.2bit /gbdb/ambMex2/ambMex2.2bit

##############################################################################
# cpgIslands on UNMASKED sequence (TBD - 2018-10-11 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/cpgIslandsUnmasked
    cd /hive/data/genomes/ambMex2/bed/cpgIslandsUnmasked

    time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
       -tableName=cpgIslandExtUnmasked \
          -maskedSeq=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit \
             -workhorse=hgwdev -smallClusterHub=ku ambMex2) > do.log 2>&1
XXX - running - Fri Apr 12 23:24:42 PDT 2019
XXX - something is too large:
MALLOC failure reqesting -2147483648 bytes - aborting

    # real    2m11.881s

    cat fb.ambMex2.cpgIslandExtUnmasked.txt
    # 27399280 bases of 1055588482 (2.596%) in intersection

#############################################################################
# cytoBandIdeo - (DONE - 2019-04-12 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/cytoBand
    cd /hive/data/genomes/ambMex2/bed/cytoBand
    makeCytoBandIdeo.csh ambMex2

#############################################################################
# run up idKeys files for chromAlias/ncbiRefSeq (DONE - 2019-04-15 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/idKeys
    cd /hive/data/genomes/ambMex2/bed/idKeys

    time (doIdKeys.pl \
        -twoBit=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit \
        -buildDir=`pwd` ambMex2) > do.log 2>&1 &
    # real    29m20.505s

    cat ambMex2.keySignature.txt
    #  72abcdcc8a28b54cad2ff751c3494bed

#############################################################################
# gapOverlap (DONE - 2019-04-15 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/gapOverlap
    cd /hive/data/genomes/ambMex2/bed/gapOverlap
    time (doGapOverlap.pl \
        -twoBit=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit ambMex2 ) \
        > do.log 2>&1 &
    # real    4m30.732s

    # only a few:
    wc -l bed.tab
    # 64 bed.tab

    cat fb.ambMex2.gapOverlap.txt
    # 16776 bases of 32396387346 (0.000%) in intersection

#############################################################################
# tandemDups (DONE - 2019-04-12 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/tandemDups
    cd /hive/data/genomes/ambMex2/bed/tandemDups
    time (~/kent/src/hg/utils/automation/doTandemDup.pl \
  -twoBit=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit ambMex2) \
        > do.log 2>&1 &
XXX - running - Fri Apr 12 23:26:32 PDT 2019
    # real    97m29.383s

    cat fb.ambMex2.tandemDups.txt
    # 24887623 bases of 1065365425 (2.336%) in intersection

    bigBedInfo ambMex2.tandemDups.bb | sed -e 's/^/#  /;'
#  version: 4
#  fieldCount: 13
#  hasHeaderExtension: yes
#  isCompressed: yes
#  isSwapped: 0
#  extraIndexCount: 0
#  itemCount: 346,400
#  primaryDataSize: 8,843,385
#  primaryIndexSize: 38,860
#  zoomLevels: 9
#  chromCount: 407
#  basesCovered: 114,644,428
#  meanDepth (of bases covered): 21.207643
#  minDepth: 1.000000
#  maxDepth: 298.000000
#  std of depth: 35.518221

#########################################################################
# ucscToINSDC and ucscToRefSeq table/track (TBD - 2018-10-11 - Hiram)
    # construct idKeys for the refseq sequence
    mkdir /hive/data/genomes/ambMex2/refseq/idKeys
    cd /hive/data/genomes/ambMex2/refseq/idKeys
    faToTwoBit ../G.*v2_genomic.fna.gz ambMex2.refSeq.2bit

    time (doIdKeys.pl -buildDir=`pwd` \
        -twoBit=`pwd`/ambMex2.refSeq.2bit refseqAmbMex2)  > do.log 2>&1 &
    # real    0m48.786s

    cat refseqAmbMex2.keySignature.txt
    #  7850e2d5dabb6134fdc9d7083f1a3a54

    # and the genbank sequence needs keys too:
    mkdir /hive/data/genomes/ambMex2/refseq/idKeysGenbank
    cd /hive/data/genomes/ambMex2/refseq/idKeysGenbank
    faToTwoBit /hive/data/outside/ncbi/genomes/genbank/vertebrate_other/Gallus_gallus/all_assembly_versions/GCA_000002315.5_GRCg6a/GCA_000002315.5_GRCg6a_genomic.fna.gz ambMex2.genbank.2bit

    time (doIdKeys.pl -buildDir=`pwd` \
        -twoBit=`pwd`/ambMex2.genbank.2bit genbankAmbMex2)  > do.log 2>&1 &

    cat genbankAmbMex2.keySignature.txt
    #  a20fdad3318d371fcb34fcc66bab3752

    mkdir /hive/data/genomes/ambMex2/bed/chromAlias

    join -t$'\t' ../idKeys/ambMex2.idKeys.txt \
        ../../refseq/idKeysGenbank/genbankAmbMex2.idKeys.txt | cut -f2- \
          | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
            | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
               | sort -k1,1 -k2,2n > ucscToINSDC.bed

    join -t$'\t' ../idKeys/ambMex2.idKeys.txt \
        ../../refseq/idKeys/refseqAmbMex2.idKeys.txt | cut -f2- \
          | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
            | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
               | sort -k1,1 -k2,2n > ucscToRefSeq.bed

    # should be same line counts throughout:
    wc -l * ../../chrom.sizes
    #	463 ucscToINSDC.bed
    #	464 ucscToRefSeq.bed
    #	464 ../../chrom.sizes

    # need to find the accession for the INSDC equivalent to chrM:
    egrep chrM *
# ucscToRefSeq.bed:chrM   0       16775   NC_001323.1
    # lookup that accession at NCBI Entrez: X52392.1
    # and add to ucscToINSDC.bed:
    printf "chrM\t0\t16775\tX52392.1\n" >> ucscToINSDC.bed
    # verify:
    grep chrM *
# ucsc.genbank.tab:chrM   X52392.1
# ucsc.refseq.tab:chrM    NC_001323.1
# ucscToINSDC.bed:chrM    0       16775   X52392.1
# ucscToRefSeq.bed:chrM   0       16775   NC_001323.1

    export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1`
    echo $chrSize
    # 27
    # use the $chrSize in this sed
    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
         | hgLoadSqlTab ambMex2 ucscToINSDC stdin ucscToINSDC.bed
     # should be the same for ucscToRefSeq:
    export chrSize=`cut -f1 ucscToRefSeq.bed | awk '{print length($0)}' | sort -n | tail -1`
    echo $chrSize
    # 27
    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
       | sed -e 's/INSDC/RefSeq/g;' \
         | hgLoadSqlTab ambMex2 ucscToRefSeq stdin ucscToRefSeq.bed

    # should be quiet for all OK
    checkTableCoords ambMex2

    # should cover %100 entirely:
    featureBits -countGaps ambMex2 ucscToINSDC
    # 1065365425 bases of 1065365425 (100.000%) in intersection
    featureBits -countGaps ambMex2 ucscToRefSeq
    # 1065365425 bases of 1065365425 (100.000%) in intersection

#########################################################################
# add chromAlias table (TBD - 2018-10-12 - ChrisL)

    mkdir /hive/data/genomes/ambMex2/bed/chromAlias
    cd /hive/data/genomes/ambMex2/bed/chromAlias

    hgsql -N -e 'select chrom,name from ucscToRefSeq;' ambMex2 \
        | sort -k1,1 > ucsc.refseq.tab
    hgsql -N -e 'select chrom,name from ucscToINSDC;' ambMex2 \
        | sort -k1,1 > ucsc.genbank.tab

    ### Adding Ensembl alias with v95 release, after idKeys made: 2019-01-16
    join -t$'\t' ../idKeys/ambMex2.idKeys.txt \
        ../../ens95/ensAmbMex2.idKeys.txt | cut -f2- \
          | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
            | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
               | sort -k1,1 -k2,2n > ucscToEns.bed
    cut -f1,4 ucscToEns.bed | sort > ucsc.ensembl.tab
    wc -l *.bed
#   2210 ucscToEns.bed
#   2211 ucscToINSDC.bed
#   2211 ucscToRefSeq.bed

    ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \
        > ambMex2.chromAlias.tab

for t in refseq genbank ensembl
do
  c0=`cat ucsc.$t.tab | wc -l`
  c1=`grep $t ambMex2.chromAlias.tab | wc -l`
  ok="OK"
  if [ "$c0" -ne "$c1" ]; then
     ok="ERROR"
  fi
  printf "# checking $t: $c0 =? $c1 $ok\n"
done
# checking refseq: 464 =? 464 OK
# checking genbank: 464 =? 464 OK
# checking ensembl: 464 =? 464 OK

    hgLoadSqlTab ambMex2 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
        ambMex2.chromAlias.tab

#########################################################################
# fixup search rule for assembly track/gold table (TBD - 2018-10-11 - Hiram)
    cd ~/kent/src/hg/makeDb/trackDb/chicken/ambMex2
    # preview prefixes and suffixes:
    hgsql -N -e "select frag from gold;" ambMex2 \
      | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c 
   1519 AADN.1
    124 AC.1
    313 AC.2
    328 AC.3
     74 AC.4
     20 AC.5
      1 AC.6
      1 NC_.1

    # implies a rule: '[AN][AC][D0-9_][N0-9][0-9]+(\.[0-9]+)?'

    # verify this rule will find them all and eliminate them all:
    hgsql -N -e "select frag from gold;" ambMex2 | wc -l
    # 2380

    hgsql -N -e "select frag from gold;" ambMex2 \
       | egrep -e '[AN][AC][D0-9_][N0-9][0-9]+(\.[0-9]+)?' | wc -l
    # 2380

    hgsql -N -e "select frag from gold;" ambMex2 \
       | egrep -v -e '[AN][AC][D0-9_][N0-9][0-9]+(\.[0-9]+)?' | wc -l
    # 0

    # hence, add to trackDb/chicken/ambMex2/trackDb.ra
searchTable gold
shortCircuit 1
termRegex [AN][AC][D0-9_][N0-9][0-9]+(\.[0-9]+)?
query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
searchPriority 8

    # verify searches work in the position box

##########################################################################
# running repeat masker (DONE - 2018-04-12 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/repeatMasker
    cd /hive/data/genomes/ambMex2/bed/repeatMasker
    time  (doRepeatMasker.pl -buildDir=`pwd` \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -smallClusterHub=ku ambMex2) > do.log 2>&1
    # real    216m7.175s

    cat faSize.rmsk.txt
# 32396387346 bases (4029676509 N's 28366710837 real 28112571951 upper
#    254138886 lower) in 98071 sequences in 1 files
# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1)
#    max 2030161756 (chr7) median 40920
# %0.78 masked total, %0.90 masked real

    egrep -i "versi|relea" do.log
# RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $
#    February 01 2017 (open-4-0-8) 1.332 version of RepeatMasker
# CC    Dfam_Consensus RELEASE 20181026;                            *
# CC    RepBase RELEASE 20181026;       

XXX - this standard run is useless, note the custom library used next procedure
    time featureBits -countGaps ambMex2 rmsk
    # 133395265 bases of 1065365425 (12.521%) in intersection
    # real    0m4.226s

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the faSize count above
    #   separates out the N's from the bases, it doesn't show lower case N's

    # faster way to get the same result on high contig count assemblies:
    time hgsql -N -e 'select genoName,genoStart,genoEnd from rmsk;' ambMex2 \
        | bedSingleCover.pl stdin | ave -col=4 stdin | grep "^total"
    # total 133395265.000000
    #   real    0m3.198s

###############################################################################
# running repeat masker (DONE - 2020-06-19 - 2020-08-15 - Hiram)
    # using a custom library from Jermiah Smith they developed with
    # Repeat Modeller

    mkdir /hive/data/genomes/ambMex2/bed/repeatModeler
    cd /hive/data/genomes/ambMex2/bed/repeatModeler

    # note the file used for customLib, this took almost two months running
    # time with little interference on the ku kluster

    doRepeatMasker.pl -buildDir=`pwd` -customLib=`pwd`/LTRs_all_repeats.fa \
       -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          -smallClusterHub=hgwdev ambMex2
    cat run.cluster/run.time
# Completed: 65638 of 65638 jobs
# CPU time in finished jobs:  4047318392s 67455306.53m 1124255.11h 46843.96d 128.340 y
# IO & Wait Time:              11101559s  185025.99m  3083.77h  128.49d  0.352 y
# Average job time:               61830s    1030.51m    17.18h    0.72d
# Longest finished job:           77503s    1291.72m    21.53h    0.90d
# Submission to last job:       4811964s   80199.40m  1336.66h   55.69d

    # continuing after the kluster run is complete:
    doRepeatMasker.pl -buildDir=`pwd` -customLib=`pwd`/LTRs_all_repeats.fa \
       -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          -continue=cat -smallClusterHub=hgwdev ambMex2
    # real    329m25.992s

    # much better result with this custom library:
    cat faSize.rmsk.txt
# 32396387346 bases (4029676509 N's 28366710837 real 10003444277 upper
#    18363266560 lower) in 98071 sequences in 1 files
# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1)
#    max 2030161756 (chr7) median 40920
# %56.68 masked total, %64.74 masked real

    egrep -i "versi|relea" do.log
# RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $
# CC    Dfam_Consensus RELEASE 20181026;                            *
# CC    RepBase RELEASE 20181026;                                   *

    time featureBits -countGaps ambMex2 rmsk
    # 18368951822 bases of 32396387346 (56.701%) in intersection
    # real    4m34.562s

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the faSize count above
    #   separates out the N's from the bases, it doesn't show lower case N's

    # faster way to get the same result on high contig count assemblies:
    time hgsql -N -e 'select genoName,genoStart,genoEnd from rmsk;' ambMex2 \
        | bedSingleCover.pl stdin | ave -col=4 stdin | grep "^total"
    # total 18368951822.000000
    # real    2m8.428s

###############################################################################
# running simple repeat (DONE - 2019-04-15 - Hiram)

    mkdir /hive/data/genomes/ambMex2/bed/simpleRepeat
    cd /hive/data/genomes/ambMex2/bed/simpleRepeat
    time (doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \
        -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \
        -trf409=6 ambMex2) > do.log 2>&1
    # real    30m12.201s

    cat fb.simpleRepeat
    # 1399134851 bases of 32393621946 (4.319%) in intersection

    cd /hive/data/genomes/ambMex2
    # if using the Window Masker result:
    cd /hive/data/genomes/ambMex2
    twoBitMask bed/windowMasker/ambMex2.cleanWMSdust.2bit \
       -add bed/simpleRepeat/trfMask.bed  ambMex2.2bit
    #   you can safely ignore the warning about fields >= 13

    # or using RepeatMasker result add to rmsk after it is done:
    twoBitMask ambMex2.rmsk.2bit \
        -add bed/simpleRepeat/trfMask.bed ambMex2.2bit
    #   you can safely ignore the warning about fields >= 13
    twoBitToFa ambMex2.2bit stdout | faSize stdin > faSize.ambMex2.2bit.txt
    cat faSize.ambMex2.2bit.txt
# 32396387346 bases (4029676509 N's 28366710837 real 9998218507 upper
#	18368492330 lower) in 98071 sequences in 1 files
# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1)
#	max 2030161756 (chr7) median 40920
# %56.70 masked total, %64.75 masked real

    rm /gbdb/ambMex2/ambMex2.2bit
    ln -s `pwd`/ambMex2.2bit /gbdb/ambMex2/ambMex2.2bit

#########################################################################
# CREATE MICROSAT TRACK (DONE - 2020-08-17 - Hiram)
    ssh hgwdev
    mkdir /cluster/data/ambMex2/bed/microsat
    cd /cluster/data/ambMex2/bed/microsat

    awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \
       ../simpleRepeat/simpleRepeat.bed > microsat.bed

    hgLoadBed ambMex2 microsat microsat.bed
    # Read 56937 elements of size 4 from microsat.bed

##########################################################################
## WINDOWMASKER (DONE - 2019-04-15 - Hiram)
    # Odd result here, WM masked all but 703 bases ?
    mkdir /hive/data/genomes/ambMex2/bed/windowMasker
    cd /hive/data/genomes/ambMex2/bed/windowMasker
    time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
        -dbHost=hgwdev ambMex2) > do.log 2>&1
    # real    1747m17.123s

    # Masking statistics
    cat faSize.ambMex2.cleanWMSdust.txt
# 32396387346 bases (4029676509 N's 28366710837 real 703 upper 28366710134
#	lower) in 98071 sequences in 1 files
# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1)
#	max 2030161756 (chr7) median 40920
# %87.56 masked total, %100.00 masked real

    cat fb.ambMex2.rmsk.windowmaskerSdust.txt
    # 18368939458 bases of 32396387346 (56.701%) in intersection

##########################################################################
# cpgIslands - (WORKING - 2018-10-11 - Hiram)
    mkdir /hive/data/genomes/ambMex2/bed/cpgIslands
    cd /hive/data/genomes/ambMex2/bed/cpgIslands
    time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev -smallClusterHub=ku ambMex2) > do.log 2>&1
XXX - running last18 manually on hgwdev 2020-12-11 - Hiram
something is too large MALLOC failure reqesting -2147483648 bytes - aborting
    # real    2m5.105s

    cat fb.ambMex2.cpgIslandExt.txt
    # 16395346 bases of 1055588482 (1.553%) in intersection

##############################################################################
# genscan - (DONE - 2020-08-17 - Hiram)
XXX - waiting for ku to return after power fails - Mon Aug 17 12:11:48 PDT 2020
    mkdir /hive/data/genomes/ambMex2/bed/genscan
    cd /hive/data/genomes/ambMex2/bed/genscan
    time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
      -bigClusterHub=ku ambMex2) > do.log 2>&1
    # real    88m34.900s

    cat fb.ambMex2.genscan.txt
    # 23911678 bases of 1055588482 (2.265%) in intersection

    cat fb.ambMex2.genscanSubopt.txt
    # 24521608 bases of 1055588482 (2.323%) in intersection

#########################################################################
# Create kluster run files (DONE - 2020-08-17 - Hiram)

    # numerator is ambMex2 gapless bases "real" as reported by:
    featureBits -noRandom -noHap ambMex2 gap
    # 2765400 bases of 27505544706 (0.010%) in intersection
    #                   ^^^

    # denominator is hg19 gapless bases as reported by:
    #   featureBits -noRandom -noHap hg19 gap
    #     234344806 bases of 2861349177 (8.190%) in intersection
    # 1024 is threshold used for human -repMatch:
    calc \( 27505544706 / 2861349177 \) \* 1024
    #  ( 27505544706 / 2861349177 ) * 1024 = 9843.495511

    # ==> use -repMatch=9000 according to size scaled up from 1024 for human.
    #   and rounded down to nearest 1000
    # experiment with 9000, 8000, 7000 - using 7000 as it makes a
    #   reasonable number
    cd /hive/data/genomes/ambMex2
    time blat ambMex2.2bit \
         /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/ambMex2.11.ooc \
        -repMatch=7000
    # real    4m11.198s

    # at repMatch 9000
    # Wrote 9042 overused 11-mers to jkStuff/ambMex2.11.ooc
    # at repMatch 8000
    # Wrote 13163 overused 11-mers to jkStuff/ambMex2.11.ooc
    # at repMatch 7000
    # Wrote 20332 overused 11-mers to jkStuff/ambMex2.11.ooc

    # there are no non-bridged gaps
    hgsql -N -e 'select bridge from gap;' ambMex2  | sort | uniq -c
    #  27654 yes
    # survey gap sizes:
    # all gaps are size 100
    hgsql -N -e 'select size from gap where bridge="yes" order by size;' \
       ambMex2  | ave stdin | sed -e 's/^/# /;'
# Q1 100.000000
# median 100.000000
# Q3 100.000000
# average 100.000000
# min 100.000000
# max 100.000000
# count 27654
# total 2765400.000000
# standard deviation 0.000000

    # minimum gap size is 100:
    gapToLift -verbose=2 -minGap=100 ambMex2 jkStuff/ambMex2.100baseGaps.lft \
        -allowBridged -bedFile=jkStuff/ambMex2.100baseGaps.bed
    wc -l jkStuff/ambMex*
    # 125725 jkStuff/ambMex2.100baseGaps.bed
    # 125725 jkStuff/ambMex2.100baseGaps.lft

    # to see the gaps used:
    bedInvert.pl chrom.sizes jkStuff/ambMex2.100baseGaps.bed | less
    # and their sizes:
    bedInvert.pl chrom.sizes jkStuff/ambMex2.100baseGaps.bed \
	| cut -f4 | sort -n | uniq -c | less
    #   27654 100

########################################################################
# lastz/chain/net swap human/hg38 (TBD - 2018-10-12 - Hiram)
    # original alignment
    cd /hive/data/genomes/hg38/bed/lastzAmbMex2.2018-10-12

    cat fb.hg38.chainAmbMex2Link.txt
    # 154079940 bases of 3095998939 (4.977%) in intersection
    cat fb.hg38.chainSynAmbMex2Link.txt
    # 95877644 bases of 3095998939 (3.097%) in intersection
    cat fb.hg38.chainRBest.AmbMex2.txt
    # 106665747 bases of 3095998939 (3.445%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/ambMex2/bed/blastz.hg38.swap
    cd /hive/data/genomes/ambMex2/bed/blastz.hg38.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/hg38/bed/lastzAmbMex2.2018-10-12/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    9m45.514s

    cat fb.ambMex2.chainHg38Link.txt
    # 120955955 bases of 1055588482 (11.459%) in intersection

    cat fb.ambMex2.chainSynHg38Link.txt
    # 92597630 bases of 1055588482 (8.772%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` ambMex2 hg38) > rbest.log 2>&1 &
    # real    139m24.408s

    cat fb.ambMex2.chainRBest.Hg38.txt
    # 106294585 bases of 1055588482 (10.070%) in intersection

#########################################################################
# lastz/chain/net swap mouse/mm10 (TBD - 2018-10-12 - Hiram)

    # original alignment
    cd /hive/data/genomes/mm10/bed/lastzAmbMex2.2018-10-12
    cat fb.mm10.chainAmbMex2Link.txt
    # 101151132 bases of 2652783500 (3.813%) in intersection
    cat fb.mm10.chainSynAmbMex2Link.txt
    # 70707720 bases of 2652783500 (2.665%) in intersection
    cat fb.mm10.chainRBest.AmbMex2.txt 
    # 79649474 bases of 2652783500 (3.002%) in intersection

    # and for the swap:
    mkdir /hive/data/genomes/ambMex2/bed/blastz.mm10.swap
    cd /hive/data/genomes/ambMex2/bed/blastz.mm10.swap

    time (doBlastzChainNet.pl -verbose=2 \
      /hive/data/genomes/mm10/bed/lastzAmbMex2.2018-10-12/DEF \
        -swap -chainMinScore=5000 -chainLinearGap=loose \
          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -syntenicNet) > swap.log 2>&1
    #  real    6m41.043s

    cat fb.ambMex2.chainMm10Link.txt
    # 88539346 bases of 1055588482 (8.388%) in intersection

    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` ambMex2 mm10) > rbest.log 2>&1 &
    # real    94m11.007s

    cat fb.ambMex2.chainRBest.Mm10.txt
    # 79474812 bases of 1055588482 (7.529%) in intersection

#########################################################################
# GENBANK AUTO UPDATE (TBD - 2018-10-12 - Hiram)
    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # /cluster/data/genbank/data/organism.lst shows:
    # organism               mrnaCnt estCnt  refSeqCnt
    # Ambystoma mexicanum     7749    43323   0

    # edit etc/genbank.conf to add ambMex2 just before ambMex2

# ambMex2 (Axolotl - Ambystoma mexicanum) GCA_002915635.2 - 30Gb total
ambMex2.serverGenome = /hive/data/genomes/ambMex2/ambMex2.2bit
ambMex2.ooc = /hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc
ambMex2.lift = /hive/data/genomes/ambMex2/jkStuff/ambMex2.100baseGaps.lft
ambMex2.perChromTables = no
ambMex2.downloadDir = ambMex2
ambMex2.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
ambMex2.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
ambMex2.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
ambMex2.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
ambMex2.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
ambMex2.genbank.est.xeno.pslCDnaFilter    = ${ordered.genbank.est.xeno.pslCDnaFilter}
# defaults yes: genbank.mrna.native.load genbank.mrna.native.loadDesc
# yes: genbank.est.native.load refseq.mrna.native.load
# yes: refseq.mrna.native.loadDesc refseq.mrna.xeno.load
# yes: refseq.mrna.xeno.loadDesc
# defaults no: genbank.mrna.xeno.load genbank.mrna.xeno.loadDesc
# no: genbank.est.native.loadDesc genbank.est.xeno.load
# no: genbank.est.xeno.loadDesc
# DO NOT NEED genbank.mrna.xeno except for human, mouse
# ambMex2.upstreamGeneTbl = ensGene
# ambMex2.upstreamMaf = multiz6way /hive/data/genomes/ambMex2/bed/multiz6way/species.list

    # verify the files specified exist before checking in the file:
  grep ^ambMex2 etc/genbank.conf | grep hive | awk '{print $NF}' | xargs ls -og
-rw-rw-r-- 1 8271637678 Aug 17 10:51 /hive/data/genomes/ambMex2/ambMex2.2bit
-rw-rw-r-- 1    7002521 Aug 17 12:05 /hive/data/genomes/ambMex2/jkStuff/ambMex2.100baseGaps.lft
-rw-rw-r-- 1      81336 Aug 17 11:46 /hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc

    git commit -m "Added ambMex2; refs #23367" etc/genbank.conf
    git push
    # update /cluster/data/genbank/:
    make etc-update

    # enable daily alignment and update of hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add ambMex2 to:
    #   etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added ambMex2 refs #23367" etc/hgwdev.dbs etc/align.dbs
    git push
    make etc-update

    # Notify Chris this is ready to go 2020-08-17

#############################################################################
# augustus gene track (TBD - 2018-10-12 - Hiram)

    mkdir /hive/data/genomes/ambMex2/bed/augustus
    cd /hive/data/genomes/ambMex2/bed/augustus
    time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \
        -species=chicken -dbHost=hgwdev \
           -workhorse=hgwdev ambMex2) > do.log 2>&1
    # real    48m48.597s

    cat fb.ambMex2.augustusGene.txt
    # 25827925 bases of 1055588482 (2.447%) in intersection

#########################################################################
# ncbiRefSeq (TBD - 2018-10-12 - Hiram)

    mkdir /hive/data/genomes/ambMex2/bed/ncbiRefSeq
    cd /hive/data/genomes/ambMex2/bed/ncbiRefSeq
    # running step wise just to be careful
    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      -bigClusterHub=ku -dbHost=hgwdev \
      -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
      refseq vertebrate_other Gallus_gallus \
      GCF_000002315.5_GRCg6a ambMex2) > download.log 2>&1
    # real    1m19.029s

    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      -continue=process -bigClusterHub=ku -dbHost=hgwdev \
      -stop=process -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
      refseq vertebrate_other Gallus_gallus \
      GCF_000002315.5_GRCg6a ambMex2) > process.log 2>&1
    # real    2m6.030s

    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
      -continue=load -bigClusterHub=ku -dbHost=hgwdev \
      -stop=load -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
      refseq vertebrate_other Gallus_gallus \
      GCF_000002315.5_GRCg6a ambMex2) > load.log 2>&1
    # real    0m22.312s

    cat fb.ncbiRefSeq.ambMex2.txt
    #  88641893 bases of 1055588482 (8.397%) in intersection

    # need to add: include ../../refSeqComposite.ra alpha
    # to the chicken/ambMex2/trackDb.ra to turn on the track in the browser

    # there was one gene that claimed to have a protein, but the
    # protein sequence was not included in the protein.faa file
    # discovered from joinerCheck
    # manual fix to blank out this one protein, to see the entry
    hgsql -e 'select * from ncbiRefSeqLink where protAcc="NP_989875.1";' ambMex2
    hgsql -e 'update ncbiRefSeqLink set protAcc="" where protAcc="NP_989875.1";' ambMex2
    # this makes the 'protein' link disappear from the gene details page
    # curious that this gene is marked as a non-coding gene anyway ?
    # gene: FET1 at chr4:63,102,774-63,105,516-

    featureBits -enrichment ambMex2 refGene ncbiRefSeq 
 # refGene 1.374%, ncbiRefSeq 8.397%, both 1.370%, cover 99.73%, enrich 11.88x
    featureBits -enrichment ambMex2 ncbiRefSeq refGene
 # ncbiRefSeq 8.397%, refGene 1.374%, both 1.370%, cover 16.32%, enrich 11.88x

    featureBits -enrichment ambMex2 ncbiRefSeqCurated refGene
 # ncbiRefSeqCurated 1.368%, refGene 1.374%, both 1.364%, cover 99.71%, enrich 72.59x
    featureBits -enrichment ambMex2 refGene ncbiRefSeqCurated
 # refGene 1.374%, ncbiRefSeqCurated 1.368%, both 1.364%, cover 99.32%, enrich 72.59x

#########################################################################
# LIFTOVER TO galGal5 (TBD - 2018-10-11 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/ambMex2/bed/blat.galGal5.2018-10-11
    cd /hive/data/genomes/ambMex2/bed/blat.galGal5.2018-10-11
    doSameSpeciesLiftOver.pl -verbose=2 \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc \
         ambMex2 galGal5
    time (doSameSpeciesLiftOver.pl -verbose=2 \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc \
         ambMex2 galGal5) > doLiftOverToGalGal5.log 2>&1
    # real    156m30.215s

    # see if the liftOver menus function in the browser from ambMex2 to galGal5

#########################################################################
# LIFTOVER TO galGal4 (TBD - 2018-10-12 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/ambMex2/bed/blat.galGal4.2018-10-12
    cd /hive/data/genomes/ambMex2/bed/blat.galGal4.2018-10-12
    doSameSpeciesLiftOver.pl -verbose=2 \
        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc \
         ambMex2 galGal4
    time (doSameSpeciesLiftOver.pl -verbose=2 \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -ooc=/hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc \
         ambMex2 galGal4) > doLiftOverToGalGal4.log 2>&1 &
    # real    36m10.254s

    # see if the liftOver menus function in the browser from ambMex2 to galGal5

#########################################################################
#  BLATSERVERS ENTRY (TBD - 2018-10-12 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("ambMex2", "blat1a", "17892", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("ambMex2", "blat1a", "17893", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
## reset default position to MEPE gene (egg shell protein)
##  (TBD - 2018-10-12 - Hiram)

    # as found from the galGal5 to ambMex2 liftOver
    ssh hgwdev
    hgsql -e 'update dbDb set defaultPos="chr4:45667017-45672928"
	where name="ambMex2";' hgcentraltest

#########################################################################
# crispr 10K shoulders (TBD - 2018-10-16 - Hiram)
    # working on this script, adding the indexFa step:
    time (~/kent/src/hg/utils/automation/doCrispr.pl \
	-stop=indexFa -buildDir=`pwd` -smallClusterHub=ku ambMex2 ncbiRefSeq) \
	> indexFa.log 2>&1
    # real    23m26.694s

    time (~/kent/src/hg/utils/automation/doCrispr.pl \
       -continue=ranges -stop=guides -buildDir=`pwd` -smallClusterHub=ku \
           ambMex2 ncbiRefSeq) > guides.log 2>&1
    # real    2m50.758s

    # adding the /dev/shm/ setup rsync for the indexed Fa
    # performed manually to work out the procedure
    time (~/kent/src/hg/utils/automation/doCrispr.pl \
        -continue=specScores -stop=specScores -buildDir=`pwd` \
           -smallClusterHub=ku ambMex2 ncbiRefSeq) > specScores.log

    # had about half of ku for about half this time:
# Completed: 884922 of 884922 jobs
# CPU time in finished jobs:  35872791s  597879.85m  9964.66h  415.19d  1.138 y
# IO & Wait Time:               899261s   14987.69m   249.79h   10.41d  0.029 y
# Average job time:                 42s       0.69m     0.01h    0.00d
# Longest finished job:             88s       1.47m     0.02h    0.00d
# Submission to last job:        48045s     800.75m    13.35h    0.56d


    time find tmp/outGuides -type f | xargs cut -f3-6 > ../specScores.tab
    # real    236m17.220s
    wc -l specScores.tab
    # 66451712 specScores.tab

    time (~/kent/src/hg/utils/automation/doCrispr.pl \
	-continue=effScores -stop=load \
	    -buildDir=`pwd` -smallClusterHub=ku ambMex2 ncbiRefSeq) \
	> load.log
    # real    307m41.143s

#########################################################################
# all.joiner update, downloads and in pushQ - (TBD - 2018-10-17 - Hiram)
xyz
    cd $HOME/kent/src/hg/makeDb/schema
    # verify all the business is done for release
    ~/kent/src/hg/utils/automation/verifyBrowser.pl ambMex2

    # fixup all.joiner until this is a clean output
    joinerCheck -database=ambMex2 -tableCoverage all.joiner
    joinerCheck -database=ambMex2 -times all.joiner
    joinerCheck -database=ambMex2 -keys all.joiner

    # when clean, check in:
    git commit -m 'adding rules for ambMex2 refs #23367' all.joiner
    git push
    # run up a 'make alpha' in hg/hgTables to get this all.joiner file
    # into the hgwdev/genome-test system

    cd /hive/data/genomes/ambMex2
    time (makeDownloads.pl ambMex2) > downloads.log 2>&1
    #  real    10m7.605s

    #   now ready for pushQ entry
    mkdir /hive/data/genomes/ambMex2/pushQ
    cd /hive/data/genomes/ambMex2/pushQ
  time (makePushQSql.pl -redmineList ambMex2) > ambMex2.pushQ.sql 2> stderr.out
    # real    9m58.779s

    # remove the extra chainNet files from the listings:
    sed -i -e "/etNig1/d" redmine.ambMex2.file.list
    sed -i -e "/asAcu1/d" redmine.ambMex2.file.list
    sed -i -e "/etNig1/d" redmine.ambMex2.table.list
    sed -i -e "/onAlb1/d" redmine.ambMex2.table.list
    sed -i -e "/asAcu1/d" redmine.ambMex2.table.list
    sed -i -e "/Stickleback/d" redmine.ambMex2.releaseLog.txt
    sed -i -e "/Tetraodon/d" redmine.ambMex2.releaseLog.txt
    sed -i -e "/sparrow/d" redmine.ambMex2.releaseLog.txt
    # remove the tandemDups and gapOverlap from the file list:
    sed -i -e "/tandemDups/d" redmine.ambMex2.table.list
    sed -i -e "/Tandem Dups/d" redmine.ambMex2.releaseLog.txt
    sed -i -e "/gapOverlap/d" redmine.ambMex2.table.list
    sed -i -e "/Gap Overlaps/d" redmine.ambMex2.releaseLog.txt
    #  real    7m21.629s

    #   check for errors in stderr.out, some are OK, e.g.:
    # WARNING: hgwdev does not have /gbdb/ambMex2/wib/gc5Base.wib
    # WARNING: hgwdev does not have /gbdb/ambMex2/wib/quality.wib
    # WARNING: hgwdev does not have /gbdb/ambMex2/bbi/quality.bw
    # WARNING: ambMex2 does not have seq
    # WARNING: ambMex2 does not have extFile

    # add the path names to the listing files in the redmine issue
    # in the three appropriate entry boxes:

#	/hive/data/genomes/ambMex2/pushQ/redmine.ambMex2.file.list
#	/hive/data/genomes/ambMex2/pushQ/redmine.ambMex2.releaseLog.txt
#	/hive/data/genomes/ambMex2/pushQ/redmine.ambMex2.table.list

#########################################################################
