# for emacs: -*- mode: sh; -*-

# This file describes browser build for the chrPic1
#	Western Painted Turtle - Chrysemys picta bellii - Dec 2011


#	http://www.ncbi.nlm.nih.gov/genome/12107
#	http://www.ncbi.nlm.nih.gov/genome/assembly/326468/
#	http://www.ncbi.nlm.nih.gov/bioproject/78657 - WashU
#	http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AHGY01
#	15X coverage WGS

#	http://www.ncbi.nlm.nih.gov/bioproject/11857 - chrMt - U. Mich.
#	NC_002073.3 - AF069423.1
#	http://www.ncbi.nlm.nih.gov/nuccore/NC_002073.3

#	DATE:	29-Dec-2011
#	ORGANISM:	Chrysemys picta bellii
#	TAXID:	8478
#	ASSEMBLY LONG NAME:	Chrysemys_picta_bellii-3.0.1
#	ASSEMBLY SHORT NAME:	Chrysemys_picta_bellii-3.0.1
#	ASSEMBLY SUBMITTER:	International Painted Turtle Genome Sequencing Consortium
#	ASSEMBLY TYPE:	Haploid
#	NUMBER OF ASSEMBLY-UNITS:	2
#	ASSEMBLY ACCESSION:	GCA_000241765.1
#	##Below is a 2 column list with assembly-unit id and name.
#	##The Primary Assembly unit is listed first.
#	GCA_000241775.1	Primary Assembly
#	GCA_000241805.1	non-nuclear
#	FTP-RELEASE DATE: 10-Jan-2012


#	Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Coelomata;
#	Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata;
#	Teleostomi; Euteleostomi; Sarcopterygii; Tetrapoda; Amniota;
#	Sauropsida; Testudines; Cryptodira; Testudinoidea; Emydidae;
#	Chrysemys; Chrysemys picta

#############################################################################
# Fetch sequence from genbank (DONE - 2012-01-12 - Hiram)

    mkdir -p /hive/data/genomes/chrPic1/genbank
    cd /hive/data/genomes/chrPic1/genbank

    time rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_other/Chrysemys_picta/Chrysemys_picta_bellii-3.0.1/ ./


    faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz
    #	2589728838 bases (431455958 N's 2158272880 real 2158272880 upper
    #	0 lower) in 80983 sequences in 1 files
    #	Total size: mean 31978.7 sd 446227.0
    #	min 501 (gi|370893393|gb|AHGY01540527.1|)
    #	max 26452378 (gi|371559404|gb|JH584429.1|) median 753

    faSize non-nuclear/unlocalized_scaffolds/FASTA/chrMT.unlocalized.scaf.fa.gz
    #	18940 bases (0 N's 18940 real 18940 upper 0 lower) in 4 sequences
    #	in 1 files
    #	Total size: mean 4735.0 sd 7919.8
    #	min 563 (gi|370925593|gb|AHGY01508327.1|)
    #	max 16612 (gi|371051809|gb|AHGY01385006.1|) median 970

    # the automatic pickup of the chrMt sequence fails because the name
    #	in the fasta is only: "Trichechus manatus" and the script is
    #	expecting "Trichechus manatus latirostris"
    # So, pick up the sequence and include it with the wild card
    #	specification of files here:

    wget -O NC_002073.NCBI.fa \
	"http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&dopt=fasta&sendto=on&id=NC_002073"

    sed -e 's/^>.*/>chrM/' NC_002073.NCBI.fa > chrM.fa

    faCount chrM.fa
#seq    len     A       C       G       T       N       cpg
chrM    16866   5810    4376    2165    4515    0       331
    echo "chrM 1 16866 1 O NC_002073 1 16866 +" | tr '[ ]' '[\t]' > chrM.agp

#############################################################################
# process into UCSC naming scheme (DONE - 2012-01-12 - Hiram)
    mkdir /hive/data/genomes/chrPic1/ucsc
    cd /hive/data/genomes/chrPic1/ucsc

    # verify we don't have any .acc numbers different from .1
    zcat ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
	 | cut -f1 | egrep "^JH|AHGY" \
	| sed -e 's/^JH[0-9][0-9]*//; s/^AHGY[0-9][0-9]*//' | sort | uniq -c
    #	1022441 .1

    # this is like the unplaced.pl script in other assemblies except it
    #	does not add chrUn_ to the names since they are all just scaffolds

    cat << '_EOF_' > unplaced.pl
#!/bin/env perl

use strict;
use warnings;

my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
open (FH, "zcat $agpFile|") or die "can not read $agpFile";
open (UC, ">unplaced.agp") or die "can not write to unplaced.agp";
while (my $line = <FH>) {
    if ($line =~ m/^#/) {
        print UC $line;
    } else {
        $line =~ s/\.1//;    
        printf UC "%s", $line;
    }
}
close (FH);
close (UC);

open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
open (UC, ">unplaced.fa") or die "can not write to unplaced.fa";
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        chomp $line;
        $line =~ s/.*gb\|//;
        $line =~ s/\.1\|.*//;
        printf UC ">$line\n";
    } else {
        print UC $line;
    }
}
close (FH);
close (UC);
'_EOF_'
    # << happy emacs
    chmod +x unplaced.pl

    time ./unplaced.pl
    # real    1m4.707s
# -rw-rw-r-- 1   52300039 Jan 12 16:03 unplaced.agp
# -rw-rw-r-- 1 2627874889 Jan 12 16:03 unplaced.fa

    time gzip *.fa *.agp

    # verify nothing lost in the translation, should be the same as above
    #	except for the name translations
    faSize *.fa.gz
# 2589728838 bases (431455958 N's 2158272880 real 2158272880 upper 0 lower)
#	in 80983 sequences in 1 files
# Total size: mean 31978.7 sd 446227.0 min 501 (AHGY01540527)
#	max 26452378 (JH584429) median 753

    # the automatic pickup of the chrMt sequence fails because the name
    #	in the fasta is only: "Trichechus manatus" and the script is
    #	expecting "Trichechus manatus latirostris"
    # So, pick up the sequence and include it with the wild card
    #	specification of files here:

    cat ../genbank/chrM.fa | gzip -c > chrM.fa.gz
    cat ../genbank/chrM.agp | gzip -c > chrM.agp.gz

#############################################################################
#   Initial browser build (DONE - 2012-03-27 - Hiram)
    cd /hive/data/genomes/chrPic1
    cat << '_EOF_' > chrPic1.config.ra
# Config parameters for makeGenomeDb.pl:
db chrPic1
clade vertebrate
genomeCladePriority 50
scientificName Chrysemys picta bellii
commonName Painted Turtle
assemblyDate Dec. 2011
assemblyLabel International Painted Turtle Genome Sequencing Consortium (GCA_000241765.1)
assemblyShortLabel v3.0.1
ncbiAssemblyName Chrysemys_picta_bellii-3.0.1
ncbiAssemblyId 326468
orderKey 4000
# chrM already included
mitoAcc none
fastaFiles /hive/data/genomes/chrPic1/ucsc/*.fa.gz
agpFiles /hive/data/genomes/chrPic1/ucsc/*.agp.gz
dbDbSpeciesDir turtle
taxId   8478
'_EOF_'
    # << happy emacs

    time makeGenomeDb.pl -workhorse=hgwdev -stop=agp chrPic1.config.ra \
	> agp.log 2>&1 
    #	real    2m2.893s
    # check the end of agp.log to verify it is OK
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev \
	-continue=db chrPic1.config.ra > db.log 2>&1 
    #	real    17m30.996s

#############################################################################
# running repeat masker (DONE - 2012-03-27 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/repeatMasker
    cd /hive/data/genomes/chrPic1/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek chrPic1 > do.log 2>&1 &
    #	real    252m12.216s
    cat faSize.rmsk.txt
    #	2589745704 bases (431455958 N's 2158289746 real 2088449309 upper
    #	69840437 lower) in 80984 sequences in 1 files
    #	Total size: mean 31978.5 sd 446224.3 min 501 (AHGY01540527)
    #	max 26452378 (JH584429) median 753
    #	%2.70 masked total, %3.24 masked real


    grep -i versi do.log
# RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $
#    April 26 2011 (open-3-3-0) version of RepeatMasker

    featureBits -countGaps chrPic1 rmsk
    #	70001996 bases of 2589745704 (2.703%) in intersection
    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, this featureBits count
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2012-03-27 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/simpleRepeat
    cd /hive/data/genomes/chrPic1/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=memk \
	chrPic1 > do.log 2>&1 &
    #	real    43m42.360s

    cat fb.simpleRepeat 
    #	21563230 bases of 2158293365 (0.999%) in intersection

    # not going to add to rmsk here, using the window masker instead since
    #	it masks more sequence
    cd /hive/data/genomes/chrPic1
    twoBitMask chrPic1.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed chrPic1.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa chrPic1.2bit stdout | faSize stdin > faSize.chrPic1.2bit.txt
    cat faSize.chrPic1.2bit.txt
    #	2608572064 bases (131440969 N's 2477131095 real 1320629270 upper
    #	1156501825 lower) in 2685 sequences in 1 files
    #	Total size: mean 971535.2 sd 4827933.6 min 1003 (AGCE01151413)
    #	max 72162052 (JH378105) median 1553

    rm /gbdb/chrPic1/chrPic1.2bit
    ln -s `pwd`/chrPic1.2bit /gbdb/chrPic1/chrPic1.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2012-03-27 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/gap
    cd /hive/data/genomes/chrPic1/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../chrPic1.unmasked.2bit > findMotif.txt 2>&1
    #	real    0m23.801s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits -countGaps chrPic1 -not gap -bed=notGap.bed
    #	2158293365 bases of 2589745704 (83.340%) in intersection
    #	real    0m21.340s

    time featureBits -countGaps chrPic1 allGaps.bed notGap.bed -bed=new.gaps.bed
    #	3619 bases of 2589745704 (0.000%) in intersection
    #	real    216m26.278s

    #	what is the highest index in the existing gap table:
    hgsql -N -e "select ix from gap;" chrPic1 | sort -n | tail -1
    #	8964
    cat << '_EOF_' > mkGap.pl
#!/bin/env perl

use strict;
use warnings;

my $ix=`hgsql -N -e "select ix from gap;" chrPic1 | sort -n | tail -1`;
chomp $ix;

open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
while (my $line = <FH>) {
    my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
    ++$ix;
    printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
        $chromEnd, $ix, $chromEnd-$chromStart;
}
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./mkGap.pl
    ./mkGap.pl > other.bed
    featureBits -countGaps chrPic1 other.bed
    #	3619 bases of 2589745704 (0.000%) in intersection
    wc -l other.bed
    #	3589
    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
	-noLoad chrPic1 otherGap other.bed
    #	Read 3589 elements of size 8 from other.bed
    #	starting with this many
    hgsql -e "select count(*) from gap;" chrPic1
    #	470729
    hgsql chrPic1 -e 'load data local infile "bed.tab" into table gap;'
    #	result count:
    hgsql -e "select count(*) from gap;" chrPic1
    #	474318
    # == 470729 + 3589

    # verify we aren't adding gaps where gaps already exist
    # this would output errors if that were true:
    gapToLift -minGap=1 chrPic1 nonBridged.lift -bedFile=nonBridged.bed
    # see example in danRer7.txt

    # there are no non-bridged gaps here:
    hgsql -N -e "select bridge from gap;" chrPic1 | sort | uniq -c
    #	474318 yes

##########################################################################
## WINDOWMASKER (DONE - 2012-03-27 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/windowMasker
    cd /hive/data/genomes/chrPic1/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev chrPic1 > do.log 2>&1 &
    #	real    225m32.368s

    # Masking statistics
    twoBitToFa chrPic1.wmsk.2bit stdout | faSize stdin
    #	2860591921 bases (676999153 N's 2183592768 real 1197030078 upper
    #	986562690 lower) in 22819 sequences in 1 files
    #	Total size: mean 125360.1 sd 395781.0 min 1000 (AFYH01291818)
    #	max 10736886 (JH126562) median 5475
    #	%34.49 masked total, %45.18 masked real

    twoBitToFa chrPic1.wmsk.sdust.2bit stdout | faSize stdin
    #	2860591921 bases (676999153 N's 2183592768 real 1186196719 upper
    #	997396049 lower) in 22819 sequences in 1 files
    #	Total size: mean 125360.1 sd 395781.0 min 1000 (AFYH01291818)
    #	max 10736886 (JH126562) median 5475
    #	%34.87 masked total, %45.68 masked real

    hgLoadBed chrPic1 windowmaskerSdust windowmasker.sdust.bed.gz
    #	Read 11886030 elements of size 3 from windowmasker.sdust.bed.gz

    featureBits -countGaps chrPic1 windowmaskerSdust
    #	1104610436 bases of 2589745704 (42.653%) in intersection

    #	eliminate the gaps from the masking
    featureBits chrPic1 -not gap -bed=notGap.bed
    #	2158289746 bases of 2158289746 (100.000%) in intersection

    time nice -n +19 featureBits chrPic1 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    #	673156757 bases of 2158289746 (31.189%) in intersection
    #	real    109m11.237s

    #	reload track to get it clean
    hgLoadBed chrPic1 windowmaskerSdust cleanWMask.bed.gz
    #	Read 11868504 elements of size 4 from cleanWMask.bed.gz

    time featureBits -countGaps chrPic1 windowmaskerSdust
    #	673156757 bases of 2589745704 (25.993%) in intersection
    #	real    1m39.393s

    #	mask with this clean result
    zcat cleanWMask.bed.gz \
	| twoBitMask ../../chrPic1.unmasked.2bit stdin \
	    -type=.bed chrPic1.cleanWMSdust.2bit
    twoBitToFa chrPic1.cleanWMSdust.2bit stdout | faSize stdin \
        > chrPic1.cleanWMSdust.faSize.txt
    cat chrPic1.cleanWMSdust.faSize.txt
    #	2589745704 bases (431455958 N's 2158289746 real 1485132989 upper
    #	673156757 lower) in 80984 sequences in 1 files
    #	Total size: mean 31978.5 sd 446224.3 min 501 (AHGY01540527)
    #	max 26452378 (JH584429) median 753
    #	%25.99 masked total, %31.19 masked real

    # how much does this window masker and repeat masker overlap:
    featureBits -countGaps chrPic1 rmsk windowmaskerSdust
    #	42492356 bases of 2589745704 (1.641%) in intersection
    #	real    2m15.424s

#########################################################################
# MASK SEQUENCE WITH WM+TRF (DONE - 2012-03-29 - Hiram)
    cd /hive/data/genomes/chrPic1
    twoBitMask -add bed/windowMasker/chrPic1.cleanWMSdust.2bit \
	bed/simpleRepeat/trfMask.bed chrPic1.2bit
    #	safe to ignore the warnings about BED file with >=13 fields
    twoBitToFa chrPic1.2bit stdout | faSize stdin > faSize.chrPic1.txt
    cat faSize.chrPic1.txt
    #	2589745704 bases (431455958 N's 2158289746 real 1485006823 upper
    #	673282923 lower) in 80984 sequences in 1 files
    #	Total size: mean 31978.5 sd 446224.3 min 501 (AHGY01540527)
    #	max 26452378 (JH584429) median 753
    #	%26.00 masked total, %31.20 masked real

    #	create symlink to gbdb
    ssh hgwdev
    rm /gbdb/chrPic1/chrPic1.2bit
    ln -s `pwd`/chrPic1.2bit /gbdb/chrPic1/chrPic1.2bit

#########################################################################
# cpgIslands - (DONE - 2011-04-24 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/cpgIslands
    cd /hive/data/genomes/chrPic1/bed/cpgIslands
    time doCpgIslands.pl chrPic1 > do.log 2>&1
    #   real    187m22.286s

    cat fb.chrPic1.cpgIslandExt.txt
    #   15937502 bases of 2158289746 (0.738%) in intersection

#########################################################################
# genscan - (DONE - 2011-04-26 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/genscan
    cd /hive/data/genomes/chrPic1/bed/genscan
    time doGenscan.pl chrPic1 > do.log 2>&1
    # one failed job:
./runGsBig.csh JH584641 000 gtf/000/JH584641.gtf pep/000/JH584641.pep subopt/000/JH584641.bed
    # rerunning with window size of 200000
    #   real    0m45.659s
    #   continuing:
    time doGenscan.pl -continue=makeBed chrPic1 > makeBed.log 2>&1
    #   real    73m21.769s

    cat fb.chrPic1.genscan.txt
    #   41301989 bases of 2158289746 (1.914%) in intersection
    cat fb.chrPic1.genscanSubopt.txt
    #   50395023 bases of 2158289746 (2.335%) in intersection

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-03 - Hiram)
    # Use -repMatch=900, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 2158272880 / 2897316137 \) \* 1024
    #	( 2158272880 / 2897316137 ) * 1024 = 762.799544

    # round up to 800

    cd /hive/data/genomes/chrPic1
    time blat chrPic1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/chrPic1.11.ooc -repMatch=800
    #   Wrote 13782 overused 11-mers to jkStuff/chrPic1.11.ooc
    #	real     0m58.297s

    # there are no non-bridged gaps, no lift file needed for genbank
    hgsql -N -e "select bridge from gap;" chrPic1 | sort | uniq -c
    #   474318 yes
#    cd /hive/data/genomes/chrPic1/jkStuff
#    gapToLift chrPic1 chrPic1.nonBridged.lift -bedFile=chrPic1.nonBridged.bed
    # largest non-bridged contig:
#    awk '{print $3-$2,$0}' chrPic1.nonBridged.bed | sort -nr | head
    #   123773608 chrX  95534   123869142       chrX.01

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Choloepus hoffmanni	7	0	0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add chrPic1 just after ce2
# chrPic1 (painted turtle)
chrPic1.serverGenome = /hive/data/genomes/chrPic1/chrPic1.2bit
chrPic1.clusterGenome = /hive/data/genomes/chrPic1/chrPic1.2bit
chrPic1.ooc = /hive/data/genomes/chrPic1/jkStuff/chrPic1.11.ooc
chrPic1.lift = no
chrPic1.refseq.mrna.native.pslCDnaFilter  = ${lowCover.refseq.mrna.native.pslCDnaFilter}
chrPic1.refseq.mrna.xeno.pslCDnaFilter    = ${lowCover.refseq.mrna.xeno.pslCDnaFilter}
chrPic1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter}
chrPic1.genbank.mrna.xeno.pslCDnaFilter   = ${lowCover.genbank.mrna.xeno.pslCDnaFilter}
chrPic1.genbank.est.native.pslCDnaFilter  = ${lowCover.genbank.est.native.pslCDnaFilter}
chrPic1.refseq.mrna.native.load = no
chrPic1.refseq.mrna.xeno.load = yes
chrPic1.genbank.mrna.xeno.load = yes
chrPic1.genbank.est.native.load = no
chrPic1.downloadDir = chrPic1
chrPic1.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding chrPic1 painted turtle" etc/genbank.conf
    git push
    make etc-update

    git pull
    # Edit src/lib/gbGenome.c to add new species.
    git commit -m "adding definition for chrPicNames Chrysemys picta bellii" \
        src/lib/gbGenome.c
#	Western Painted Turtle - Chrysemys picta bellii - Dec 2011
    git push
    make install-server

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S chrPic1           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial chrPic1 &
    #	var/build/logs/2012.05.03-15:12:04.chrPic1.initalign.log
    #   real    2418m11.414s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad chrPic1 &
    #	logFile:  var/dbload/hgwdev/logs/2012.05.08-13:23:32.dbload.log
    #	real    42m38.307s

    # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add chrPic1 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added chrPic1." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
# create pushQ entry (DONE - 2012-05-24 - Hiram)
    # first make sure all.joiner is up to date and has this new organism
    # a keys check should be clean:
    cd ~/kent/src/hg/makeDb/schema
    joinerCheck -database=chrPic1 -keys all.joiner

    mkdir /hive/data/genomes/chrPic1/pushQ
    cd /hive/data/genomes/chrPic1/pushQ
    makePushQSql.pl chrPic1 > chrPic1.sql 2> stderr.out
    # check stderr.out for no significant problems, it is common to see:
# WARNING: hgwdev does not have /gbdb/chrPic1/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/chrPic1/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/chrPic1/bbi/quality.bw
# WARNING: chrPic1 does not have seq
# WARNING: chrPic1 does not have extFile
    # which are no real problem
    # if some tables are not identified:
# WARNING: Could not tell (from trackDb, all.joiner and hardcoded lists of
# supporting and genbank tables) which tracks to assign these tables to:
#	<some table list ... >
    # put them in manually after loading the pushQ entry
    scp -p chrPic1.sql hgwbeta:/tmp
    ssh hgwbeta
    cd /tmp
    hgsql qapushq < chrPic1.sql

#########################################################################
# fetch a photo, found in Wiki Commons (DONE - 2012-06-07 - Hiram)
    mkdir /hive/data/genomes/chrPic1/photo
    cd /hive/data/genomes/chrPic1/photo
    wget --timestamping \
http://upload.wikimedia.org/wikipedia/commons/4/47/A4_Western_painted_turtle.jpg

    convert -quality 80 -geometry "350x200" A4_Western_painted_turtle.jpg \
        Chrysemys_picta_bellii.jpg

# original source URL:
http://digitalmedia.fws.gov/cdm4/item_viewer.php?CISOROOT=/natdiglib&CISOPTR=2894&CISOBOX=1&REC=4

# in the public domain courtesy of:
    #   U.S. Fish & Wildlife Service

#########################################################################
# fixup search rule for assembly track/gold table (DONE - 2012-06-07 - Hiram)
    hgsql -N -e "select frag from gold;" chrPic1 | sort | head -1
AHGY01000001.1
    hgsql -N -e "select frag from gold;" chrPic1 | sort | tail -2
AHGY01551716.1
NC_002073

    # hence, add to trackDb/turtle/trackDb.ra
searchTable gold
shortCircuit 1
termRegex [AN][HC][G_][Y0]0[0-9]+(\.1)?
query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
searchPriority 8

#########################################################################
# create ucscToINSDC name mapping (DONE - 2013-08-23 - Hiram)
    mkdir /hive/data/genomes/chrPic1/bed/ucscToINSDC
    cd /hive/data/genomes/chrPic1/bed/ucscToINSDC

    # copying these scripts from the previous load and improving them
    # with each instance
    ./translateNames.sh NC_002073.3
    ./verifyAll.sh
    ./join.sh

    sed -e "s/21/12/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
      | hgLoadSqlTab chrPic1 ucscToINSDC stdin ucscToINSDC.tab
    checkTableCoords chrPic1 ucscToINSDC
    featureBits -countGaps chrPic1 ucscToINSDC
    # 2589745704 bases of 2589745704 (100.000%) in intersection

    # verify the track link to INSDC functions

##############################################################################
