# for emacs: -*- mode: sh; -*-

# Caenorhabditis japonica
#	Washington University School of Medicine GSC and Sanger Institute

#  $Id: caeJap1.txt,v 1.9 2008/08/15 21:53:32 hiram Exp $

########################################################################
#  Download sequence (DONE - 2008-04-03 - Hiram)
    ssh kkstore02
    mkdir /cluster/store5/caeJap1
    ln -s /cluster/store5/caeJap1 /cluster/data/caeJap1
    mkdir /cluster/data/caeJap1/wustl
    cd /cluster/data/caeJap1/wustl

    wget -nd -r \
ftp://genome.wustl.edu/pub/organism/Invertebrates/Caenorhabditis_japonica/assembly/Caenorhabditis_japonica-3.0.2/output/

########################################################################
#  Construct chrUn assembly from their agp file (DONE - 2008-04-03 - Hiram)
    ssh kkstore02
    cd /cluster/data/caeJap1/wustl
    cat << '_EOF_' > mkChrUn.pl
#!/usr/bin/env perl

# Process unknown regions of mapped.agp.chromosome.agp to create one unknown chromosome.
#
# run thus:
#	zcat supercontigs.agp.gz | ./mkChrUn.pl > chrUn.agp

use warnings;
use strict;

my $seq = 1;
my $end = 0;	# fixed-up end position (field 3) of current chromosome
my $prevEnd = 0; # field three of previous "chromosome" (not necessarily the previous line)
my $count = 0;	# current number of $prevEnd lines seen (used to detect single line Unk chromosomes).
my $lastNum = "";	# "Unk" number of previous line

my $ungappedLen = 1000;

open (FH,">Contig.bed") or die "can not write to Contig.bed";

my $debug = 0;
my $test = 0;

sub printUnGapped
{
# print a non-bridged gap for single line unknown chromosomes.
    my ($prev) = @_;
    
    print "chrUn\t";
    print $prev + 1, "\t";
    print $prev + 1000, "\t";
    print $seq++, "\t";
    print "N\t$ungappedLen\tcontig\tno\n";
    $test++;
    return $prev + $ungappedLen;
}

my $ctgStart = 0;
my $ctgEnd = 0;

while(<>) {
    my @a = split /\s+/;
    if($a[0] =~ /^Contig(\d+)/) {
        my $num = $1;
        my $newChr = $num ne $lastNum;

        # XXXX
        if($newChr && $count) {
	    $ctgEnd = $end;
	    printf FH "chrUn\t%d\t%d\tContig%s\n", $ctgStart, $ctgEnd,$lastNum;
            $end = printUnGapped($end);
	    $ctgStart = $end;
        }

        if($newChr) {
            print "chrUn$num\n" if($debug);
            $prevEnd = $end;
        }

        # fixup fields 2 & 3
        $a[1] += $prevEnd;
        $a[2] += $prevEnd;

        # fixup sequence
        $a[3] = $seq++;

        print "chrUn\t";
        for my $i (1..8) {
            if(defined($a[$i])) {
                print $a[$i],"\t";
            }
        }
        print "\n";
        $end = $a[2];
        $lastNum = $num;
        $count = $newChr ? 1 : $count + 1;
    }
}
$ctgEnd = $end;
printf FH "chrUn\t%d\t%d\tContig%s\n", $ctgStart, $ctgEnd,$lastNum;
close (FH);

print STDERR "test: $test\n" if($debug);
'_EOF_'
    # << happy emacs
    chmod +x mkChrUn.pl
    zcat supercontigs.agp.gz | ./mkChrUn.pl > chrUn.agp

    # fixup qual coordinates
    qaToQac contigs.fa.qual.gz stdout \
	| qacAgpLift chrUn.agp stdin chrUn.qual.qac

    #	and create a ctgPos2 file to show ContigN locations
    awk '{
size = $3-$2
printf "%s\t%d\tchrUn\t%d\t%d\tW\n", $4, size, $2, $3
}' Contig.bed > ctgPos2.tab

########################################################################
# build the database (DONE - 2008-04-03 - Hiram)
    ssh kkstore02
    cd /cluster/data/caeJap1
    cat << '_EOF_' > caeJap1.config.ra
# Config parameters for makeGenomeDb.pl:
db caeJap1
clade worm
genomeCladePriority 10
scientificName Caenorhabditis japonica
commonName C. japonica
assemblyDate Mar. 2008
assemblyLabel Washington University School of Medicine GSC C. japonica 3.0.2
orderKey 882
mitoAcc none
fastaFiles /cluster/data/caeJap1/wustl/contigs.fa.gz
agpFiles /cluster/data/caeJap1/wustl/chrUn.agp
qualFiles /cluster/data/caeJap1/wustl/chrUn.qual.qac
dbDbSpeciesDir worm
'_EOF_'
    # << happy emacs

    time nice -n +19 makeGenomeDb.pl caeJap1.config.ra

    #	check in the track Db stuff, fixup the gold, gap and description pages

########################################################################
#	repeat masker was run, but it did not mask much, so that table
#	was removed so that windowmaskerSdust could be the masking track
#	when fetching DNA from the browser
    ssh kkstore02
    mkdir /cluster/data/caeJap1/bed/repeatMasker
    cd /cluster/data/caeJap1/bed/repeatMasker
    time nice -n +19 doRepeatMasker.pl \
	-buildDir=/cluster/data/caeJap1/bed/repeatMasker \
	-bigClusterHub=pk caeJap1 > do.log

    cd /cluster/data/caeJap1
    twoBitToFa caeJap1.rmsk.2bit stdout | faSize stdin \
	> rmsk.faSize.txt
    cat rmsk.faSize.txt
    #	156378573 bases (27031392 N's 129347181 real 126720725 upper
    #	2626456 lower) in 1 sequences in 1 files
    #	%1.68 masked total, %2.03 masked real

    ssh hgwdev
    hgsql -e "drop table chrUn_rmsk;" caeJap1
    #	this leaves the interrupted repeats track showing on genome-test

########################################################################
# run window masker (DONE - 2008-04-03 - Hiram)
    ssh kkstore02
    mkdir /cluster/data/caeJap1/bed/windowMasker
    cd /cluster/data/caeJap1/bed/windowMasker
    doWindowMasker.pl -workhorse=kolossus \
        -buildDir=/cluster/data/caeJap1/bed/windowMasker caeJap1

    #	load this initial data to get ready to clean it
    ssh hgwdev
    cd /cluster/data/caeJap1/bed/windowMasker
    hgLoadBed caeJap1 windowmaskerSdust windowmasker.sdust.bed.gz

    #	eliminate the gaps from the masking
    featureBits caeJap1 -not gap -bed=notGap.bed
    time nice -n +19 featureBits caeJap1 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    #	reload track to get it clean
    hgLoadBed caeJap1 windowmaskerSdust cleanWMask.bed.gz

    #	mask the sequence with this clean mask
    zcat cleanWMask.bed.gz \
	| twoBitMask ../../caeJap1.unmasked.2bit stdin \
	    -type=.bed caeJap1.cleanWMSdust.2bit
    twoBitToFa caeJap1.cleanWMSdust.2bit stdout | faSize stdin \
        > caeJap1.cleanWMSdust.faSize.txt
    cat caeJap1.cleanWMSdust.faSize.txt
    #	156378573 bases (27031392 N's 129347181 real 72118110 upper
	57229071 lower) in 1 sequences in 1 files
    #	%36.60 masked total, %44.24 masked real

########################################################################
# run TRF simple repeats (DONE - 2008-04-03 - Hiram)
    ssh kkstore02
    mkdir /cluster/data/caeJap1/bed/simpleRepeat
    cd /cluster/data/caeJap1/bed/simpleRepeat
    time nice -n +19 doSimpleRepeat.pl \
	-buildDir=/cluster/data/caeJap1/bed/simpleRepeat \
	    -smallClusterHub=memk caeJap1 > do.log 2>&1

    #	add this mask to the window masker mask to produce the masked
    #	2bit file
    cd /cluster/data/caeJap1
    twoBitMask -add bed/windowMasker/caeJap1.cleanWMSdust.2bit \
        bed/simpleRepeat/trfMask.bed caeJap1.2bit
    twoBitToFa caeJap1.2bit stdout | faSize stdin \
        > faSize.caeJap1.wmskSdust.TRF.txt
    cat faSize.caeJap1.wmskSdust.TRF.txt
    # 156378573 bases (27031392 N's 129347181 real 71997717 upper
    # 57349464 lower) in 1 sequences in 1 files
    #	%36.67 masked total, %44.34 masked real

    #	create symlink to gbdb
    ssh hgwdev
    cd /gbdb/caeJap1
    ln -s /cluster/data/caeJap1/caeJap1.2bit .

########################################################################
#	add ctgPos2 track (DONE - 2008-04-03 - Hiram)
    ssh hgwdev
    cd /cluster/data/caeJap1/wustl
    hgLoadSqlTab caeJap1 ctgPos2 $HOME/kent/src/hg/lib/ctgPos2.sql ctgPos2.tab

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2008-04-03 - Hiram)
    # Use -repMatch=100 (based on size -- for human we use 1024, and 
    # worm size is ~5.1% of human judging by gapless caeJap1 vs. hg18 genome 
    # size from featureBits. So we would use 50, but that yields a very
    # high number of tiles to ignore, especially for a small more compact 
    # genome.  Bump that up a bit to be more conservative.
    ssh kolossus
    # Use -repMatch=50 (based on size -- for human we use 1024, and 
    # C. remanei size is ~5.1% of human judging by gapless caeJap1 vs. hg18 
    # genome sizes from featureBits.
    ssh kolossus
    cd /cluster/data/caeJap1
    blat caeJap1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/11.ooc -repMatch=100
    #	Wrote 14923 overused 11-mers to jkStuff/11.ooc

#########################################################################
#	make caeJap1.chrUn.lift
    ssh hgwdev
    cd /cluster/data/caeJap1/jkStuff

    awk '{
printf "%d\t%s\t%d\tchrUn\t156378573\n", $2, $4, $3-$2
}' ../wustl/Contig.bed > caeJap1.chrUn.lift

#########################################################################
# GENBANK AUTO UPDATE (DONE - 2008-04-04 - Hiram)
    # align with latest genbank process.
    ssh hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # edit src/lib/gbGenome.c to add caeJap names between caeRem and caePb
# static char *caeJapNames[] = {"Caenorhabditis japonica", NULL};
# {"caeJap", caeJapNames},{"caeJap", caeJapNames},
    cvs ci -m "Added C. japonica" src/lib/gbGenome.c
    make install-server

    # edit etc/genbank.conf to add caeJap1 just before caeRem1

# caeJap1 (C. remanei)
caeJap1.serverGenome = /cluster/data/caeJap1/caeJap1.2bit
caeJap1.clusterGenome = /cluster/data/caeJap1/caeJap1.2bit
caeJap1.ooc = /cluster/data/caeJap1/jkStuff/11.ooc
caeJap1.lift = /cluster/data/caeJap1/jkStuff/caeJap1.chrUn.lift
caeJap1.refseq.mrna.native.pslCDnaFilter  = ${lowCover.refseq.mrna.native.pslCDnaFilter}
caeJap1.refseq.mrna.xeno.pslCDnaFilter    = ${lowCover.refseq.mrna.xeno.pslCDnaFilter}
caeJap1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter}
caeJap1.genbank.mrna.xeno.pslCDnaFilter   = ${lowCover.genbank.mrna.xeno.pslCDnaFilter}
caeJap1.genbank.est.native.pslCDnaFilter  = ${lowCover.genbank.est.native.pslCDnaFilter}
caeJap1.refseq.mrna.native.load = yes
caeJap1.refseq.mrna.xeno.load  = yes
caeJap1.refseq.mrna.xeno.loadDesc = yes
caeJap1.genbank.mrna.xeno.load = yes
caeJap1.genbank.est.native.load = yes
caeJap1.genbank.est.native.loadDesc = no
caeJap1.downloadDir = caeJap1
caeJap1.perChromTables = no

    cvs ci -m "Added caeJap1." etc/genbank.conf
    # update /cluster/data/genbank/:
    make etc-update

    ssh genbank
    screen	#	use a screen to manage this job
    cd /cluster/data/genbank
    time nice -n +19 bin/gbAlignStep -initial caeJap1 &
    #	logFile: var/build/logs/2008.04.04-00:01:44.caeJap1.initalign.log
    #	real    124m10.365s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad caeJap1
    #	logFile: var/dbload/hgwdev/logs/2008.04.04-10:07:40.dbload.log
    #	real    16m14.090s

    # enable daily alignment and update of hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # add caeJap1 to:
        etc/align.dbs
        etc/hgwdev.dbs
    cvs ci -m "Added caeJap1 - C. japonica" etc/align.dbs etc/hgwdev.dbs
    make etc-update

############################################################################
#  BLATSERVERS ENTRY (DONE - 2008-04-04 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("caeJap1", "blat15", "17790", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("caeJap1", "blat15", "17791", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
## Reset default position to the unc-52 gene complex (DONE - 2008-04-04 - Hiram)
#	as found by a blat search
    ssh hgwdev
    hgsql -e 'update dbDb set defaultPos="chrUn:36686834-36727776"
	where name="caeJap1";' hgcentraltest

#########################################################################
## Make maskedContigs (DONE - 2008-06-09 - Hiram)
    mkdir /cluster/data/caeJap1/maskedContigs
    cd /cluster/data/caeJap1/maskedContigs
    ln -s ../jkStuff/caeJap1.chrUn.lift ./supers.lift
    cp -p /cluster/data/priPac1/jkStuff/lft2BitToFa.pl .
    ./lft2BitToFa.pl ../caeJap1.2bit supers.lift > supers.fa
    faSize supers.fa
    #	151722573 bases (22375392 N's 129347181 real
    #	71997717 upper 57349464 lower) in 4657 sequences in 1 files
    #	%37.80 masked total, %44.34 masked real
    twoBitToFa ../caeJap1.2bit stdout | faSize stdin
    #	156378573 bases (27031392 N's 129347181 real
    #	71997717 upper 57349464 lower) in 1 sequences in 1 files
    #	%36.67 masked total, %44.34 masked real
    faToTwoBit supers.fa caeJap1.TrfWM.supers.2bit
    twoBitInfo caeJap1.TrfWM.supers.2bit stdout \
	| sort -k2rn > caeJap1.TrfWM.supers.sizes

#########################################################################
# ELEGANS (ce6) PROTEINS TRACK (DONE -  Hiram - 2008-06-11-19)
    ssh kkstore02

    #	breaking up this target genome into manageable pieces
    #	using the contigs so we don't have genes spanning non-bridged gaps
    mkdir /cluster/data/caeJap1/blastDb
    cd /cluster/data/caeJap1
    twoBitToFa -noMask maskedContigs/caeJap1.TrfWM.supers.2bit temp.fa
    twoBitToFa caeJap1.unmasked.2bit temp.fa
    faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
    #	4658 pieces of 4658 written
    rm temp.fa
    cd blastDb
    for i in *.fa
    do
	/cluster/bluearc/blast229/formatdb -i $i -p F
    done
    rm *.fa

    ##	copy to san for kluster access
    mkdir -p /san/sanvol1/scratch/worms/caeJap1/blastDb
    cd /san/sanvol1/scratch/worms/caeJap1/blastDb
    rsync -a --progress --stats /cluster/data/caeJap1/blastDb/ .

    ## create the query protein set
    mkdir -p /cluster/data/caeJap1/bed/tblastn.ce6SG
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG
    echo  /san/sanvol1/scratch/worms/caeJap1/blastDb/*.nsq | xargs ls -S \
	| sed "s/\.nsq//"  > query.lst
    wc -l query.lst
    # 4658 query.lst

   # we want around 100000 jobs
   calc `wc /cluster/data/ce6/bed/blat.ce6SG/ce6SG.psl | awk "{print \\\$1}"`/\(100000/`wc query.lst | awk "{print \\\$1}"`\)

    #	23741/(100000/4658) = 1105.855780

   mkdir -p /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/sgfa
   split -l 1100 /cluster/data/ce6/bed/blat.ce6SG/ce6SG.psl \
	/cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/sgfa/sg
   ln -s /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/sgfa sgfa
   cd sgfa
   for i in *; do 
     nice pslxToFa $i $i.fa; 
     rm $i; 
   done
   cd ..
   ls -1S sgfa/*.fa > sg.lst
   mkdir -p /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/blastOut
   ln -s /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/blastOut
   for i in `cat sg.lst`; do  mkdir blastOut/`basename $i .fa`; done
   
   cd /cluster/data/caeJap1/bed/tblastn.ce6SG
   cat << '_EOF_' > template
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
#ENDLOOP
'_EOF_'
    # << happy emacs

   cat << '_EOF_' > blastSome
#!/bin/sh
CBIN=/cluster/bin/i386
DB=caeJap1
BLASTMAT=/scratch/data/blast229/data
SCR="/scratch/tmp/${DB}"
g=`/bin/basename $2`
D=`/bin/basename $1`
R=`/bin/basename $3`
inputFile=${SCR}/${D}_${g}
export CBIN DB BLASTMAT SCR g D R
/bin/mkdir -p ${SCR}
/bin/cp -p $1.* ${SCR}
/bin/cp -p $2 ${inputFile}
/bin/cp -p /cluster/store5/${DB}/blastDb.lft ${SCR}/${D}_blastDb.lft
/bin/cp -p /cluster/store3/worm/ce6/bed/blat.ce6SG/protein.lft \
	${SCR}/${D}_protein.lft
f=${SCR}/${R}.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/data/blast229/blastall -M BLOSUM80 -m 0 -F no \
        -e $eVal -p tblastn -d ${SCR}/${D} -i ${inputFile} -o $f.8
then
        /bin/mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if ${CBIN}/blastToPsl $f.1 $f.2
    then
        ${CBIN}/liftUp -nosort -type=".psl" -nohead $f.3 \
            ${SCR}/${D}_blastDb.lft carry $f.2 > /dev/null
        ${CBIN}/liftUp -nosort -type=".psl" -pslQ -nohead ${SCR}/${R}.tmp \
            ${SCR}/${D}_protein.lft warn $f.3 > /dev/null
        if ${CBIN}/pslCheck -prot ${SCR}/${R}.tmp
        then
            /bin/rm -f $3
            /bin/mv ${SCR}/${R}.tmp $3
            /bin/rm -f $f.1 $f.2 $f.3 $f.4 ${SCR}/${D}.* ${inputFile} \
		${SCR}/${D}_protein.lft ${SCR}/${D}_blastDb.lft
            /bin/rmdir --ignore-fail-on-non-empty ${SCR}
        fi
        exit 0
    fi
fi
/bin/rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4 ${SCR}/${D}.* ${inputFile} \
	${SCR}/${D}_protein.lft ${SCR}/${D}_blastDb.lft
/bin/rmdir --ignore-fail-on-non-empty ${SCR}
exit 1
'_EOF_'
    # << happy emacs
    chmod +x blastSome

    ssh kk
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG
    gensub2 query.lst sg.lst template jobList
    para create jobList
#    para try, check, push, check etc.
# Completed: 102476 of 102476 jobs
# CPU time in finished jobs:   11629662s  193827.71m  3230.46h  134.60d  0.369 y
# IO & Wait Time:                427634s    7127.23m   118.79h    4.95d  0.014 y
# Average job time:                 118s       1.96m     0.03h    0.00d
# Longest finished job:             594s       9.90m     0.17h    0.01d
# Submission to last job:         91352s    1522.53m    25.38h    1.06d

    #	one of these jobs had a broken psl file.  Determined it was in
    #	the sgam group of jobs.  So, re-running that bit:
    ssh kk
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG
    grep sgam jobList > sgam.jobList
    para create sgam.jobList
    para try, check, push ... etc ...
# Completed: 4658 of 4658 jobs
# CPU time in finished jobs:     575028s    9583.80m   159.73h    6.66d  0.018 y
# IO & Wait Time:                 13345s     222.42m     3.71h    0.15d  0.000 y
# Average job time:                 126s       2.11m     0.04h    0.00d
# Longest finished job:             565s       9.42m     0.16h    0.01d
# Submission to last job:          2669s      44.48m     0.74h    0.03d

    # do the cluster run for chaining
    ssh kk
    mkdir /cluster/data/caeJap1/bed/tblastn.ce6SG/chainRun
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG/chainRun
    cat << '_EOF_' > template
#LOOP
chainOne $(path1)
#ENDLOOP
'_EOF_'
    # << happy emacs

    cat << '_EOF_' > chainOne
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/blastOut/c.`basename $1`.psl)
'_EOF_'
    # << happy emacs
    chmod +x chainOne

    ls -1dS /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG/blastOut/sg?? \
	> chain.lst
    gensub2 chain.lst single template jobList
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG/chainRun
    para create jobList
    # para maxNode 30 # there are only 22 jobs
    para try, check, push, check etc.
# Completed: 22 of 22 jobs
# CPU time in finished jobs:        413s       6.89m     0.11h    0.00d  0.000 y
# IO & Wait Time:                  2123s      35.38m     0.59h    0.02d  0.000 y
# Average job time:                 115s       1.92m     0.03h    0.00d
# Longest finished job:             125s       2.08m     0.03h    0.00d
# Submission to last job:          7798s     129.97m     2.17h    0.09d

    ssh kkstore02
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG/blastOut
    for i in sg??
    do
       cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
       sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
       awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
       echo $i
    done
    sort -T /scratch/tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq \
	> /cluster/data/caeJap1/bed/tblastn.ce6SG/blastCe6SG.psl
    cd ..
    pslCheck blastCe6SG.psl
    #	checked: 24730 failed: 0 errors: 0

    # load table 
    ssh hgwdev
    cd /cluster/data/caeJap1/bed/tblastn.ce6SG
    liftUp -nohead -type=.psl stdout ../../jkStuff/caeJap1.chrUn.lift \
	error blastCe6SG.psl | sort -k14,14 -k16,16n \
	| hgLoadPsl -table=blastCe6SG caeJap1 stdin

    # check coverage
    featureBits caeJap1 blastCe6SG 
    #	14816965 bases of 129347181 (11.455%) in intersection
    featureBits cb3 blastCe6SG
    #	18431207 bases of 108433446 (16.998%) in intersection

    featureBits ce6 sangerGene
    #	28134889 bases of 100281426 (28.056%) in intersection

    ssh kkstore02
    rm -rf /cluster/data/caeJap1/bed/tblastn.ce6SG/blastOut
    rm -rf /cluster/bluearc/worms/caeJap1/bed/tblastn.ce6SG
    rm -rf /cluster/data/caeJap1/bed/tblastn.ce6SG/sgfa

###########################################################################
#  verify all.joiner entries (DONE - 2008-06-20 - Hiram)
    #	try to get joinerCheck tests clean output on these commands
    ssh hgwdev
    cd ~/kent/src/hg/makeDb/schema
    joinerCheck -database=caeJap1 -tableCoverage all.joiner
    #	Error: caeJap1.intronEst tables not in .joiner file
    #	it isn't clear how to get this to go away
    joinerCheck -database=caeJap1 -keys all.joiner
    joinerCheck -database=caeJap1 -times all.joiner

##########################################################################
#  make downloads (DONE - 2008-06-20 - Hiram)
    #	verify all tracks have html pages for their trackDb entries
    cd /cluster/data/caeJap1
    /cluster/bin/scripts/makeDownloads.pl caeJap1
    #	fix the README files
    #	re-build the upstream files:
    cd /cluster/data/caeJap1/goldenPath/bigZips
   for size in 1000 2000 5000
do
  echo $size
  featureBits caeJap1 sangerGene:upstream:$size -fa=stdout \
  | gzip -c > sangerGene.upstream$size.fa.gz
done

    cp -p bed/windowMasker/cleanWMask.bed.gz \
	goldenPath/bigZips/chromWMSdust.bed.gz

##########################################################################
#  create pushQ entry (DONE - 2008-06-20 - Hiram)
    #	verify all tracks have html pages for their trackDb entries
    ssh hgwdev
    cd /cluster/data/caeJap1/jkStuff
    /cluster/bin/scripts/makePushQSql.pl caeJap1 > caeJap1.pushQ.sql
# caeJap1 does not have seq
# caeJap1 does not have extFile
    scp -p caeJap1.pushQ.sql hgwbeta:/tmp
    ssh hgwbeta
    cd /tmp
    hgsql qapushq < caeJap1.pushQ.sql
    #	verify file sizes in the pushQ entries
 
##########################################################################
