# for emacs: -*- mode: sh; -*-


# This file describes browser build for the Medaka
# genome, Oryzias latipes, October 2005, MEDAKA1 from
#	Institute of Genetics (NIG) and the University of Tokyo
#	NIG: http://dolphin.lab.nig.ac.jp/medaka/
#	UTGB: http://medaka.utgenome.org/o
#	Data release policy: http://medaka.utgenome.org/#policy
#	Ensembl: http://www.ensembl.org/Oryzias_latipes/index.html
#
#	"$Id: oryLat1.txt,v 1.42 2008/07/01 16:53:02 markd Exp $"
#

##########################################################################
###  Fetch sequence	(DONE - 2006-11-22 - Hiram)
    ssh kkstore04
    mkdir /cluster/store8/oryLat1
    ln -s /cluster/store8/oryLat1 /cluster/data/oryLat1
    cd /cluster/data/oryLat1
    mkdir rawData
    cd rawData
    cat << '_EOF_' > fetch.sh
#!/bin/sh

for C in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
do
    wget --timestamping \
"ftp://ftp.ensembl.org/pub/current_oryzias_latipes/data/fasta/dna/Oryzias_latipes.MEDAKA1.41.dna.chromosome.${C}.fa.gz" \
-O chr${C}.fa.gz
done

wget --timestamping \
"ftp://ftp.ensembl.org/pub/current_oryzias_latipes/data/fasta/dna/Oryzias_latipes.MEDAKA1.41.dna.nonchromosomal.fa.gz" \
-O chrUn.fa.gz

wget --timestamping \
"ftp://ftp.ensembl.org/pub/current_oryzias_latipes/data/fasta/dna/Oryzias_latipes.MEDAKA1.41.dna.seqlevel.fa.gz" \
-O seqlevel.fa.gz
'_EOF_'
    # << happy emacs
    chmod +x fetch.sh
    ./fetch.sh
    #	There is also repeat masked sequence there too
    cat << '_EOF_' > fetchRM.sh
#!/bin/sh

for C in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
do
    wget --timestamping \
"ftp://ftp.ensembl.org/pub/current_oryzias_latipes/data/fasta/dna/Oryzias_latipes.MEDAKA1.41.dna_rm.chromosome.${C}.fa.gz" \
-O chr${C}.rm.fa.gz
done

wget --timestamping \
"ftp://ftp.ensembl.org/pub/current_oryzias_latipes/data/fasta/dna/Oryzias_latipes.MEDAKA1.41.dna_rm.nonchromosomal.fa.gz" \
-O chrUn.rm.fa.gz

wget --timestamping \
"ftp://ftp.ensembl.org/pub/current_oryzias_latipes/data/fasta/dna/Oryzias_latipes.MEDAKA1.41.dna_rm.seqlevel.fa.gz" \
-O seqlevel.rm.fa.gz
'_EOF_'
    # << happy emacs
    chmod +x fetchRM.sh
    ./fetchRM.sh

##########################################################################
##  Create AGP file from these sequence files
##	(DONE - 2006-11-28 - Hiram)
    #	Looks like we might be able to make an AGP file from the seqlevel.fa
    #	file.  Its fasta headers have position information, e.g.:
# >scaffold605_contig102266 dna:contig chromosome:MEDAKA1:12:103214:103945:-1
# >scaffold605_contig102265 dna:contig chromosome:MEDAKA1:12:103956:105102:-1
# >scaffold605_contig102264 dna:contig chromosome:MEDAKA1:12:105113:107631:-1
# ... etc

    #	Extract the headers out of the seqlevel.fa.gz file:
    zcat seqlevel.fa.gz | grep "^>" > seqlevel.fa.headers.txt

    #	use the following perl script on those headers to generate an agp file:

cat << '_EOF_' > allSeqLevel.pl
#!/usr/bin/env perl

use warnings;
use strict;

my %chrStartOrdered;

open (FH,"<seqlevel.fa.headers.txt") or die "can not open seqlevel.fa.headers.txt";
my $seqCount = 1;
my $unLen = 0;
my $unStart = 1;
my $unEnd = 1;
my $unGapSize = 100;
my $prevScaf = "";
while (my $line = <FH>) {
    chomp $line;
    my @a = split('\s',$line);
    #   first word on the line is a scaffold_contig identification
    $a[0] =~ s/^>//;
    #   third word on the line is the chrom position indication
    my @b = split(':',$a[2]);
    #   length of this scaffold_contig
    my $len = $b[4] - $b[3] + 1;
    my $strand = "+";
    #   a -1 indicates negative strand orientation for this scaffold_contig
    $strand = "-" if (-1 == $b[5]);
    my $key = "";       # key will be chrom.start
    if (($b[2] =~ m/^[0-9]+$/) && ($b[3] =~ m/^[0-9]+$/)) {
        $key = sprintf("%03d.%09d", $b[2], $b[3]);
    } else {
        my @scafContig = split('_',$a[0]);
        if (length($prevScaf) > 0) {
            if ($scafContig[0] =~ m/^$prevScaf$/) {
                $unGapSize = 10;
            } else {
                $unGapSize = 100;
            }
        } else {
            $unGapSize = 0;     # first contig seen, no gap yet.
        }
        $prevScaf = $scafContig[0];
        $unStart += $unGapSize;
        $key = sprintf("Un.%09d", $unStart);
        $unEnd = $unStart + $len - 1;
    $chrStartOrdered{$key} = sprintf "chrUn\t%d\t%d\t%d\tW\t%s\t1\t%d\t%s",
        $unStart,$unEnd, $seqCount++, $a[0], $len, $strand;
        $unStart = $unEnd + 1;
        next;
    }
    if ($b[4] =~ m/^[0-9]+$/) {
    $chrStartOrdered{$key} = sprintf "chr%s\t%d\t%d\t%d\tW\t%s\t1\t%d\t%s",
        $b[2],$b[3],$b[4], $seqCount++, $a[0], $len, $strand;
    } else {
        printf STDERR "non-numeric 4 ? '$line'\n";
        next;
    }
}

close(FH);

my $startChr = "chr1";
my $lastEnd = 0;
$seqCount = 1;
$prevScaf = "";
foreach my $key (sort keys(%chrStartOrdered)) {
    my ($chr, $start, $end, $seqNum, $type, $name, $one, $len, $strand) =
        split('\t',$chrStartOrdered{$key});
    if ($chr !~ m/^$startChr$/) {
        $lastEnd = 0;
        $startChr = $chr;
    }
    my @scafContig = split('_',$name);
    my $gapType = "contig";
    my $bridged = "yes";
    if (length($prevScaf) > 0) {
        if ($prevScaf !~ m/^$scafContig[0]$/) {
            $gapType = "scaffold";
            $bridged = "no";
        }
    }
    $prevScaf = $scafContig[0];
    if (1 == ($lastEnd+1)) {
        $gapType = "telomere";
        $bridged = "no";
    }
    if (($lastEnd +1) < $start) {
        my $gapLen = $start - $lastEnd - 1;
        if ($chr =~ m/chrUn/) {
            if (10 == $gapLen) {
                $gapType = "contig";
                $bridged = "yes";
            } else {
                $gapType = "scaffold";
                $bridged = "no";
            }
        }
        printf "%s\t%d\t%d\t%d\tN\t%d\t%s\t%s\n",
            $chr, $lastEnd+1, $start-1, $seqCount++, $gapLen, $gapType, $bridged;
    }
    printf "%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\n", $chr, $start, $end,
        $seqCount++, $type, $name, $one, $len, $strand;
    $lastEnd = $end;
}
'_EOF_'
    # << happy emacs
    chmod +x allSeqLevel.pl
    ./allSeqLevel.pl > allSeqLevel.agp 2> err

##########################################################################
#  Run the makeGenomeDb.pl script (DONE - 2006-11-28 - Hiram)
    # prepare for the makeGenomeDb.pl script:
    ssh hgwdev
    cd /cluster/data/oryLat1
    #	the config.ra script pretty much specifies everything
    cat << '_EOF_' >  config.ra
db oryLat1
scientificName Oryzias latipes
assemblyDate Oct. 2005
assemblyLabel MEDAKA1
#       Same order key as tetNig1
orderKey 40
#       NC_004387
mitoAcc 25057156
fastaFiles /cluster/data/oryLat1/useSequence/seq*.fa.gz
dbDbSpeciesDir medaka
agpFiles /cluster/data/oryLat1/useSequence/oryLat1.agp
commonName Medaka
clade Vertebrate
genomeCladePriority 105
'_EOF_'
    # << happy emacs

    #	create a special useSequence directory to collect the essentials into
    #	one single place
    mkdir useSequence
    cd useSequence
    ln -s ../rawData/seqlevel.fa.gz .
    ln -s ../rawData/allSeqLevel.agp ./oryLat1.agp

    cd /cluster/data/oryLat1
    #	Being brave, a complete run will do this entirely
    makeGenomeDb.pl config.ra > mgdb.out 2>&1
    #	This sequence creates and loads the following files into the
    #	new database oryLat1
    #	chr*_gap, chr*_gold, chromInfo, gc5Base, grp
    #	And, when you follow the instructions it gives at the end to check in
    #	the trackDb files it creates, and you do a make in your trackDb
    #	hierarchy, you will then create the trackDb and hgFindSpec tables
    #	(with a 'make alpha') or your specific trackDb_logname
    #	hgFindSpec_logname with a simple 'make' with no arguments.
    #	The sequence also adds an entry to the dbDb table to turn on this
    #	organism in the drop-down menus

    #	run the script one step at a time if there are difficulties
    #	(there were problems until I figured out how to synchronize the agp
    #	file with the sequence file)
    makeGenomeDb.pl config.ra -stop=seq -workhorse=hgwdev
    makeGenomeDb.pl config.ra -continue=agp -stop=agp -workhorse=hgwdev
    makeGenomeDb.pl config.ra -continue=db -stop=db -workhorse=hgwdev
    makeGenomeDb.pl config.ra -continue=dbDb -stop=dbDb -workhorse=hgwdev
    makeGenomeDb.pl config.ra -continue=trackDb -stop=trackDb -workhorse=hgwdev
    #	The trackDb step displays instructions on what to do to get the
    #	generated source checked into the source tree.

########################################################################
##  Verify the generated source is correct and matches the chrom source
##  obtained from Ensembl	(DONE - 2006-11-29 - Hiram)
    #	Extract the source from the 2bit file
    ssh kkstore04
    cd /cluster/data/oryLat1
    for C in ? ??
do
    echo $C
twoBitToFa -seq=chr${C} oryLat1.unmasked.2bit stdout | gzip > ${C}/chr${C}.fa.gz
done
    #	Compare extracted source to the rawData/chr*.fa.gz files
for C in ? ??
do
    echo -n "${C}: "
    faSize ${C}/chr${C}.fa.gz
    echo -n "${C}: "
    faSize rawData/chr${C}.fa.gz
done > checkFa.out
    #	Each faSize output line should be duplicated
    grep sequence checkFa.out | wc
    #	51     867    4876
    #	It is an odd number because there is a M/chrM.fa.gz
    #	but no rawData/chrM.fa.gz
    grep sequence checkFa.out | sort -u | wc
    #	27     459    2579
    #	the chrUn count is different in each case because the
    #	Ensembl chrUn.fa.gz is different that what was put together
    #	here with the seqlevel.fa.gz source.  They have the same sequence,
    #	just a different size of gaps between scaffolds, and Ensemble
    #	doesn't assemble it into a chrUn, they leave it as separate sequences:
    faSize Un/chrUn.fa.gz
    #	119265241 bases (1021750 N's 118243491 real 118243491 upper 0 lower)
    #	in 1 sequences in 1 files
    faSize rawData/chrUn.fa.gz
    #	144764649 bases (26521158 N's 118243491 real 118243491 upper 0 lower)
    #	in 7164 sequences in 1 files

#########################################################################
# REPEATMASKER (DONE - 2006-11-29 - Hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1/bed
    # Run -debug to create the dir structure and preview the scripts:
    #	kk and kki currently down at this run, specify pk
    ~/kent/src/hg/utils/automation/doRepeatMasker.pl \
	oryLat1 -verbose=3 -debug -workhorse=hgwdev -bigClusterHub=pk \
	-smallClusterHub=pk > rmRun.debug.log 2>&1
    # Run it for real and tail the log to follow progress:
    ~/kent/src/hg/utils/automation/doRepeatMasker.pl \
	oryLat1 -verbose=3 -workhorse=hgwdev -bigClusterHub=pk \
	-smallClusterHub=pk > RepeatMasker.2006-11-29/do.log 2>&1 &
    tail -f RepeatMasker.2006-11-29/do.log
    #	When this finished, it created a masked 2bit:
    # 214145351 Nov 29 16:33 /cluster/data/oryLat1/oryLat1.rmsk.2bit
    #	But it did not change the /gbdb/oryLat1/oryLat1.2bit symlink

    # RepeatMasker and lib version from do.log:
    #    October 6 2006 (open-3-1-6) version of RepeatMasker
    #	CC   RELEASE 20061006;  

    # Calculate coverage to compare with other masking efforts
    featureBits oryLat1 rmsk
    #	21095035 bases of 700386597 (3.012%) in intersection
    #	It appears the Ensemble chr*.RM.fa.gz files have been hard masked.

#########################################################################
## WINDOWMASKER (DONE - 2006-11-29 - Hiram)
    #	Trying different repeat mask process to see if we get significant
    #	different masking
    ssh hgwdev
    cd /cluster/data/oryLat1/bed	    
    ~/kent/src/hg/utils/automation/doWindowMasker.pl oryLat1 \
	-workhorse=kolossus > wmRun.log 2>&1 &
    #	Save the log
    mv wmRun.log WindowMasker.2006-11-29
    #	Looks like there are two results:
    cd WindowMasker.2006-11-29
    ls -og *.2bit
# -rw-rw-r--  1 249361695 Nov 29 21:06 oryLat1.wmsk.2bit
# -rw-rw-r--  1 250245447 Nov 29 21:06 oryLat1.wmsk.sdust.2bit
    #	Let's see what got masked
    twoBitToFa oryLat1.wmsk.2bit stdout | gzip > oryLat1.wmsk.fa.gz
    twoBitToFa oryLat1.wmsk.sdust.2bit stdout | gzip > oryLat1.wmsk.sdust.fa.gz
    faSize oryLat1.wmsk.fa.gz > faSize.wmsk.txt
    faSize oryLat1.wmsk.sdust.fa.gz > faSize.wmsk.sdust.txt
    cat faSize.wmsk.sdust.txt
    #	843500808 bases (143114211 N's 700386597 real 468909282 upper
    #	231477315 lower) in 26 sequences in 1 files
    cat faSize.wmsk.txt
    #	843500808 bases (143114211 N's 700386597 real 474213482 upper
    #	226173115 lower) in 26 sequences in 1 files
    #	Percent masked sdust: %33.05 = 100*231477315/700386597
    #	Percent masked wmsk: %32.29 = 100*226173115/700386597

    #	Load this table
    hgLoadBed -strict oryLat1 windowmaskerSdust windowmasker.sdust.bed.gz

########################################################################
##  Check how much ensembl sequence is repeat masked
    ssh kkstore04
    cd /cluster/data/oryLat1/rawData
    faSize chr?.fa.gz chr1?.fa.gz chr2?.fa.gz > faSize.txt 2>&1 &
    faSize chr?.RM.fa.gz chr1?.RM.fa.gz chr2?.RM.fa.gz > faSize.RM.txt 2>&1 &
    cat faSize.RM.txt
    #	724218853 bases (171323306 N's 552895547 real 552895547 upper 0 lower)
    #	in 24 sequences in 24 files
    cat faSize.txt
    #	724218853 bases (142092461 N's 582126392 real 582126392 upper 0 lower)
    #	in 24 sequences in 24 files
    #	additional N's in the masked sequence: 29230845 = 171323306 - 142092461
    #	percent of real sequence knocked out %5.02 = 100*29230845/582126392

#########################################################################
# SIMPLE REPEATS (TRF) (DONE 2006-12-04 - Hiram)
    ssh kolossus
    mkdir /cluster/data/oryLat1/bed/simpleRepeat
    cd /cluster/data/oryLat1/bed/simpleRepeat
    time nice -n 19 twoBitToFa ../../oryLat1.unmasked.2bit stdout \
	| trfBig -trf=/cluster/bin/i386/trf stdin /dev/null \
	    -bedAt=simpleRepeat.bed -tempDir=/tmp > trf.log 2>&1 &
    # ~1h 20m
    # Make a filtered version for sequence masking:
    awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
    splitFileByColumn trfMask.bed trfMaskChrom

    # Load unfiltered repeats into the database:
    ssh hgwdev
    hgLoadBed oryLat1 simpleRepeat \
      /cluster/data/oryLat1/bed/simpleRepeat/simpleRepeat.bed \
      -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
    featureBits oryLat1 simpleRepeat
    #	11740975 bases of 700386597 (1.676%) in intersection

#########################################################################
## Add TRF mask to WindowMasker masked sequence
    ssh kkstore04
    cd /cluster/data/oryLat1
    twoBitMask bed/WindowMasker.2006-11-29/oryLat1.wmsk.sdust.2bit \
	-add bed/simpleRepeat/trfMask.bed oryLat1.sdTrf.2bit

#########################################################################
## Send sequence to san for kluster runs (DONE - 2006-12-04 - Hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1
    mkdir /san/sanvol1/scratch/oryLat1
    cp -p oryLat1.sdTrf.2bit /san/sanvol1/scratch/oryLat1/

#########################################################################
# BLASTZ/CHAIN/NET HG18 (DONE - 2006-12-04 - Hiram)
#  first time with everything, done later without the randoms and chrUn
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.hg18.2006-12-04
    cd /cluster/data/oryLat1/bed/blastz.hg18.2006-12-04
    cat << '_EOF_' > DEF
# Medaka vs. Human

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#	except for chrUn of course
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes

# QUERY: Human hg18
SEQ2_DIR=/san/sanvol1/scratch/hg18/hg18.sdTrf.2bit
SEQ2_LEN=/cluster/data/hg18/chrom.sizes
SEQ2_CHUNK=50000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.hg18.2006-12-04
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
      -bigClusterHub pk \
      -blastzOutRoot /cluster/bluearc/oryLat1Hg18 > do.log 2>&1 &
    #	real    312m34.518s
    #	user    0m0.151s
    #	sys     0m0.123s

    #	failed during the netClass operation in loadUp.csh due to missing
    #	rmsk table in oryLat1 - added options to netClass to allow
    #	specification of repeat table, and rerun the two commands here to
    #	finish off the loadUp.csh step:
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.hg18.2006-12-04/axtChain
    netClass -verbose=2 -noAr -tRepeats=repeats -qRepeats=windowmaskerSdust \
        noClass.net oryLat1 hg18 oryLat1.hg18.net
    netFilter -minGap=10 oryLat1.hg18.net \
	| hgLoadNet -verbose=0 oryLat1 netHg18 stdin
    #	Then, continuing:
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/blastz.hg18.2006-12-04
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=download \
	-blastzOutRoot /cluster/bluearc/oryLat1Hg18 > download.log 2>&1 &

    cd /cluster/data/oryLat1/bed
    ln -s blastz.hg18.2006-12-04 blastz.hg18
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.hg18
    nice featureBits oryLat1 chainHg18Link > fb.oryLat1.hg18.txt 2>&1
    #	48309237 bases of 700386597 (6.898%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/hg18/bed/blastz.oryLat1.swap
    cd /cluster/data/hg18/bed/blastz.oryLat1.swap
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.hg18.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap \
	-blastzOutRoot /cluster/bluearc/oryLat1Hg18 > swap.log 2>&1 &
    # fails at:
    # HgStepManager: executing step 'net'.
    # netChains: looks like previous stage was not successful
    #	(can't find [hg18.oryLat1.]all.chain[.gz]).

    # it will continue at net after this:
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.hg18.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=net -swap \
	-blastzOutRoot /cluster/bluearc/oryLat1Hg18 > net_swap.log 2>&1 &
    #	failed on cleanUp because /cluster/bluearc/oryLat1Hg18 did not exist

    nice -n 19 featureBits hg18 chainOryLat1Link > fb.hg18.oryLat1.txt 2>&1
    #	57908796 bases of 2881515245 (2.010%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET FR1 (DONE - 2006-12-04 - Hiram)
## first time, run later with fr1 scaffolds and oryLat1 chrUn out of the
##	picture
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.fr1.2006-12-04
    cd /cluster/data/oryLat1/bed/blastz.fr1.2006-12-04
    cat << '_EOF_' > DEF
# Medaka vs. Fugu

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#	except for chrUn of course
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes

# QUERY: Fugu fr1
SEQ2_DIR=/san/sanvol1/scratch/fr1/fr1.sdTrf.2bit
SEQ2_LEN=/cluster/data/fr1/chrom.sizes
SEQ2_CHUNK=40000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.fr1.2006-12-04
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
      -bigClusterHub pk \
      -blastzOutRoot /cluster/bluearc/oryLat1Fr1 > do.log 2>&1 &
    #	real    832m8.618s
    #	user    0m0.165s
    #	sys     0m0.109s

    #	failed during the netClass operation in loadUp.csh due to missing
    #	rmsk table in oryLat1 - added options to netClass to allow
    #	specification of repeat table, and rerun the two commands here to
    #	finish off the loadUp.csh step:
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.fr1.2006-12-04/axtChain
    netClass -verbose=2 -noAr -tRepeats=repeats -qRepeats=windowmaskerSdust \
        noClass.net oryLat1 fr1 oryLat1.fr1.net
    netFilter -minGap=10 oryLat1.fr1.net \
	| hgLoadNet -verbose=0 oryLat1 netFr1 stdin
    #	Then, continuing:
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/blastz.fr1.2006-12-04
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=download \
	-blastzOutRoot /cluster/bluearc/oryLat1Fr1 > download.log 2>&1 &


    cd /cluster/data/oryLat1/bed
    ln -s blastz.fr1.2006-12-04 blastz.fr1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.fr1
    nice -n 19 featureBits oryLat1 chainFr1Link > fb.oryLat1.fr1.txt 2>&1
    #	170744130 bases of 700386597 (24.379%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/fr1/bed/blastz.oryLat1.swap
    cd /cluster/data/fr1/bed/blastz.oryLat1.swap
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.fr1.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap \
	-blastzOutRoot /cluster/bluearc/oryLat1Fr1 > swap.log 2>&1 &
    # fails at:
    # HgStepManager: executing step 'net'.
    # netChains: looks like previous stage was not successful
    #	(can't find [fr1.oryLat1.]all.chain[.gz]).

    # it will continue at net after this:
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.fr1.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=net -swap > net_swap.log 2>&1 &

#########################################################################
# BLASTZ/CHAIN/NET danRer4 (DONE - 2006-12-04 - Hiram)
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04
    cd /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04
    cat << '_EOF_' > DEF
# Medaka vs. Zebrafish

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#	except for chrUn of course
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes

# QUERY: Zebrafish danRer4
SEQ2_DIR=/san/sanvol1/scratch/danRer4/danRer4.sdTrf.2bit
SEQ2_LEN=/cluster/data/danRer4/chrom.sizes
SEQ2_CHUNK=40000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
      -bigClusterHub pk \
      -blastzOutRoot /cluster/bluearc/oryLat1DanRer4 > do.log 2>&1 &

    #	failed during the netClass operation in loadUp.csh due to missing
    #	rmsk table in oryLat1 - added options to netClass to allow
    #	specification of repeat table, and rerun the two commands here to
    #	finish off the loadUp.csh step:
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04/axtChain
    netClass -verbose=2 -noAr -tRepeats=repeats -qRepeats=windowmaskerSdust \
        noClass.net oryLat1 danRer4 oryLat1.danRer4.net
    netFilter -minGap=10 oryLat1.danRer4.net \
	| hgLoadNet -verbose=0 oryLat1 netDanRer4 stdin

    #	Then continuing
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=download \
	-blastzOutRoot /cluster/bluearc/oryLat1DanRer4 > download.log 2>&1 &

    cd /cluster/data/oryLat1/bed
    ln -s blastz.danRer4.2006-12-04 blastz.danRer4
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.danRer4
    nice -n 19 featureBits oryLat1 chainDanRer4Link >fb.oryLat1.danRer4.txt 2>&1
    #	161293587 bases of 700386597 (23.029%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/danRer4/bed/blastz.oryLat1.swap
    cd /cluster/data/danRer4/bed/blastz.oryLat1.swap
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap \
	-blastzOutRoot /cluster/bluearc/oryLat1DanRer4 > swap.log 2>&1 &

    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.danRer4.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=net -swap > swap.log 2>&1 &

#########################################################################
# BLASTZ/CHAIN/NET gasAcu1 (DONE - 2006-12-04 - Hiram)
## first time - run later with chrUn in the picture
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-04
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-04
    cat << '_EOF_' > DEF
# Medaka vs. Stippleback

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#	except for chrUn of course
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes

# QUERY: Stippleback gasAcu1
SEQ2_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.sdTrf.2bit
SEQ2_LEN=/cluster/data/gasAcu1/chrom.sizes
SEQ2_CHUNK=40000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-04
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
      -bigClusterHub pk \
      -blastzOutRoot /cluster/bluearc/oryLat1GasAcu1 > do.log 2>&1 &
    #	real    419m21.385s
    #	user    0m0.168s
    #	sys     0m0.123s
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub pk \
	-blastzOutRoot /cluster/bluearc/oryLat1GasAcu1 > do.log 2>&1 &

    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=download \
	-blastzOutRoot /cluster/bluearc/oryLat1GasAcu1 > download.log 2>&1 &

    cd /cluster/data/oryLat1/bed
    ln -s blastz.gasAcu1.2006-12-04 blastz.gasAcu1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1
    nice -n 19 featureBits oryLat1 chainGasAcu1Link >fb.oryLat1.gasAcu1.txt 2>&1
    #	200841711 bases of 700386597 (28.676%) in intersection

    #	the 2006-12-22 run on oryLat1 was eventually used instead of a swap

#########################################################################
# BLASTZ/CHAIN/NET tetNig1 (DONE - 2006-12-04 - Hiram)
##  first time, run later without the chrUn in oryLat1 or randoms in tetNig1
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04
    cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04
    cat << '_EOF_' > DEF
# Medaka vs. Tetraodon

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#	except for chrUn of course
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes

# QUERY: Tetraodon tetNig1
SEQ2_DIR=/san/sanvol1/scratch/tetNig1/tetNig1.sdTrf.2bit
SEQ2_LEN=/cluster/data/tetNig1/chrom.sizes
SEQ2_CHUNK=40000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
      -bigClusterHub pk \
      -blastzOutRoot /cluster/bluearc/oryLat1TetNig1 > do.log 2>&1 &
    #	real    362m9.644s
    #	user    0m0.160s
    #	sys     0m0.116s
    #	failed during the netClass operation in loadUp.csh due to missing
    #	rmsk table in oryLat1 - added options to netClass to allow
    #	specification of repeat table, and rerun the two commands here to
    #	finish off the loadUp.csh step:
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04/axtChain
    netClass -verbose=2 -noAr -tRepeats=repeats -qRepeats=windowmaskerSdust \
        noClass.net oryLat1 tetNig1 oryLat1.tetNig1.net
    netFilter -minGap=10 oryLat1.tetNig1.net \
	| hgLoadNet -verbose=0 oryLat1 netTetNig1 stdin
    #	Then, continuing:
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=download \
	-blastzOutRoot /cluster/bluearc/oryLat1TetNig1 > download.log 2>&1 &


    cd /cluster/data/oryLat1/bed
    ln -s blastz.tetNig1.2006-12-04 blastz.tetNig1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.tetNig1
    nice -n 19 featureBits oryLat1 chainTetNig1Link >fb.oryLat1.tetNig1.txt 2>&1
    #	161622943 bases of 700386597 (23.076%) in intersection

    #	the run from 2006-12-13 was used for the swap
    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap > swap.log 2>&1 &
    # fails at:
    # HgStepManager: executing step 'net'.
    # netChains: looks like previous stage was not successful
    #	(can't find [tetNig1.oryLat1.]all.chain[.gz]).
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-continue=net -swap > net_swap.log 2>&1 &

########################################################################
########################################################################
gasAcu1 #	200841711 bases of 700386597 (28.676%) in intersection
fr1     #	170744130 bases of 700386597 (24.379%) in intersection
tetNig1 #	161622943 bases of 700386597 (23.076%) in intersection
danRer4 #	161293587 bases of 700386597 (23.029%) in intersection
hg18    #	48309237 bases of 700386597 (6.898%) in intersection
########################################################################
on hg18 #	57908796 bases of 2881515245 (2.010%) in intersection
########################################################################
##
#                         featureBits chainLink measures
#                                           chainOryLat1Link  chain linearGap
#    distance                      on oryLat1    on other   minScore
#  1  0.xxxx - stickleback gasAcu1 (% 28.676) (% 38.909)       2000     loose
#  2  0.xxxx - fugu fr1            (% 24.379) (% 41.994)       2000     loose
#  3  0.xxxx - tetraodon tetNig1   (% 23.076) (no swap)        2000     loose
#  4  0.xxxx - zebrafish danRer4   (% 23.029) (% 13.084)       2000     loose
#  5  0.xxxx - human hg18          (%  6.898) (%  2.010)       2000     loose
######
## Second time around, without the randoms and chrUn sequences:
#  1  0.xxxx - stickleback gasAcu1 (% 28.110) (% 37.899)       2000     loose
#  2  0.xxxx - fugu fr1            (% 24.764) (% 40.018)       2000     loose
#  3  0.xxxx - tetraodon tetNig1   (% 17.585) (% 39.552)       2000     loose
#  4  0.xxxx - zebrafish danRer4   (% 22.755) (% 12.192)       2000     loose
#  5  0.xxxx - human hg18          (%  6.896) (%  1.791)       2000     loose
intersection
gasAcu1 #	163640802 bases of 582143106 (28.110%) in intersection
on gasAcu1 #	148335558 bases of 391398261 (37.899%) in intersection
tetNig1 #	102368873 bases of 582143106 (17.585%) in intersection
on tetNig1 #	81227466 bases of 205367312 (39.552%) in intersection
fr1 #	173441855 bases of 700386597 (24.764%) in intersection
on fr1 #	126265137 bases of 315518167 (40.018%) in intersection
danRer4 #	159369670 bases of 700386597 (22.755%) in intersection
on danRer4 #	188598788 bases of 1546950119 (12.192%) in intersection
hg18 #	48299069 bases of 700386597 (6.896%) in intersection
on hg18 #	51598883 bases of 2881515245 (1.791%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET FR1 (DONE - 2006-12-08 - Hiram)
##  Second time, no chrUn in oryLat1, and align to fr1 scaffolds,
##	results lifted to fr1 chrUn coordinates
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.fr1.2006-12-08
    cd /cluster/data/oryLat1/bed/blastz.fr1.2006-12-08
    cat << '_EOF_' > DEF
# Medaka vs. Fugu

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       no chrUn in this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000

# QUERY: Fugu fr1
#       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
SEQ2_DIR=/san/sanvol1/scratch/fr1/chrUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/fr1/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/fr1/fr1.UnScaffolds.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/fr1/scaffold.sizes
SEQ2_LIFT=/san/sanvol1/scratch/fr1/UnScaffolds/ordered.lft
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=30

BASE=/cluster/data/oryLat1/bed/blastz.fr1.2006-12-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1Fr1 > do.log 2>&1 &
    #	real    424m7.567s
    #	user    0m0.199s
    #	sys     0m0.160s

    cd /cluster/data/oryLat1/bed
    rm blastz.fr1
    ln -s blastz.fr1.2006-12-08 blastz.fr1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.fr1
    nice -n 19 featureBits oryLat1 chainFr1Link > fb.oryLat1.fr1.txt 2>&1
    #	173441855 bases of 700386597 (24.764%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/fr1/bed/blastz.oryLat1.swap
    cd /cluster/data/fr1/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.fr1.2006-12-08/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap > swap.log 2>&1 &
    #	real    32m7.742s
    #	user    0m0.148s
    #	sys     0m0.097s

    ssh hgwdev
    cd /cluster/data/fr1/bed/blastz.oryLat1.swap
    time nice -n 19 featureBits fr1 chainOryLat1Link \
	> fb.fr1.chainOryLat1Link.txt 2>&1
    #	126265137 bases of 315518167 (40.018%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET gasAcu1 (DONE - 2006-12-08 - Hiram)
## Second time, without chrUn in either assembly
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-08
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-08
    cat << '_EOF_' > DEF
# Medaka vs. Stippleback

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       no chrUn in this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000

# QUERY: Stippleback gasAcu1, no chrUn in this run
SEQ2_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.sizes
SEQ2_CHUNK=40000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub pk \
	-blastzOutRoot /cluster/bluearc/oryLat1GasAcu1 > do.log 2>&1 &
    #	real    99m16.562s

    cd /cluster/data/oryLat1/bed
    rm blastz.gasAcu1
    ln -s blastz.gasAcu1.2006-12-08 blastz.gasAcu1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1
    nice -n 19 featureBits -noRandom oryLat1 chainGasAcu1Link \
	>fb.oryLat1.gasAcu1.txt 2>&1 &
    #	163640802 bases of 582143106 (28.110%) in intersection

    # And to swap
    ## This sequence duplicated in gasAcu1.txt too
    ssh kkstore04
    cd /cluster/data/gasAcu1/bed
    rm blastz.oryLat1.swap
    mkdir /cluster/data/gasAcu1/bed/blastz.oryLat1.swap
    cd /cluster/data/gasAcu1/bed/blastz.oryLat1.swap
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-08/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap > swap.log 2>&1 &
    #	real    26m32.549s

    ssh hgwdev
    cd /cluster/data/gasAcu1/bed/blastz.oryLat1.swap
    nice -n 19 featureBits -noRandom gasAcu1 chainOryLat1Link \
	>fb.gasAcu1.oryLat1.txt 2>&1 &
    #	148335558 bases of 391398261 (37.899%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET tetNig1 (DONE - 2006-12-08 - Hiram)
## second time, no chrUn in oryLat1 and not randoms or chrUn in tetNig1
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-08
    cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-08
    cat << '_EOF_' > DEF
# Medaka vs. Tetraodon

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#	except for chrUn of course
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes

# QUERY: Tetraodon tetNig1
SEQ2_DIR=/san/sanvol1/scratch/tetNig1/tetNig1.sdTrf.2bit
SEQ2_LEN=/cluster/data/tetNig1/chrom.sizes
SEQ2_CHUNK=40000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-04
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1TetNig1 > do.log 2>&1 &
    #	real 42m53.388s

    cd /cluster/data/oryLat1/bed
    rm blastz.tetNig1
    ln -s blastz.tetNig1.2006-12-08 blastz.tetNig1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.tetNig1
    nice -n 19 featureBits -noRandom oryLat1 chainTetNig1Link \
	>fb.oryLat1.tetNig1.txt 2>&1
    #	102368873 bases of 582143106 (17.585%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    cd /cluster/data/tetNig1/bed
    mv blastz.oryLat1.swap blastz.oryLat1.swap.2006-12-05
    mkdir blastz.oryLat1.swap
    cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-08/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap > swap.log 2>&1 &
    #	real    11m57.050s
    nice -n 19 featureBits -noRandom tetNig1 chainOryLat1Link \
	>fb.tetNig1.oryLat1.txt 2>&1
    #	81227466 bases of 205367312 (39.552%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET danRer4 (DONE - 2006-12-08 - Hiram)
## Second time - now without chrUn in medata, or any of
##	the randoms or Un in danRer4
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-08
    cd /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-08
    cat << '_EOF_' > DEF
# Medaka vs. Zebrafish

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       no chrUn in this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000

# QUERY: Zebrafish danRer4, no randoms or Un in this sequence
SEQ2_DIR=/san/sanvol1/scratch/danRer4/danRer4.noUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/danRer4/danRer4.noUn.sdTrf.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.danRer4.2006-12-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1DanRer4 > do.log 2>&1 &
    #	real    290m39.870s
    #	user    0m0.193s
    #	sys     0m0.164s


    cd /cluster/data/oryLat1/bed
    rm blastz.danRer4
    ln -s blastz.danRer4.2006-12-08 blastz.danRer4
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.danRer4
    nice -n 19 featureBits oryLat1 chainDanRer4Link >fb.oryLat1.danRer4.txt 2>&1
    #	159369670 bases of 700386597 (22.755%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/danRer4/bed/blastz.oryLat1.swap
    cd /cluster/data/danRer4/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.danRer4.2006-12-08/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap > swap.log 2>&1 &
    #	real    84m39.497s
    #	user    0m0.148s
    #	sys     0m0.086s

    ssh hgwdev
    cd /cluster/data/danRer4/bed/blastz.oryLat1.swap
    time nice -n 19 featureBits -noRandom danRer4 chainOryLat1Link \
	> fb.danRer4.chainOryLat1Link.txt 2>&1
    #	188598788 bases of 1546950119 (12.192%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET HG18 (DONE - 2006-12-08 - Hiram)
#  second time without the randoms and chrUn
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.hg18.2006-12-08
    cd /cluster/data/oryLat1/bed/blastz.hg18.2006-12-08
    cat << '_EOF_' > DEF
# Medaka vs. Human

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       no chrUn in this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000

# QUERY: Human hg18, no randoms in this sequence
SEQ2_DIR=/san/sanvol1/scratch/hg18/hg18.noUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/hg18/hg18.noUn.sdTrf.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.hg18.2006-12-08
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1Hg18 > do.log 2>&1 &
    #	real    353m18.317s
    #	user    0m0.199s
    #	sys     0m0.170s

    cd /cluster/data/oryLat1/bed
    rm -f blastz.hg18
    ln -s blastz.hg18.2006-12-08 blastz.hg18
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.hg18
    time nice -n 19 featureBits -noRandom oryLat1 chainHg18Link \
	> fb.oryLat1.hg18.txt 2>&1 &
    #	48299069 bases of 700386597 (6.896%) in intersection
    #	40936335 bases of 582143106 (7.032%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/hg18/bed/blastz.oryLat1.swap
    cd /cluster/data/hg18/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.hg18.2006-12-08/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-swap > swap.log 2>&1 &
    #	real    39m35.656s
    #	user    0m0.147s
    #	sys     0m0.100s

    time nice -n 19 featureBits -noRandom hg18 chainOryLat1Link \
	> fb.hg18.oryLat1.txt 2>&1 &
    #	51598883 bases of 2881515245 (1.791%) in intersection
    #	51259072 bases of 2868834265 (1.787%) in intersection

#############################################################################
## make up chrUn sequence of its individual scaffolds
## and a 2bit for the whole thing for blastz runs (DONE - 2006-12-13 - Hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1/jkStuff
    cp /cluster/data/mm7/jkStuff/agpToLift.pl .
    cp /cluster/data/mm7/jkStuff/lft2BitToFa.pl .
    cd /cluster/data/oryLat1/Un
    ../jkStuff/agpToLift.pl chrUn.agp > chrUn.lift
    time ../jkStuff/lft2BitToFa.pl ../oryLat1.sdTrf.2bit \
	chrUn.lift > chrUnScaffolds.fa
    # make a 2bit with this scaffold sequence
    faToTwoBit ../noUn/chr*.fa chrUnScaffolds.fa oryLat1UnScaffolds.2bit
    twoBitInfo *.2bit oryLat1UnScaffolds.sizes
    #	Verify it is complete
    twoBitToFa oryLat1UnScaffolds.2bit stdout \
	| faSize stdin > oryLat1UnScaffolds.faSize
    mv oryLat1UnScaffolds.* ..

#########################################################################
# BLASTZ/CHAIN/NET FR1 (DONE - 2006-12-13 - 2007-01-18 - Hiram)
##  Third time, chrUn in scaffolds in oryLat1, and align to fr1 scaffolds,
##	both results lifted properly to chrUn coordinates
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.fr1.2006-12-13
    cd /cluster/data/oryLat1/bed/blastz.fr1.2006-12-13
    cat << '_EOF_' > DEF
# Medaka vs. Fugu

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
SEQ1_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.2bit
SEQ1_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.sizes
SEQ1_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=90

# QUERY: Fugu fr1
#       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
SEQ2_DIR=/san/sanvol1/scratch/fr1/chrUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/fr1/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/fr1/fr1.UnScaffolds.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/fr1/scaffold.sizes
SEQ2_LIFT=/san/sanvol1/scratch/fr1/UnScaffolds/ordered.lft
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=90

BASE=/cluster/data/oryLat1/bed/blastz.fr1.2006-12-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-verbose=2 -bigClusterHub=pk -blastzOutRoot \
	/cluster/bluearc/oryLat1Fr1 > do.log 2>&1 &
    ### Two jobs fail during the chain step.  axtChain gets a segfault
    #  by-passing those, continuing with chainMerge
    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-continue=chainMerge -verbose=2 -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1Fr1 > chainMerge.log 2>&1 &

    cd /cluster/data/oryLat1/bed
    rm blastz.fr1
    ln -s blastz.fr1.2006-12-13 blastz.fr1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.fr1
    nice -n 19 featureBits oryLat1 chainFr1Link > fb.oryLat1.fr1.txt 2>&1
    #	165968083 bases of 700386597 (23.697%) in intersection

    # Let's try a swap and see what happens
    ssh kkstore04
    mkdir /cluster/data/fr1/bed/blastz.oryLat1.swap
    cd /cluster/data/fr1/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.fr1.2006-12-13/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=pk -swap > swap.log 2>&1 &
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.fr1.2006-12-13/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-continue=net -bigClusterHub=pk -swap > net_swap.log 2>&1 &
    cat fb.fr1.chainOryLat1Link.txt
    #	129282429 bases of 315518167 (40.975%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET HG18 (DONE - 2006-12-13 - Hiram)
#  third time with randoms and chrUn in scaffolds on both sequences
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.hg18.2006-12-13
    cd /cluster/data/oryLat1/bed/blastz.hg18.2006-12-13
    cat << '_EOF_' > DEF
# Medaka vs. Human

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
SEQ1_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.2bit
SEQ1_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.sizes
SEQ1_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=80

# QUERY: Human hg18, randoms in contigs, lifted to their chr*_random 
SEQ2_DIR=/san/sanvol1/scratch/hg18/hg18.sdTrf.2bit
SEQ2_LEN=/cluster/data/hg18/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/hg18/hg18.randomContigs.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/hg18/hg18.randomContigs.sdTrf.sizes
SEQ2_LIFT=/san/sanvol1/scratch/hg18/hg18.randomContigs.lift
SEQ2_CHUNK=10000000
SEQ2_LIMIT=80
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.hg18.2006-12-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
	-verbose=2 -blastzOutRoot /cluster/bluearc/oryLat1Hg18 > do.log 2>&1 &
    #	abandonded this effort, the swap from hg18 was good enough

#########################################################################
# BLASTZ/CHAIN/NET danRer4 (DONE - 2006-12-13 - Hiram)
## Third time - randoms and Un in scaffolds, both sequences
##  The actual run that was used was done on danRer4 and swapped to here
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-13
    cd /cluster/data/oryLat1/bed/blastz.danRer4.2006-12-13
    cat << '_EOF_' > DEF
# Medaka vs. Zebrafish

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
SEQ1_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.2bit
SEQ1_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.sizes
SEQ1_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=100

# QUERY: Zebrafish danRer4, no randoms or Un in this sequence
SEQ2_DIR=/san/sanvol1/scratch/danRer4/danRer4.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/danRer4/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/danRer4/danRer4.randomContigs.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/danRer4/danRer4.randomContigs.sdTrf.sizes
SEQ2_LIFT=/san/sanvol1/scratch/danRer4/danRer4.randomContigs.lift
SEQ2_CHUNK=10000000
SEQ2_LIMIT=100
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.danRer4.2006-12-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=kk \
	-verbose=2 -blastzOutRoot /cluster/bluearc/oryLat1DanRer4 >do.log 2>&1 &
    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk \
	-continue=cat -verbose=2 \
	-blastzOutRoot /cluster/bluearc/oryLat1DanRer4 >cat.log 2>&1 &
    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk \
	-continue=chainMerge -verbose=2 \
	-blastzOutRoot /cluster/bluearc/oryLat1DanRer4 > chainMerge.log 2>&1 &
    #	abandon this effort here, the swap from danRer4 was good enough

#########################################################################
## SWAP danRer4 to oryLat1 (DONE - 2007-01-20 - Hiram)
##  This is the one that ended up being used
    ssh kkstore04
    cd /cluster/data/danRer4/bed/blastz.oryLat1.2007-01-19
    cat fb.danRer4.chainOryLat1Link.txt
    #	209746583 bases of 1626093931 (12.899%) in intersection

    mkdir /cluster/data/oryLat1/bed/blastz.swap.danRer4
    cd /cluster/data/oryLat1/bed/blastz.swap.danRer4
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/danRer4/bed/blastz.oryLat1.2007-01-19/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-swap -bigClusterHub=pk  > swap.log 2>&1 &

    cat fb.oryLat1.chainDanRer4Link.txt
    #	156014546 bases of 700386597 (22.275%) in intersection
    cd /cluster/data/oryLat1/bed
    ln -s blastz.swap.danRer4 blastz.danRer4

#########################################################################
# BLASTZ/CHAIN/NET tetNig1 (DONE - 2006-12-13 - Hiram)
## third time, chrUn and randoms are in contigs in each sequence
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13
    cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13
    cat << '_EOF_' > DEF
# Medaka vs. Tetraodon

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
SEQ1_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.2bit
SEQ1_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.sizes
SEQ1_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=100

# QUERY: Tetraodon tetNig1
SEQ2_DIR=/san/sanvol1/scratch/tetNig1/tetNig1.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/tetNig1/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/tetNig1/tetNig1.randomContigs.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/tetNig1/tetNig1.randomContigs.sdTrf.sizes
SEQ2_CHUNK=20000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk -verbose=2 -blastzOutRoot \
	/cluster/bluearc/oryLat1TetNig1 > do.log 2>&1 &
    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
 	-continue=cat \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk -verbose=2 \
	-blastzOutRoot /cluster/bluearc/oryLat1TetNig1 > cat.log 2>&1 &
    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
 	-continue=chainRun \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk -verbose=2 \
	-blastzOutRoot /cluster/bluearc/oryLat1TetNig1 > chain.log 2>&1 &

    cd /cluster/data/oryLat1/bed
    rm blastz.tetNig1
    ln -s blastz.tetNig1.2006-12-13 blastz.tetNig1
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.tetNig1
    nice -n 19 featureBits -noRandom oryLat1 chainTetNig1Link \
	>fb.oryLat1.tetNig1.txt 2>&1
    #	160388236 bases of 700386597 (22.900%) in intersection

    # And the swap
    ssh kkstore04
    cd /cluster/data/tetNig1/bed
    mv blastz.oryLat1.swap blastz.oryLat1.swap.2006-12-08
    mkdir blastz.oryLat1.swap
    cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-swap -bigClusterHub=kk > swap.log 2>&1 &
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-continue=net -swap -bigClusterHub=kk > net-swap.log 2>&1 &

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    nice -n 19 featureBits -noRandom tetNig1 chainOryLat1Link \
	>fb.tetNig1.oryLat1.txt 2>&1
    #	130327582 bases of 342403326 (38.063%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET gasAcu1 (DONE - 2006-12-14 - Hiram)
## Third time, randoms and chrUn in contigs for both assemblies
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-14
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-14
    cat << '_EOF_' > DEF
# Medaka vs. Stippleback

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
SEQ1_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.2bit
SEQ1_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.sizes
SEQ1_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=100

# QUERY: Stippleback gasAcu1, no chrUn in this run
SEQ2_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.sdTrf.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.randomContigs.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.randomContigs.sdTrf.sizes
SEQ2_LIFT=/san/sanvol1/scratch/gasAcu1/gasAcu1.randomContigs.lift
SEQ2_CHUNK=35000000
SEQ2_LAP=0
SEQ2_LIMIT=100

BASE=/cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-14
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub pk -blastzOutRoot \
	/cluster/bluearc/oryLat1GasAcu1 > do.log 2>&1 &
    #	this effort was abandoned and removed

#######################################################################
## BLASTZ Medaka chroms vs Stickleback random contigs
## Experiment only
##	(DONE - 2006-12-20 - Hiram)
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.gasAcu1.ChrUn.2006-12-20
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.ChrUn.2006-12-20
    cat << '_EOF_' > DEF
# Medaka chroms vs Stickleback chroms random contigs

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
    
# TARGET: Medaka oryLat1 no chrUn sequence, only chroms
#       The largest is 40 million bases, there are 25 of them
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000

# QUERY: Stickleback gasAcu1 chrUn only
#       chrUn in scaffolds for this alignment run
#       The largest contig is only 420,000 bases, there are 5,000 of them
#       and a total size of 63 million.
SEQ2_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUn.sdTrf.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUnContigsOnly.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUnContigsOnly.sdTrf.sizes
SEQ2_LIFT=/san/sanvol1/scratch/gasAcu1/chrUn.extraCloneGap.lift
SEQ2_CHUNK=1000000
SEQ2_LIMIT=40
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.gasAcu1.ChrUn.2006-12-20
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl -verbose=2 DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-blastzOutRoot /cluster/bluearc/oryLat1GasAcu1Un \
	-bigClusterHub=pk > do.log 2>&1 &


#########################################################################
##  Reorder Fish organisms (DONE - 2006-12-22 - Hiram)
    hgsql -h genome-testdb hgcentraltest \
	-e "update dbDb set orderKey = 475 where name = 'oryLat1';"

#######################################################################
## Swap of Stickleback chroms vs Medaka chroms and random contigs
## (DONE - 2006-12-22 - Hiram)
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.gasAcu1.swap
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.swap

    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-swap -bigClusterHub=pk > swap.log 2>&1 &
    #	real    28m51.553s
    #	user    0m0.137s
    #	sys     0m0.096s

    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.swap
    time featureBits oryLat1 chainGasAcu1Link \
	> fb.oryLat1.chainGasAcu1Link.txt 2>&1
    #	189411142 bases of 700386597 (27.044%) in intersection

#######################################################################
## Stickleback gasAcu1  BLASTZ (DONE - 2007-01-11 - Hiram)
##	The above swap doesn't contain the the Stickleback random contigs
##	and it would be good to have the Medaka alignments back on Stickleback
##	randoms.  So, to get them, run this blastz of the Stickleback chroms
##	and random contigs, then swap it back to Stickleback, no Db loads as
##	we are going to extract the random chains and nets out of this business
##	(WORKIN - 2007-01-11 - Hiram)
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.gasAcu1.2007-01-11
    cd /cluster/data/oryLat1/bed/blastz.gasAcu1.2007-01-11
    cat << '_EOF_' > DEF
# Medaka chroms vs. Stickleback chroms and randoms in scaffolds

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 no chrUn sequence, only chroms
#       The largest is 40 million bases, there are 25 of them
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000
SEQ1_LIMIT=10

# QUERY: Stickleback gasAcu1 chroms and chrUn in contigs
#       The largest chrom is 32M bases, the largest contig 418,670 bases
#       The smallest chrom chrM is 15,742 bases, smallest contig 60 bases
#       there are 5,115 chroms and contig pieces
#       total size 1,089,690,673 bases
#       47107538 N's 1042583135 real 834274605 upper 208308530 lower
SEQ2_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.sdTrf.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.randomContigs.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.randomContigs.sdTrf.sizes
SEQ2_LIFT=/san/sanvol1/scratch/gasAcu1/chrUn.extraCloneGap.lift
SEQ2_CHUNK=1000000
SEQ2_LIMIT=20
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.gasAcu1.2007-01-11
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl -verbose=2 DEF -smallClusterHub=pk \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-blastzOutRoot /cluster/bluearc/oryLat1GasAcu1Un \
	-stop=net -bigClusterHub=pk > do.log 2>&1 &
    #	real    127m37.446s
    #	user    0m0.148s
    #	sys     0m0.111s
    ## And the swap
    mkdir /cluster/data/gasAcu1/bed/blastz.orylat1.swap
    cd /cluster/data/gasAcu1/bed/blastz.orylat1.swap
    time doBlastzChainNet.pl -verbose=2 -smallClusterHub=pk \
	/cluster/data/oryLat1/bed/blastz.gasAcu1.2007-01-11/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-swap -stop=net -bigClusterHub=pk > swap.log 2>&1 &

#######################################################################
## BLASTZ FR1 fr1 chrUn in contigs, Medaka chroms only, no randoms
##	(DONE - 2006-12-22 - Hiram)
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.fr1.2006-12-22
    cd /cluster/data/oryLat1/bed/blastz.fr1.2006-12-22
    cat << '_EOF_' > DEF
# Medaka vs. Fugu

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       no chrUn in this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.noUn.sdTrf.sizes
SEQ1_CHUNK=40000000
SEQ1_LAP=10000

# QUERY: Fugu fr1
#       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
#       20,397 scaffolds, largest 1.1 million
SEQ2_DIR=/san/sanvol1/scratch/fr1/chrUn.sdTrf.2bit
SEQ2_LEN=/san/sanvol1/scratch/fr1/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/fr1/fr1.UnScaffolds.sdTrf.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/fr1/scaffold.sizes
SEQ2_LIFT=/san/sanvol1/scratch/fr1/UnScaffolds/ordered.lft
SEQ2_CHUNK=2000000
SEQ2_LIMIT=10
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.fr1.2006-12-22
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1Fr1 > do.log 2>&1 &
    #	this effort was abandoned, the 2006-12-13 run was used instead

#########################################################################
# ENSEMBL GENES (DONE - 2007-01-05 - Hiram)
#  Good luck with the biomart interface.  It seems to be different each time
#  it is used.
    mkdir /cluster/data/oryLat1/bed/ensembl
    cd /cluster/data/oryLat1/bed/ensembl
    # Get the ensembl gene data from 
    # http://www.biomart.org/biomart/martview/
    #	
    # Follow this sequence through the pages:
    #	The default dataset will be Human.  In the right side
    #	frame, if you scroll it up and down, you will come to a pull-down
    #	dataset menu where you can select the organism.  There appear to be
    #	two pull-down menus in this frame, one for:
    #	Database: ENSEMBL 42 GENE (SANGER)
    #	and you can select the second:
    #	Dataset: Oryzias latipes genes (MEDAKA1)

    #	After selecting the Dataset, in the left frame, click on the
    #	"Attributes" (Features) label, now the right frame changes to radio
    #	buttons, Features, Sequences, Structures
    #	Click the "Structures" button, the three optional buttons can be
    #	expanded to select elements of these:
    #	REGION:
    #	GENE:
    #	EXON:
    #	REGION: has Chromosome checked
    #	GENE: has Ensembl Gene ID and Biotype selected
    #	EXON: has no selections
    #	In the GENE: menu:
    #	Unselect Biotype
    #	and Select
    #	Ensembl Gene ID
    #	Ensembl Transcript ID
    #	External Gene ID

    #	Then, in the black menu bar above these frames, click rhe "Results"
    #	it shows the first ten rows.  For this organism, there appear to be no
    #	External Gene ID in the HTML view.  Change the "Display maximum"
    #	"rows as" pull-down
    #	to GFF, and use the "Export all results to" pull-down to
    #	Compressed file (.gz), press the "Go" button to download.
    #	Save this file as oryLat1.ensembl42.gff.gz
    #	The regular chroms came with just their numbers, but the chrUn
    #	scaffolds arrived as scaffolds.  Will need to lift to chrUn.

    # Add "chr" to front of each line in the gene data gtf file to make 
    # it compatible with our software, and liftUp to get scaffolds onto chrUn
    #	The scaffolds and ultracontigs mentioned in this are not the scaffolds
    #	we have in our lift file for chrUn ...  can't use them.
    zcat oryLat1.ensembl42.gff.gz | egrep -v "scaffold|ultracontig" \
	| sed -e "s/^\([0-9][0-9]*\)/chr\1/" | gzip -c > ensembl42.gff.gz
    #	Verify names OK:
    zcat ensembl42.gff.gz | awk '{print $1}' | sort | uniq -c
    #	22938 chr1
    #	15887 chr10
    #	18645 chr11
    #	20162 chr12
    #	21474 chr13
    #	21302 chr14
    #	20148 chr15
    #	22978 chr16
    #	26164 chr17
    #	13671 chr18
    #	17109 chr19
    #	10988 chr2
    #	15705 chr20
    #	21361 chr21
    #	21030 chr22
    #	12984 chr23
    #	19009 chr24
    #	21767 chr3
    #	26204 chr4
    #	24335 chr5
    #	24300 chr6
    #	24329 chr7
    #	23323 chr8
    #	27795 chr9


    cd /cluster/data/oryLat1/bed/ensembl
    ldHgGene -gtf -genePredExt oryLat1 ensGene ensembl42.gff.gz
    #	Read 23087 transcripts in 493608 lines in 1 files
    #	23087 groups 24 seqs 1 sources 4 feature types
    #	23087 gene predictions

    # ensGtp associates geneId/transcriptId/proteinId for hgPepPred and 
    # hgKnownToSuper.  Use ensMart to create it as above, except:
    # for the Attributes, choose the "Features" box, and then In "GENE:"
    # select Ensembl Gene ID, Ensembl Transcript ID, Ensembl Peptide ID.  
    # Results, choose txt output and a gzipped file, save this as
    # ensGtp42.txt.gz
    #	Strip the first lines which is merely column heading labels
    zcat ensGtp42.txt.gz | headRest 1 stdin | sed -e "s/ /\t/g" > ensGtp.txt
    hgLoadSqlTab oryLat1 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.txt
    hgsql -N -e "select count(*) from ensGtp;" oryLat1
    #	+-------+
    #	| 26148 |
    #	+-------+
    wc -l ensGtp.txt
    #	26148 ensGtp.txt

    # Load Ensembl peptides:
    # Get them from ensembl as above in the gene section except for
    # for the Attributes, choose the "Sequences" box, and then 
    # SEQUENCES: Peptide and 
    #	Header Information "Ensembl Transcript ID"
    #	Results output as FASTA
    # Save file as ensPep.fa.gz
    hgPepPred oryLat1 generic ensPep ensPep.fa

#########################################################################
# MAKE 11.OOC FILE FOR BLAT (DONE - 2007-01-18 - Hiram)
    # Use -repMatch=200 (based on size -- for human we use 1024, and 
    # medaka size is ~20% of human judging by gapless oryLat1 vs. hg18 
    # genome sizes from featureBits.
    ssh kolossus
    blat /cluster/data/oryLat1/oryLat1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=/cluster/bluearc/oryLat1/11.ooc -repMatch=200
# Wrote 46600 overused 11-mers to /cluster/bluearc/oryLat1/11.ooc

#########################################################################
# GENBANK AUTO UPDATE (DONE - 2007-01-18 - Hiram)
    # Make a liftAll.lft that specifies 5M chunks for genbank:
    ssh kkstore04
    cd /cluster/data/oryLat1
    simplePartition.pl oryLat1.2bit 5000000 /tmp/oryLat1P
    cat /tmp/oryLat1P/*/*.lft > jkStuff/liftAll.lft
    rm -r /tmp/oryLat1P

    # align with latest genbank process.
    ssh hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # edit etc/genbank.conf to add oryLat1 just after gasAcu1

# oryLat1 (G. Aculeatus)
oryLat1.serverGenome = /cluster/data/oryLat1/oryLat1.2bit
oryLat1.clusterGenome = /iscratch/i/oryLat1/oryLat1.2bit
oryLat1.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
oryLat1.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
oryLat1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
oryLat1.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
oryLat1.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
oryLat1.ooc = /cluster/bluearc/oryLat1/11.ooc
oryLat1.lift = /cluster/data/oryLat1/jkStuff/liftAll.lft
oryLat1.genbank.mrna.xeno.load = yes
oryLat1.downloadDir = oryLat1

    cvs ci -m "Added oryLat1." etc/genbank.conf
    # update /cluster/data/genbank/:
    make etc-update

    # Edit src/lib/gbGenome.c to add new species.
    cvs ci -m "Added Oryzias latipes (medaka)." src/lib/gbGenome.c
    make install-server

    ssh kkstore02
    cd /cluster/data/genbank
    nice -n +19 bin/gbAlignStep -initial oryLat1 &
    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad oryLat1
    #	real    17m10.400s

    # enable daily alignment and update of hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # add oryLat1 to:
        etc/align.dbs
        etc/hgwdev.dbs
    cvs ci -m "Added oryLat1." etc/align.dbs etc/hgwdev.dbs
    make etc-update

   # add Xeno refseqs (2007-04-12 markd)
   # add this to genbank.com:
   oryLat1.refseq.mrna.xeno.load = yes

   ssk kkstore02
   cd /cluster/data/genbank
   (./bin/gbAlignStep -initial -srcDb=refseq oryLat1)|&mail markd&
   (./bin/gbDbLoadStep -srcDb=refseq oryLat1)|&mail markd&

#########################################################################
## 5-Way Multiz (DONE - 2007-01-21 - Hiram)
##  re-done with fr2 in place of fr1
    ssh hgwdev
    mkdir /cluster/data/oryLat1/bed/multiz5way
    cd /cluster/data/oryLat1/bed/multiz5way

    cp /cluster/data/fr2/bed/multiz5way/5way.nh .
    #	this file looks like:

    cat << '_EOF_' > 5way.nh
(((tetraodon_tetNig1:0.199381,fugu_fr2:0.239894):0.2,
    (stickleback_gasAcu1:0.2,medaka_oryLat1:0.2):0.2):0.292961,
	zebrafish_danRer4:0.782561);
'_EOF_'
    # << happy emacs

    #	Use this specification in the phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    #	to obtain a gif image for htdocs/images/phylo/oryLat1_5way.gif

    /cluster/bin/phast/all_dists 5way.nh > 5way.distances.txt
    #	Use this output to create the table below
    grep -y oryLat1 5way.distances.txt | sort -k3,3n
#
#	If you can fill in all the numbers in this table, you are ready for
#	the multiple alignment procedure
#
#                         featureBits chainLink measures
#                                        chainOryLat1Link   chain    linearGap
#    distance                      on oryLat1    on other   minScore
#  1  0.4000 - stickleback gasAcu1 (% 27.044) (% 38.308)       2000     loose
#  2  0.7994 - tetraodon tetNig1   (% 22.900) (% 38.063)       2000     loose
#  3  0.8399 - fugu fr2            (% 25.344) (% 36.611)       2000     loose
#  4  1.4755 - zebrafish danRer4   (% 22.275) (% 12.899)       2000     loose

    cd /cluster/data/oryLat1/bed/multiz5way
    #	bash shell syntax here ...
    export H=/cluster/data/oryLat1/bed
    mkdir mafLinks
    for G in gasAcu1 tetNig1 fr2 danRer4
    do
	mkdir mafLinks/$G
	if [ ! -d ${H}/blastz.${G}/mafNet ]; then
	echo "missing directory blastz.${G}/mafNet"
	else
	ln -s ${H}/blastz.$G/mafNet/*.maf.gz ./mafLinks/$G
	fi
    done

    #	Copy MAFs to some appropriate NFS server for kluster run
    ssh kkstore04
    mkdir /san/sanvol1/scratch/oryLat1/multiz5way
    cd /san/sanvol1/scratch/oryLat1/multiz5way
    time nice -n +19 rsync -a --copy-links --progress \
	/cluster/data/oryLat1/bed/multiz5way/mafLinks/ .
    #	15 seconds to copy 522 Mb

    mkdir penn
    cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/multiz penn
    cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/maf_project penn
    cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/autoMZ penn

    # the autoMultiz cluster run
    ssh pk
    cd /cluster/data/oryLat1/bed/multiz5way/

    # create species list and stripped down tree for autoMZ
    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
	5way.nh > tmp.nh
    echo `cat tmp.nh` > tree-commas.nh
    echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
    sed 's/[()]//g; s/,/ /g' tree.nh > species.lst

    mkdir run maf
    cd run

    #	NOTE: you need to set the db and multiz dirname properly in this script
    cat > autoMultiz << '_EOF_'
#!/bin/csh -ef
set db = oryLat1
set c = $1
set maf = $2
set binDir = /san/sanvol1/scratch/$db/multiz5way/penn
set tmp = /scratch/tmp/$db/multiz.$c
set pairs = /san/sanvol1/scratch/$db/multiz5way
rm -fr $tmp
mkdir -p $tmp
cp ../{tree.nh,species.lst} $tmp
pushd $tmp
foreach s (`cat species.lst`)
    set in = $pairs/$s/$c.maf
    set out = $db.$s.sing.maf
    if ($s == $db) then
	continue
    endif
    if (-e $in.gz) then
	zcat $in.gz > $out
    else if (-e $in) then
	cp $in $out
    else
	echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($binDir $path); rehash
$binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
popd
cp $tmp/$c.maf $maf
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod +x autoMultiz

cat  << '_EOF_' > template
#LOOP
./autoMultiz $(root1) {check out line+ /cluster/data/oryLat1/bed/multiz5way/maf/$(root1).maf}
#ENDLOOP
'_EOF_'
    # << happy emacs

    awk '{print $1}' /cluster/data/oryLat1/chrom.sizes > chrom.lst
    gensub2 chrom.lst single template jobList
    para create jobList
    # 23 jobs
    para try ... check ... push ... etc ...
# Completed: 26 of 26 jobs
# CPU time in finished jobs:       7534s     125.57m     2.09h    0.09d  0.000 y
# IO & Wait Time:                   161s       2.68m     0.04h    0.00d  0.000 y
# Average job time:                 296s       4.93m     0.08h    0.00d
# Longest finished job:            1148s      19.13m     0.32h    0.01d
# Submission to last job:          1158s      19.30m     0.32h    0.01d

    #	combine results into a single file for loading and gbdb reference
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/multiz5way
    time nice -n +19 catDir maf > multiz5way.maf
    #	real    0m5.628s

    #	makes a 1.4 Gb file:
    #	-rw-rw-r--   1 1413115862 Feb  5 15:21 multiz5way.maf

    #	Create per-chrom individual maf files for downloads
    #	NOT NECESSARY HERE - DONE LATER WITH THE ANNOTATED MAFS
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/multiz5way
    mkdir mafDownloads
    time for M in maf/chr*.maf
    do
	B=`basename $M`
	cp -p ${M} mafDownloads/${B}
	gzip mafDownloads/${B}
	echo ${B} done
    done
    #	real    5m9.273

    #	deliver to downloads *!* NOT NECESSARY HERE - DONE LATER WITH
    #		THE ANNOTATED MAFS
    ssh hgwdev
    ln -s /cluster/data/oryLat1/bed/multiz5way/mafDownloads \
	/usr/local/apache/htdocs/goldenPath/oryLat1/multiz5way

    # Load into database
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way
    mkdir /gbdb/oryLat1/multiz5way
    ln -s /cluster/data/oryLat1/bed/multiz5way/multiz5way.maf \
	/gbdb/oryLat1/multiz5way
    time nice -n +19 hgLoadMaf oryLat1 multiz5way
    #	Loaded 1559270 mafs in 1 files from /gbdb/oryLat1/multiz5way
    #	real    0m38.327s

    time nice -n +19 hgLoadMafSummary -minSize=10000 -mergeGap=500 \
	-maxSize=50000 oryLat1 multiz5waySummary multiz5way.maf
    #	Created 724676 summary blocks from 3597261 components
    #	and 1559270 mafs from multiz5way.maf
    #	real    0m47.574s

    #	Create tree image for details page
    #	You can get a better image from the phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    # cp 5way.nh treeImage.nh
    #	Edit treeImage.nh to make the names as desired
    #	Submit this treeImage.nh to the phyloGif program, or:
    # /cluster/bin/phast/draw_tree -b -s treeImage.nh > treeImage.ps

############################################################################
# ANNOTATE MULTIZ5WAY MAF AND LOAD TABLES (DONE - 2007-01-21 - Hiram)
## Reloaded 2007-04-13 to get the annotation correct
    ssh kolossus
    mkdir /cluster/data/oryLat1/bed/multiz5way/anno
    cd /cluster/data/oryLat1/bed/multiz5way/anno
    mkdir maf run
    cd run
    rm -f sizes nBeds
    ln -s  /cluster/data/oryLat1/chrom.sizes oryLat1.len
    twoBitInfo -nBed /cluster/data/oryLat1/oryLat1.sdTrf.{2bit,N.bed}
    ln -s  /cluster/data/oryLat1/oryLat1.sdTrf.N.bed oryLat1.bed
    for DB in `sed -e "s/ oryLat1//" /cluster/data/oryLat1/bed/multiz5way/species.lst`
    do
        ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
        ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
	echo ${DB}.bed  >> nBeds
	echo ${DB}.len  >> sizes
	echo $DB
    done

    echo '#!/bin/csh -ef' > jobs.csh
    echo date >> jobs.csh
    # do smaller jobs first so you can see some progress immediately:
    for F in `ls -1rS ../../maf/*.maf`
    do
     echo mafAddIRows -nBeds=nBeds $F \
      /cluster/data/oryLat1/oryLat1.sdTrf.2bit ../maf/`basename $F` >> jobs.csh
     echo "echo $F" >> jobs.csh
    done 
    echo date >> jobs.csh
    chmod +x jobs.csh
    time nice -n +19 ./jobs.csh > jobs.log 2>&1 &
    #	to watch progress;
    tail -f jobs.log
    #	real    36m19.199s

    # Load anno/maf
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way/anno/maf
    mkdir -p /gbdb/oryLat1/multiz5way/anno/maf
    ln -s /cluster/data/oryLat1/bed/multiz5way/anno/maf/*.maf \
      /gbdb/oryLat1/multiz5way/anno/maf
    time nice -n +19 hgLoadMaf \
	-pathPrefix=/gbdb/oryLat1/multiz5way/anno/maf oryLat1 multiz5way
    #	Loaded 1918235 mafs in 26 files from /gbdb/oryLat1/multiz5way/anno/maf
    #	real    1m1.468s

    # Do the computation-intensive part of hgLoadMafSummary on a workhorse 
    # machine and then load on hgwdev:
    ssh kolossus
    cd /cluster/data/oryLat1/bed/multiz5way/anno/maf
    time cat *.maf | \
    	nice -n +19 hgLoadMafSummary oryLat1 -minSize=30000 -mergeGap=1500 \
	-maxSize=200000 -test multiz5waySummary stdin
    #	Created 413488 summary blocks from 3682815 components and 1918228 mafs
    #	from stdin
    #	real    1m20.644s

    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way/anno/maf
    time nice -n +19 hgLoadSqlTab oryLat1 multiz5waySummary \
	~/kent/src/hg/lib/mafSummary.sql multiz5waySummary.tab
    #	real    0m6.295s

#######################################################################
# MULTIZ5WAY MAF FRAMES (DONE - 2007-01-21 - Hiram)
## re-done with fr2 in place of fr1 - 2007-02-05 - Hiram
    ssh hgwdev
    mkdir /cluster/data/oryLat1/bed/multiz5way/frames
    cd /cluster/data/oryLat1/bed/multiz5way/frames
    mkdir genes
    # The following is adapted from the gasAcu1 8-way sequence

    #------------------------------------------------------------------------
    # get the genes for all genomes
    # mRNAs with CDS.  single select to get cds+psl, then split that up and
    # create genePred
    # using refGene for danRer4
    for qDB in danRer4
    do
	geneTbl=refGene
      echo hgsql -N -e \"'select * from '$geneTbl\;\" ${qDB}
      hgsql -N -e "select * from $geneTbl" ${qDB} | cut -f 2-100 \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/$qDB.tmp.gz
      mv /scratch/tmp/$qDB.tmp.gz genes/$qDB.gp.gz
      rm -f $tmpExt
    done

    # using genscan for tetNig1
    # using ensGene for oryLat1, gasAcu1, and fr2
    # genePreds; (must keep only the first 10 columns for knownGene)
    for qDB in gasAcu1 oryLat1 fr2 tetNig1
    do
      if [ $qDB = "gasAcu1" -o $qDB = "oryLat1" -o $qDB = "fr2" ]; then
        geneTbl=ensGene
      elif [ $qDB = "tetNig1" ]; then
        geneTbl=genscan
      fi
      echo hgsql -N -e \"'select * from '$geneTbl\;\" ${qDB}
      hgsql -N -e "select * from $geneTbl" ${qDB} | cut -f 1-10 \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/$qDB.tmp.gz
      mv /scratch/tmp/$qDB.tmp.gz genes/$qDB.gp.gz
      rm -f $tmpExt
    done

    ###
    
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/multiz5way/frames
    time cat ../maf/*.maf | nice -n +19 genePredToMafFrames oryLat1 \
	stdin stdout gasAcu1 genes/gasAcu1.gp.gz oryLat1 \
	genes/oryLat1.gp.gz fr2 genes/fr2.gp.gz tetNig1 \
	genes/tetNig1.gp.gz danRer4 genes/danRer4.gp.gz \
	| gzip > multiz5way.mafFrames.gz
    #	real    0m40.989s

    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way/frames
    time nice -n +19 hgLoadMafFrames oryLat1 multiz5wayFrames \
	multiz5way.mafFrames.gz
    #	real    0m22.466s

#########################################################################
# MULTIZ5WAY DOWNLOADABLES (DONE - 2007-02-06 - Hiram)
    ssh hgwdev
    mkdir /cluster/data/oryLat1/bed/multiz5way/mafDownloads
    cd /cluster/data/oryLat1/bed/multiz5way

    # upstream mafs 
    #	rebuilt 2007-12-21 to fix difficulty in mafFrags when species.lst
    #	did not have fr2 as the first one
    #	There isn't any refGene table, trying ensGene instead
for S in 1000 2000 5000
do
    echo "making upstream${S}.maf"
    nice -n +19 $HOME/bin/$MACHTYPE/featureBits -verbose=2 oryLat1 \
        ensGene:upstream:${S} -fa=/dev/null -bed=stdout \
        | perl -wpe 's/_up[^\t]+/\t0/' \
        | $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags oryLat1 multiz5way \
                stdin stdout -orgs=species.lst \
        | gzip -c > mafDownloads/ensGene.upstream${S}.maf.gz
    echo "done ensGene.upstream${S}.maf.gz"
done

    # re-done 2007-04-13 after proper annotation
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/multiz5way
    time for M in anno/maf/chr*.maf
    do
	B=`basename $M`
	nice -n +19 gzip -c ${M} > mafDownloads/${B}.gz
	echo ${B}.gz done
    done
    #	real    6m13.232s

    cd mafDownloads
    nice -n +19 md5sum *.maf.gz > md5sum.txt
    # Make a README.txt

    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/oryLat1/multiz5way
    cd /usr/local/apache/htdocs/goldenPath/oryLat1/multiz5way
    ln -s /cluster/data/oryLat1/bed/multiz5way/mafDownloads/{*.gz,*.txt} .


############################################################################
# CREATE CONSERVATION WIGGLE WITH PHASTCONS
#		(DONE - 2007-01-23 - Hiram)
    # The following is adapted from the gasAcu1 8-way sequence

# Estimate phastCons parameters
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/multiz5way/cons
    cd /cluster/data/oryLat1/bed/multiz5way/cons

    #	The automatic build process does not leave masked fa sequences for the
    #	chroms, so, create them here.
    twoBitToFa -seq=chr1 ../../../oryLat1.sdTrf.2bit chr1.fa
    # Create a starting-tree.mod based on chr1 (the largest one)
    time /cluster/bin/phast/$MACHTYPE/msa_split ../maf/chr1.maf \
	--refseq ./chr1.fa --in-format MAF \
	--windows 100000000,1000 --out-format SS \
	--between-blocks 5000 --out-root s1
    #	20 seconds

    time nice -n +19 /cluster/bin/phast/$MACHTYPE/phyloFit -i SS s1.*.ss \
--tree "(((tetNig1,fr2),(gasAcu1,oryLat1)),danRer4)" \
    --out-root starting-tree
    #	Fitting tree model to s1.1-39973033.ss using REV ...
    #	Writing model to starting-tree.mod ...
    #	real    0m8.798s

    rm s1.*.ss
    # add up the C and G:
    grep BACKGROUND starting-tree.mod | awk '{printf "%0.3f\n", $3 + $4;}'
    #	0.430
    #	This 0.430 is used in the --gc argument below

    # Create SS files on san filesystem
    #	Using a size of 10,000,000 to slow down the phastCons pk jobs
    ssh kkstore04
    mkdir -p  /san/sanvol1/scratch/oryLat1/cons/ss
    cd  /san/sanvol1/scratch/oryLat1/cons/ss
    time for C in `awk '{print $1}' /cluster/data/oryLat1/chrom.sizes`
    do
      if [ -s /cluster/data/oryLat1/bed/multiz5way/maf/${C}.maf ]; then
	mkdir -p ${C}
	mkdir -p /cluster/data/oryLat1/bed/multiz5way/cons/${C}
	echo msa_split $C
	if [ ! -s /cluster/data/oryLat1/bed/multiz5way/cons/${C}/${C}.fa ]; then
	    twoBitToFa -seq=${C} /cluster/data/oryLat1/oryLat1.sdTrf.2bit \
		/cluster/data/oryLat1/bed/multiz5way/cons/${C}/${C}.fa
	fi
	chrN=${C/chr/}
	chrN=${chrN/_random/}
	/cluster/bin/phast/$MACHTYPE/msa_split \
	    /cluster/data/oryLat1/bed/multiz5way/maf/${C}.maf \
	    --refseq /cluster/data/oryLat1/bed/multiz5way/cons/${C}/${C}.fa \
	    --in-format MAF --windows 10000000,0 --between-blocks 5000 \
	    --out-format SS --out-root ${C}/${C}
      fi
    done &
    #	real    7m49.718s

    # Create a random list of 50 1 mb regions  (do not use the _randoms)
    cd /san/sanvol1/scratch/oryLat1/cons/ss
    ls -1l chr*/chr*.ss | grep -v Un | \
	awk '$5 > 4000000 {print $9;}' | randomLines stdin 50 ../randomSs.list

    # Set up parasol directory to calculate trees on these 50 regions
    ssh pk
    mkdir /san/sanvol1/scratch/oryLat1/cons/treeRun1
    cd /san/sanvol1/scratch/oryLat1/cons/treeRun1
    mkdir tree log

    #	Tuning this loop should come back to here to recalculate 
    # Create little script that calls phastCons with right arguments
    #	--target-coverage of 0.20 is about right for mouse, will be
    #	tuned exactly below
    cat > makeTree.csh << '_EOF_'
#!/bin/csh -fe
set C=$1:h
mkdir -p log/${C} tree/${C}
    /cluster/bin/phast/$MACHTYPE/phastCons ../ss/$1 \
      /cluster/data/oryLat1/bed/multiz5way/cons/starting-tree.mod \
      --gc 0.430 --nrates 1,1 --no-post-probs --ignore-missing \
      --expected-length 10 --target-coverage 0.20 \
      --quiet --log log/$1 --estimate-trees tree/$1
'_EOF_'
    #	<< happy emacs
    chmod a+x makeTree.csh

    # Create gensub file
    cat > template << '_EOF_'
#LOOP
./makeTree.csh $(path1)
#ENDLOOP
'_EOF_'
    #	<< happy emacs

    # Make cluster job and run it
    gensub2 ../randomSs.list single template jobList
    para create jobList
    para try/push/check/etc
# Completed: 50 of 50 jobs
# CPU time in finished jobs:       8162s     136.03m     2.27h    0.09d  0.000 y
# IO & Wait Time:                   188s       3.14m     0.05h    0.00d  0.000 y
# Average job time:                 167s       2.78m     0.05h    0.00d
# Longest finished job:             315s       5.25m     0.09h    0.00d
# Submission to last job:           320s       5.33m     0.09h    0.00d

    # Now combine parameter estimates.  We can average the .mod files
    # using phyloBoot.  This must be done separately for the conserved
    # and nonconserved models
    ssh pk
    cd /san/sanvol1/scratch/oryLat1/cons/treeRun1
    ls -1 tree/chr*/*.cons.mod > cons.list
    /cluster/bin/phast/$MACHTYPE/phyloBoot --read-mods '*cons.list' \
	--output-average ave.cons.mod > cons_summary.txt
    ls -1 tree/chr*/*.noncons.mod > noncons.list
    /cluster/bin/phast/$MACHTYPE/phyloBoot --read-mods '*noncons.list' \
	--output-average ave.noncons.mod > noncons_summary.txt
    cp -p ave.*.mod ..
    cd ..
    cp -p ave.*.mod /cluster/data/oryLat1/bed/multiz5way/cons

    #	measuring entropy
    #	consEntopy <target coverage> <expected lengths>
    #		 ave.cons.mod ave.noncons.mod --NH 9.78
    #	never stops with the --NH argument
    time /cluster/bin/phast/$MACHTYPE/consEntropy --NH 9.7834 \
	0.10 12 ave.{cons,noncons}.mod
## 0.20 10 
## ERROR: too many iterations, not converging; try without --NH.
Transition parameters: gamma=0.100000, omega=12.000000, mu=0.083333, nu=0.009259
Relative entropy: H=0.811315 bits/site
Expected min. length: L_min=14.589283 sites
Expected max. length: L_max=8.948974 sites
Phylogenetic information threshold: PIT=L_min*H=11.836508 bits


    #	SKIP to here passing by the tuning numbers
    ssh pk
    # Create cluster dir to do main phastCons run
    mkdir /san/sanvol1/scratch/oryLat1/cons/consRun1
    cd /san/sanvol1/scratch/oryLat1/cons/consRun1
    mkdir ppRaw bed

    # Create script to run phastCons with right parameters
    #	This job is I/O intensive in its output files, thus it is all
    #	working over in /scratch/tmp/
    cat > doPhast << '_EOF_'
#!/bin/csh -fe
mkdir /scratch/tmp/${2}
cp -p ../ss/${1}/${2}.ss ../ave.{cons,noncons}.mod /scratch/tmp/${2}
pushd /scratch/tmp/${2} > /dev/null
/cluster/bin/phast/${MACHTYPE}/phastCons ${2}.ss ave.cons.mod,ave.noncons.mod \
    --expected-length 10 --target-coverage 0.20 \
    --quiet --seqname ${1} --idpref ${1} --viterbi ${2}.bed --score > ${2}.pp
popd > /dev/null
mkdir -p ppRaw/${1}
mkdir -p bed/${1}
mv /scratch/tmp/${2}/${2}.pp ppRaw/${1}
mv /scratch/tmp/${2}/${2}.bed bed/${1}
rm /scratch/tmp/${2}/ave.{cons,noncons}.mod
rm /scratch/tmp/${2}/${2}.ss
rmdir /scratch/tmp/${2}
'_EOF_'
    # << happy emacs
    chmod a+x doPhast

    #	root1 == chrom name, file1 == ss file name without .ss suffix
    # Create gsub file
    cat > template << '_EOF_'
#LOOP
./doPhast $(root1) $(file1)
#ENDLOOP
'_EOF_'
    #	<< happy emacs

    # Create parasol batch and run it
    ls -1 ../ss/chr*/chr*.ss | sed 's/.ss$//' > in.list

    gensub2 in.list single template jobList
    para create jobList
    para try/check/push/etc.
    #	These jobs are very fast and very I/O intensive, even on the san
    #	they will hang it up as they work at full tilt.
# Completed: 98 of 98 jobs
# CPU time in finished jobs:       2506s      41.76m     0.70h    0.03d  0.000 y
# IO & Wait Time:                  1062s      17.70m     0.30h    0.01d  0.000 y
# Average job time:                  36s       0.61m     0.01h    0.00d
# Longest finished job:              50s       0.83m     0.01h    0.00d
# Submission to last job:            96s       1.60m     0.03h    0.00d

    # combine predictions and transform scores to be in 0-1000 interval
    #	it uses a lot of memory, so on kolossus:
    ssh kolossus
    cd /san/sanvol1/scratch/oryLat1/cons/consRun1
    #	The sed's and the sort get the file names in chrom,start order
    #	You might like to verify it is correct by first looking at the
    #	list it produces:
    find ./bed -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
	| sort -k7,7 -k9,9n \
	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | less
    #	if that looks right, then let it run:
    find ./bed -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
	| sort -k7,7 -k9,9n \
	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat \
	| awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' \
	| /cluster/bin/scripts/lodToBedScore /dev/stdin > mostConserved5Way.bed
    #	~ 16 seconds
    cp -p mostConserved5Way.bed /cluster/data/oryLat1/bed/multiz5way

    # Figure out how much is actually covered by the bed file as so:
    #	Get the non-n genome size from faSize on all chroms:
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/multiz5way/cons
    faSize chr*/*.fa
    #	843500808 bases (143114211 N's 700386597 real 468614281
    #	upper 231772316 lower) in 26 sequences in 26 files

    cd /san/sanvol1/scratch/oryLat1/cons/consRun1
    #	The 700386597 comes from the non-n genome as counted above.
    awk '
{sum+=$3-$2}
END{printf "%% %.2f = 100.0*%d/700386597\n",100.0*sum/700386597,sum}' \
	mostConserved5Way.bed
    #	%  6.84 = 100.0*47871534/700386597--exp-len 10 --tar-cov 0.20

    #	Aiming for %70 coverage in
    #	the following featureBits measurement on CDS:
    # Beware of negative scores when too high.  The logToBedScore
    # will output an error on any negative scores.

    HGDB_CONF=~/.hg.conf.read-only time nice -n +19 featureBits oryLat1 \
	-enrichment ensGene:cds mostConserved5Way.bed
    #	--expected-length 10 --target-coverage 0.20
    #	ensGene:cds 4.125%, mostConserved5Way.bed 6.835%, both 2.309%, cover
    #	55.97%, enrich 8.19x

    # Load most conserved track into database
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way
    # ended up using the set: --expected-length 10 --target-coverage 0.20
    time nice -n +19 hgLoadBed -strict oryLat1 phastConsElements5way \
	mostConserved5Way.bed
    #	Loaded 825134 elements of size 5
    #	real    0m18.391s

    #	should measure the same as above
    time nice -n +19 \
	featureBits oryLat1 -enrichment ensGene:cds phastConsElements5way
    #	At: --expected-length 10 --target-coverage 0.20
    #	ensGene:cds 4.125%, phastConsElements5way 6.835%, both 2.309%, cover
    #	55.97%, enrich 8.19x

    # Create merged posterier probability file and wiggle track data files
    ssh pk
    cd /san/sanvol1/scratch/oryLat1/cons/consRun1
    # the sed business gets the names sorted by chromName, chromStart
    #	so that everything goes in numerical order into wigEncode
    #	This was verified above to be correct
    time nice -n +19 find ./ppRaw -type f \
	| sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
	| sort -k7,7 -k9,9n \
	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat \
	    | wigEncode -noOverlap stdin \
		phastCons5.wig phastCons5.wib
    #	Converted stdin, upper limit 1.00, lower limit 0.00
    #	real    2m55.873s
    #	-rw-rw-r--   1 292833977 Feb  5 16:31 phastCons5.wib
    #	-rw-rw-r--   1  48444448 Feb  5 16:31 phastCons5.wig
    time nice -n +19 cp -p phastCons5.wi? /cluster/data/oryLat1/bed/multiz5way/
    #	real    1m21.329s

    #	prepare compressed copy of ascii data values for downloads
    ssh pk
    cd /san/sanvol1/scratch/oryLat1/cons/consRun1
    cat << '_EOF_' > gzipAscii.sh
#!/bin/sh

TOP=`pwd`
export TOP

mkdir -p phastCons5Scores

for D in ppRaw/chr*
do
    C=${D/ppRaw\/}
    out=phastCons5Scores/${C}.data.gz
    echo "========================== ${C} ${D}"
    find ./${D} -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
	| sort -k7,7 -k9,9n \
	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat |
	    gzip > ${out}
done
'_EOF_'
    #	<< happy emacs
    chmod +x gzipAscii.sh
    time nice -n +19 ./gzipAscii.sh
    #	real    5m25.145s

    #	copy them for downloads
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/multiz5way/phastCons5Scores
    cd /cluster/data/oryLat1/bed/multiz5way/phastCons5Scores
    cp -p  /san/sanvol1/scratch/oryLat1/cons/consRun1/phastCons5Scores/* .
    # make a README.txt file here, and an md5sum

    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/oryLat1/phastCons5Scores
    cd /usr/local/apache/htdocs/goldenPath/oryLat1/phastCons5Scores
    ln -s /cluster/data/oryLat1/bed/multiz5way/phastCons5Scores/* .

    # Load gbdb and database with wiggle.
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way
    ln -s `pwd`/phastCons5.wib /gbdb/oryLat1/wib/phastCons5.wib
    # ended up using the set: --expected-length 10 --target-coverage 0.20
    time nice -n +19 hgLoadWiggle oryLat1 phastCons5 phastCons5.wig
    #	real    0m9.256s

    #  Create histogram to get an overview of all the data
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/multiz5way
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=oryLat1 phastCons5 > histogram.data 2>&1
    #	real    0m30.744

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small color \
        xffffff x000000 x000000 x444444 xaa4400 x00ffff xff0000
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Medaka oryLat1 Histogram phastCons5 track"
set xlabel " phastCons5 score - --expected-length 10 --target-coverage 0.20"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    #	The mostConserved can also be characterized by a histogram
    awk '{print $3-$2}' mostConserved5Way.bed > mostCons.txt
    textHistogram -verbose=2 -autoScale=1000 -pValues mostCons.txt \
	> mostCons.histo.txt

    cat << '_EOF_' | gnuplot > mostCons.png
set terminal png small color \
        xffffff x000000 x000000 x444444 xaa4400 x00ffff xff0000
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Medaka oryLat1 histogram: lengths of mostConserved track elements"
set xlabel " mostConserved element length - --expected-length 10 --target-coverage 0.20"
set ylabel " # of elements at this length"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set xrange [0:200]
set y2tics
set boxwidth 2
set style fill solid

plot "mostCons.histo.txt" using 2:3 title " # of elements" with boxes, \
        "mostCons.histo.txt" using 2:6 axes x1y2 title " CRF" with lines
'_EOF_'
#	<< happy emacs

###########################################################################
# HUMAN (hg18) PROTEINS TRACK (DONE braney 2007-01-28)
    ssh kkstore04
    # bash  if not using bash shell already

    mkdir /cluster/data/oryLat1/blastDb
    cd /cluster/data/oryLat1
    cat noUn/chr*fa > temp.fa
    faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
    rm temp.fa
    faSplit sequence Un/chrUnScaffolds.fa 150 blastDb/y
    cd blastDb
    for i in *.fa
    do
	/cluster/bluearc/blast229/formatdb -i $i -p F
    done
    rm *.fa

    mkdir -p /san/sanvol1/scratch/oryLat1/blastDb
    cd /cluster/data/oryLat1/blastDb
    for i in nhr nin nsq; 
    do 
	echo $i
	cp *.$i /san/sanvol1/scratch/oryLat1/blastDb
    done

    mkdir -p /cluster/data/oryLat1/bed/tblastn.hg18KG
    cd /cluster/data/oryLat1/bed/tblastn.hg18KG
    echo  /san/sanvol1/scratch/oryLat1/blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
    wc -l query.lst
# 887 query.lst

   # we want around 150000 jobs
   calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk "{print \\\$1}"`/\(150000/`wc query.lst | awk "{print \\\$1}"`\)

# 36727/(150000/887) = 217.178993

   mkdir -p /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/kgfa
   split -l 217 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/kgfa/kg
   ln -s /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/kgfa kgfa
   cd kgfa
   for i in *; do 
     nice pslxToFa $i $i.fa; 
     rm $i; 
     done
   cd ..
   ls -1S kgfa/*.fa > kg.lst
   mkdir -p /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/blastOut
   ln -s /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/blastOut
   for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
   tcsh
   cd /cluster/data/oryLat1/bed/tblastn.hg18KG
   cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
#ENDLOOP
'_EOF_'

   cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/cluster/bluearc/blast229/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/oryLat1/blastDb.lft carry $f.2
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3

        if pslCheck -prot $3.tmp                                                  
        then                                                                      
            mv $3.tmp $3                                                          
            rm -f $f.1 $f.2 $f.3 $f.4
        fi
        exit 0                                                                    
    fi                                                                            
fi                                                                                
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'
    # << happy emacs
    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec
    exit 
    
    ssh pk
    cd /cluster/data/oryLat1/bed/tblastn.hg18KG
    para create blastSpec
#    para try, check, push, check etc.

    para time

# times munged due to recovered batches

    ssh kkstore04
    cd /cluster/data/oryLat1/bed/tblastn.hg18KG
    mkdir chainRun
    cd chainRun
    tcsh
    cat << '_EOF_' > chainGsub
#LOOP
chainOne $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainOne
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=100000 stdin /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/blastOut/c.`basename $1`.psl)
'_EOF_'
    chmod +x chainOne
    ls -1dS /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec
    # do the cluster run for chaining
    ssh kk
    cd /cluster/data/oryLat1/bed/tblastn.hg18KG/chainRun
    para create chainSpec
    para maxNode 30
    para try, check, push, check etc.

# Completed: 170 of 170 jobs
# CPU time in finished jobs:       3150s      52.51m     0.88h    0.04d  0.000 y
# IO & Wait Time:                 42932s     715.53m    11.93h    0.50d  0.001 y
# Average job time:                 271s       4.52m     0.08h    0.00d
# Longest finished job:             453s       7.55m     0.13h    0.01d
# Submission to last job:          1632s      27.20m     0.45h    0.02d

    ssh kkstore04
    cd /cluster/data/oryLat1/bed/tblastn.hg18KG/blastOut
    for i in kg??
    do
       cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
       sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
       awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
       echo $i
    done
    sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/oryLat1/bed/tblastn.hg18KG/unliftBlastHg18KG.psl
    cd ..
    pslCheck unliftBlastHg18KG.psl
    liftUp blastHg18KG.psl ../../Un/chrUn.lift carry unliftBlastHg18KG.psl

    # load table 
    ssh hgwdev
    cd /cluster/data/oryLat1/bed/tblastn.hg18KG
    hgLoadPsl oryLat1 blastHg18KG.psl

    # check coverage
    featureBits oryLat1 blastHg18KG 
# 18960975 bases of 700386597 (2.707%) in intersection

    featureBits oryLat1 ensGene:cds blastHg18KG  -enrichment
# ensGene:cds 4.125%, blastHg18KG 2.707%, both 2.279%, cover 55.26%, enrich 20.41x

    ssh kkstore04
    rm -rf /cluster/data/oryLat1/bed/tblastn.hg18KG/blastOut
    rm -rf /cluster/bluearc/oryLat1/bed/tblastn.hg18KG/blastOut
#end tblastn
##########################################################################

#########################################################################
# BLASTZ/CHAIN/NET FR2 (DONE - 2007-01-24 - Hiram)
##  no chrUn in oryLat1, and align to fr2 scaffolds,
##	results lifted to fr2 chrUn coordinates
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.fr2.2007-01-24
    cd /cluster/data/oryLat1/bed/blastz.fr2.2007-01-24
    cat << '_EOF_' > DEF
# Medaka vs. Fugu

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
SEQ1_CHUNK=40000000
SEQ1_LIMIT=30
SEQ1_LAP=10000

# QUERY: Fugu fr2
#       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
SEQ2_DIR=/san/sanvol1/scratch/fr2/fr2.2bit
SEQ2_LEN=/san/sanvol1/scratch/fr2/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/fr2/fr2.scaffolds.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/fr2/fr2.scaffolds.sizes
SEQ2_LIFT=/san/sanvol1/scratch/fr2/liftAll.lft
SEQ2_CHUNK=20000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.fr2.2007-01-24
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-verbose=2 -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/oryLat1Fr2 > do.log 2>&1 &
    #	real    601m51.650s

    mkdir /cluster/data/fr2/bed/blastz.oryLat1.swap
    cd /cluster/data/fr2/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.fr2.2007-01-24/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-swap -bigClusterHub=pk > swap.log 2>&1 &

    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.fr2.2007-01-24
    time nice -n +19 featureBits oryLat1 chainFr2Link \
	> fb.oryLat1.chainFr2Link.txt 2>&1
    #	177508958 bases of 700386597 (25.344%) in intersection
    cd /cluster/data/fr2/bed/blastz.oryLat1.swap]
    time nice -n +19 featureBits fr2 chainOryLat1Link \
	> fb.fr2.chainOryLat1Link.txt 2>&1
    #	143996507 bases of 393312790 (36.611%) in intersection

############################################################################
##  Set a more interesting default position, location of IGHMBP2 gene
##	DONE - 2007-02-08 - Hiram
    ssh hgwdev
    hgsql -e \
'update dbDb set defaultPos="chr6:11,027,149-11,038,707" where name = "oryLat1";' \
	-h genome-testdb hgcentraltest

############################################################################
#  BLATSERVERS ENTRY (DONE - 2007-02-08 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("oryLat1", "blat3", "17788", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("oryLat1", "blat3", "17789", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
## Medaka Photograph  (DONE - 2007-02-12 12:15 - Hiram)
# Permission to use photo received in email from moris@cb.k.u-tokyo.ac.jp
# Shinichi Morishita 2007-02-09 18:25 - Morishita Lab - Univ of Tokyo
    ssh hgwdev
    mkdir /cluster/data/oryLat1/photograph
    cd /cluster/data/oryLat1/photograph
    wget --timestamping "http://medaka.utgenome.org/picture/Medaka_hd_rR.jpg" \
	-O oryLat1.Kiyoshi.Naruse.jpg
    convert -quality 100 -crop "1464x600+0+300" -sharpen 0 -normalize \
	oryLat1.Kiyoshi.Naruse.jpg oryLat1.crop.jpg
    convert -quality 80 -geometry 300x200 oryLat1.crop.jpg \
	Oryzias_latipes.jpg
    # add this image to the browser doc source tree in images and copy it
    #	to /usr/local/apache/htdocs/images on hgwdev

############################################################################
##  DOWNLOADS - (DONE - 2007-02-12 - 2007-02-16 - Hiram)
    ssh hgwdev
    cd /cluster/data/oryLat1
    ~/kent/src/hg/utils/automation/makeDownloads.pl oryLat1 \
	> makeDownloads.out 2>&1
    #	This needs to be checked to see if it did the correct procedure
    #	since there isn't any valid Repeat masker output files
    #	Create WindowMasker separate files by chrom, for downloads
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/WindowMasker.2006-11-29
    zcat windowmasker.sdust.bed.gz > chr.oryLat1.WMSdust.bed
    splitFileByColumn chr.oryLat1.WMSdust.bed chrWM
    #	Creating chrWM/chr1.oryLat1.WMSdust.bed
    #	Creating chrWM/chr2.oryLat1.WMSdust.bed
    #	... etc ...
    cd chrWM
    tar cvzf ../chromWMSdust.bed.tar.gz *.bed
    #	Verify this process didn't destroy anything:
    cat chrWM/*.bed | awk '{sum += $3-$2}END{printf "total size: %d\n",sum}'
    #	total size: 374591526
    zcat windowmasker.sdust.bed.gz \
	| awk '{sum += $3-$2}END{printf "total size: %d\n",sum}'
    #	total size: 374591526
    #	deliver to bigZips
    cd /cluster/data/oryLat1/goldenPath/bigZips
    ln -s \
    /cluster/data/oryLat1/bed/WindowMasker.2006-11-29/chromWMSdust.bed.tar.gz .
    
    # get GenBank native mRNAs
    ssh hgwdev
    cd /cluster/data/genbank
    ./bin/x86_64/gbGetSeqs -db=oryLat1 -native \
	GenBank mrna /cluster/data/oryLat1/goldenPath/bigZips/mrna.fa
    # get GenBank xeno mRNAs
    ./bin/x86_64/gbGetSeqs -db=oryLat1 -xeno \
	GenBank mrna /cluster/data/oryLat1/goldenPath/bigZips/xenoMrna.fa
    cd /cluster/data/oryLat1/goldenPath/bigZips
    ssh kkstore04
    gzip mrna.fa xenoMrna.fa
    md5sum *.gz > md5sum.txt
    #	Edit the README.txt file to be correct
    ssh hgwdev
    cd /usr/local/apache/htdocs/goldenPath/oryLat1/bigZips
    ln -s /cluster/data/oryLat1/goldenPath/bigZips/* .


############################################################################
##  swap human alignments done properly with oryLat1 contigs
##	(DONE - 2005-02-25 - Hiram)
    ## previous measurement on human
    ssh hgwdev
    cd /cluster/data/hg18/bed/blastz.oryLat1.2007-02-24
    nice -n +19 featureBits  hg18 chainOryLat1Link \
	> fb.hg18.chainOryLat1Link.txt 2>&1 &
    #	57393910 bases of 2881515245 (1.992%) in intersection

    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/blastz.hg18.swap
    cd /cluster/data/oryLat1/bed/blastz.hg18.swap
    time doBlastzChainNet.pl -chainMinScore=2000 -chainLinearGap=loose \
	/cluster/data/hg18/bed/blastz.oryLat1.2007-02-24/DEF \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=pk -verbose=2 -swap > swap.log 2>&1 &

    ssh hgwdev
    cd /cluster/data/oryLat1/bed/blastz.hg18.swap
    nice -n +19 featureBits  oryLat1 chainHg18Link \
	> fb.oryLat1.chainHg18Link.txt 2>&1 &
    #	48002423 bases of 700386597 (6.854%) in intersection

##########################################################################
## RepeatMasker run to cover all bases (DONE - 2007-03-07 - Hiram)
    ssh kkstore02
    mkdir /cluster/data/oryLat1/bed/RepeatMasker
    cd /cluster/data/oryLat1/bed/RepeatMasker
    time nice -n +19 doRepeatMasker.pl -verbose=2 -bigClusterHub=kk \
	-buildDir=/cluster/data/oryLat1/bed/RepeatMasker oryLat1 > do.log 2>&1 &

#########################################################################
## Remake the gold tables to be whole scaffolds only (DONE - 2007-03-21 - Hiram)
    cd /cluster/data/oryLat1/rawData
    mkdir gold_tables
    cat << '_EOF_' > mkGoldTab.pl
#!/usr/bin/env perl

use warnings;
use strict;

open (FH,"<allSeqLevel.agp") or die "Can not read allSeqLevel.agp";

my $scafName = "";
my $scafLen = 0;
my $scafType = "";
my $chrName = "";
my $chrStart = 0;
my $chrEnd = 0;
my $ix = 1;

while (my $line = <FH>) {
    if ($line =~ m/scaffold[0-9]+_contig/) {
        chomp $line;
        my ($chr, $start, $end, $seqId, $type, $scafContig, $ctgStart, $ctgEnd, 
$strand) = split('\s+',$line);
        --$start;
        --$ctgStart;
        my $ctgLen = $ctgEnd - $ctgStart;
        my $chrLen = $end - $start;
        die "lengths: $ctgLen != $chrLen" if ($ctgLen != $chrLen);
        $scafContig =~ s/_contig[0-9]+//;
        if (length($scafName) > 0) {
            if ($scafName ne $scafContig) {
                $scafLen = $chrEnd - $chrStart;
                printf "%s\t%d\t%d\t%d\t%s\t%s\t0\t%d\t+\n",
                    $chrName, $chrStart, $chrEnd, $ix++, $scafType,
                        $scafName, $scafLen;
                $scafName = $scafContig;
                $chrStart = $start;
            }
        } else {
            $scafName = $scafContig;
            $chrStart = $start;
        }
        $chrEnd = $end;
        $chrName = $chr;
        $scafType = $type;
    }
}
# and the last one
printf "%s\t%d\t%d\t%d\t%s\t%s\t0\t%d\t+\n",
    $chrName, $chrStart, $chrEnd, $ix++, $scafType, $scafName, $scafLen;

close(FH);
'_EOF_'
    # << happy emacs
    chmod +x ./mkGoldTab.pl
    ./mkGoldTab.pl | splitFileByColumn -ending=_gold.tab -tab stdin gold_tables

    cd gold_tables
    sed -e "s/agpFrag/chrZX_gold/" $HOME/kent/src/hg/lib/agpFrag.sql \
	> chr_gold.sql
    # edit that .sql file to add the bin column
    #	 bin smallint(6) NOT NULL default '0',

    ls *._gold.tab | sed -e "s/._gold.tab//" | while read C
do
    echo ${C}_gold
    sed -e "s/chrZX/${C}/" chr_gold.sql > t.sql
    exit 0
done
    ls *._gold.tab | sed -e "s/._gold.tab//" | while read C
do
    echo ${C}_gold
    sed -e "s/chrZX/${C}/" chr_gold.sql > t.sql
    hgLoadBed -sqlTable=t.sql oryLat1 ${C}_gold ${C}._gold.tab
    rm -f t.sql
done

#########################################################################
## Creating a ctgPos2 table (DONE - 2007-03-21 - Hiram)
    cd /cluster/data/oryLat1/rawData
    cat << '_EOF_' > mkCtgPos2.pl
#!/usr/bin/env perl

use warnings;
use strict;

open (FH,"<allSeqLevel.agp") or die "Can not read allSeqLevel.agp";

while (my $line = <FH>) {
    if ($line =~ m/scaffold[0-9]+_contig/) {
        chomp $line;
        my ($chr, $start, $end, $seqId, $type, $scafContig, $ctgStart, $ctgEnd, $strand) = split('\s+',$line);
        --$start;
        --$ctgStart;
        my $ctgLen = $ctgEnd - $ctgStart;
        my $chrLen = $end - $start;
        die "lengths: $ctgLen != $chrLen" if ($ctgLen != $chrLen);
        $scafContig =~ s/scaffold[0-9]+_//;
        printf "%s\t%d\t%s\t%d\t%d\t%s\n",
                $scafContig, $ctgLen, $chr, $start, $end, $type;
    }
}

close(FH);
'_EOF_'
    # << happy emacs
    chmod +x mkCtgPos2.pl
    ./mkCtgPos2.pl > ctgPos2.tab
    hgLoadSqlTab oryLat1 ctgPos2 ~/kent/src/hg/lib/ctgPos2.sql ctgPos2.tab

#########################################################################
##  Creating quality tracks for the chromosomes (DONE - 2007-03-23 - Hiram)
##  In email conversation with Budrul Ahsan ahsan@cb.k.u-tokyo.ac.jp
##  obtain quality scores from:  (existed only temporarily here)
    ssh kkstore04
    mkdir /cluster/data/oryLat1/bed/qual
    cd /cluster/data/oryLat1/bed/qual
    wget --timestamping 'http://dev.utgenome.org/~utgb/medaka/medakaqual.tar.gz'
    ##  These quality scores have scaffolds and ultracontigs mixed
    ##	together.  Our chrUn only has scaffolds.  Separate out just the
    ##	chrom scores from the scaffolds and ultracontigs
    cat << '_EOF_' > onlyChroms.pl
#!/usr/bin/env perl

use strict;
use warnings;

open (FH,"<medakaver1.0.UCSC.fasta.qual") or
        die "can not read medakaver1.0.UCSC.fasta.qual";
open (OF,">chroms.fa.qual") or
        die "can not write to chroms.fa.qual";

my $nextOf = 1;
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        if ($line =~ m/^>scaf|^>ult/) {
            if ($nextOf) {
                close (OF);
                open (OF,">chrUn.fa.qual") or
                    die "can not write to chrUn.fa.qual";
                $nextOf = 0;
            }
        } else {
            $line =~ s/^>/>chr/;
        }
    }
    printf OF "%s", $line;
}

close (OF);
close (FH);
'_EOF_'
    # << happy emacs
    chmod +x ./onlyChroms.pl
    ./onlyChroms.pl
    # some of the numbers in this are incorrect, too large.  Truncate them
    #	to 90 as per advice from Budrul in email
    time nice -n +19 sed -e \
"s/ 1[0-9][0-9]/ 90/g; s/ 2[0-9][0-9]/ 90/g; s/^1[0-9][0-9]/90/; s/^2[0-9][0-9]/90/;" \
	chroms.fa.qual | qaToQac stdin fixed.chroms.qac
    #	real    2m19.194s

    time nice -n +19 qacToWig -fixed fixed.chroms.qac stdout \
	| wigEncode stdin qual.wig qual.wib
    #	Converted stdin, upper limit 99.00, lower limit 0.00
    #	real    4m19.515s

    ssh hgwdev
    cd /cluster/data/oryLat1/bed/qual
    time nice -n +19 hgLoadWiggle -tmpDir=/scratch/tmp \
	-pathPrefix=/gbdb/oryLat1/wib oryLat1 quality qual.wig
    #	real    0m22.997s
    ln -s /cluster/data/oryLat1/bed/qual/qual.wib /gbdb/oryLat1/wib

##########################################################################
## Obtain a tar.gz file from Budrul that was their handoff to Ensembl
##  They only made this tar file available to me for a week, then they
##	remove it.
    ssh kkstore04
    mkdir /cluster/data/oryLat1/downloads
    cd /cluster/data/oryLat1/downloads
    wget --timestamping 'http://dev.utgenome.org/~utgb/medakaAGP/EMBL.tar.gz'
    tar xvzf EMBL.tar.gz
    #	EMBL/200510.chr_random.agp.txt
    #	EMBL/200510.scaffold.agp.txt
    #	EMBL/200510.contig.fasta
    #	EMBL/200510.ultracontig.agp.txt
    #	EMBL/200510.chr.agp.txt
    #	EMBL/200510.UCSC.fasta.qual

###########################################################################
## Fixup gold/gap table entries (DONE - 2007-04-11 - Hiram)
    ssh hgwdev
    mkdir /cluster/data/oryLat1/bed/gap
    cd /cluster/data/oryLat1/bed/gap
    hgsql -e "show tables;" oryLat1 | grep gap | while read T
do
    hgsql -N -e "select * from $T;" oryLat1 > ${T}.tab
    echo $T
done
    # change telomere to scaffold
    hgsql -e "show tables;" oryLat1 | grep _gap | while read T
do
    hgsql -N -e 'update '$T' set type="scaffold" where type="telomere";' oryLat1
done
    #	the gold and ctgPos2 tables on chrM are empty
    mkdir /cluster/data/oryLat1/bed/gold
    cd /cluster/data/oryLat1/bed/gold
    hgsql -e "show tables;" oryLat1 | grep gold | while read T
do
    hgsql -N -e "select * from $T;" oryLat1 > ${T}.tab
    echo $T
done
    hgsql -N -e "select * from chrM_gold;" hg18 > hg18.chrM_gold.tab
    #	Edit that to make it read:
# 585  chrM  0   16714  1   F  NC_004387.1   0 16714   +
    sed -e "s/agpFrag/chrM_gold/" $HOME/kent/src/hg/lib/agpFrag.sql \
	> chrM_gold.sql
    # edit that to add the bin column, then
    #	bin smallint(6) NOT NULL default '0',
    hgLoadSqlTab oryLat1 chrM_gold chrM_gold.sql chrM_gold.tab

    hgsql -e 'INSERT INTO ctgPos2
	(contig, size, chrom, chromStart, chromEnd, type)
	VALUES ("NC_004387.1", 16714, "chrM", 0, 16714, "F");' oryLat1

###########################################################################
## Drop some entries from the ensGtp table since they are not all loaded
## to keep joiner check happy
## (DONE - 2007-04-11 - Hiram)
    ssh hgwdev
    hgsql -N -e "select name from ensGene;" oryLat1 \
	| sort > name.ensGene.oryLat1.txt
    hgsql -N -e "select transcript from ensGtp;" oryLat1 \
	| sort > transcript.ensGtp.oryLat1
    wc -l name.ensGene.oryLat1.txt transcript.ensGtp.oryLat1
    #	23087 name.ensGene.oryLat1.txt
    #	26148 transcript.ensGtp.oryLat1
    comm -12 name.ensGene.oryLat1.txt transcript.ensGtp.oryLat1 | wc -l
    #	23087
    comm -13 name.ensGene.oryLat1.txt transcript.ensGtp.oryLat1 | wc -l
    #	3061
    comm -13 name.ensGene.oryLat1.txt transcript.ensGtp.oryLat1 \
	> delete.transcript.ensGtp
    for T in `cat delete.transcript.ensGtp`
do
    hgsql -e 'delete from ensGtp where transcript="'$T'";' oryLat1
    echo $T
done
    hgsql -e "select count(*) from ensGtp;" oryLat1
    #	23087
    hgsql -N -e "select name from ensPep;" oryLat1 | sort > name.ensPep.oryLat1
    comm -13 name.ensGene.oryLat1.txt name.ensPep.oryLat1 \
	> delete.name.ensPep
    for T in `cat delete.name.ensPep`
do
    hgsql -e 'delete from ensPep where name="'$T'";' oryLat1
    echo $T
done
    hgsql -N -e "select name from ensPep;" oryLat1 | wc

    runJoiner.csh oryLat1 ensGene
    #	found identifiers:
    #	ensemblTranscriptId
    #	Checking keys on database oryLat1
    #	 oryLat1.ensGtp.transcript - hits 23087 of 23087 ok
    #	 oryLat1.ensPep.name - hits 23073 of 23073 ok
    #	running -times flag

#########################################################################
##  Reset date on request (DONE - 2007-05-01 - Hiram)
    hgsql hgcentraltest \
	-e 'update dbDb set description = "Apr. 2006" where name = "oryLat1";'

##########################################################################
# NSCAN track - (2007-06-05 markd)
# uses danRer4 as informant, no pseudogene masking 
#
    cd /cluster/data/oryLat1/bed/nscan/

    # obtainedf NSCAN predictions from michael brent's group
    # at WUSTL
    wget -nv http://mblab.wustl.edu/predictions/Oryzias_latipes/oryLat1.gtf
    wget -nv http://mblab.wustl.edu/predictions/Oryzias_latipes/oryLat1.prot.fa

    gzip -9 oryLat1.*
    chmod a-w *.gz

    # load tracks.  Note that these have *utr features, rather than
    # exon features.  currently ldHgGene creates separate genePred exons
    # for these.
    ldHgGene -bin -gtf -genePredExt oryLat1 nscanGene oryLat1.gtf.gz
    hgPepPred oryLat1 generic nscanPep oryLat1.prot.fa.gz
    rm -f *.tab

    # update trackDb; need a oryLat1-specific page to describe informants
    medaka/oryLat1/nscanGene.html    
    medaka/oryLat1/trackDb.ra

##########################################################################
# SWAP DANRER5 BLASTZ RESULT TO GET DANRER5 CHAINS, NETS, AXTNET, MAFNET 
# AND ALIGNMENT DOWNLOADS ON ORYLAT1 (DONE, 2007-09-11 - 2007-09-12, hartera)
# CLEAN UP AND MAKE blastz.danRer5 LINK TO BLASTZ DIRECTORY 
# (DONE, 2007-09-17, hartera) 
    ssh hgwdev
    mkdir /cluster/data/oryLat1/bed/blastz.swap.danRer5
    cd /cluster/data/oryLat1/bed/blastz.swap.danRer5
    # blastz parameters used to align oryLat1 as query to danRer5 as target:
    # BLASTZ_H=2500
    # BLASTZ_M=50
    # BLASTZ_Q=/cluster/data/blastz/HoxD55.q
    time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
        -qRepeats=windowmaskerSdust -swap \ 
        /cluster/data/danRer5/bed/blastz.oryLat1.2007-09-10/DEF \
         >& swap.log &
    # 0.070u 0.027s 3:30.52 0.0%      0+0k 0+0io 0pf+0w
    # netChains: looks like previous stage was not successful (can't find
    # [oryLat1.danRer5.]all.chain[.gz]).
    time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
        -qRepeats=windowmaskerSdust -swap -continue net \
        /cluster/data/danRer5/bed/blastz.oryLat1.2007-09-10/DEF \
         >& swap2.log &
    # 0.153u 0.109s 31:55.61 0.0%     0+0k 0+0io 0pf+0w
  
    cat \
  /cluster/data/oryLat1/bed/blastz.danRer5.swap/fb.oryLat1.chainDanRer5Link.txt
    # 130699035 bases of 700386597 (18.661%) in intersection
    # look at coverage of ensGene CDS, there is no native RefSeqs 
    # track for medaka. oryLat1 ensGene has 18477 genes and 23087 transcripts.
    featureBits oryLat1 ensGene:cds chainDanRer5Link -enrichment
# ensGene:cds 4.125%, chainDanRer5Link 18.661%, both 3.582%, cover 86.85%,
# enrich 4.65x
    featureBits oryLat1 ensGene:cds chainDanRer4Link -enrichment
# ensGene:cds 4.125%, chainDanRer4Link 22.275%, both 3.552%, cover 86.12%,
# enrich 3.87x
    featureBits oryLat1 ensGene:cds netDanRer5 -enrichment
# ensGene:cds 4.125%, netDanRer5 72.605%, both 3.981%, cover 96.52%, enrich
# 1.33x
    featureBits oryLat1 ensGene:cds netDanRer4 -enrichment
# ensGene:cds 4.125%, netDanRer4 73.914%, both 3.948%, cover 95.72%, enrich
# 1.30x

    # clean up a little (2007-09-17, hartera)
    ssh kkstore04
    cd /cluster/data/oryLat1/bed
    mv ./blastz.swap.danRer5/swap.log ./blastz.danRer5.swap
    rm -r blastz.swap.danRer5
    ln -s blastz.danRer5.swap blastz.danRer5

#########################################################################
# BLASTZ/CHAIN/NET Medaka oryLat1 (DONE - 2008-01-16 - 2008-01-30 - Hiram)
#	with contigs for Lamprey
    ssh kkstore04
    screen # use screen to control this job
    mkdir /cluster/data/oryLat1/bed/blastz.petMar1.2008-01-15
    cd /cluster/data/oryLat1/bed/blastz.petMar1.2008-01-15

    cat << '_EOF_' > DEF
# Medaka vs. Lamprey

# using the "close" genome alignment parameters
#	see also: http://genomewiki.ucsc.edu/index.php/Mm9_multiple_alignment
BLASTZ_Y=9400
BLASTZ_L=3000
BLASTZ_K=3000
BLASTZ_M=50

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LIMIT=30
SEQ1_LAP=10000

# QUERY: Lamprey petMar1
SEQ2_DIR=/cluster/bluearc/scratch/data/petMar1/petMar1.2bit
SEQ2_LEN=/cluster/data/petMar1/chrom.sizes
SEQ2_CTGDIR=/cluster/bluearc/scratch/data/petMar1/petMar1.supers.2bit
SEQ2_CTGLEN=/cluster/data/petMar1/super.sizes
SEQ2_LIFT=/cluster/data/petMar1/jkStuff/petMar1.nonBridged.lft
SEQ2_CHUNK=10000000
SEQ2_LIMIT=150
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.petMar1.2008-01-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk > do.log 2>&1 &
    #	real    218m14.787s
# Completed: 69774 of 70854 jobs
# Crashed: 1080 jobs
# CPU time in finished jobs:    3950133s   65835.55m  1097.26h   45.72d  0.125 y
# IO & Wait Time:               1218716s   20311.93m   338.53h   14.11d  0.039 y
# Average job time:                  74s       1.23m     0.02h    0.00d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:            1629s      27.15m     0.45h    0.02d
# Submission to last job:         12951s     215.85m     3.60h    0.15d
    #	had some parasol bookeeping problems, it actually finished
    #	the kluster run but got confused.  Checked manually with
    #	para recover -> nothing in new job list, and check files in
    #	psl result directory finding the correct number of files for the
    #	the job count.  Make the run.time file and continuing:
    time doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-continue=cat -chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk > cat.log 2>&1 &
    #	real    7m47.963s
    #	something failed
    #	had the specification to the lft file incorrect, failed during
    #	chain run.  Fix that, finish the run, now continuing:
    time doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-continue=chainMerge -chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk > chainMerge.log 2>&1 &
    #	had the same failure as the first time test of this alignment.
    #	something is wrong with netChainSubset, needs to be debugged.
    #	ignore the step to create the "over.chain" file, finish the net
    #	step manually, then continuing:
    time doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-continue=load -chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk > load.log 2>&1 &
    #	real    3m53.686s
    cat fb.oryLat1.chainPetMar1Link.txt
    #	24990733 bases of 700386597 (3.568%) in intersection

    #	And, for the swap:
    mkdir /cluster/data/petMar1/bed/blastz.oryLat1.swap
    cd /cluster/data/petMar1/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 -swap \
	/cluster/data/oryLat1/bed/blastz.petMar1.2008-01-16/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk > swap.log 2>&1 &
    #	real    1m3.572s
    #	same problem in netChainSubset trying to make the "over.chain" file.
    #	skip that, finish the chains manually:
    #	real    14m35.340s
    #	then continuing:
    time doBlastzChainNet.pl -verbose=2 -swap \
	/cluster/data/oryLat1/bed/blastz.petMar1.2008-01-16/DEF \
	-continue=load -chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=kk > load.log 2>&1 &
    cat fb.petMar1.chainOryLat1Link.txt
    #	23358156 bases of 831696438 (2.808%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET Medaka oryLat1 (DONE - 2008-01-29 - Hiram)
#	with contigs for Lamprey
    ssh kkstore04
    screen # use screen to control this job
    mkdir /cluster/data/oryLat1/bed/blastz.petMar1.2008-01-29
    cd /cluster/data/oryLat1/bed/blastz.petMar1.2008-01-29

    cat << '_EOF_' > DEF
# Medaka vs. Lamprey

# using the "close" genome alignment parameters
#	see also: http://genomewiki.ucsc.edu/index.php/Mm9_multiple_alignment
BLASTZ_Y=9400
BLASTZ_L=3000
BLASTZ_K=3000
BLASTZ_M=50

# TARGET: Medaka oryLat1 (40M chunks covers the largest chroms in one gulp)
#       chrUn in Scaffolds for this alignment run
SEQ1_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
SEQ1_LEN=/cluster/data/oryLat1/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LIMIT=30
SEQ1_LAP=10000

# QUERY: Lamprey petMar1
SEQ2_DIR=/cluster/bluearc/scratch/data/petMar1/petMar1.2bit
SEQ2_LEN=/cluster/data/petMar1/chrom.sizes
SEQ2_CHUNK=10000000
SEQ2_LIMIT=150
SEQ2_LAP=0

BASE=/cluster/data/oryLat1/bed/blastz.petMar1.2008-01-29
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=memk > do.log 2>&1 &
    #	real    1117m45.898s
    cat fb.oryLat1.chainPetMar1Link.txt
    #	24991574 bases of 700386597 (3.568%) in intersection

    mkdir /cluster/data/petMar1/bed/blastz.oryLat1.swap
    cd /cluster/data/petMar1/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 -swap \
	/cluster/data/oryLat1/bed/blastz.petMar1.2008-01-29/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=memk > swap.log 2>&1 &
    #	real    38m58.711s

    cat fb.petMar1.chainOryLat1Link.txt
    #	23359179 bases of 831696438 (2.809%) in intersection

############################################################################
#  oryLat1 - Medaka - Ensembl Genes (DONE - 2008-03-31 - 04-15 - hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1
    cat << '_EOF_' > oryLat1.ensGene.ra
# required db variable
db oryLat1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 oryLat1.ensGene.ra
    featureBits oryLat1 ensGene
    #	29680340 bases of 700386597 (4.238%) in intersection

    #	version 49 update had a big change in the Medaka genes.
    #	Many of the new genes are in the Ensembl Gene Scaffold
    #	(actually here ultracontigs) coordinate space that will not
    #	translate to our chrUn assembly.  So, some fixups are
    #	necessary to get joinerCheck to be a bit more happy.
    ssh hgwdev
    cd ~/kent/src/hg/makeDb/schema
    joinerCheck -keys -database=oryLat1 -identifier=ensemblTranscriptId \
	all.joiner
# Checking keys on database oryLat1
# oryLat1.ensGtp.transcript - hits 22447 of 25134
# Error: 2687 of 25134 elements of oryLat1.ensGtp.transcript are not in key ensGene.name line 1726 of all.joiner
# Example miss: ENSORLT00000022895
# oryLat1.ensPep.name - hits 22053 of 24661
# Error: 2608 of 24661 elements of oryLat1.ensPep.name are not in key ensGene.name line 1728 of all.joiner
# Example miss: ENSORLT00000022872
    cd  /cluster/data/oryLat1/bed/ensGene.49
    grep "invalid chrom" *.errors.txt | cut -d\  -f2 | sort -u > badLifts.name
    hgsql -N -e "select transcript from ensGtp;" oryLat1 \
	| sort > ensGtp.transcript
    comm -13 ensGene.name ensGtp.transcript | sort > ensGtp.not.ensGene.name
    comm -13 ensGene.name ensPep.name | sort > ensPep.not.ensGene.name
    #	do the badLifts account for all the missing peptides:
    comm -12 badLifts.name ensPep.not.ensGene.name | wc -l
    #	2608 -> yes, this is the count of missing peptides.
    #	So, remove this set of business from ensPep table
    for N in `cat badLifts.name`
do
    hgsql -e "delete from ensPep where name=\"$N\";" oryLat1
    echo $N
done
    #	And from the ensGtp table
    for N in `cat badLifts.name`
do
    hgsql -e "delete from ensGtp where transcript=\"$N\";" oryLat1
    echo $N
done
############################################################################
############################################################################
# TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20

see doc/builds.txt for specific details.
############################################################################
############################################################################
# TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30

see doc/builds.txt for specific details.
############################################################################
