# sequence from Mattheiu Blanchette 1/28/05 in /cluster/store8/blanchem/hsa13/ancestors1
    ssh kksilo
    mkdir -p /cluster/store10/ancestor/eutPri1/sequence
    ln -s /cluster/store10/ancestor/eutPri1 /cluster/data/eutPri1
    cd /cluster/data/eutPri1/sequence
    g=PREDICTIONANCESTRAL7
    echo $g > list
    for i in /cluster/store8/blanchem/hsa13/ancestors1/*
    do 
	f=`basename $i .ancestors`
	faSomeRecords $i list stdout | sed "s/ $g/$f/" | tr -d '-' > $f.fa
	echo $i
    done
    cat *.fa > eutPri1.fa
    scaffoldFaToAgp eutPri1.fa  

    cd ..
    mkdir 15
    sed -e "s/chrUn/chr15/g" sequence/eutPri1.agp > 15/chr15.agp 
    sed -e "s/chrUn/chr15/g" sequence/eutPri1.lft > 15/chr15.lft 
    cat */*/*.lft > jkStuff/liftAll.lft  

    # Create chromosome FA file from AGP and file of masked scaffolds
    cd 15
    agpToFa -simpleMultiMixed chr15.agp chr15 chr15.fa ../sequence/eutPri1.fa 

# CREATING DATABASE 
    # Create the database.
    ssh hgwdev
    echo 'create database eutPri1' | hgsql ''

# STORE SEQUENCE AND ASSEMBLY INFORMATION 
    # Translate to nib
    cd /cluster/data/eutPri1
    mkdir nib
    faToNib -softMask 15/chr15.fa nib/chr15.nib

    faToTwoBit */chr*.fa eutPri1.2bit
    mkdir -p /gbdb/eutPri1/nib
    ln -s /cluster/data/eutPri1/eutPri1.2bit /gbdb/eutPri1/nib/
    ln -s /cluster/data/eutPri1/nib/chr15.nib  /gbdb/eutPri1/nib

    hgsql eutPri1  < $HOME/kent/src/hg/lib/chromInfo.sql             

    hgNibSeq -preMadeNib eutPri1 /gbdb/eutPri1/nib 15/chr15.fa
    echo "select chrom,size from chromInfo" | hgsql -N eutPri1 > chrom.sizes
    echo 15 > chrom.lst

    # create assembly and gap tracks
    hgGoldGapGl -noGl -chromLst=chrom.lst eutPri1 /cluster/data/eutPri1 .

    echo "create table grp (PRIMARY KEY(NAME)) select * from hg17.grp" | hgsql eutPri1 

    cd 15
    splitFaIntoContigs chr15.agp chr15.fa . -nSize=5000000 
    cd ..

    # stupid move to restore sanity
    mv 15 old.15
    mv old.15/15 15

# MAKE GCPERCENT 
    mkdir -p /cluster/data/eutPri1/bed/gcPercent
    cd /cluster/data/eutPri1/bed/gcPercent
    hgsql eutPri1  < ~/src/hg/lib/gcPercent.sql

    # load gcPercent table
    hgGcPercent eutPri1 ../../nib

    cd ../..

    echo 'INSERT INTO defaultDb VALUES ("Primate Ancestor", "eutPri1");' | hgsql -h genome-testdb hgcentraltest

    # Add dbDb and defaultDb entries:
    echo 'insert into dbDb (name, description, nibPath, organism,  \
          defaultPos, active, orderKey, genome, scientificName,  \
          htmlPath, hgNearOk)  \
          values("eutPri1", "Jan. 2005", \
          "/gbdb/eutPri1/nib", "Primate Ancestor", "chr15:13120186-13124117", 1, \
          46, "Primate Ancestor", "Primate Ancestor", \
          "/gbdb/eutPri1/html/description.html", 0);' \
    | hgsql -h genome-testdb hgcentraltest
    echo 'update defaultDb set name = "eutPri1" where genome = "Primate Ancestor"' | hgsql -h genome-testdb hgcentraltest
    echo 'insert into genomeClade (genome, clade, priority) values ("Primate Ancestor", "vertebrate",67);'  \
	| hgsql -h genome-testdb hgcentraltest

    # Make trackDb table so browser knows what tracks to expect:
    cd $HOME/kent/src/hg/makeDb/trackDb
    mkdir -p eutPri/eutPri1
    cvs add eutPri
    cvs add eutPri/eutPri1
    cvs ci eutPri

    # Edit that makefile to add eutPri1 in all the right places and do
    make update

# SIMPLE REPEATS (TRF) 
    ssh kksilo
    mkdir -p /cluster/data/eutPri1/bed/simpleRepeat
    cd /cluster/data/eutPri1/bed/simpleRepeat
    mkdir trf
    tcsh
    cp /dev/null jobs.csh
    foreach d (/cluster/data/eutPri1/*/chr*_?{,?})
      set ctg = $d:t
      foreach f ($d/${ctg}.fa)
        set fout = $f:t:r.bed
        echo $fout
        echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" \
        >> jobs.csh
      end
    end
    csh -ef jobs.csh >&! jobs.log &
    # check on this with
    tail -f jobs.log
    wc -l jobs.csh
    ls -1 trf | wc -l
    endsInLf trf/*
    # When job is done do:
    liftUp simpleRepeat.bed /cluster/data/eutPri1/jkStuff/liftAll.lft warn  trf/*.bed

    # Load into the database:
    ssh hgwdev
    hgLoadBed eutPri1 simpleRepeat \
      /cluster/data/eutPri1/bed/simpleRepeat/simpleRepeat.bed \
      -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
    featureBits eutPri1 simpleRepeat
    # 1251045 bases of 88781993 (1.409%) in intersection

# REPEAT MASKING
    #- Make the run directory and job list:
    cd /cluster/data/eutPri1
    cd 15
    for i in chr*_*; do  cd $i; faSplit gap $i.fa 500000 "$i"_ -lift=$i.lft -minGapSize=100; cd ..; done

    cd ..
    cat << '_EOF_' > jkStuff/RMHuman
#!/bin/csh -fe

cd $1
pushd .
/bin/mkdir -p /tmp/eutPri1/$2
/bin/cp $2 /tmp/eutPri1/$2/
cd /tmp/eutPri1/$2
/cluster/bluearc/RepeatMasker/RepeatMasker -ali -s $2
popd
/bin/cp /tmp/eutPri1/$2/$2.out ./
if (-e /tmp/eutPri1/$2/$2.align) /bin/cp /tmp/eutPri1/$2/$2.align ./
if (-e /tmp/eutPri1/$2/$2.tbl) /bin/cp /tmp/eutPri1/$2/$2.tbl ./
if (-e /tmp/eutPri1/$2/$2.cat) /bin/cp /tmp/eutPri1/$2/$2.cat ./
/bin/rm -fr /tmp/eutPri1/$2/*
/bin/rmdir --ignore-fail-on-non-empty /tmp/eutPri1/$2
/bin/rmdir --ignore-fail-on-non-empty /tmp/eutPri1
'_EOF_'
    # << this line makes emacs coloring happy
    chmod +x jkStuff/RMHuman
    mkdir RMRun
    for i in `cat chrom.lst`
    do
	f=$i/*/*_*_*.fa
	for j in $f
	do
	    d=`dirname $j`
	    f=`basename $j`
	    echo /cluster/data/eutPri1/jkStuff/RMHuman
		 /cluster/data/eutPri1/$d $f \
	       '{'check out line+ /cluster/data/eutPri1/$d/$f.out'}' 
	done
    done > RMRun/RMJobs

    #- Do the run
    ssh kk
    cd /cluster/data/eutPri1/RMRun
    para create RMJobs
    para try, para check, para check, para push, para check,...
# Completed: 195 of 195 jobs
# CPU time in finished jobs:    1591364s   26522.74m   442.05h   18.42d  0.050 y
# IO & Wait Time:                  4786s      79.76m     1.33h    0.06d  0.000 y
# Average job time:                8185s     136.42m     2.27h    0.09d
# Longest job:                    10708s     178.47m     2.97h    0.12d
# Submission to last job:         10759s     179.32m     2.99h    0.12d


    #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
    ssh kksilo
    cd /cluster/data/eutPri1
    foreach d (*/chr*_?{,?})
      set contig = $d:t
      echo $contig
      liftUp $d/$contig.fa.out $d/$contig.lft warn $d/${contig}_*.fa.out > /dev/null
    end

    #- Lift pseudo-contigs to chromosome level
    foreach c (`cat chrom.lst`)
      echo lifting $c
      cd $c
      if (-e lift/ordered.lft && ! -z lift/ordered.lft) then
        liftUp chr$c.fa.out lift/ordered.lft warn `cat lift/oOut.lst` \
        > /dev/null
      endif
      if (-e lift/random.lft && ! -z lift/random.lft) then
        liftUp chr${c}_random.fa.out lift/random.lft warn `cat lift/rOut.lst` \
        > /dev/null
      endif
      cd ..
    end

    #- Load the .out files into the database with:
    ssh hgwdev
    cd /cluster/data/eutPri1
    hgLoadOut eutPri1 */chr*.fa.out
  # Strange perc. field -14.9 line 53256 of 15/chr15.fa.out
  # Strange perc. field -54.5 line 53256 of 15/chr15.fa.out
  # Loading up table chr15_rmsk
  # note: 24 records dropped due to repStart > repEnd



# PROCESS SIMPLE REPEATS INTO MASK 
    # After the simpleRepeats track has been built, make a filtered version 
    # of the trf output: keep trf's with period <= 12:
    ssh kksilo
    cd /cluster/data/eutPri1/bed/simpleRepeat
    mkdir -p trfMask
    foreach f (trf/chr*.bed)
      awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t
    end
    # Lift up filtered trf output to chrom coords as well:
    cd /cluster/data/eutPri1
    mkdir bed/simpleRepeat/trfMaskChrom
    foreach c (`cat chrom.lst`)
      if (-e $c/lift/ordered.lst) then
        perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
          $c/lift/ordered.lst > $c/lift/oTrf.lst
        liftUp bed/simpleRepeat/trfMaskChrom/chr$c.bed \
          jkStuff/liftAll.lft warn `cat $c/lift/oTrf.lst`
      endif
      if (-e $c/lift/random.lst) then
        perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
           $c/lift/random.lst > $c/lift/rTrf.lst
        liftUp bed/simpleRepeat/trfMaskChrom/chr${c}_random.bed \
          jkStuff/liftAll.lft warn `cat $c/lift/rTrf.lst`
      endif
    end
    # Here's the coverage for the filtered TRF:
    ssh hgwdev
    cat /cluster/data/eutPri1/bed/simpleRepeat/trfMaskChrom/*.bed > /tmp/filtTrf.bed
    featureBits eutPri1 /tmp/filtTrf.bed
#  535895 bases of 88781993 (0.604%) in intersection

# MASK SEQUENCE WITH REPEATMASKER AND SIMPLE REPEAT/TRF 
    ssh kksilo
    cd /cluster/data/eutPri1
    # Soft-mask (lower-case) the contig and chr .fa's, 
    # then make hard-masked versions from the soft-masked.  
    set trfCtg=bed/simpleRepeat/trfMask
    set trfChr=bed/simpleRepeat/trfMaskChrom
    foreach f (*/chr*.fa)
      echo "repeat- and trf-masking $f"
      maskOutFa -soft $f $f.out $f
      set chr = $f:t:r
      maskOutFa -softAdd $f $trfChr/$chr.bed $f
      echo "hard-masking $f"
      maskOutFa $f hard $f.masked
    end
# WARNING: negative rEnd: -10 chr15:5787338-5787377 FRAM
# WARNING: negative rEnd: -203 chr15:22088023-22089143 L1MEc
# WARNING: negative rEnd: -140 chr15:41818311-41818422 L1MA8

    foreach c (`cat chrom.lst`)
      echo "repeat- and trf-masking contigs of chr$c, chr${c}_random"
      foreach d ($c/chr*_?{,?})
        set ctg=$d:t
        set f=$d/$ctg.fa
        maskOutFa -soft $f $f.out $f
        maskOutFa -softAdd $f $trfCtg/$ctg.bed $f
        maskOutFa $f hard $f.masked
      end
    end
    #- Rebuild the nib files, using the soft masking in the fa:
    mkdir nib
    foreach f (*/chr*.fa)
      faToNib -softMask $f nib/$f:t:r.nib
    end
# Writing 88841993 bases in 44421005 bytes
    # Make one big 2bit file as well, and make a link to it in 
    # /gbdb/eutPri1/nib because hgBlat looks there:
    faToTwoBit */chr*.fa eutPri1.2bit
    ln -s /cluster/data/eutPri1/eutPri1.2bit /gbdb/eutPri1/nib/

# PUT MASKED SEQUENCE OUT FOR CLUSTER RUNS 
    ssh kkr1u00
    # Chrom-level mixed nibs that have been repeat- and trf-masked:
    rm -rf /iscratch/i/eutPri1/nib
    mkdir -p /iscratch/i/eutPri1/nib
    cp -p /cluster/data/eutPri1/nib/chr*.nib /iscratch/i/eutPri1/nib
    # Pseudo-contig fa that have been repeat- and trf-masked:
    rm -rf /iscratch/i/eutPri1/trfFa
    mkdir /iscratch/i/eutPri1/trfFa
    foreach d (/cluster/data/eutPri1/*/chr*_?{,?})
      cp $d/$d:t.fa /iscratch/i/eutPri1/trfFa
    end
    cp -p /cluster/data/eutPri1/eutPri1.2bit /iscratch/i/eutPri1/
    iSync

# MAKE 10.OOC, 11.OOC FILES FOR BLAT 
    ssh kolossus
    mkdir /cluster/data/eutPri1/bed/ooc
    mkdir -p /cluster/bluearc/eutPri1/
    cd /cluster/data/eutPri1/bed/ooc
    ls -1 /cluster/data/eutPri1/nib/chr*.nib > nib.lst
    blat nib.lst /dev/null /dev/null -tileSize=11 -makeOoc=/cluster/bluearc/eutPri1/11.ooc -repMatch=1024
# Wrote 8 overused 11-mers to /cluster/bluearc/eutPri1/11.ooc
    blat nib.lst /dev/null /dev/null -tileSize=10 -makeOoc=/cluster/bluearc/eutPri1/10.ooc -repMatch=843
# Wrote 67 overused 10-mers to /cluster/bluearc/eutPri1/10.ooc
    ssh kkr1u00
    cp -p /cluster/bluearc/eutPri1/*.ooc /iscratch/i/eutPri1/
    iSync


# MAKE Human Proteins track 
    ssh kksilo
    mkdir -p /cluster/data/eutPri1/blastDb
    cd /cluster/data/eutPri1/blastDb
    for i in `cat ../chrom.lst`; do ls -1 ../$i/*/*.fa  ; done > list  
    for i in `cat list`
    do
	f=`basename $i`
	sed "s/ .*$//" $i > $f 
	formatdb -i $f -p F
    done
    rm *.log *.fa list
    cd ..
    # for i in `cat chrom.lst`; do cat  $i/chr*/*.lft  ; done > jkStuff/subChr.lft

    ssh kkr1u00
    mkdir -p /iscratch/i/eutPri1/blastDb
    cd /cluster/data/eutPri1/blastDb
    for i in nhr nin nsq; do cp *.$i /iscratch/i/eutPri1/blastDb     ; echo $i; done

    cd
    iSync > sync.out

    mkdir -p /cluster/data/eutPri1/bed/tblastn.hg17KG
    cd /cluster/data/eutPri1/bed/tblastn.hg17KG
    echo  /iscratch/i/eutPri1/blastDb/*.nsq  | xargs ls -S | sed "s/\.nsq//"  > query.lst  
    # back to kksilo
    exit

    cd /cluster/data/eutPri1/bed/tblastn.hg17KG
    mkdir -p /cluster/bluearc/eutPri1/bed/tblastn.hg17KG/kgfa
    split -l 70 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl /cluster/bluearc/eutPri1/bed/tblastn.hg17KG/kgfa/kg
    ln -s /cluster/bluearc/eutPri1/bed/tblastn.hg17KG/kgfa kgfa
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir -p /cluster/bluearc/eutPri1/bed/tblastn.hg17KG/blastOut
    ln -s /cluster/bluearc/eutPri1/bed/tblastn.hg17KG/blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
        liftUp -nosort -type=".psl" -nohead $f.3 ../../jkStuff/liftAll.lft carry $f.2       
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.3       
        if pslCheck -prot $3.tmp                                                                          
        then                                                                                              
            mv $3.tmp $3                                                                                  
            rm -f $f.1 $f.2 $f.3 
        fi
        exit 0                                                                                            
    fi                                                                                                    
fi                                                                                                        
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec

    ssh kk
    cd /cluster/data/eutPri1/bed/tblastn.hg17KG
    para create blastSpec
    para push
# Completed: 6633 of 6633 jobs
# CPU time in finished jobs:    2509978s   41832.96m   697.22h   29.05d  0.080 y
# IO & Wait Time:                 57960s     966.00m    16.10h    0.67d  0.002 y
# Average job time:                 387s       6.45m     0.11h    0.00d
# Longest job:                     2437s      40.62m     0.68h    0.03d
# Submission to last job:          7992s     133.20m     2.22h    0.09d

    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainOne
(cd $1; cat q."$2"* | simpleChain -prot -outPsl -maxGap=200000 stdin ../c.`basename $1`.$2.psl)
'_EOF_'
    chmod +x chainOne

    for j in blastOut/kg??; do for i in `cat ../../chrom.lst`; do echo chainOne $j chr"$i"; done ; done > chainSpec

    ssh kki
    cd /cluster/data/eutPri1/bed/tblastn.hg17KG
    para create chainSpec
    para push
# Completed: 603 of 603 jobs
# CPU time in finished jobs:      34193s     569.88m     9.50h    0.40d  0.001 y
# IO & Wait Time:                  2951s      49.18m     0.82h    0.03d  0.000 y
# Average job time:                  62s       1.03m     0.02h    0.00d
# Longest job:                      814s      13.57m     0.23h    0.01d
# Submission to last job:          3282s      54.70m     0.91h    0.04d

    exit
    # back to kksilo
    cd /cluster/data/eutPri1/bed/tblastn.hg17KG/blastOut
    for i in kg??
    do 
	cat c.$i.*.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.90 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    cat u.*.psl m60* | sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > /cluster/data/eutPri1/bed/tblastn.hg17KG/blastHg17KG.psl
    cd ..
    ssh hgwdev
    cd /cluster/data/eutPri1/bed/tblastn.hg17KG
    hgLoadPsl eutPri1 blastHg17KG.psl

    # back to kksilo
    rm -rf blastOut
# End tblastn

# PRODUCING GENSCAN PREDICTIONS 
    ssh hgwdev
    mkdir /cluster/data/eutPri1/bed/genscan
    cd /cluster/data/eutPri1/bed/genscan
    # Check out hg3rdParty/genscanlinux to get latest genscan:
    cvs co hg3rdParty/genscanlinux
    # Run on small cluster (more mem than big cluster).
    ssh kki
    cd /cluster/data/eutPri1/bed/genscan
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Generate a list file, genome.list, of all the hard-masked contigs that 
    # *do not* consist of all-N's (which would cause genscan to blow up)
    rm -f genome.list
    touch genome.list
    foreach f ( `ls -1S /cluster/data/eutPri1/*/chr*_*/chr*_?{,?}.fa.masked` )
      egrep '[ACGT]' $f > /dev/null
      if ($status == 0) echo $f >> genome.list
    end
    wc -l genome.list
    # Create template file, gsub, for gensub2.  For example (3-line file):
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/i386/gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    gensub2 genome.list single gsub jobList
    para create jobList
    para try, check, push, check, ...
# Completed: 11 of 11 jobs
# CPU time in finished jobs:       1865s      31.09m     0.52h    0.02d  0.000 y
# IO & Wait Time:                    39s       0.64m     0.01h    0.00d  0.000 y
# Average job time:                 173s       2.88m     0.05h    0.00d
# Longest job:                      253s       4.22m     0.07h    0.00d
# Submission to last job:           253s       4.22m     0.07h    0.00d


    # Convert these to chromosome level files as so:
    ssh kksilo
    cd /cluster/data/eutPri1/bed/genscan
    liftUp genscan.gtf ../../jkStuff/liftAll.lft warn gtf/*.gtf
    liftUp genscanSubopt.bed ../../jkStuff/liftAll.lft warn subopt/*.bed
    cat pep/*.pep > genscan.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/eutPri1/bed/genscan
    ldHgGene eutPri1 genscan genscan.gtf
    hgPepPred eutPri1 generic genscanPep genscan.pep
    hgLoadBed eutPri1 genscanSubopt genscanSubopt.bed
#END GENSCAN

# MAKE Human Known Gene mRNAs Mapped by BLATZ track (hg17 
    # Create working directory and extract known gene mrna from database
    ssh kk
    mkdir -p /cluster/data/eutPri1/bed/blatz.hg17KG
    cd /cluster/data/eutPri1/bed/blatz.hg17KG

    # Do parasol job to align via blatz
    mkdir run0
    cd run0
    mkdir psl
    # awk '{print $1;}' /cluster/data/eutPri1/chrom.sizes > genome.lst
    ls -1S /iscratch/i/eutPri1/trfFa/*.fa > genome.lst
    for i in `cat genome.lst` ;  do f=`basename $i .fa`; mkdir psl/$f ; done 
    for i in  /iscratch/i/hg17/knownRna2/*.fa ; do echo $i; done > mrna.lst
    cat << '_EOF_' > doBlatz
#!/bin/sh
f=/tmp/`basename $1`.`basename $2`
if blatz -rna -minScore=6000 -out=psl $1 $2 $f.1
then
    if liftUp -nosort -type=".psl" -nohead $f.2 ../../../jkStuff/liftAll.lft carry $f.1
    then
	mv $f.2 $3
	rm $f.1
	exit 0
    fi
fi
rm $f.1 $f.2
exit 1
'_EOF_'
    chmod +x doBlatz
    cat << '_EOF_' > gsub
#LOOP
doBlatz   $(path1) $(path2) {check out line psl/$(root1)/$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    gensub2 genome.lst mrna.lst gsub spec
    para create spec
    # Then do usual para try/para push/para time
# Completed: 6303 of 6303 jobs
# CPU time in finished jobs:     920417s   15340.29m   255.67h   10.65d  0.029 y
# IO & Wait Time:                 23131s     385.51m     6.43h    0.27d  0.001 y
# Average job time:                 150s       2.49m     0.04h    0.00d
# Longest job:                     1064s      17.73m     0.30h    0.01d
# Submission to last job:          1632s      27.20m     0.45h    0.02d

    # Do sort and near-besting on file server
    ssh kksilo
    cd /cluster/data/eutPri1/bed/blatz.hg17KG/run0
    catDir -r psl | pslFilter -minScore=100 -minAli=255 -noHead stdin stdout | sort -k 10 > ../filtered.psl
    cd ..
    pslReps filtered.psl nearBest.psl /dev/null -nohead -minAli=0.8 -nearTop=0.01 -minCover=0.8
    sort -k 14 nearBest.psl > blatzHg17KG.psl

    # Clean up
    rm -r run0/psl 

    # Load into database
    ssh hgwdev
    cd /cluster/data/eutPri1/bed/blatz.hg17KG
    hgLoadPsl eutPri1 blatzHg17KG.psl 

#END BLATZ

# BLASTZ SWAP FOR hg17 vs eutPri1 BLASTZ TO CREATE eutPri1 vs hg17 BLASTZ
    ssh kolossus
    mkdir -p /cluster/data/eutPri1/bed/blastz.hg17
    cd /cluster/data/eutPri1/bed/blastz.hg17
    set aliDir = /cluster/data/hg17/bed/blastz.eutPri1
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom
    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
    # 409M    /cluster/data/hg17/bed/blastz.eutPri1/axtChrom
    # 409M    unsorted
    # 409M    axtChrom

    rm -r unsorted

# CHAIN HUMAN BLASTZ 
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/eutPri1/bed/blastz.hg17
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/eutPri1/bed/blastz.hg17/axtChrom/*.axt  > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in line+ $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain $1 \
  /iscratch/i/eutPri1/nib \
  /iscratch/i/gs.18/build35/bothMaskedNibs $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 1 of 1 jobs
# CPU time in finished jobs:        189s       3.16m     0.05h    0.00d  0.000 y
# IO & Wait Time:                    11s       0.18m     0.00h    0.00d  0.000 y
# Average job time:                 200s       3.33m     0.06h    0.00d
# Longest job:                      200s       3.33m     0.06h    0.00d
# Submission to last job:           200s       3.33m     0.06h    0.00d

    # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/eutPri1/bed/blastz.hg17/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=4000 /tmp/score.$f:t:r
      echo ""
    end

    mv all.chain all.chain.unfiltered
    chainFilter -minScore=50000 all.chain.unfiltered > all.chain
    rm chain/*
    chainSplit chain all.chain
    gzip all.chain.unfiltered

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/eutPri1/bed/blastz.hg17/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain eutPri1 ${c}_chainHg17 $i
    end
    featureBits eutPri1 chainHg17Link
    # 54236215 bases of 70435997 (77.001%) in intersection

# BLASTZ SWAP FOR borEut1 vs eutPri1 BLASTZ TO CREATE eutPri1 vs borEut1 BLASTZ
    ssh kolossus
    mkdir -p /cluster/data/eutPri1/bed/blastz.borEut1
    cd /cluster/data/eutPri1/bed/blastz.borEut1
    set aliDir = /cluster/data/borEut1/bed/blastz.eutPri1
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom
    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
    # 409M    /cluster/data/borEut1/bed/blastz.eutPri1/axtChrom
    # 409M    unsorted
    # 409M    axtChrom

    rm -rf unsorted

# CHAIN DOG BLASTZ 
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/eutPri1/bed/blastz.borEut1
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/eutPri1/bed/blastz.borEut1/axtChrom/*.axt  > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in line+ $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain $1 \
  /iscratch/i/eutPri1/nib \
  /iscratch/i/borEut1/nib $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 1 of 1 jobs
# CPU time in finished jobs:        197s       3.28m     0.05h    0.00d  0.000 y
# IO & Wait Time:                    88s       1.47m     0.02h    0.00d  0.000 y
# Average job time:                 285s       4.75m     0.08h    0.00d
# Longest job:                      285s       4.75m     0.08h    0.00d
# Submission to last job:           285s       4.75m     0.08h    0.00d

    # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/eutPri1/bed/blastz.borEut1/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=4000 /tmp/score.$f:t:r
      echo ""
    end

    mv all.chain all.chain.unfiltered
    chainFilter -minScore=50000 all.chain.unfiltered > all.chain
    rm chain/*
    chainSplit chain all.chain
    gzip all.chain.unfiltered &

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/eutPri1/bed/blastz.borEut1/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain eutPri1 ${c}_chainCanFam1 $i
    end
    featureBits eutPri1 chainHg17Link
    # 54236215 bases of 70435997 (77.001%) in intersection

# BLASTZ Primate Ancestor (BORPRI1)
    ssh kk
    mkdir /cluster/data/borEut1/bed/blastz.eutPri1
    cd /cluster/data/borEut1/bed/blastz.eutPri1
    # Use default (Human-Mouse) settings for starters.
    cat << '_EOF_' > DEF
# dog vs. borEut
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Default
BLASTZ_H=2000
BLASTZ_ABRIDGE_REPEATS=1

# TARGET: Dog
SEQ1_DIR=/iscratch/i/borEut1/nib
SEQ1_RMSK=
SEQ1_FLAG=
# SEQ1_SMSK=/scratch/hg/gs.18/build35/linSpecRep.notInDog
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: BorEut
SEQ2_DIR=/iscratch/i/eutPri1/nib
SEQ2_RMSK=
SEQ2_FLAG=
#SEQ2_SMSK=/scratch/hg/borEut1/linSpecRep.notInHuman
SEQ2_SMSK=
SEQ2_IN_CONTIGS=0
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/cluster/data/borEut1/bed/blastz.eutPri1

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len
'_EOF_'
    # << this line keeps emacs coloring happy

    /cluster/data/hg17/jkStuff/BlastZ_run0.sh
    cd run.0
    para push
# Completed: 2200 of 2200 jobs
# CPU time in finished jobs:     720384s   12006.40m   200.11h    8.34d  0.023 y
# IO & Wait Time:                 21690s     361.50m     6.03h    0.25d  0.001 y
# Average job time:                 337s       5.62m     0.09h    0.00d
# Longest job:                     2099s      34.98m     0.58h    0.02d
# Submission to last job:          2267s      37.78m     0.63h    0.03d

    ssh kki
    cd /cluster/data/borEut1/bed/blastz.eutPri1
    /cluster/data/hg17/jkStuff/BlastZ_run1.sh

    cd run.1
    para push
# Completed: 275 of 275 jobs
# CPU time in finished jobs:        243s       4.06m     0.07h    0.00d  0.000 y
# IO & Wait Time:                   774s      12.89m     0.21h    0.01d  0.000 y
# Average job time:                   4s       0.06m     0.00h    0.00d
# Longest job:                        8s       0.13m     0.00h    0.00d
# Submission to last job:            78s       1.30m     0.02h    0.00d

    ssh kk
    cd /cluster/data/borEut1/bed/blastz.eutPri1
    /cluster/data/hg17/jkStuff/BlastZ_run2.sh
    cd run.2
    para push

# Completed: 41 of 41 jobs
# CPU time in finished jobs:        266s       4.43m     0.07h    0.00d  0.000 y
# IO & Wait Time:                   822s      13.70m     0.23h    0.01d  0.000 y
# Average job time:                  27s       0.44m     0.01h    0.00d
# Longest job:                       54s       0.90m     0.01h    0.00d
# Submission to last job:            58s       0.97m     0.02h    0.00d

# END BLASTZ 

