# for emacs: -*- mode: sh; -*-


# This file describes browser build for the human-chimp ancestor
# genome, October 2006, homIni14
#

    ssh kkstore01
    mkdir -p /cluster/store10/homIni14
    ln -s /cluster/store10/homIni14 /cluster/data/homIni14
    cd /cluster/data/homIni14
    ls -d /san/sanvol1/scratch/rico/vers13/car* | sed "s/.*car//" > car.names
    mkdir jkStuff

    tcsh
    cat << '_EOF_' > jkStuff/fullCarToChr.sed
    s/ borEut.13.1$/chr1/
    s/ borEut.13.2$/chr2a/
    s/ borEut.13.3$/chr2b/
    s/ borEut.13.4$/chr3/
    s/ borEut.13.5$/chr4/
    s/ borEut.13.6$/chr5/
    s/ borEut.13.7$/chr6/
    s/ borEut.13.8$/chr7/
    s/ borEut.13.9$/chr8/
    s/ borEut.13.10$/chr9/
    s/ borEut.13.11$/chr10/
    s/ borEut.13.12$/chr11/
    s/ borEut.13.13$/chr12a/
    s/ borEut.13.14$/chr12b/
    s/ borEut.13.15$/chr13/
    s/ borEut.13.16$/chr14/
    s/ borEut.13.17$/chr16a/
    s/ borEut.13.18$/chr16b/
    s/ borEut.13.19$/chr17/
    s/ borEut.13.20$/chr18/
    s/ borEut.13.21$/chr19/
    s/ borEut.13.22$/chr20/
    s/ borEut.13.23$/chrX/
'_EOF_'
    cat << '_EOF_' > jkStuff/carToChr.sed
    s/^1$/1fart/
    s/^2$/2afart/
    s/^3$/2bfart/
    s/^4$/3fart/
    s/^5$/4fart/
    s/^6$/5fart/
    s/^7$/6fart/
    s/^8$/7fart/
    s/^9$/8fart/
    s/^10$/9fart/
    s/^11$/10fart/
    s/^12$/11fart/
    s/^13$/12afart/
    s/^14$/12bfart/
    s/^15$/13fart/
    s/^16$/14fart/
    s/^17$/16afart/
    s/^18$/16bfart/
    s/^19$/17fart/
    s/^20$/18fart/
    s/^21$/19fart/
    s/^22$/20fart/
    s/^23$/Xfart/
    s/fart//
'_EOF_'
    exit

    sed -f jkStuff/carToChr.sed car.names > chr.names
    for i in `cat car.names`
    do 
	f=`echo $i | sed -f jkStuff/carToChr.sed`
	g=/san/sanvol1/scratch/rico/vers13/final-results/borEut.13.$i.gz
	echo " mkdir $f ; zcat $g | sed -f jkStuff/fullCarToChr.sed > $f/chr$f.fa " 
    done > zcat.jobs

    for i in `cat chr.names`
    do 
	echo "mkdir -p $i/split; "
	echo "faSplit size $i/chr$i.fa 500000 -minGapSize=100 $i/split/\"$i\"_ -lift=$i/$i.lft"
    done > split.jobs
    screen
    sh -x split.jobs
    exit
    cat */*.lft > jkStuff/liftAll.lft

    mkdir blastDb
    for i in */split/*.fa
    do
	f=`basename $i`
	ln -s `pwd`/$i blastDb/$f
    done

    cd /cluster/data/homIni14/blastDb
    for i in *.fa
    do
	/cluster/bluearc/blast229/formatdb -p F -i $i
	rm $i
    done

    rm -rf  /san/sanvol1/scratch/homIni14/blastDb 
    mkdir -p /san/sanvol1/scratch/homIni14/blastDb 
    cp *.nhr /san/sanvol1/scratch/homIni14/blastDb 
    cp *.nin /san/sanvol1/scratch/homIni14/blastDb 
    cp *.nsq /san/sanvol1/scratch/homIni14/blastDb 

    mkdir -p /cluster/data/homIni14/bed/tblastn.hg17KG
    cd /cluster/data/homIni14/bed/tblastn.hg17KG
    echo  /san/sanvol1/scratch/homIni14/blastDb/*.nsq  | xargs ls -S | sed "s/\.nsq//"  > query.lst  

    f=/san/sanvol1/scratch/rico/vers13/final-results/correct.12.$i.gz
zcat ../../from*/correct.gz |sed 1d | tr -d '\012' |  sed "s/./& /g" | tr ' ' '\012' |   cat wig.head - | wigEncode stdin correct.wig correct.wib


# REPEAT MASKING
    #- Make the run directory and job list:
    cd /cluster/data/homIni14
    mkdir RMRun
    for i in `cat chr.names`
    do 
	for j in $i/split/*.fa  
	do 
	    echo "`pwd`/jkStuff/RMCanHg `pwd`/$i/split `basename $j` {check out line+ /cluster/data/homIni14/$j.out}"
	done 
    done > RMRun/RMJobs

    tcsh
    cat << '_EOF_' > jkStuff/RMCanHg
#!/bin/csh -fe

cd $1
pushd .
/bin/mkdir -p /tmp/homIni14/$2
/bin/cp $2 /tmp/homIni14/$2/
cd /tmp/homIni14/$2
/cluster/bluearc/RepeatMasker/RepeatMasker -ali -s $2
popd
/bin/cp /tmp/homIni14/$2/$2.out ./
if (-e /tmp/homIni14/$2/$2.align) /bin/cp /tmp/homIni14/$2/$2.align ./
if (-e /tmp/homIni14/$2/$2.tbl) /bin/cp /tmp/homIni14/$2/$2.tbl ./
if (-e /tmp/homIni14/$2/$2.cat) /bin/cp /tmp/homIni14/$2/$2.cat ./
/bin/rm -fr /tmp/homIni14/$2/*
/bin/rmdir --ignore-fail-on-non-empty /tmp/homIni14/$2
/bin/rmdir --ignore-fail-on-non-empty /tmp/homIni14
'_EOF_'
    exit


    # << this line makes emacs coloring happy
    #- Do the run
    ssh pk
    cd /cluster/data/homIni14/RMRun
    chmod +x ../jkStuff/RMCanHg
    para create RMJobs
    para try, para check, para check, para push, para check,...

# Completed: 4221 of 4221 jobs
# CPU time in finished jobs:   28038273s  467304.55m  7788.41h  324.52d  0.889 y
# IO & Wait Time:                 22474s     374.56m     6.24h    0.26d  0.001 y
# Average job time:                6648s     110.80m     1.85h    0.08d
# Longest finished job:            8035s     133.92m     2.23h    0.09d
# Submission to last job:         99864s    1664.40m    27.74h    1.16d

    ssh kkstore01
    cd /cluster/data/homIni14
    for i in `cat chr.names`; do echo $i;liftUp $i/$i.fa.out $i/$i.lft warn $i/split/*.fa.out > /dev/null; done
    head -3 `head -1 chr.names`/*.fa.out > all.out
    for i in `cat chr.names`; do sed "1,3d" $i/$i.fa.out; done >> all.out

    #- Load the .out files into the database with:
    ssh hgwdev
    cd /cluster/data/homIni14

    hgLoadOut -nosplit homIni14 all.out
    featureBits homIni14 rmsk
# 683289559 bases of 2105190218 (32.457%) in intersection


# STORE SEQUENCE AND ASSEMBLY INFORMATION 
    # Translate to nib
    cd /cluster/data/homIni14
    mkdir nib
    for i in `cat chr.names`
    do
	faToNib $i/chr$i.fa nib/$i.nib
    done
    faToTwoBit */*.fa homIni14.2bit

# CREATING DATABASE 
    # Create the database.
    ssh hgwdev
    cd /cluster/data/homIni14
    echo 'create database homIni14' | hgsql ''
    mkdir -p /gbdb/homIni14/nib
    ln -s /cluster/data/homIni14/homIni14.2bit /gbdb/homIni14/

    # Make symbolic links from /gbdb/homIni14/nib to the real nibs.
    mkdir -p /gbdb/homIni14/nib
    ln -s /cluster/data/homIni14/nib/*.nib  /gbdb/homIni14/nib

    hgsql homIni14  < $HOME/kent/src/hg/lib/chromInfo.sql             

    hgNibSeq -preMadeNib homIni14 /gbdb/homIni14/nib fa/*.fa
    echo "select chrom,size from chromInfo" | hgsql -N homIni14 > chrom.sizes

    # create assembly and gap tracks
    hgGoldGapGl -noGl -chromLst=chrom.lst homIni14 /cluster/data/homIni14 .

    echo "create table grp (PRIMARY KEY(NAME)) select * from hg17.grp" | hgsql homIni14 

    echo 'insert into blatServers values("homIni14", "blat1", "17778", "1","0"); \
          insert into blatServers values("homIni14", "blat1", "17779", "0","1");' \
      | hgsql -h genome-testdb hgcentraltest

    mkdir -p /cluster/data/homIni14/bed/gapRmsk
    cd /cluster/data/homIni14/bed/gapRmsk
    simpleGap /cluster/data/homIni14/nib gap.bed repeats.bed
    echo "drop table gap;" | hgsql homIni14
    hgsql homIni14 < ~/kent/src/hg/lib/gap.sql
    hgLoadBed -oldTable homIni14 gap gap.bed
    echo "create index chrom on gap (chrom(13), bin) ;" | hgsql homIni14
    echo "create index chrom_2 on gap  (chrom(13), chromStart);" | hgsql homIni14
    echo "create index chrom_3 on gap  (chrom(13), chromEnd);" | hgsql homIni14

# MAKE GCPERCENT 
    mkdir -p /cluster/data/homIni14/bed/gcPercent
    cd /cluster/data/homIni14/bed/gcPercent
    hgsql homIni14  < ~/kent/src/hg/lib/gcPercent.sql

    # load gcPercent table
    hgGcPercent homIni14 ../../nib

    cd ../..


    # Add dbDb and defaultDb entries:
    echo 'insert into dbDb (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName,  \
          htmlPath, hgNearOk)  values("homIni14", "Oct. 1. 2006", "/gbdb/homIni14/nib", "Hominidae", "", 1, \
          44, "Hominidae", "Hominidae", "/gbdb/homIni14/html/description.html", 0);' \
    | hgsql -h genome-testdb hgcentraltest
#    #echo 'INSERT INTO defaultDb VALUES ("Hominidae", "homIni14");' | hgsql -h genome-testdb hgcentraltest
    echo 'update defaultDb set name = "homIni14" where genome = "Hominidae"' | hgsql -h genome-testdb hgcentraltest
#    #echo 'insert into genomeClade (genome, clade, priority) values ("BoreoEutheria", "vertebrate",65);'  \
#	| hgsql -h genome-testdb hgcentraltest

#    echo 'INSERT INTO defaultDb VALUES ("Dog/Human", "homIni14");' | hgsql -h genome-testdb hgcentraltest

    # Make trackDb table so browser knows what tracks to expect:
    cd $HOME/kent/src/hg/makeDb/trackDb
    mkdir -p ancestor/homIni14
    cvs add ancestor/homIni14
    cp ancestor/borEut13/trackDb.ra ancestor/homIni14
    cvs add ancestor/homIni14/trackDb.ra
    cvs ci ancestor/homIni14/trackDb.ra

    # Edit that makefile to add homIni14 in all the right places and do
    make update

# SIMPLE REPEATS (TRF) 
    ssh kkstore01
    mkdir -p /cluster/data/homIni14/bed/simpleRepeat
    cd /cluster/data/homIni14/bed/simpleRepeat
    mkdir trf
    tcsh
    cp /dev/null jobs.csh
    foreach d (/cluster/data/homIni14/*/split)
      foreach f ($d/*.fa)
        set fout = $f:t:r.bed
        echo $fout
        echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" >> jobs.csh
      end
    end
    exit
    screen
    csh -ef jobs.csh 

    # check on this with
    cd /cluster/data/homIni14/bed/simpleRepeat
    wc -l jobs.csh
    ls -1 trf | wc -l
    endsInLf trf/*
    # When job is done do:
    liftUp simpleRepeat.bed /cluster/data/homIni14/jkStuff/liftAll.lft warn  trf/*.bed

    # Load into the database:
    ssh hgwdev
    hgLoadBed homIni14 simpleRepeat /cluster/data/homIni14/bed/simpleRepeat/simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
    featureBits homIni14 simpleRepeat
# 18480235 bases of 2748518598 (0.672%) in intersection

# PROCESS SIMPLE REPEATS INTO MASK 
    # After the simpleRepeats track has been built, make a filtered version 
    # of the trf output: keep trf's with period <= 12:
    ssh kkstore01
    cd /cluster/data/homIni14/bed/simpleRepeat
    awk '{if ($5 <= 12) print;}' simpleRepeat.bed > simpleRepeat12.bed
    mkdir trfMask

    for i in `cat ../../chr.names`;     
    do         
	echo "awk \"{if ($i == \\\$1) print}\" simpleRepeat12.bed > trfMask/$i.bed"
    done > mask.jobs
    sh -x mask.jobs

    # Here's the coverage for the filtered TRF:
    ssh hgwdev
    featureBits homIni14 /cluster/data/homIni14/bed/simpleRepeat/simpleRepeat12.bed
# 3540823 bases of 1792058299 (0.198%) in intersection

# MASK SEQUENCE WITH REPEATMASKER AND SIMPLE REPEAT/TRF 
    ssh kkstore01
    cd /cluster/data/homIni14

    for i in `cat car.names`
    do
	f=$i/chr$i.fa
	echo maskOutFa -soft $f $i/$i.fa.out $f
	echo maskOutFa -softAdd $f bed/simpleRepeat/trfMask/$i.bed $f
	echo maskOutFa $f hard $f.masked
    done > mask.jobs
    sh -x mask.jobs

    #- Rebuild the nib files, using the soft masking in the fa:
    for i in `cat chr.names`; do faToNib -softMask $i/chr$i.fa nib/chr$i.nib; done
    for i in `cat chr.names`; do cat $i/chr$i.fa; done | faToTwoBit stdin homIni14.2bit 

# PUT MASKED SEQUENCE OUT FOR CLUSTER RUNS 
    ssh kkr1u00
    # Chrom-level mixed nibs that have been repeat- and trf-masked:
    mkdir -p /iscratch/i/homIni14/nib
    cp -p /cluster/data/homIni14/homIni14.2bit /iscratch/i/homIni14
    cp -p /cluster/data/homIni14/nib/*.nib /iscratch/i/homIni14/nib
    # Pseudo-contig fa that have been repeat- and trf-masked:
    rm -rf /iscratch/i/homIni14/trfFa
    mkdir /iscratch/i/homIni14/trfFa
    foreach d (/cluster/data/homIni14/*/chr*_?{,?})
      cp $d/$d:t.fa /iscratch/i/homIni14/trfFa
    end
    cp -p /cluster/data/homIni14/homIni14.2bit /iscratch/i/homIni14/
    iSync

# MAKE 10.OOC, 11.OOC FILES FOR BLAT 
    ssh kolossus
    mkdir /cluster/data/homIni14/bed/ooc
    mkdir -p /cluster/bluearc/homIni14/
    cd /cluster/data/homIni14/bed/ooc
    ls -1 /cluster/data/homIni14/nib/*.nib > nib.lst
    blat nib.lst /dev/null /dev/null -tileSize=11 -makeOoc=/cluster/bluearc/homIni14/11.ooc -repMatch=1024
# Wrote 7685 overused 11-mers to /cluster/bluearc/homIni14/11.ooc
    blat nib.lst /dev/null /dev/null -tileSize=10 -makeOoc=/cluster/bluearc/homIni14/10.ooc -repMatch=843
# Wrote 99129 overused 10-mers to /cluster/bluearc/homIni14/10.ooc

    ssh kkr1u00
    cp -p /cluster/bluearc/homIni14/*.ooc /iscratch/i/homIni14/
    iSync > sync.out

    for i in `cat ../../car.names`; do echo "mafToAxt ../buildMaf/homIni14.$i.maf homIni14. hg17. stdout | axtChain stdin /cluster/data/homIni14/homIni14.2bit /cluster/data/hg17/hg17.2bit stdout | chainToPsl stdin /cluster/data/homIni14/chrom.sizes /cluster/data/hg17/chrom.sizes homIni14.lst hg17.lst $i.psl" ; done > jobs


# MAKE Human Proteins track 
    ssh kolossus
    mkdir -p /cluster/data/homIni14/bed/tblastn.hg17KG
    cd /cluster/data/homIni14/bed/tblastn.hg17KG
    mkdir expect.genes
    cd expect.genes
    tcsh
    cat << '_EOF_' > doOne
    grep human /san/sanvol1/scratch/rico/vers13/car$1/car |  sed s/human.// | sed "s/-/ /" | tr ' :' '\t\t' | overlapSelect -selectFmt=bed stdin /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl stdout  > $1.psl
'_EOF_'
    exit
    chmod +x doOne
    for i in `cat ../../../car.names`; do echo ./doOne $i; done > jobs  
    sh -x jobs
    for i in *.psl; do f=`basename $i .psl`; pslxToFa $i $f.fa; awk "{print \$10}" $i > $f.lst; done
    sort -u *.lst  > allInCars.lst  
    awk "{print \$10}" /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl  | sort -u > allInHg17.lst  
    diff allInCars.lst allInHg17.lst | grep ">" | sed "s/..//" > missingInCars.lst
    cd ..

    mkdir queries
    for i in `cat ../../chr.names`; do  grep b/"$i"_ query.lst > queries/$i.lst; done

    rm -rf /san/sanvol1/scratch/homIni14/bed/tblastn.hg17KG/blastOut
    mkdir -p /san/sanvol1/scratch/homIni14/bed/tblastn.hg17KG/blastOut
    ln -s /san/sanvol1/scratch/homIni14/bed/tblastn.hg17KG/blastOut
    for i in `cat ../../car.names`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 ../../jkStuff/liftAll.lft carry $f.2                     
	liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.3       

        if pslCheck -prot $3.tmp                                                                          
        then                                                                                              
            mv $3.tmp $3                                                                                  
            rm -f $f.1 $f.2 $f.3  $f.4
        fi
        exit 0                                                                                            
    fi                                                                                                    
fi                                                                                                        
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'

    exit
    chmod +x blastSome
    for i in `cat ../../car.names`
    do 
	f=`echo $i | sed -f ../../jkStuff/carToChr.sed`
    	echo expect.genes/$i.fa | gensub2 queries/$f.lst stdin blastGsub stdout
    done > blastSpec
   // gensub2 query.lst kg.lst blastGsub blastSpec

    ssh pk
    cd /cluster/data/homIni14/bed/tblastn.hg17KG
    para create blastSpec
    para push

# Completed: 3644 of 3644 jobs
# CPU time in finished jobs:    1127288s   18788.13m   313.14h   13.05d  0.036 y
# IO & Wait Time:                143129s    2385.49m    39.76h    1.66d  0.005 y
# Average job time:                 349s       5.81m     0.10h    0.00d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:            1322s      22.03m     0.37h    0.02d
# Submission to last job:          4477s      74.62m     1.24h    0.05d

    ssh kki
    cd /cluster/data/homIni14/bed/tblastn.hg17KG
    tcsh
    cat << '_EOF_' > chainGsub
#LOOP
chainOne $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainOne
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=250000 stdin ../c.`basename $1`.psl)
'_EOF_'
    chmod +x chainOne
    exit

    for i in `cat ../../car.names`
    do
	ls -1dS `pwd`/blastOut/$i 
    done > chain.lst
    gensub2 chain.lst single chainGsub chainSpec

    para create chainSpec
    para push

Completed: 30 of 30 jobs
# CPU time in finished jobs:      32583s     543.05m     9.05h    0.38d  0.001 y
# IO & Wait Time:                   268s       4.46m     0.07h    0.00d  0.000 y
# Average job time:                1095s      18.25m     0.30h    0.01d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:           20831s     347.18m     5.79h    0.24d
# Submission to last job:         20987s     349.78m     5.83h    0.24d

    ssh kkstore01
    cd /cluster/data/homIni14/bed/tblastn.hg17KG/blastOut
    for i in `cat /cluster/data/homIni14/car.names`
    do 
	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/homIni14/bed/tblastn.hg17KG/blastHg17KG.psl
    cd ..
    mkdir found.genes
    for i in `cat ../../car.names`; do pslSomeRecords blastHg17KG.psl expect.genes/$i.lst found.genes/$i.psl; done
    cd found.genes
    for i in *.psl; do cut -f 10 $i | sort -u > `basename $i .psl`.lst;     done
    cd ..
    mkdir missing.genes
    for i in `cat ../../car.names`; 
    do 
	diff expect.genes/$i.lst found.genes/$i.lst | grep "<" | sed "s/..//" > missing.genes/$i.lst 
    done
        
    ssh hgwdev
    cd /cluster/data/homIni14/bed/tblastn.hg17KG
    hgLoadPsl homIni14 blastHg17KG.psl
# 28212551 bases of 1814471846 (1.555%) in intersection
# 27377895 bases of 1792058299 (1.528%) in intersection (in canHg11)

    # back to kkstore01
    rm -rf blastOut
# End tblastn

# PRODUCING GENSCAN PREDICTIONS 
    ssh hgwdev
    mkdir /cluster/data/homIni14/bed/genscan
    cd /cluster/data/homIni14/bed/genscan
    # Check out hg3rdParty/genscanlinux to get latest genscan:
    cvs co hg3rdParty/genscanlinux
    # Run on small cluster (more mem than big cluster).
    ssh kki
    cd /cluster/data/homIni14/bed/genscan
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Generate a list file, genome.list, of all the hard-masked contigs that 
    # *do not* consist of all-N's (which would cause genscan to blow up)
    rm -f genome.list
    touch genome.list
    foreach f ( `ls -1S /cluster/data/homIni14/*/chr*/chr*_?{,?}.fa.masked` )
      egrep '[ACGT]' $f > /dev/null
      if ($status == 0) echo $f >> genome.list
    end
    wc -l genome.list
    # Create template file, gsub, for gensub2.  For example (3-line file):
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/i386/gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    gensub2 genome.list single gsub jobList

    # did one job on kolossus

    # Convert these to chromosome level files as so:
    ssh kkstore01
    cd /cluster/data/homIni14/bed/genscan
    liftUp genscan.gtf ../../jkStuff/liftAll.lft warn gtf/*.gtf
    liftUp genscanSubopt.bed ../../jkStuff/liftAll.lft warn subopt/*.bed
    cat pep/*.pep > genscan.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/homIni14/bed/genscan
    ldHgGene homIni14 genscan genscan.gtf
    hgPepPred homIni14 generic genscanPep genscan.pep
    hgLoadBed homIni14 genscanSubopt genscanSubopt.bed
#END GENSCAN

# MAKE Human Known Gene mRNAs Mapped by BLATZ track (hg17 
    # Create working directory and extract known gene mrna from database
    mkdir -p /cluster/data/homIni14/bed/blatz.hg17KG
    cd /cluster/data/homIni14/bed/blatz.hg17KG
    pepPredToFa hg17 knownGeneMrna known.fa

    # Split known genes into pieces and load on /iscratch/i temp device
    ssh kkr1u00
    cd /iscratch/i/homIni14
    mkdir knownRna
    cd knownRna
    faSplit sequence /cluster/data/homIni14/bed/blatz.hg17KG/known.fa 580 known
    cd
    iSync > sync.out

    ssh kk
    cd /cluster/data/homIni14/bed/blatz.hg17KG

    # Do parasol job to align via blatz
    mkdir run0
    cd run0
    mkdir psl
    # awk '{print $1;}' /cluster/data/homIni14/chrom.sizes > genome.lst
    ls -1S /iscratch/i/homIni14/trfFa/*.fa > genome.lst
    for i in `cat genome.lst` ;  do f=`basename $i .fa`; mkdir psl/$f ; done 
    for i in  /iscratch/i/canHg3/knownRna/*.fa ; do echo $i; done > mrna.lst
    cat << '_EOF_' > doBlatz
#!/bin/sh
f=/tmp/`basename $1`.`basename $2`
if blatz -rna -minScore=6000 -out=psl $1 $2 $f.1
then
    if liftUp -nosort -type=".psl" -nohead $f.2 ../../../jkStuff/liftAll.lft carry $f.1
    then
	mv $f.2 $3
	rm $f.1
	exit 0
    fi
fi
rm $f.1 $f.2
exit 1
'_EOF_'
    chmod +x doBlatz
    cat << '_EOF_' > gsub
#LOOP
doBlatz   $(path1) $(path2) {check out line psl/$(root1)/$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    gensub2 genome.lst mrna.lst gsub spec
    para create spec
    # Then do usual para try/para push/para time
# Completed: 1146 of 1146 jobs
# CPU time in finished jobs:     108857s    1814.28m    30.24h    1.26d  0.003 y
# IO & Wait Time:                819365s   13656.09m   227.60h    9.48d  0.026 y
# Average job time:                 810s      13.50m     0.22h    0.01d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:            2985s      49.75m     0.83h    0.03d
# Submission to last job:          2997s      49.95m     0.83h    0.03d

# Completed: 6303 of 6303 jobs
# CPU time in finished jobs:     920417s   15340.29m   255.67h   10.65d  0.029 y
# IO & Wait Time:                 23131s     385.51m     6.43h    0.27d  0.001 y
# Average job time:                 150s       2.49m     0.04h    0.00d
# Longest job:                     1064s      17.73m     0.30h    0.01d
# Submission to last job:          1632s      27.20m     0.45h    0.02d

    # Do sort and near-besting on file server
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blatz.hg17KG/run0
    catDir -r psl | pslFilter -minScore=100 -minAli=255 -noHead stdin stdout | sort -k 10 > ../filtered.psl
    cd ..
    pslReps filtered.psl nearBest.psl /dev/null -nohead -minAli=0.8 -nearTop=0.01 -minCover=0.8
    sort -k 14 nearBest.psl > blatzHg17KG.psl

    # Clean up
    rm -r run0/psl 

    # Load into database
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blatz.hg17KG
    hgLoadPsl homIni14 blatzHg17KG.psl 

#END BLATZ

# BLASTZ SWAP FOR hg17 vs homIni14 BLASTZ TO CREATE homIni14 vs hg17 BLASTZ
    ssh kolossus
    mkdir -p /cluster/data/homIni14/bed/blastz.hg17
    cd /cluster/data/homIni14/bed/blastz.hg17
    set aliDir = /cluster/data/hg17/bed/blastz.homIni14
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom
    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
    # 409M    /cluster/data/hg17/bed/blastz.homIni14/axtChrom
    # 409M    unsorted
    # 409M    axtChrom

    rm -r unsorted

# CHAIN HUMAN BLASTZ 
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/homIni14/bed/blastz.hg17
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/homIni14/bed/blastz.hg17/axtChrom/*.axt  > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in line+ $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain $1 \
  /iscratch/i/homIni14/nib \
  /iscratch/i/gs.18/build35/bothMaskedNibs $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 1 of 1 jobs
# CPU time in finished jobs:        109s       1.81m     0.03h    0.00d  0.000 y
# IO & Wait Time:                   104s       1.73m     0.03h    0.00d  0.000 y
# Average job time:                 212s       3.53m     0.06h    0.00d
# Longest finished job:             212s       3.53m     0.06h    0.00d
# Submission to last job:           212s       3.53m     0.06h    0.00d

    # now on the cluster server, sort chains
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=4000 /tmp/score.$f:t:r
      echo ""
    end

    mv all.chain all.chain.unfiltered
    chainFilter -minScore=50000 all.chain.unfiltered > all.chain
    rm chain/*
    chainSplit chain all.chain
    gzip all.chain.unfiltered

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain homIni14 ${c}_chainHg17 $i
    end
    featureBits homIni14 chainHg17Link
    # 52252036 bases of 62796757 (83.208%) in intersection

# NET HUMAN BLASTZ 
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain
    netClass noClass.net homIni14 hg17 human.net

    # Make a 'syntenic' subset:
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain
    rm noClass.net
    # Make a 'syntenic' subset of these with
    netFilter -syn human.net > humanSyn.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain
    netFilter -minGap=10 human.net |  hgLoadNet homIni14 netHg17 stdin
    netFilter -minGap=10 humanSyn.net | hgLoadNet homIni14 netSyntenyHg17 stdin
    # Add entries for chainHg17, netHg17, syntenyHg17 to dog/homIni14 trackDb


# GENERATE HG17 MAF FOR MULTIZ FROM NET 
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.hg17/axtChain
    netSplit human.net net
    cd /cluster/data/homIni14/bed/blastz.hg17
    mkdir axtNet
    foreach f (axtChain/net/*)
      set chr = $f:t:r
      netToAxt $f axtChain/chain/$chr.chain /cluster/data/homIni14/nib \
        /cluster/data/hg17/nib stdout | axtSort stdin axtNet/$chr.axt
    end
    mkdir mafNet
    foreach f (axtNet/chr*.axt)
      set maf = mafNet/$f:t:r.maf
      axtToMaf $f /cluster/data/homIni14/chrom.sizes /cluster/data/hg17/chrom.sizes $maf -tPrefix=homIni14. -qPrefix=hg17.
    end

# BLASTZ SWAP FOR canFam1 vs homIni14 BLASTZ TO CREATE homIni14 vs canFam1 BLASTZ
    ssh kolossus
    mkdir -p /cluster/data/homIni14/bed/blastz.canFam1
    cd /cluster/data/homIni14/bed/blastz.canFam1
    set aliDir = /cluster/data/canFam1/bed/blastz.homIni14
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom
    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
    # 207M    /cluster/data/canFam1/bed/blastz.homIni14/axtChrom
    # 207M    unsorted
    # 207M    axtChrom

    rm -rf unsorted

# CHAIN DOG BLASTZ 
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/homIni14/bed/blastz.canFam1
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/homIni14/bed/blastz.canFam1/axtChrom/*.axt  > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in line+ $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain $1 \
  /iscratch/i/homIni14/nib \
  /iscratch/i/canFam1/nib $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 1 of 1 jobs
# CPU time in finished jobs:         86s       1.44m     0.02h    0.00d  0.000 y
# IO & Wait Time:                     7s       0.11m     0.00h    0.00d  0.000 y
# Average job time:                  93s       1.55m     0.03h    0.00d
# Longest finished job:              93s       1.55m     0.03h    0.00d
# Submission to last job:            93s       1.55m     0.03h    0.00d

    # now on the cluster server, sort chains
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.canFam1/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=4000 /tmp/score.$f:t:r
      echo ""
    end

    mv all.chain all.chain.unfiltered
    chainFilter -minScore=50000 all.chain.unfiltered > all.chain
    rm chain/*
    chainSplit chain all.chain
    gzip all.chain.unfiltered &

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.canFam1/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain homIni14 ${c}_chainCanFam1 $i
    end
    featureBits homIni14 chainCanFam1Link
    # 51373603 bases of 62796757 (81.809%) in intersection

# GENBANK/REFSEQ alignments:
   - added borEut to:
        /cluster/data/genbank/etc/genbank.conf
        src/hg/makeDb/genbank/src/lib/gbGenome.c
        src/hg/makeDb/genbank/src/align/gbBlat
     cvs commit the changes

     cd src/hg/makeDb/genbank
     jkmake install-server
     ssh kkstore01
     cd /cluster/data/genbank
     ./bin/gbAlignStep -initial homIni14 &

     # load into database
     ssh hgwdev
     cd /cluster/data/genbank/                
     ./bin/gbDbLoadStep  -initialLoad homIni14 &

# BLASTZ EutGli1
    ssh kk
    mkdir /cluster/data/homIni14/bed/blastz.eutGli1
    cd /cluster/data/homIni14/bed/blastz.eutGli1
    cat << '_EOF_' > DEF
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Default
BLASTZ_H=2000
BLASTZ_ABRIDGE_REPEATS=1

SEQ1_DIR=/iscratch/i/homIni14/nib
SEQ1_RMSK=
SEQ1_FLAG=
# SEQ1_SMSK=/scratch/hg/gs.18/build35/linSpecRep.notInDog
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=1000000
SEQ1_LAP=10000

SEQ2_DIR=/iscratch/i/eutGli1/nib
SEQ2_RMSK=
SEQ2_FLAG=
#SEQ2_SMSK=/scratch/hg/homIni14/linSpecRep.notInHuman
SEQ2_SMSK=
SEQ2_IN_CONTIGS=0
SEQ2_CHUNK=1000000
SEQ2_LAP=0

BASE=/cluster/data/homIni14/bed/blastz.eutGli1

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len
'_EOF_'
    # << this line keeps emacs coloring happy

    /cluster/data/hg17/jkStuff/BlastZ_run0.sh
    cd run.0
    para push
# Completed: 4899 of 4899 jobs
# CPU time in finished jobs:     918787s   15313.12m   255.22h   10.63d  0.029 y
# IO & Wait Time:                 21355s     355.91m     5.93h    0.25d  0.001 y
# Average job time:                 192s       3.20m     0.05h    0.00d
# Longest job:                     2206s      36.77m     0.61h    0.03d
# Submission to last job:          9206s     153.43m     2.56h    0.11d

    ssh kki
    cd /cluster/data/homIni14/bed/blastz.eutGli1
    /cluster/data/hg17/jkStuff/BlastZ_run1.sh

    cd run.1
    para push
# Completed: 71 of 71 jobs
# CPU time in finished jobs:       1139s      18.99m     0.32h    0.01d  0.000 y
# IO & Wait Time:                   797s      13.28m     0.22h    0.01d  0.000 y
# Average job time:                  27s       0.45m     0.01h    0.00d
# Longest job:                       75s       1.25m     0.02h    0.00d
# Submission to last job:           282s       4.70m     0.08h    0.00d

    ssh kk
    cd /cluster/data/homIni14/bed/blastz.eutGli1
    /cluster/data/hg17/jkStuff/BlastZ_run2.sh
    cd run.2
    para push

# Completed: 1 of 1 jobs
# CPU time in finished jobs:        362s       6.04m     0.10h    0.00d  0.000 y
# IO & Wait Time:                  1464s      24.39m     0.41h    0.02d  0.000 y
# Average job time:                1826s      30.43m     0.51h    0.02d
# Longest job:                     1826s      30.43m     0.51h    0.02d
# Submission to last job:          1826s      30.43m     0.51h    0.02d


# END BLASTZ eutGli1


# CHAIN EutGli1 BLASTZ 
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/homIni14/bed/blastz.eutGli1
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/homIni14/bed/blastz.eutGli1/axtChrom/*.axt  > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in line+ $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
/cluster/bin/x86_64/axtChain $1 \
  /iscratch/i/homIni14/nib \
  /iscratch/i/eutGli1/nib $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 1 of 1 jobs
# CPU time in finished jobs:        197s       3.28m     0.05h    0.00d  0.000 y
# IO & Wait Time:                    88s       1.47m     0.02h    0.00d  0.000 y
# Average job time:                 285s       4.75m     0.08h    0.00d
# Longest job:                      285s       4.75m     0.08h    0.00d
# Submission to last job:           285s       4.75m     0.08h    0.00d

    # now on the cluster server, sort chains
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=4000 /tmp/score.$f:t:r
      echo ""
    end

    mv all.chain all.chain.unfiltered
    chainFilter -minScore=50000 all.chain.unfiltered > all.chain
    rm chain/*
    chainSplit chain all.chain
    gzip all.chain.unfiltered &

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain homIni14 ${c}_chainEutGli1 $i
    end
    featureBits homIni14 chainEutGli1Link
    # 66936604 bases of 70435997 (95.032%) in intersection

# NET eutGli1 BLASTZ 
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain
    netClass -noAr noClass.net homIni14 eutGli1 eutGli1.net

    # Make a 'syntenic' subset:
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain
    rm noClass.net
    # Make a 'syntenic' subset of these with
    netFilter -syn eutGli1.net > eutGli1Syn.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain
    netFilter -minGap=10 eutGli1.net |  hgLoadNet homIni14 netEutGli1 stdin
    netFilter -minGap=10 eutGli1Syn.net | hgLoadNet homIni14 netSyntenyEutGli1 stdin
    # Add entries for chainHg17, netHg17, syntenyHg17 to dog/homIni14 trackDb


# GENERATE EutGli1 MAF FOR MULTIZ FROM NET 
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutGli1/axtChain
    netSplit eutGli1.net net
    cd /cluster/data/homIni14/bed/blastz.eutGli1
    mkdir axtNet
    foreach f (axtChain/net/*)
      set chr = $f:t:r
      netToAxt $f axtChain/chain/$chr.chain /cluster/data/homIni14/nib \
        /cluster/data/eutGli1/nib stdout | axtSort stdin axtNet/$chr.axt
    end
    mkdir mafNet
    foreach f (axtNet/chr*.axt)
    set maf = mafNet/$f:t:r.maf
    axtToMaf $f /cluster/data/homIni14/chrom.sizes /cluster/data/eutGli1/chrom.sizes $maf -tPrefix=homIni14. -qPrefix=eutGli1.
    end
# BLASTZ EutPri1
    ssh kk
    mkdir /cluster/data/homIni14/bed/blastz.eutPri1
    cd /cluster/data/homIni14/bed/blastz.eutPri1
    cat << '_EOF_' > DEF
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Default
BLASTZ_H=2000
BLASTZ_ABRIDGE_REPEATS=1

SEQ1_DIR=/iscratch/i/homIni14/nib
SEQ1_RMSK=
SEQ1_FLAG=
# SEQ1_SMSK=/scratch/hg/gs.18/build35/linSpecRep.notInDog
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=1000000
SEQ1_LAP=10000

SEQ2_DIR=/iscratch/i/eutPri1/nib
SEQ2_RMSK=
SEQ2_FLAG=
#SEQ2_SMSK=/scratch/hg/homIni14/linSpecRep.notInHuman
SEQ2_SMSK=
SEQ2_IN_CONTIGS=0
SEQ2_CHUNK=1000000
SEQ2_LAP=0

BASE=/cluster/data/homIni14/bed/blastz.eutPri1

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len
'_EOF_'
    # << this line keeps emacs coloring happy

    /cluster/data/hg17/jkStuff/BlastZ_run0.sh
    cd run.0
    para push
# Completed: 6319 of 6319 jobs
# CPU time in finished jobs:     144839s    2413.98m    40.23h    1.68d  0.005 y
# IO & Wait Time:                 21882s     364.71m     6.08h    0.25d  0.001 y
# Average job time:                  26s       0.44m     0.01h    0.00d
# Longest job:                     2517s      41.95m     0.70h    0.03d
# Submission to last job:          3900s      65.00m     1.08h    0.05d

    ssh kki
    cd /cluster/data/homIni14/bed/blastz.eutPri1
    /cluster/data/hg17/jkStuff/BlastZ_run1.sh

    cd run.1
    para push
# Completed: 71 of 71 jobs
# CPU time in finished jobs:         72s       1.21m     0.02h    0.00d  0.000 y
# IO & Wait Time:                   211s       3.51m     0.06h    0.00d  0.000 y
# Average job time:                   4s       0.07m     0.00h    0.00d
# Longest job:                       13s       0.22m     0.00h    0.00d
# Submission to last job:            52s       0.87m     0.01h    0.00d

    ssh kk
    cd /cluster/data/homIni14/bed/blastz.eutPri1
    /cluster/data/hg17/jkStuff/BlastZ_run2.sh
    cd run.2
    para push

# Completed: 1 of 1 jobs
# CPU time in finished jobs:         52s       0.86m     0.01h    0.00d  0.000 y
# IO & Wait Time:                    26s       0.44m     0.01h    0.00d  0.000 y
# Average job time:                  78s       1.30m     0.02h    0.00d
# Longest job:                       78s       1.30m     0.02h    0.00d
# Submission to last job:            80s       1.33m     0.02h    0.00d

# END BLASTZ eutPri1

# CHAIN EutPri1 BLASTZ 
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/homIni14/bed/blastz.eutPri1
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/homIni14/bed/blastz.eutPri1/axtChrom/*.axt  > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in line+ $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
/cluster/bin/x86_64/axtChain $1 \
  /iscratch/i/homIni14/nib \
  /iscratch/i/eutPri1/nib $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 1 of 1 jobs
# CPU time in finished jobs:         72s       1.20m     0.02h    0.00d  0.000 y
# IO & Wait Time:                     7s       0.12m     0.00h    0.00d  0.000 y
# Average job time:                  79s       1.32m     0.02h    0.00d
# Longest job:                       79s       1.32m     0.02h    0.00d
# Submission to last job:            79s       1.32m     0.02h    0.00d

    # now on the cluster server, sort chains
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=4000 /tmp/score.$f:t:r
      echo ""
    end

    mv all.chain all.chain.unfiltered
    chainFilter -minScore=50000 all.chain.unfiltered > all.chain
    rm chain/*
    chainSplit chain all.chain
    gzip all.chain.unfiltered &

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain homIni14 ${c}_chainEutPri1 $i
    end
    featureBits homIni14 chainEutPri1Link
    # 65181722 bases of 70435997 (92.540%) in intersection

# NET eutPri1 BLASTZ 
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain
    netClass -noAr noClass.net homIni14 eutPri1 eutPri1.net

    # Make a 'syntenic' subset:
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain
    rm noClass.net
    # Make a 'syntenic' subset of these with
    netFilter -syn eutPri1.net > eutPri1Syn.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain
    netFilter -minGap=10 eutPri1.net |  hgLoadNet homIni14 netEutPri1 stdin
    netFilter -minGap=10 eutPri1Syn.net | hgLoadNet homIni14 netSyntenyEutPri1 stdin
    # Add entries for chainHg17, netHg17, syntenyHg17 to dog/homIni14 trackDb


# GENERATE EutPri1 MAF FOR MULTIZ FROM NET 
    ssh kkstore01
    cd /cluster/data/homIni14/bed/blastz.eutPri1/axtChain
    netSplit eutPri1.net net
    cd /cluster/data/homIni14/bed/blastz.eutPri1
    mkdir axtNet
    foreach f (axtChain/net/*)
      set chr = $f:t:r
      netToAxt $f axtChain/chain/$chr.chain /cluster/data/homIni14/nib \
        /cluster/data/eutPri1/nib stdout | axtSort stdin axtNet/$chr.axt
    end
    mkdir mafNet
    foreach f (axtNet/chr*.axt)
    set maf = mafNet/$f:t:r.maf
    axtToMaf $f /cluster/data/homIni14/chrom.sizes /cluster/data/eutPri1/chrom.sizes $maf -tPrefix=homIni14. -qPrefix=eutPri1.
    end

# Load into database
  ssh hgwdev
  cd /cluster/data/homIni14/bed/multizDescend
  /cluster/bin/pen
  mkdir -p /gbdb/homIni14/multiz4way
  ln -s /cluster/data/homIni14/bed/multizDescend/multiz4way.maf /gbdb/homIni14/multiz4way
  hgLoadMaf homIni14 multiz4way


      for i in eutGli1 eutPri1 hg17
      do
	  f="$i"_netBlastz
	  mkdir -p /gbdb/homIni14/multiz4way/$f
	  ln -s /cluster/data/homIni14/bed/blastz.$i/chr15.maf /gbdb/homIni14/multiz4way/$f/$i.maf
	  hgLoadMaf homIni14 $f -pathPrefix=/gbdb/homIni14/multiz4way/$f
      done

# Load the 9-way with ancestors
    ssh kkilo
    mkdir -p /cluster/data/homIni14/bed/webbMaf
    cd /cluster/data/homIni14/bed/webbMaf
    /cluster/bin/penn/maf_project homIni14.maf  homIni14 > homIni14.local.maf
  mkdir -p /gbdb/homIni14/multiz4way
  ln -s /cluster/data/homIni14/bed/multizDescend/multiz4way.maf /gbdb/homIni14/multiz4way
  hgLoadMaf homIni14 multiz4way

#load Webb's maf
  mkdir -p /cluster/data/homIni14/bed/tba14way
    zcat ../../fromWebb/borEut.maf.gz | sed "s/chimp/panTro1/g" |sed "s/cow/bosTau1/g" |sed "s/primate/eutPri2.chr15/g" | sed "s/euarch/eutGli2.chr15/g" | sed "s/boreo/homIni14.chr15/g" | sed "s/macaca/rheMac1/g" > global.maf
    time /cluster/bin/penn/maf_project global.maf homIni14 > homIni14.local.maf
  mkdir -p /gbdb/homIni14/tba14way
  ln -s /cluster/data/homIni14/bed/webbMaf/homIni14.local.maf /gbdb/homIni14/webbMaf/webbMaf.maf
   awk "! /^s/ { print} /^s/ {if (\$3 + \$4 > \$6) print \$1, \$2, \$3, \$4,\$5,\$3+\$4, \$7; else print \$0}" homIni14.local.maf > fixed.local.maf
  /cluster/bin/penn/maf_order homIni14.local.maf homIni14 mmHg3 panHg3 hg17 panTro1 rheMac1 rabbit mm6 rn3 canFam1 bosTau1 armadillo elephant > homIni14.order.maf
  hgLoadMaf homIni14 webbMaf -pathPrefix=/gbdb/homIni14/webbMaf


    mkdir /cluster/data/homIni14/bed/tba12way
    cd /cluster/data/homIni14/bed/tba12way
    wget "http://www.bx.psu.edu/~webb/borEut.maf.gz"
    nice bash
    grep "^s" *local* | sed "s/\..*$//" | sort -u > species.lst
    time /cluster/bin/penn/maf_order homIni14.local.maf homIni14 eutGli2 eutPri2 hg17 panTro1 rabbit mouse rat canFam1 bosTau1 armadillo elephant  > homIni14.order.maf
    awk "! /^s/ { print} /^s/ {if (\$3 + \$4 > \$6) print \$1, \$2, \$3, \$4,\$5,\$3+\$4, \$7; else print \$0}" homIni14.local.maf > fixed.local.maf

    mkdir -p /gbdb/homIni14/tba12way
    ln -s /cluster/data/homIni14/bed/tba12way/homIni14.order.maf /gbdb/homIni14/tba12way/tba12way.maf
    hgLoadMaf homIni14 tba12way -pathPrefix=/gbdb/homIni14/tba12way
    cat homIni14.order.maf | hgLoadMafSummary -minSize=10000 -mergeGap=500 -maxSize=50000 homIni14 tba12waySummary stdin

# MAKE Mouse Proteins track 
    ssh kkstore01
    mkdir -p /cluster/data/homIni14/blastDb
    cd /cluster/data/homIni14/blastDb
    awk "{print \$2}" ../*/chr*/*.lft > subChr.lst
    for i in `cat subChr.lst`
    do
	ln -s ../*/chr*/$i.fa 
	echo formatdb -i $i.fa -p F
	formatdb -i $i.fa -p F
    done
    rm *.log *.fa list
    cd ..
    for i in `cat chrom.lst`; do cat  $i/chr*/*.lft  ; done > jkStuff/subChr.lft

    ssh kkr1u00
    rm -rf /iscratch/i/homIni14/blastDb
    mkdir -p /iscratch/i/homIni14/blastDb
    cd /cluster/data/homIni14/blastDb
    for i in nhr nin nsq; do cp *.$i /iscratch/i/homIni14/blastDb     ; echo $i; done

    cd
    iSync > sync.out

    mkdir -p /cluster/data/homIni14/bed/tblastn.mm6KG
    cd /cluster/data/homIni14/bed/tblastn.mm6KG
    echo  /iscratch/i/homIni14/blastDb/*.nsq  | xargs ls -S | sed "s/\.nsq//"  > query.lst  
    # back to kkstore01
    exit

    cd /cluster/data/homIni14/bed/tblastn.mm6KG
    rm -rf /cluster/bluearc/homIni14/bed/tblastn.mm6KG/kgfa
    mkdir -p /cluster/bluearc/homIni14/bed/tblastn.mm6KG/kgfa
    split -l 70 /cluster/data/mm6/bed/blat.mm6KG/mm6KG.psl /cluster/bluearc/homIni14/bed/tblastn.mm6KG/kgfa/kg
    ln -s /cluster/bluearc/homIni14/bed/tblastn.mm6KG/kgfa kgfa
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    rm -rf /cluster/bluearc/homIni14/bed/tblastn.mm6KG/blastOut
    mkdir -p /cluster/bluearc/homIni14/bed/tblastn.mm6KG/blastOut
    ln -s /cluster/bluearc/homIni14/bed/tblastn.mm6KG/blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 ../../jkStuff/subChr.lft carry $f.2                     
	liftUp -nosort -type=".psl" -nohead $f.4 ../../jkStuff/liftAll.lft carry $f.3                     
	liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/mm6/bed/blat.mm6KG/protein.lft warn $f.4       

        if pslCheck -prot $3.tmp                                                                          
        then                                                                                              
            mv $3.tmp $3                                                                                  
            rm -f $f.1 $f.2 $f.3  $f.4
        fi
        exit 0                                                                                            
    fi                                                                                                    
fi                                                                                                        
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec

    ssh kk
    cd /cluster/data/homIni14/bed/tblastn.mm6KG
    para create blastSpec
    para push
# Completed: 37620 of 37620 jobs
# CPU time in finished jobs:    1022938s   17048.96m   284.15h   11.84d  0.032 y
# IO & Wait Time:                275126s    4585.44m    76.42h    3.18d  0.009 y
# Average job time:                  35s       0.58m     0.01h    0.00d
# Longest finished job:             111s       1.85m     0.03h    0.00d
# Submission to last job:          1684s      28.07m     0.47h    0.02d


    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    ssh kki
    cd /cluster/data/homIni14/bed/tblastn.mm6KG
    tcsh
    cat << '_EOF_' > chainOne
(cd $1; cat q."$2"* | simpleChain -prot -outPsl -maxGap=200000 stdin ../c.`basename $1`.$2.psl)
'_EOF_'
    chmod +x chainOne

    for j in blastOut/kg??; do for i in `cat ../../chrom.lst`; do echo chainOne $j chr"$i"; done ; done > chainSpec

    para create chainSpec
    para push
# Completed: 285 of 285 jobs
# CPU time in finished jobs:        239s       3.98m     0.07h    0.00d  0.000 y
# IO & Wait Time:                   731s      12.19m     0.20h    0.01d  0.000 y
# Average job time:                   3s       0.06m     0.00h    0.00d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:              14s       0.23m     0.00h    0.00d
# Submission to last job:            85s       1.42m     0.02h    0.00d

    exit
    # back to kkstore01
    cd /cluster/data/homIni14/bed/tblastn.mm6KG/blastOut
    for i in kg??
    do 
	cat c.$i.*.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.90 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    sort -u -T /tmp -k 14,14 -k 17,17n -k 17,17n  u.*.psl m60* > /cluster/data/homIni14/bed/tblastn.mm6KG/blastMm6KG.psl
    cd ..
    ssh hgwdev
    cd /cluster/data/homIni14/bed/tblastn.mm6KG
    hgLoadPsl homIni14 blastHg17KG.psl
    # 1425966 bases of 64944656 (2.196%) 
    # 1368835 bases of 62796757 (2.180%) in borEut2

    # back to kkstore01
    rm -rf blastOut
# End tblastn


##########################################################################
# EVOFOLD  - RNA secondary structure predictions lifted from hg17
# Jakob Skou Pedersen, Aug 5, 2005
  ssh -C hgwdev
  mkdir -p /cluster/data/homIni14/bed/evofold
  cd /cluster/data/homIni14/bed/evofold

  # concatenating chain files
  cat /cluster/data/homIni14/bed/chains.hg17/hg17/*chain > hg17ToCanHg10.over.chain

  # lifting evofolds from hg17 to homIni14
  echo "select chrom, chromStart, chromEnd, name, score, strand, size, secStr, conf from evofold;" | hgsql hg17 | sed -e 1d > foldsHg17.bed
  liftOver -minMatch=1.0 foldsHg17.bed hg17ToCanHg10.over.chain tmp.bed unmapped.bed
  grep "#" unmapped.bed | wc -l
  # remove elements which are wrong size after lifting
  awk '$3-$2 == $7' tmp.bed > foldsCanHg10.bed
  # clean up
  rm foldsHg17.bed unmapped.bed tmp.bed
  hgLoadBed -notItemRgb -sqlTable=/cluster/home/jsp/prog/kent/src/hg/lib/evofold.sql homIni14 evofold foldsCanHg10.bed


##########################################################################
#  RFAMSEEDFOLDS  - Rfam seed secondary structure annotation lifted from hg17
# Jakob Skou Pedersen, Aug 5, 2005
  # lifting rfamSeed from hg17 to homIni14
  echo "select chrom, chromStart, chromEnd, name, score, strand, size, secStr, conf from RfamSeedFolds;" | hgsql hg17 | sed -e 1d > foldsHg17.bed
  liftOver -minMatch=1.0 foldsHg17.bed hg17ToCanHg10.over.chain tmp.bed unmapped.bed
  grep "#" unmapped.bed | wc -l
  # remove elements which are of wrong size after lifting
  awk '$3-$2 == $7' tmp.bed > foldsCanHg10.bed
  # clean up
  rm foldsHg17.bed unmapped.bed tmp.bed
  hgLoadBed -noBin -notItemRgb -sqlTable=/cluster/home/jsp/prog/kent/src/hg/lib/RfamSeedFolds.sql homIni14 RfamSeedFolds foldsCanHg10.bed




###
## ENTROPY (Done, 2006-10-09, bsuh)
##  Entropy wiggle track
mkdir /cluster/data/homIni14/bed/entropy
mkdir /cluster/data/homIni14/bed/entropy/work
cd /cluster/data/homIni14/bed/entropy/work
ln -s /cluster/home/bsuh/file/san/rico/vers1/final-results
compile.exe "probToEntropy.c faProb.c"
mkdir tmp
probToBed.pl
cd tmp
ln -s ../addChr_nochange.pl
ln -s ../encode_nochange.pl
encode_nochange.pl
mv entropy.* ../..
cd ..
rm -rf tmp
cd /gbdb/homIni14/wib
ln -s /cluster/data/homIni14/bed/entropy/entropy.*.wib .
cd /cluster/data/homIni14/bed/entropy
hgLoadWiggle homIni14 entropy entropy.*.wig
