# for emacs: -*- mode: sh; -*-

# This file describes how we make the BlastTab tables for knownGenes
#	sgdGenes flyBaseGenes wormGenes

# Run on the occassion of Ensembl v59 release to update danRer6 ensembl
#	blast tabs on all other organisms.  In the process, discovered
#	some confusion in previous builds of blastTabs, therefore everything
#	rebuilt

############################################################################
# DONE 2010-08-18 in this case in this danRer6 directory
#	where you can find this procedure encapsulated in scripts.
#	the copy here is merely the contents of the scripts that run
#	these procedures.

mkdir /hive/data/genomes/danRer6/bed/ensGene.59/blastTab
cd /hive/data/genomes/danRer6/bed/ensGene.59/blastTab

export runDir="/hive/data/genomes/danRer6/bed/ensGene.59/blastTab"

############################################################################
# prepare peptide fasta files for all
mkdir -p ${runDir}
cd ${runDir}
# get all the protein sets to be used here
pepPredToFa danRer6 ensPep danRer6.ensembl.faa
pepPredToFa sacCer2 sgdPep sacCer2.sgd.faa
pepPredToFa dm3 flyBasePep dm3.flyBase.faa
pepPredToFa ce6 sangerPep ce6.sanger.faa
for D in hg19 mm9 rn4
do
pepPredToFa $D knownGenePep $D.known.faa
done

 
############################################################################
# construct each *.config.ra file
# known gene tables
for D in hg19 mm9 rn4
do
mkdir -p $D
echo "targetGenesetPrefix known
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs hg19 mm9 rn4 ce6 dm3 danRer6 sacCer2" \
        | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra
echo "hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra
done

# sgd gene tables
for D in sacCer2
do
mkdir -p $D
echo "targetGenesetPrefix sgd
targetDb $D
queryDbs hg19 mm9 rn4 ce6 dm3 danRer6
hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# flyBase table
for D in dm3
do
mkdir -p $D
echo "targetGenesetPrefix flyBase
targetDb $D
queryDbs hg19 mm9 rn4 ce6 sacCer2 danRer6
hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# sanger table
for D in ce6
do
mkdir -p $D
echo "targetGenesetPrefix sanger
targetDb $D
queryDbs hg19 mm9 rn4 dm3 sacCer2 danRer6
hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

###########################################################################
# in each directory ce6 rn4 dm3 mm9 sacCer2 hg19
#	running the kluster job
cd sacCer6
time nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
	sacCer2.config.ra > do.log 2>&1
real    39m3.306s
user    0m0.589s
sys     0m0.628s

cd ../dm3
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \
	dm3.config.ra > do.log 2>&1
cd ../ce6
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
	ce6.config.ra > do.log 2>&1
cd ../mm9
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \
	mm9.config.ra > do.log 2>&1
cd ../rn4
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \
	rn4.config.ra > do.log 2>&1
cd ../hg19
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
	hg19.config.ra > do.log 2>&1

###########################################################################
# running syntenic best on human, mouse, rat combinations
synBlastp.csh mm9 rn4
# db=mm9
# otherDb=rn4
# genePredToFakePsl:
# pslMap via /gbdb/mm9/liftOver/mm9ToRn4.over.chain.gz :
# hgLoadPsl:
# Processing mm9.rn4.kg.psl
# hgMapToGene:
# mm9.rnBlastTab:
# old number of unique query values:
# 33419
# old number of unique target values
# 7090
# new number of unique query values:
# 15038
# new number of unique target values
# 6915
synBlastp.csh mm9 hg19
# db=mm9
# otherDb=hg19
# genePredToFakePsl:
# pslMap via /gbdb/mm9/liftOver/mm9ToHg19.over.chain.gz :
# hgLoadPsl:
# Processing mm9.hg19.kg.psl
# hgMapToGene:
# mm9.hgBlastTab:
# old number of unique query values:
# 42100
# old number of unique target values
# 22528
# new number of unique query values:
# 38924
# new number of unique target values
# 21877
synBlastp.csh hg19 mm9
# db=hg19
# otherDb=mm9
# genePredToFakePsl:
# pslMap via /gbdb/hg19/liftOver/hg19ToMm9.over.chain.gz :
# hgLoadPsl:
# Processing hg19.mm9.kg.psl
# hgMapToGene:
# hg19.mmBlastTab:
# old number of unique query values:
# 60635
# old number of unique target values
# 22100
# new number of unique query values:
# 57302
# new number of unique target values
# 21627
synBlastp.csh hg19 rn4
# db=hg19
# otherDb=rn4
# genePredToFakePsl:
# pslMap via /gbdb/hg19/liftOver/hg19ToRn4.over.chain.gz :
# hgLoadPsl:
# Processing hg19.rn4.kg.psl
# hgMapToGene:
# hg19.rnBlastTab:
# old number of unique query values:
# 48744
# old number of unique target values
# 6894
# new number of unique query values:
# 25086
# new number of unique target values
# 6669
synBlastp.csh rn4 mm9
# db=rn4
# otherDb=mm9
# genePredToFakePsl:
# pslMap via /gbdb/rn4/liftOver/rn4ToMm9.over.chain.gz :
# hgLoadPsl:
# Processing rn4.mm9.kg.psl
# hgMapToGene:
# rn4.mmBlastTab:
# old number of unique query values:
# 8024
# old number of unique target values
# 7095
# new number of unique query values:
# 7751
# new number of unique target values
# 6949
synBlastp.csh rn4 hg19
# db=rn4
# otherDb=hg19
# genePredToFakePsl:
# pslMap via /gbdb/rn4/liftOver/rn4ToHg19.over.chain.gz :
# hgLoadPsl:
# Processing rn4.hg19.kg.psl
# hgMapToGene:
# rn4.hgBlastTab:
# old number of unique query values:
# 7993
# old number of unique target values
# 6915
# new number of unique query values:
# 7489
# new number of unique target values
# 6636

###########################################################################
#  running recip best to ce6 dm3 and sacCer2
# on human, mouse and rat
export TARGET=hg
export TARGET_DB=hg19
for DB in ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=mm
export TARGET_DB=mm9
for DB in ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=rn
export TARGET_DB=rn4
for DB in ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=ce
export TARGET_DB=ce6
for DB in dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done


export TARGET=sc
export TARGET_DB=sacCer2
for DB in dm3 ce6
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=dm
export TARGET_DB=dm3
for DB in sacCer2 ce6
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

DB=danRer6
for tDB in hg19 mm9 rn4 ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${DB}.${tDB}
    bToA=run.${tDB}.${DB}
    mkdir -p $aToB
    cat ../${tDB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${tDB}/$bToA/out/*.tab > $bToA/all.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo "hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab"
    hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab
done

# hgLoadBlastTab hg19 drBlastTab run.danRer6.hg19/recipBest.tab
# Loading database with 12146 rows

# hgLoadBlastTab mm9 drBlastTab run.danRer6.mm9/recipBest.tab
# Loading database with 11997 rows

# hgLoadBlastTab rn4 drBlastTab run.danRer6.rn4/recipBest.tab
# Loading database with 5142 rows

# hgLoadBlastTab ce6 drBlastTab run.danRer6.ce6/recipBest.tab
# Loading database with 4865 rows

# hgLoadBlastTab dm3 drBlastTab run.danRer6.dm3/recipBest.tab
# Loading database with 5765 rows

# hgLoadBlastTab sacCer2 drBlastTab run.danRer6.sacCer2/recipBest.tab
# Loading database with 2174 rows

###########################################################################
# updating drBlastTab for new EnsGene release (DONE - 2012-10-12 - Hiram)
#
# updating drBlastTab for emsGene v70 release (DONE 2013-04-26 - Chin)
# same steps are followed for all assemblies listed below, but
# also update the drBlastTab for sacCer2 and mm9.

    mkdir /hive/data/genomes/danRer7/bed/ensGene.68/blastTab
    cd /hive/data/genomes/danRer7/bed/ensGene.68/blastTab
    cat << '_EOF_' > prepFasta.sh
#!/bin/sh

export sacCer="sacCer3"
export ce="ce6"
export danRer="danRer7"
export dm="dm3"
export rn="rn4"
export hg="hg19"
export mm="mm10"
export rnGene="rgdGene2"
export runDir="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab"

mkdir -p ${runDir}
cd ${runDir}
# get all the protein sets to be used here
pepPredToFa ${danRer} ensPep ${danRer}.ensembl.faa
pepPredToFa ${sacCer} sgdPep ${sacCer}.sgd.faa
pepPredToFa ${dm} flyBasePep ${dm}.flyBase.faa
pepPredToFa ${ce} sangerPep ${ce}.sanger.faa
pepPredToFa ${rn} ${rnGene}Pep ${rn}.${rnGene}.faa
for D in ${hg} ${mm}
do
pepPredToFa $D knownGenePep $D.known.faa
done

# known gene tables
for D in ${hg} ${mm}
do
mkdir -p $D
echo "targetGenesetPrefix known
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${sacCer}" \
        | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra
echo "${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra
done

for D in ${rn}
do
mkdir -p $D
echo "targetGenesetPrefix ${rnGene}
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${sacCer}" \
        | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra
echo "${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra
done

for D in ${sacCer}
do
mkdir -p $D
echo "targetGenesetPrefix sgd
targetDb $D
queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer}
${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# flyBase table
for D in ${dm}
do
mkdir -p $D
echo "targetGenesetPrefix flyBase
targetDb $D
queryDbs ${hg} ${mm} ${rn} ${ce} ${sacCer} ${danRer}
${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# sanger table
for D in ${ce}
do
mkdir -p $D
echo "targetGenesetPrefix sanger
targetDb $D
queryDbs ${hg} ${mm} ${rn} ${dm} ${sacCer} ${danRer}
${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# Zebrafish danRer7 Ensembl genes
for D in ${danRer}
do
mkdir -p $D
echo "targetGenesetPrefix ensembl
targetDb $D
queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${sacCer}
${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

'_EOF_'
    # << happy emacs

    chmod +x prepFasta.sh
    ./prepFasta.sh

    cat << '_EOF_' > runAll.sh
#!/bin/sh

export sacCer="sacCer3"
export ce="ce6"
export danRer="danRer7"
export dm="dm3"
export rn="rn4"
export hg="hg19"
export mm="mm10"
export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab"

for G in ${sacCer} ${ce} ${danRer} ${dm} ${rn} ${hg} ${mm}
do
    cd "${TOP}/${G}"
    time nice -n +19 doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -clusterHub=swarm ${G}.config.ra > do.log 2>&1
done
'_EOF_'
    # << happy emacs

    chmod +x runAll.sh
    ./runAll.sh

    # need to also do mm9 and sacCer2, save mm10, sacCer3 result:
    mv danRer7 danRer7.mm10.sacCer3

    cat << '_EOF_' > prepMm9.sh
#!/bin/sh

export sacCer="sacCer3"
export ce="ce6"
export danRer="danRer7"
export dm="dm3"
export rn="rn4"
export hg="hg19"
export mm="mm9"
export rnGene="rgdGene2"
export runDir="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab"

cd ${runDir}
for D in ${mm}
do
pepPredToFa $D knownGenePep $D.known.faa
done

# known gene tables
for D in ${mm}
do
mkdir -p $D
echo "targetGenesetPrefix known
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${sacCer}" \
        | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra
echo "${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra
done

# Zebrafish danRer7 Ensembl genes
for D in ${danRer}
do
mkdir -p $D
echo "targetGenesetPrefix ensembl
targetDb $D
queryDbs ${mm}
${hg}Fa ${runDir}/${hg}.known.faa
${mm}Fa ${runDir}/${mm}.known.faa
${rn}Fa ${runDir}/${rn}.${rnGene}.faa
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${ce}Fa ${runDir}/${ce}.sanger.faa
${dm}Fa ${runDir}/${dm}.flyBase.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done
'_EOF_'
    # << happy emacs
    chmod +x prepMm9.sh
    ./prepMm9.sh

export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab"

for G in danRer7 mm9
do
    cd "${TOP}/${G}"
    time nice -n +19 doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -clusterHub=swarm ${G}.config.ra > do.log 2>&1
done

    # and the sacCer2, save the mm9 result
    mv danRer7 danRer7.mm9

    cat << '_EOF_' > prepMm9.sh
#!/bin/sh

export sacCer="sacCer2"
export ce="ce6"
export danRer="danRer7"
export dm="dm3"
export rn="rn4"
export hg="hg19"
export mm="mm9"
export rnGene="rgdGene2"
export runDir="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab"

cd ${runDir}
# get all the protein sets to be used here
# pepPredToFa ${sacCer} sgdPep ${sacCer}.sgd.faa

for D in ${sacCer}
do
mkdir -p $D
echo "targetGenesetPrefix sgd
targetDb $D
queryDbs ${danRer}
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# Zebrafish danRer7 Ensembl genes
for D in ${danRer}
do
mkdir -p $D
echo "targetGenesetPrefix ensembl
targetDb $D
queryDbs ${sacCer}
${sacCer}Fa ${runDir}/${sacCer}.sgd.faa
${danRer}Fa ${runDir}/${danRer}.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done
'_EOF_'
    # << happy emacs
    chmod +x prepSacCer2.sh
    ./prepSacCer2.sh

export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab"

for G in danRer7 sacCer2
do
    cd "${TOP}/${G}"
    time nice -n +19 doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -clusterHub=swarm ${G}.config.ra > do.log 2>&1
done

    #####  Now run recip best business, this requires moving
    #### the different danRer7 directory results around to get them
    #### in place for mm9 and sacCer2 vs the mm10 sacCer3 results

    mkdir /hive/data/genomes/danRer7/bed/ensGene.68/blastTab/recip
    cd /hive/data/genomes/danRer7/bed/ensGene.68/blastTab/recip

    cat << '_EOF_' > recipBest.sh
#!/bin/sh

export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab/recip"
cd $TOP

DB=danRer7
# for tDB in hg19 mm10 rn4 ce6 dm3 sacCer3
# for tDB in sacCer2
# for tDB in mm9
for tDB in mm10
do
    echo $DB
    aToB=run.${DB}.${tDB}
    bToA=run.${tDB}.${DB}
    mkdir -p $aToB $bToA
    cat ../${tDB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${tDB}/$bToA/out/*.tab > $bToA/all.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer7) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo "hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab"
#    hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab
done
'_EOF_'
    # << happy emacs

    # survey tables before loading:
for D in hg19 mm9 mm10 rn4 dm3 ce6 sacCer3 sacCer2
do
    echo hgsql -e \"select count'(*)' from drBlastTab\;\" $D
    hgsql -N -e "select count(*) from drBlastTab;" $D | cat
    wc -l run.${D}.danRer7/recipBest.tab
done > survey.result.before

    # load all the drBlastTab tables:
for D in ce6 dm3 hg19 mm10 mm9 rn4 sacCer2 sacCer3
do
    ls -og run.$D.danRer7/recipBest.tab
    echo "hgLoadBlastTab $D drBlastTab -maxPer=1 run.$D.danRer7/recipBest.tab"
    hgLoadBlastTab $D drBlastTab -maxPer=1 run.$D.danRer7/recipBest.tab
done


    # survey tables after loading:
for D in hg19 mm9 mm10 rn4 dm3 ce6 sacCer3 sacCer2
do
    echo hgsql -e \"select count'(*)' from drBlastTab\;\" $D
    hgsql -N -e "select count(*) from drBlastTab;" $D | cat
    wc -l run.${D}.danRer7/recipBest.tab
done > survey.result.after

    # verify table row counts are not too much different:
    diff survey.result.before survey.result.after
2c2
< 13021
---
> 13076
5c5
< 12886
---
> 12904
8c8
< 12881
---
> 12918
11c11
< 7827
---
> 8928
14c14
< 5924
---
> 5929
17c17
< 4956
---
> 4958
20c20
< 3893
---
> 2386
23c23
< 2229
---
> 2231

###########################################################################
