#########################################################################
# repeat masking (DONE - 2015-04-13 - Hiram)
#
# RepeatMasker doesn't do virus
# trf/simpleRepeats finds nothing
# windowMasker finds a couple of bits

mkdir /hive/data/genomes/eboVir2/bed/windowMasker
cd /hive/data/genomes/eboVir2/bed/windowMasker
time (doWindowMasker.pl -buildDir=`pwd` \
   -workhorse=hgwdev eboVir2> > do.log 2>&1 &
# real    0m24.311s
# fails on final measurement with rmsk which does not exist
time (doWindowMasker.pl -continue=cleanup -buildDir=`pwd` \
   -workhorse=hgwdev eboVir2) > cleanup.log 2>&1
#   real    0m4.435s

#########################################################################
# cpgIslandsUnmasked (DONE - 2014-09-13 - Hiram)

mkdir /hive/data/genomes/eboVir2/bed/cpgIslandsUnmasked
cd /hive/data/genomes/eboVir2/bed/cpgIslandsUnmasked
time (doCpgIslands.pl -buildDir=`pwd` -bigClusterHub=ku \
   -tableName=cpgIslandExtUnmasked -dbHost=hgwdev -smallClusterHub=ku \
    -workhorse=hgwdev \
    -maskedSeq=/hive/data/genomes/eboVir2/eboVir2.unmasked.2bit eboVir2) > do.log 2>&1

#########################################################################
#  BLATSERVERS ENTRY (DONE - 2014-09-16 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,

# delete previous temporary
hgsql -e 'DELETE FROM blatServers where db="eboVir2";' hgcentraltest

hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("eboVir2", "blat4d", "17850", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("eboVir2", "blat4d", "17851", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
# lastz Marburg sequences (DONE - 2014-09-13 - Hiram)
mkdir /hive/data/genomes/eboVir2/bed/lastzMarVir1.2014-09-13
cd /hive/data/genomes/eboVir2/bed/lastzMarVir1.2014-09-13

cat << '_EOF_' > DEF
# Ebola virus 47 strains vs. 2 strains Marburg virus marVir1
BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - eboVir2 47 strains
SEQ1_DIR=/hive/data/genomes/eboVir2/eboVir2.2bit
SEQ1_CHUNK=200000
SEQ1_LAP=0
SEQ1_LEN=/hive/data/genomes/eboVir2/chrom.sizes

# QUERY - marVir1 2 strains
SEQ2_DIR=/hive/data/genomes/marVir1/marVir1.2bit
SEQ2_CHUNK=200000
SEQ2_LAP=0
SEQ2_LEN=/hive/data/genomes/marVir1/chrom.sizes

BASE=/hive/data/genomes/eboVir2/bed/lastzMarVir1.2014-09-13
TMPDIR=/dev/shm
'_EOF_'
  # << happy emacs

time (doBlastzChainNet.pl `pwd`/DEF \
        -syntenicNet -fileServer=hgwdev \
        -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
        -chainMinScore=100  -chainLinearGap=loose \
        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
# real    1m50.424s

   # failed on NET creation due to no simpleRepeats table in either browser

featureBits -countGaps eboVir2 chainMarVir1Link
# 640008 bases of 889112 (71.983%) in intersection

############################################################################
# on advice from Brian Raney Ph.D (DONE - 2014-09-15 - Hiram)

mkdir /hive/data/genomes/eboVir2/bed/genscan
cd /hive/data/genomes/eboVir2/bed/genscan
time (doGenscan.pl -buildDir=`pwd` -bigClusterHub=ku -workhorse=hgwdev \
    -dbHost=hgwdev eboVir2) > do.log 2>&1

cat fb.eboVir2.genscan.txt
#  673614 bases of 889109 (75.763%) in intersection

############################################################################

