# Hiram 2020-03-13

mkdir -p  /hive/data/genomes/wuhCor1/bed/snpView44Way
cd /hive/data/genomes/wuhCor1/bed/snpView44Way

awk '/^s/ {print $2}' ../multiz44way/defraged.multiz44way.maf \
  | sed 's/\..*//' > species.lst  
for i in `cat species.lst`; do f=`echo $i \
  | tr '_' '-'`; echo "s/$f/$i/g"; done > backSedScript.txt
for i in `cat species.lst`; do f=`echo $i \
  | tr '_' '-'`; echo "s/$i/$f/g"; done > foreSedScript.txt
sed -f foreSedScript.txt species.lst > editSpecies.lst

mafGene -exons wuhCor1 multiz44way singleCover44way species.lst stdout \
   | sed -f foreSedScript.txt >  nonsyn.faa

paSNP editSpecies.lst nonsyn.faa stdout | sed 's/:/ /' | sed 's/-/ /' \
   | awk '{print $1, $2-1, $3, $4, 1583, "+", $2-1, $3, "255,0,0", 1, $3-($2 - 1), 0}' \
   | sed -f backSedScript.txt > nonsyn.bed

mafGene -uniqAA -exons wuhCor1 multiz44way singleCover species.lst stdout \
  | sed -f foreSedScript.txt > syn.faa

paSNP editSpecies.lst syn.faa stdout | sed 's/:/ /' | sed 's/-/ /' \
   | awk '{print $1, $2-1, $3, $4, 1819, "+", $2-1, $3, "0,255,0", 1, $3 - ($2 - 1), 0}' \
     | sed -f backSedScript.txt   > syn.bed

mafToSnpBed wuhCor1 ../multiz44way/defraged.multiz44way.maf \
   ../multiz44way/mafFrames/singleCover44way.gp stdout \
      | sed 's/wuhCor1.//' > single.bed

#these should all disappear on the merge
grep "1580$" single.bed \
  | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' \
    > codingVariant.bed

grep "1623$" single.bed \
  | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' \
    > utrVariant.bed
grep "1624$" single.bed \
  | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' \
    >> utrVariant.bed

grep " 0$" single.bed \
 | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "240,240,180", 1, $3 -$2, 0}' \
   > missing.bed

grep "1628$" single.bed \
  | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "0,0,0", 1, $3 -$2, 0}' \
    > intergenic.bed

grep "1627$" single.bed \
   | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "0,0,0", 1, $3 -$2, 0}' \
     > intron.bed

hgsql -N -e "select * from chromInfo" wuhCor1 > wuhCor1.chrom.sizes

rm output.bed
for i in `cat species.lst`
do
echo $i 1>&2
grep -wh "$i" nonsyn.bed syn.bed codingVariant.bed utrVariant.bed \
    intron.bed intergenic.bed missing.bed \
      | bedSmash stdin wuhCor1.chrom.sizes stdout >> output.bed
done

# make codingVariants into missing data instead of showing blue
awk '{print $1,$2,$3,$4,$5}' output.bed | sed 's/ 1580$/ 0/' > load.bed

hgLoadBed wuhCor1 mafSnp44way load.bed

cut -f1,3 ../multiz44way/acc.date.description.list | sed -e 's/\./v/;' \
   | sed -e 's/ /_/g;' | grep -v NC_045512v2 \
     | awk -F$'\t' '{printf "s#%s#%s#g;\n", $1, $2}' > accToName.sed


sed -f ./accToName.sed load.bed > strainLoad.bed

hgLoadBed wuhCor1 mafSnpStrainName44way strainLoad.bed
# Read 196342 elements of size 5 from strainLoad.bed
# Creating table definition for mafSnpStrainName44way, bedSize: 5
