# braney 2020-03-10

mkdir -p  /hive/data/genomes/wuhCor1/bed/snpView	
cd /hive/data/genomes/wuhCor1/bed/snpView	
awk '/^s/ {print $2}' ../multiz119way/defraged.multiz119way.maf | sed 's/\..*//' > species.lst  
for i in `cat species.lst`; do f=`echo $i | tr '_' '-'`; echo "s/$f/$i/g"; done > backSedScript.txt
for i in `cat species.lst`; do f=`echo $i | tr '_' '-'`; echo "s/$i/$f/g"; done > foreSedScript.txt
sed -f foreSedScript.txt species.lst > editSpecies.lst

mafGene -exons wuhCor1 multiz119way singleCover species.lst stdout | sed -f foreSedScript.txt >  nonsyn.faa

paSNP editSpecies.lst nonsyn.faa stdout | sed 's/:/ /' | sed 's/-/ /' | awk '{print $1, $2-1, $3, $4, 1583, "+", $2-1, $3, "255,0,0", 1, $3-($2 - 1), 0}' | sed -f backSedScript.txt > nonsyn.bed

mafGene -uniqAA -exons wuhCor1 multiz119way singleCover species.lst stdout  | sed -f foreSedScript.txt >syn.faa

paSNP editSpecies.lst syn.faa stdout | sed 's/:/ /' | sed 's/-/ /' | awk '{print $1, $2-1, $3, $4, 1819, "+", $2-1, $3, "0,255,0", 1, $3 - ($2 - 1), 0}'  | sed -f backSedScript.txt   > syn.bed

mafToSnpBed wuhCor1 ../multiz119way/defraged.multiz119way.maf ../multiz119way/mafFrames/singleCover.gp stdout |  sed 's/wuhCor1.//' > single.bed

#these should all disappear on the merge
grep "1580$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' > codingVariant.bed

grep "1623$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' > utrVariant.bed
grep "1624$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' >> utrVariant.bed

grep " 0$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "240,240,180", 1, $3 -$2, 0}' > missing.bed

grep "1628$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "0,0,0", 1, $3 -$2, 0}' > intergenic.bed

grep "1627$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "0,0,0", 1, $3 -$2, 0}' > intron.bed

echo "select * from chromInfo" | hgsql wuhCor1 | tail -n +2 > chrom.sizes
rm output.bed
for i in `cat species.lst`
do
echo $i
grep -wh "$i" nonsyn.bed syn.bed codingVariant.bed utrVariant.bed intron.bed intergenic.bed missing.bed | bedSmash stdin chrom.sizes stdout >>  output.bed
done

# make codingVariants into missing data instead of showing blue
awk '{print $1,$2,$3,$4,$5}' output.bed | sed 's/ 1580$/ 0/' > load.bed

cut -f1,3 ~/kent/src/hg/makeDb/doc/wuhCor1/nameList119.txt \
   | sed -e 's/ /_/g;' | grep -v NC_045512v2 \
     | awk -F$'\t' '{printf "s#%s#%s#g;\n", $1, $2}' > accToName.sed

cut -f1,3 ~/kent/src/hg/makeDb/doc/wuhCor1/nameList119.txt | sed -e 's/ /_/g;' \
  | awk -F$'\t' '{printf "s#%s.%s#%s#g;\n", $1, $1, $2}' > accToName.sed

cat load.bed | sed -f accToName.sed > strainLoad.bed

# verify enough correct names, should be 118:
awk '{print $4}' strainLoad.bed | sort | uniq -c | wc -l
    118

# and one chromosome name:
awk '{print $1}' strainLoad.bed | sort | uniq -c
 205615 NC_045512v2

hgLoadBed wuhCor1 mafSnp119way load.bed
hgLoadBed wuhCor1 mafSnpStrainName119way strainLoad.bed

