# for emacs: -*- mode: sh; -*-
#########################################################################
# hivVax003Vax004 DATABASE BUILD (DONE 5/20/08, Fan)

    ssh hiv1
    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004
    cd /cluster/store12/medical/hiv/hivVax003Vax004

#########################################################################
# Create hivVax003Vax004 DB

    hgsql e 'create database hivVax003Vax004'

# Ask admin to copy over all tables from hiv1 to hivVax003Vax004

#########################################################################
# CREATE MAF TRACKS FOR VAX004

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa
    cd /cluster/store12/medical/hiv/hivVax003Vax004/msa

# create a script file, doall

    hgsql hivVax003Vax004 -N -e \
    'select id from dnaSeq where id like "%U%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hivVax003Vax004 -N -e  "select id, seq from vax004Msa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX004
    doall >Vax003Vax004.tab
# convert .tab into .fa file
    tabToFa Vax003Vax004

# grab the base alignment sequence
    echo ">hivVax003Vax004" >Vax003Vax004.aln
    hgsql hivVax003Vax004 -N -e 'select seq from vax004Msa where id="HXB2"'  >> Vax003Vax004.aln

# prepare an interium file, jjAll.mfa
    cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf

    xmfaToMafVax003Vax004Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hivVax003Vax004/vax004Maf 
    cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p

    hgLoadMaf hivVax003Vax004 vax004Maf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hivVax003Vax004/vax004AaMaf 
    cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
    hgLoadMaf hivVax003Vax004 vax004AaMaf
    
#########################################################################
# CREATE CONSERVATION TRACKS FOR VAX003 AE STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
    cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE

# create the .wig file and .fa file of the consensus sequence.
    gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AECons.wig vax003AEConsensus.fa
# encode and load the wig file
    wigEncode vax003AECons.wig stdout vax003AECons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003AECons stdin

# copy .wib file to /gbdb
    mkdir -p /gbdb/hivVax003Vax004/wib
    cp vax003AECons.wib /gbdb/hivVax003Vax004/wib

# do the same for protein conservation track

    mkdir aa
    cd aa

# create .wig file
    gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AEAaCons.wig vax003AEAaConsensus.fa

# encode and load the .wib file   
    wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin

    cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib

#########################################################################
# CREATE MAF TRACKS FOR VAX003 AE STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
    cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE

# create a script file, doall

    hgsql hivVax003Vax004 -N -e \
    'select id from dnaSeq where id like "%T%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hivVax003Vax004 -N -e  "select id, seq from vax003AEMsa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX003 AE
    doall >Vax003Vax004.tab
# convert .tab into .fa file
    tabToFa Vax003Vax004

# grab the base alignment sequence
    echo ">hivVax003Vax004" >Vax003Vax004.aln
    hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"'  >> Vax003Vax004.aln

# prepare an interium file, jjAll.mfa
    cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf

    xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

#    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hivVax003Vax004/vax004Maf
    cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p

    hgLoadMaf hivVax003Vax004 vax004Maf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hivVax003Vax004/vax004AaMaf 
    cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
    hgLoadMaf hivVax003Vax004 vax004AaMaf

#########################################################################
# COPY OVER MSA TABLES FOR VAX003 B STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
    cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B

# get table definition
    mysqldump -d hivVax003Vax004 vax003BMsa -u medcat -p$HGPSWD|hgsql hivVax003Vax004

# load the table   
    hgsql hivVax003Vax004 -e "insert into vax003BMsa select * from hivVax003Vax004.vax003BMsa"

#########################################################################
# CREATE CONSERVATION TRACKS FOR VAX003 B STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
    cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B

# create the .wig file and .fa file of the consensus sequence.
    gsidMsa hivVax003Vax004 vax003BMsa HXB2 6228 vax003BCons.wig vax003BConsensus.fa

# encode and load the wig file
    wigEncode vax003BCons.wig stdout vax003BCons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003BCons stdin

# copy .wib file to /gbdb
    mkdir -p /gbdb/hivVax003Vax004/wib
    cp vax003BCons.wib /gbdb/hivVax003Vax004/wib

# do the same for protein conservation track

    mkdir aa
    cd aa

# create .wig file
    gsidAaMsa2 hivVax003Vax004 vax003BMsa HXB2 6228 vax003BAaCons.wig vax003BAaConsensus.fa

# encode and load the .wib file   
    wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin

    cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib

#########################################################################
# CREATE MAF TRACKS FOR VAX003 B STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
    cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B

# create a script file, doall

    hgsql hivVax003Vax004 -N -e \
    'select id from dnaSeq where id like "%T%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hivVax003Vax004 -N -e  "select id, seq from vax003BMsa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX004
    doall >Vax003Vax004.tab
# convert .tab into .fa file
    tabToFa Vax003Vax004

# grab the base alignment sequence
    echo ">hivVax003Vax004" >Vax003Vax004.aln
    hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"'  >> Vax003Vax004.aln

# prepare an interium file, jjAll.mfa
    cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf

    xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hivVax003Vax004/vax003BMaf
    cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
    
    hgLoadMaf hivVax003Vax004 vax003BMaf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hivVax003Vax004/vax003BAaMaf 
    cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
    hgLoadMaf hivVax003Vax004 vax003BAaMaf

#########################################################################
# Process, check, correct and load VAX003 clinical tables

    mkdir -p /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
    cd /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical

# copy over original raw data files
    
    cp -p /cluster/store12/medical/vaxGen/fromEvie/VAX003/*.txt .
    ls -l *.txt

# shorten the file name and run processRaw to generate .sql def

    cp "VAX003 RNACD4 match with sequence ID_20080501_EMZ18Jun.txt" VAX003_RNACD4080501.txt
    processRaw VAX003_RNACD4080501.txt

    hgsql hiv1 -e 'drop database  hivVax003Vax004Build'
    hgsql hiv1 -e 'create database  hivVax003Vax004Build'

# create hivVax003Vax004Build DB to be used in this build process
    hgsql hiv1 -e 'create database hivVax003Vax004Build'

# load raw demographic and RNACD3 data
    hgsql hivVax003Vax004Build < GSID_DEMOG_SEQNO_003Raw.sql
    hgsql hivVax003Vax004Build < VAX003_RNACD4080501Raw.sql

    hgsql hivVax003Vax004Build -e \
    'load data local infile "GSID_DEMOG_SEQNO_003.txt" into table GSID_DEMOG_SEQNO_003Raw ignore 1 lines'

    hgsql hivVax003Vax004Build -e \
    'load data local infile "VAX003_RNACD4080501.txt" into table VAX003_RNACD4080501Raw ignore 1 lines'

# build initial gsidClinicRecTemp table ...

    hgsql hivVax003Vax004Build -N -e \
    'select "specId",GSID, MBLabcd, DRNACD4, "rna","cd4" from VAX003_RNACD4080501Raw' \
    >gsidClinicRecTemp.tab

    hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecTemp'
    getDbTableDef hiv1 gsidClinicRecTemp >gsidClinicRecTemp.sql
    hgsql hivVax003Vax004Build < gsidClinicRecTemp.sql

    hgsql hivVax003Vax004Build -e \
    'load data local infile "gsidClinicRecTemp.tab" into table gsidClinicRecTemp'

# build subjLabcode table ...

    hgsql hivVax003Vax004Build -N -e \
    'select GSID, MBLabcd from VAX003_RNACD4080501Raw where MBLabcd!=""' \
    | sort -u > subjLabcode.tab

    hgsql hivVax003Vax004Build -e "drop table subjLabcode"

    getDbTableDef hiv1 subjLabcode > subjLabcode.sql
    hgsql hivVax003Vax004Build < subjLabcode.sql
    hgsql hivVax003Vax004Build -e \
    'load data local infile "subjLabcode.tab" into table subjLabcode'

# fill in labCode in gsidClinicRecTemp

    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp t, subjLabcode l set t.labCode=l.labCode where t.subjId=l.subjId'

# fill in specimenId
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp t, GSID_DEMOG_SEQNO_003Raw r set t.specimenId=r.SpecimenNumber where t.subjId=r.subjId and r.SpecimenNumber !=""'

# fill in RNA 
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.hivQuan=r.RNA where t.subjId=r.GSID and t.daysCollection=r.DRNACD4' 

# fill in CD4
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.cd4Count=r.CD4ABS where t.subjId=r.GSID and t.daysCollection=r.DRNACD4'

# change RNA "399" to "200" (which will be displayed as "<400")
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp set hivQuan="200" where hivQuan = "399"'

# update cd4 NULL ...
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count="."'

# Echo update daysCollection NULL ...
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecTemp set daysCollection="NULL" where daysCollection="."'

# build gsidClinicRecNew table

    hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecTemp ' \
    |uniq |sed -e 's/NULL/-1/g'  > gsidClinicRecNew.tab

    hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew'
    hgsql hivVax003Vax004Build < gsidClinicRecNew.sql
    hgsql hivVax003Vax004Build -e \
    'load data local infile "gsidClinicRecNew.tab" into table gsidClinicRecNew'

# set NULLs

    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecNew set hivQuan=NULL where hivQuan=-1'
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecNew set cd4Count=NULL where cd4Count=-1'

    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecNew set daysCollection=NULL where daysCollection=-1'

    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecNew set specimenId=NULL where specimenId="specId"'

# build gsidClinicRecWithSeqNew table

    hgsql hivVax003Vax004Build -N -e \
    'select c.* from GSID_DEMOG_SEQNO_003Raw r,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and r.subjId=c.subjId and r.labCode=c.labCode' \
    |sort -u |sed -e 's/NULL/-1/g' >gsidClinicRecWithSeqNew.tmp

    hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecWithSeqNew'
    hgsql hivVax003Vax004Build < gsidClinicRecWithSeqNew.sql
    hgsql hivVax003Vax004Build -e \
    'load data local infile "gsidClinicRecWithSeqNew.tmp" into table gsidClinicRecWithSeqNew'
    rm gsidClinicRecWithSeqNew.tmp

    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecWithSeqNew set hivQuan=NULL where hivQuan=-1'
    hgsql hivVax003Vax004Build -e \
    'update gsidClinicRecWithSeqNew set cd4Count=NULL where cd4Count=-1'

# compare with previous old data
    hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecWithSeqNew' |sort -u >j.n
    hgsql hivVax003Vax004 -N -e 'select * from gsidClinicRecWithSeq'|sort -u  >j.o
    diff j.o j.n |grep -v "GSID4" >j.diff

# load the newly build data into hivVax003Vax004 tables

    hgsql hivVax003Vax004 -e 'delete from gsidClinicRec where subjId like "GSID3%"'
    hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'

   hgsql hivVax003Vax004 -e \
    "insert into gsidClinicRec select * from hivVax003Vax004Build.gsidClinicRecNew"

    hgsql hivVax003Vax004 -e \
    "insert into gsidClinicRecWithSeq select * from hivVax003Vax004Build.gsidClinicRecWithSeqNew"


#########################################################################
# Build the gsidSubjSeq table (used by Table View).

   gsidSubjSeq hivVax003Vax004 dnaSeqId > j.dna
   gsidSubjSeq hivVax003Vax004 aaSeqId > j.aa

   cut -f 1 j.dna >j.1
   cut -f 1 j.aa  >j.2

   cut -f 2 j.dna  >j.3
   cut -f 2 j.aa   >j.4

   paste j.1 j.3 j.4> gsidSubjSeq.tab

   hgsql hivVax003Vax004 -e 'delete from gsidSubjSeq'
   hgsql hivVax003Vax004 -e \
   'load data local infile "gsidSubjSeq.tab" into table gsidSubjSeq'

   rm j.1 j.2 j.3 j.4 j.dna j.aa

#################################################################################
# RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 AE STRAIN (DONE, 7/10/08, Fan)

# First cut the vax003AEMsa sequences so that they start with VPV and end with REKR

# rename existing vax003AEMsa table as vax003AEMsaOld
    hgsql hivVax003Vax004 e 'rename table vax003AEMsa to vax003AEMsaOld'

# use BLAT to visually decide what are the appropriate starting and ending positions to cut.

    hgsql hivVax003Vax004 -N -e \
    'select id,substring(seq, 124, 1743) from vax003AEMsaOld' >vax003AEMsa.tab

    tabToFa vax003AEMsa
# use resulting vax003AEMsa.fa to check that the cut is correct, and then load the new MSA sequences.

    hgsql hivVax003Vax004 < ~/src/hg/lib/vax003AEMsa.sql
    hgsql hivVax003Vax004 -N -e 'load data local infile "vax003AEMsa.tab" into table vax003AEMsa'

# RE-BUILD CONSERVATION TRACKS FOR VAX003 AE STRAIN  

    mkdir -p \
    /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
    cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild

# create the .wig file and .fa file of the consensus sequence.
    gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AECons.wig vax003AEConsensus.fa
# encode and load the wig file
    wigEncode vax003AECons.wig stdout vax003AECons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003AECons stdin

# copy .wib file to /gbdb
    mkdir -p /gbdb/hivVax003Vax004/wib
    cp vax003AECons.wib /gbdb/hivVax003Vax004/wib

# do the same for protein conservation track

    mkdir aa
    cd aa

# create .wig file
    gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AEAaCons.wig vax003AEAaConsensus.fa

# encode and load the .wib file   
    wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin

    cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib

# CREATE MAF TRACKS FOR VAX003 AE STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
    cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild

# create a script file, doall

    hgsql hivVax003Vax004 -N -e \
    'select id from dnaSeq where id like "%T%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hivVax003Vax004 -N -e  "select id, seq from vax003AEMsa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX003 AE
    doall >Vax003Vax004.tab
# convert .tab into .fa file
    tabToFa Vax003Vax004

# grab the base alignment sequence
    echo ">hivVax003Vax004" >Vax003Vax004.aln
    hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"'  >> Vax003Vax004.aln

# prepare an interium file, jjAll.mfa
    cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf

    xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

#    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hivVax003Vax004/vax003AEMaf
    cp chr1.maf /gbdb/hivVax003Vax004/vax003AEMaf -p

    hgLoadMaf hivVax003Vax004 vax003AEMaf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hivVax003Vax004/vax003AEMaf 
    cp -p chr1.maf /gbdb/hivVax003Vax004/vax003AEAaMaf
    hgLoadMaf hivVax003Vax004 vax003AEAaMaf

#################################################################################
# RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 B STRAIN (DONE, 7/10/08, Fan)
# First cut the vax003BMsa sequences so that they start with VPV and end with REKR

# rename existing vax003BMsa table as vax003BMsaOld
    hgsql hivVax003Vax004 e 'rename table vax003BMsa to vax003BMsaOld'

# use BLAT to visually decide what are the appropriate starting and ending positions to cut.

    hgsql hivVax003Vax004 -N -e \
    'select id,substring(seq, 121, 1620) from vax003BMsaOld' >vax003BMsa.tab

    tabToFa vax003BMsa
# use resulting vax003BMsa.fa to check that the cut is correct, then load the new MSA sequences.

    hgsql hivVax003Vax004 -e 'drop table vax003BMsa' 
    hgsql hivVax003Vax004 < ~/src/hg/lib/vax003BMsa.sql
    hgsql hivVax003Vax004 -N -e 'load data local infile "vax003BMsa.tab" into table vax003BMsa'

# RE-BUILD CONSERVATION TRACKS FOR VAX003 B STRAIN  

    mkdir -p \
    /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
    cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild

# create the .wig file and .fa file of the consensus sequence.
    gsidMsa hivVax003Vax004 vax003BMsa HXB2 6348 vax003BCons.wig vax003BConsensus.fa
# encode and load the wig file
    wigEncode vax003BCons.wig stdout vax003BCons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003BCons stdin

# copy .wib file to /gbdb
    mkdir -p /gbdb/hivVax003Vax004/wib
    cp vax003BCons.wib /gbdb/hivVax003Vax004/wib

# do the same for protein conservation track

    mkdir aa
    cd aa

# create .wig file
    gsidAaMsa2 hivVax003Vax004 vax0 '03BMsa HXB2 6348 vax003BAaCons.wig vax003BAaConsensus.fa

# encode and load the .wib file   
    wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
    | hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin

    cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib

# CREATE MAF TRACKS FOR VAX003 B STRAIN

    mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
    cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild

# create a script file, doall

    hgsql hivVax003Vax004 -N -e \
    'select id from dnaSeq where id like "%T%"'\
    |sed -e 's/ss/do1 ss/g' >doall

# create one line script file, do1, with the following line in it:

    hgsql hivVax003Vax004 -N -e  "select id, seq from vax003BMsa where id='${1}'"

    chmod +x do*

# run the script to get the .tab file with all MSA sequences of VAX003 B
    doall >Vax003Vax004.tab
# convert .tab into .fa file
    tabToFa Vax003Vax004

# grab the base alignment sequence
    echo ">hivVax003Vax004" >Vax003Vax004.aln
    hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"'  >> Vax003Vax004.aln

# prepare an interium file, jjAll.mfa
    cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
    echo = >>jjAll.mfa

# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf

    xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
    cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp

#    rm jjAll.mfa j.out

    cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf

# copy .maf to /gbdb.

    mkdir -p  /gbdb/hivVax003Vax004/vax003BMaf
    cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p

    hgLoadMaf hivVax003Vax004 vax003BMaf

# create another copy for protein MAF.

    mkdir -p  /gbdb/hivVax003Vax004/vax003BMaf 
    cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
    hgLoadMaf hivVax003Vax004 vax003BAaMaf

########################################################################################
# REBUILD THE gsidClinicRecWithSeq TABLE (DONE 11/03/08, Fan)

mkdir -p /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild

cd /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild

#copy table gsidClinicRecNew into gsidClinicRecNew2

cp ~/hg/lib/gsidClinicRec.sql gsidClinicRecNew2.sql
vi gsidClinicRecNew2.sql
hgsql  hivVax003Vax004Build -e 'drop table gsidClinicRecNew2'
hgsql  hivVax003Vax004Build < gsidClinicRecNew2.sql

hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew' >gsidClinicRecNew2.tab

hgsql  hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecNew2.tab" into table  gsidClinicRecNew2'

hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >j.tab
diff j.tab gsidClinicRecNew2.tab

# change 200 to 399 so that they are consistent between two table

hgsql hivVax003Vax004Build -e 'update gsidClinicRecNew2 set hivQuan=399 where hivQuan=200'
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >jj.tab
diff j.tab jj.tab

# rebuild the gsidClinicRecWithSeq table for VAX003 subjects

hgsql hivVax003Vax004Build -N -e \
'select c.specimenId, c.subjId, c.labCode, c.daysCollection, r.RNA, r.CD4ABS from gsidClinicRecNew2 c, VAX003_RNACD4080501Raw r where c.specimenId=r.SpecimenNo and c.daysCollection=r.DRNACD4 and c.subjId=r.GSID and r.RNA=c.hivQuan and r.CD4ABS=c.cd4Count and r.SpecimenNo != ""' >j.out

cut -f 1-4 j.out >j.1

# revert back from 399 to 200
cut -f 5-6 j.out  | sed -e 's/399\t/200\t/' >j.2

paste j.1 j.2 >gsidClinicRecWithSeq.vax003.tab

hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'

# update the same table for the other 3 genomes

hgsql hivgne8v2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivgne8v2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'

hgsql hivmn2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivmn2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'

hgsql hiva244 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hiva244 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'

######################################################################################

# Create VAX003 subtype B Positive Selection tracks for hivVax003Vax004

cd /hive/groups/gsid/medical/hiv/hivVax003Vax004
mkdir posSelection
cd posSelection

# BLAT /hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsaAaConsensus.fa
# against hivVax003Vax004 base genome, select psl without header option
# cut and paste the result into the file BMsa.psl

hgLoadPsl -keep -table=BMsaPsl -nobin hivVax003Vax004 BMsa.psl 

# will get the following error:
#Processing BMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT  INFILE
'/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
INTO TABLE BMsaPsl

#mySQL error 13: Can't get stat of
'/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
(Errcode: 13)

# load manually then

hgsql hivVax003Vax004
load data local infile "BMsa.psl" into table BMsaPsl;
quit

# build the positive selection tracks for model 2 and model 8.

gsidPosSelect hivVax003Vax004  BMsaPsl posSelBuild pSelectBModel2
posSelModel2.bed
hgLoadBed hivVax003Vax004 posSelModel2 posSelModel2.bed

gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel8
posSelModel8.bed
hgLoadBed hivVax003Vax004 posSelModel8 posSelModel8.bed

##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE AE

    ssh hiv1
    mkdir -p /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
    cd /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004

# BLAT
# /cluster/hive/groups/gsid/medical/hiv/posSelection/AE/AEMsaAaConsensus.fa
# against hivVax003Vax004 base genome, select psl without header option
# cut and paste the result into the file AEMsa.psl

hgLoadPsl -keep -table=AEMsaPsl -nobin hivVax003Vax004 AEMsa.psl 

# will get the following error:
#Processing AEMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT  INFILE
#'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
INTO TABLE AEMsaPsl

#mySQL error 13: Can't get stat of
#'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
(Errcode: 13)

# load manually then

hgsql hivVax003Vax004
load data local infile "AEMsa.psl" into table AEMsaPsl;
quit

# build positive selection tracks for model 2 and model 8.

gsidPosSelect hivVax003Vax004  AEMsaPsl posSelBuild pSelectAEModel2
posSelAEModel2.bed
hgLoadBed hivVax003Vax004 posSelAEModel2 posSelAEModel2.bed

gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel8
posSelAEModel8.bed
hgLoadBed hivVax003Vax004 posSelAEModel8 posSelAEModel8.bed

##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX004 (Done Fan, 3/2/09)

cd /cluster/hive/groups/gsid/medical/hiv/posSelection

mkdir vax004
cd vax004

# Since there are large number (12) of subclasses and 4 HIV genomes,
# this has to be automated.  So create the do1, do2, do3 script first. 
# Please note that the do3 script works on all 4 HIV genomes.

cat << '_EOF_' >do1
#do1.1

mkdir -p $1
# start with clean slate
rm $1/*

cp -p /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1

cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/data/$1.nex $1

cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1

#do1.2

cd $1
cat VAX004-$1-sites.paml|grep "+-" >j.tmp
get1stHalf j.tmp >$1Model2.paml
cat $1Model2.paml |\
sed -e 's/+-//g'|\
sed -e 's/ \* / xxx /g'|\
sed -e 's/\*//g'|\
sed -e 's/xxx/\*/g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/ //'|\
sed -e 's/ /\t/g' > vax004$1Model2.tab

hgLoadSqlTab -notOnServer hgFixed vax004$1Model2 ~/src/hg/lib/posSelectModel.sql vax004$1Model2.tab

get2ndHalf j.tmp >$1Model8.paml
cat $1Model8.paml |\
sed -e 's/+-//g'|\
sed -e 's/ \* / xxx /g'|\
sed -e 's/\*//g'|\
sed -e 's/xxx/\*/g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/  / /g'|\
sed -e 's/ //'|\
sed -e 's/ /\t/g' > vax004$1Model8.tab

hgLoadSqlTab -notOnServer hgFixed vax004$1Model8 ~/src/hg/lib/posSelectModel.sql vax004$1Model8.tab

rm j.tmp

#do1.3

cat $1.nex|grep 'U\.'|\
    sed -e 's/  / /g'|\
    sed -e 's/  / /g'|\
    sed -e 's/  / /g'|\
    sed -e 's/  / /g'|\
    sed -e 's/  / /g'|\
    sed -e 's/ /\t/g' >vax004$1Msa.tab
chmod +rx *.tab

hgLoadSqlTab -notOnServer hgFixed vax004$1Msa /hive/groups/gsid/medical/hiv/posSelection/vax004/dnaSeq.sql vax004$1Msa.tab
cd ..

#do1.4

hgsql -N -e "select concat('do2 ${1} ', id) from hgFixed.vax004${1}Msa limit 1" >doit
chmod +x doit
doit
'_EOF_'

chmod +x do1

cat << '_EOF_' >do2
gsidAaMsa2 hgFixed vax004$1Msa $2 1 $1/$1Msa.wig $1/$1MsaAaConsensus.fa
'_EOF_'
chmod +x do2

cat << '_EOF_' >do3
# process hivVax003Vax004
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivVax003Vax004 ', port) from blatServers where db='hivVax003Vax004' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat

cd $1
gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed     hivVax003Vax004 posSelVax004$1Model2                  posSelVax004$1Model2.bed

gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed     hivVax003Vax004 posSelVax004$1Model8                  posSelVax004$1Model8.bed
cd ..

# process hivmn2
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivmn2 ', port) from blatServers where db='hivmn2' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat

cd $1
gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed     hivmn2 posSelVax004$1Model2                  posSelVax004$1Model2.bed

gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed     hivmn2 posSelVax004$1Model8                  posSelVax004$1Model8.bed
cd ..

# process hivgne8v2
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivgne8v2 ', port) from blatServers where db='hivgne8v2' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat

cd $1
gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed     hivgne8v2 posSelVax004$1Model2                  posSelVax004$1Model2.bed

gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed     hivgne8v2 posSelVax004$1Model8                  posSelVax004$1Model8.bed
cd ..

# process hiva244
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hiva244 ', port) from blatServers where db='hiva244' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat

cd $1
gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed     hiva244 posSelVax004$1Model2                  posSelVax004$1Model2.bed

gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed     hiva244 posSelVax004$1Model8                  posSelVax004$1Model8.bed
cd ..
'_EOF_'

chmod +x do3

# Now run the scripts for all subclasses.

do1 Hispanic
do1 Midwest
do1 Northeast
do1 Other
do1 South
do1 Southwest
do1 Westcoast
do1 White
do1 Asian
do1 Black
do1 pla
do1 vac

# BTW, do1 calls do2

do3 Hispanic
do3 Midwest
do3 Northeast
do3 Other
do3 South
do3 Southwest
do3 Westcoast
do3 White
do3 Asian
do3 Black
do3 pla
do3 vac

##########################################################################
# BUILD IMMUNO TABLE FOR IMMUOGENICITY DATA.  (DONE 9/29/09, Fan)

cd ~/kent/src/hg/lib

hgsql hgFixed -e 'drop table immunoRaw'
hgsql hgFixed < immunoRaw.sql

cd /cluster/hive/groups/gsid/medical/hiv/hivmnV3

# copy over raw data file.
cp -p /cluster/hive/groups/gsid/medical/hiv/immunogenicityData/orig/Antibody_Inf_LPLT_31Jul_forFH.txt jRaw.txt

# replace empty file with "-3", "N/A" with "-1", and "N/D" with "-2"
cat jRaw.txt|\
sed -e 's/\t\t/\t-3\t/g'|\
sed -e 's/\t\t/\t-3\t/g'|\
sed -e 's/\t\t/\t-3\t/g'|\
sed -e 's/\t\t/\t-3\t/g'|\
sed -e 's/\t\t/\t-3\t/g'|\
sed -e 's/N\/A/-1/g'|\
sed -e 's/N\/D/-2/g' >jRaw2.txt

# load data into immunoRaw table
hgsql hgFixed -e 'load data local infile "jRaw2.txt" into table immunoRaw ignore 1 lines'

hgsql hgFixed -e 'update immunoRaw set LastTrAntiGP120="-3" where LastTrAntiGP120=""'

hgsql hgFixed -e 'select * from immunoRaw' >immuno.tab

# load data into immuno table
hgsql hgFixed -e 'drop table immuno'
hgsql hgFixed < immuno.sql
hgsql hgFixed -e 'load data local infile "immuno.tab" into table immuno'

# replace NULL with -1 for SDayLastPTest.

hgsql hgFixed -e 'update immuno set SDayLastPTest = NULL where SDayLastPTest = -1'
#######################################################################################
