# for emacs: -*- mode: sh; -*-

# This file describes how we made the browser database on
# the Patch 9 release for the NCBI build 37 (February 2009 freeze) aka:
#	GRCh37.p9 - Genome Reference Consortium Human Reference 37

############################################################################
# gather sequence and AGP definitions (DONE - 2012-07-17 - Hiram)
    mkdir -p /hive/data/genomes/hg19Patch9/sequence
    cd /hive/data/genomes/hg19Patch9/sequence
    # a round about way here since patch9 sequence was already assembled.
    # there are perl and shell scripts in
    # ../../hg19/bed/additionalSequence/patch9
    #	which created the fasta file with UCSC names
    # see also in hg19.txt:
    # NCBI patch 9 (DONE - 2012-07-16 - Hiram)

    ln -s ../../hg19/bed/additionalSequence/patch9/hg19.patch9.fa .
    # check what might be different from previous
    faCount hg19.patch9.fa | grep -v total | grep -v seq \
        | sort > patch9.faCount.txt
    faCount ../../hg19/bed/additionalSequence/patch5/hg19.patch5.fa \
        | grep -v total | grep -v seq | sort > patch5.faCount.txt
    comm -12 patch5.faCount.txt patch9.faCount.txt | wc
    #   98     784    5609
    # it appears that seven have been removed since patch 5:
    comm -23 patch5.faCount.txt patch9.faCount.txt | wc
    #   7      56     385
    # and 54 added:
    comm -13 patch5.faCount.txt patch9.faCount.txt | wc
    #   54     432    2960
    
    # reuse the script from patch5
    sed -e "s/atch5/atch9/g" ../../hg19Patch5/sequence/patch5Agp.pl \
        > ./patch9Agp.pl
    # one little addition to this script since then:
    diff ./patch9Agp.pl ../../hg19Patch5/sequence/patch5Agp.pl
# 41d40
# <     $faName =~ s/.*gb.JH/JH/;

    ./patch9Agp.pl \
	../../hg19/bed/additionalSequence/patch9/patches.chrom.sizes \
	../../hg19/bed/additionalSequence/patch9/ucscNames.patch9.txt \
        ../../hg19/bed/additionalSequence/patch9/PATCHES/alt_scaffolds/AGP/alt.scaf.agp.gz \
    > hg19Patch9.agp

    # add in the haplotypes from hg19
for H in chr17_ctg5_hap1 chr4_ctg9_hap1 chr6_apd_hap1 chr6_cox_hap2 \
        chr6_dbb_hap3 chr6_mann_hap4 chr6_mcf_hap5 chr6_qbl_hap6 \
        chr6_ssto_hap7
do
    grep "^${H}" /hive/data/genomes/hg19/hg19.agp
    twoBitToFa ../../hg19/hg19.2bit:${H} ${H}.fa
done >> hg19Patch9.agp

    # and the chrM_rCRS
    echo -e "chrM_rCRS\t1\t16569\t1\tF\tNC_012920\t1\t16569\t+" \
	>> hg19Patch9.agp

    sed -e "s/^>.*/>chrM_rCRS/" \
	../../hg19/bed/additionalSequence/chrM/NC_012920.1.fa > chrM_rCRS.fa

    # verify we have correct sequence and AGP file:
    faToTwoBit *.fa patch9.2bit
    checkAgpAndFa  hg19Patch9.agp patch9.2bit 2>&1 | tail -3
    # All AGP and FASTA entries agree - both files are valid

    # compare the two chrom.sizes to see what is missing or has been added
    twoBitInfo patch9.2bit stdout | sort > patch9.chrom.sizes
    twoBitInfo ../../hg19Patch5/sequence/patch5.2bit stdout \
        | sort > patch5.chrom.sizes
    # 108 identical:
    comm -12 patch5.chrom.sizes patch9.chrom.sizes | wc
    #   108     216    2674
    # 8 have disappeared:
    comm -23 patch5.chrom.sizes patch9.chrom.sizes | wc
    #   7      14     170
    # and 54 new ones:
    comm -13 patch5.chrom.sizes patch9.chrom.sizes | wc
    #   54     108    1205

###########################################################################
# Build the browser (DONE - 2012-07-17 - Hiram)
    cd /hive/data/genomes/hg19Patch9
    cat << '_EOF_' > hg19Patch9.config.ra
# Config parameters for makeGenomeDb.pl:
db hg19Patch9
clade haplotypes
genomeCladePriority 138
scientificName Homo sapiens
commonName GRCh37.p9
assemblyDate Jul. 2012
assemblyLabel GRCh37 Patch 9 Genome Reference Consortium Human Reference 37
assemblyShortLabel GRCh37.p9
orderKey 144
mitoAcc none
fastaFiles /hive/data/genomes/hg19Patch9/sequence/*.fa
agpFiles /hive/data/genomes/hg19Patch9/sequence/hg19Patch9.agp
# qualFiles /dev/null
dbDbSpeciesDir human
photoCreditURL http://www.cbse.ucsc.edu/
photoCreditName CBSE
ncbiGenomeId 51
# this is patch8: 368578 can't find the patch 9 id right now
ncbiAssemblyId 368578
ncbiAssemblyName GRCh37.p9
ncbiBioProject 31257
genBankAccessionID GCA_000001405.10
taxId   9606
'_EOF_'
    # << happy emacs

    # you need to have the clade and genomeCladePriority since this unique
    # db name hg19Patch9 is always a 'new' genome

    # stop after agp to verify agp and fasta agree properly
    makeGenomeDb.pl -dbHost=hgwdev -fileServer=hgwdev -workhorse=hgwdev \
        -stop=agp hg19Patch9.config.ra > makeGenomeDb.log 2>&1
    makeGenomeDb.pl -dbHost=hgwdev -fileServer=hgwdev -workhorse=hgwdev \
	-continue=db hg19Patch9.config.ra > makeGenomeDb.db.log 2>&1

    featureBits -countGaps hg19Patch9 gap
    #   8743321 bases of 93963414 (9.305%) in intersection

###########################################################################
# RepeatMasker (WORKING - 2012-07-17 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/repeatMasker
    cd /hive/data/genomes/hg19Patch9/bed/repeatMasker
    time doRepeatMasker.pl hg19Patch9 -buildDir=`pwd` -noSplit \
	-bigClusterHub=encodek \
        -dbHost=hgwdev -workhorse=hgwdev > do.log 2>&1 &
    # about 601m == 10h 10m
    cat faSize.rmsk.txt
# 93963414 bases (8743672 N's 85219742 real 40393959 upper 44825783 lower)
#       in 162 sequences in 1 files
# Total size: mean 580021.1 sd 1120614.0 min 16569 (chrM_rCRS)
#       max 7107865 (chr1_jh636052) median 201198
# %47.71 masked total, %52.60 masked real

###########################################################################
# TRF simple repeats (WORKING - 2012-07-17 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/simpleRepeat
    cd /hive/data/genomes/hg19Patch9/bed/simpleRepeat
    time doSimpleRepeat.pl hg19Patch9 -buildDir=`pwd` -dbHost=hgwdev \
        -smallClusterHub=encodek -workhorse=hgwdev > do.log 2>&1 &
    #   real    7m58.604s
    cat fb.simpleRepeat 
# 2867144 bases of 85220093 (3.364%) in intersection

    cd /hive/data/genomes/hg19Patch9
    twoBitMask hg19Patch9.rmsk.2bit \
        -add bed/simpleRepeat/trfMask.bed hg19Patch9.2bit
    # safe to ignore warning: has >=13 fields
    twoBitToFa hg19Patch9.2bit stdout | faSize stdin \
	> faSize.hg19Patch9.2bit.txt
# 93963414 bases (8743672 N's 85219742 real 40352594 upper 44867148 lower)
#       in 162 sequences in 1 files
# Total size: mean 580021.1 sd 1120614.0 min 16569 (chrM_rCRS)
#       max 7107865 (chr1_jh636052) median 201198
# %47.75 masked total, %52.65 masked real

    time blat hg19Patch9.2bit \
        /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/hg19Patch9.11.ooc \
	-repMatch=1024
# Wrote 212 overused 11-mers to jkStuff/hg19Patch9.11.ooc

    rm /gbdb/hg19Patch9/hg19Patch9.2bit
    ln -s `pwd`/hg19Patch9.2bit /gbdb/hg19Patch9/

    # the makeGenomeDb.pl script changed the case of the genome name:
    hgsql -e 'update dbDb set genome="GRCh37.p9" where name="hg19Patch9";' \
	hgcentraltest

###########################################################################
# ctgPos track (DONE - 2012-07-17 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/ctgPos
    cd /hive/data/genomes/hg19Patch9/bed/ctgPos
    for C in `cut -f1 ../../chrom.sizes | grep -v chrM_rCRS`
do
    ctgPos=`hgsql -N -e 'select * from ctgPos where chrom="'$C'";' hg19`
    if [ "x${ctgPos}y" = "xy" ]; then
        GL=`echo $C | sed -e "s/.*_gl//; s/.*_jh//"`
        glAcc=`grep -i ${GL} ../../../hg19/bed/additionalSequence/patch9/PATCHES/scaffold_localID2acc | cut -f2`
        glSize=`grep -i ${GL} ../../chrom.sizes | cut -f2`
        echo -e "$glAcc\t$glSize\t${C}\t0\t$glSize"
    else
        echo "$ctgPos"
    fi
done > ctgPos.txt

    echo -e "NC_012920.1\t16569\tchrM_rCRS\t0\t16569" >> ctgPos.txt

    # check length of ctg names:
    cut -f 1 ctgPos.txt | awk '{print length($0)}' | sort -n | tail -1
    #	11
    # and length of chrom names:
    cut -f 3 ctgPos.txt | awk '{print length($0)}' | sort -n | tail -1
    #	25
    # set those lengths in the indexes for the SQL create:
    sed -e "s/14/11/; s/16/25/" $HOME/kent/src/hg/lib/ctgPos.sql > ctgPos.sql

    hgLoadSqlTab hg19Patch9 ctgPos ctgPos.sql ctgPos.txt
    # should be %100 with gaps:
    featureBits -countGaps hg19Patch9 ctgPos
    #   93963414 bases of 93963414 (100.000%) in intersection

###########################################################################
# ctgPos2 track (DONE - 2012-07-17 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/ctgPos2
    cd /hive/data/genomes/hg19Patch9/bed/ctgPos2

for C in `cut -f1 ../../chrom.sizes | grep -v chrM_rCRS`
do
    ctgPos2=`hgsql -N -e 'select * from ctgPos2 where chrom="'$C'";' hg19`
    if [ "x${ctgPos}y" = "xy" ]; then
        GL=`echo $C | sed -e "s/.*_gl//; s/.*_jh//"`
        glSize=`grep ${GL} /hive/data/genomes/hg19Patch9/chrom.sizes | cut -f2`
        ncbiChrName=`grep ${GL} ../../../hg19/bed/additionalSequence/patch9/PATCHES/scaffold_localID2acc | cut -f1`
        if [ "x${ncbiChrName}y" = "xy" ]; then
            GL=`echo $C | sed -e "s/_hap.*//" | sed -e "s/chr.*_/_/" | tr '[a-z]' '[A-Z]'`
            ncbiChrName=`grep -h ${GL} /hive/data/genomes/hg19/download/alternate_loci/ALT_REF_LOCI_?/localID2acc | cut -f1`
        fi
        echo -e "$ncbiChrName\t$glSize\t${C}\t0\t$glSize\tF"
    else
        echo -e "$ctgPos2\tF"
    fi
done > ctgPos2.tab

    echo -e "NC_012920.1\t16569\tchrM_rCRS\t0\t16569\tF" >> ctgPos2.tab

    # check length of ctg names:
    cut -f 1 ctgPos2.tab | awk '{print length($0)}' | sort -n | tail -1
    # 24
    # and length of chrom names:
    cut -f 3 ctgPos2.tab | awk '{print length($0)}' | sort -n | tail -1
    # 25

    sed -e "s/20/24/; s/16/25/" $HOME/kent/src/hg/lib/ctgPos2.sql \
	> ctgPos2.sql
    hgLoadSqlTab hg19Patch9 ctgPos2 ctgPos2.sql ctgPos2.tab

    # should be %100 with gaps
    featureBits -countGaps hg19Patch9 ctgPos2
    #	93963414 bases of 93963414 (100.000%) in intersection

###########################################################################
# altSequence track (DONE - 2012-07-17 - Hiram)
    # provide links to locations on reference genome where these patches and
    # haplotypes belong
    mkdir /hive/data/genomes/hg19Patch9/bed/altSequence
    cd /hive/data/genomes/hg19Patch9/bed/altSequence
    ln -s ../../../hg19/bed/additionalSequence/patch9/altSequence.bed \
	altSeq.bed.0

    cat altSeq.bed.0 | while read L
do
    C=`echo "${L}" | awk '{print $4}'`
    hg19C=`echo "${L}" | awk '{print $1}'`
    hg19S=`echo "${L}" | awk '{print $2}'`
    hg19E=`echo "${L}" | awk '{print $3}'`
    S=`grep "^${C}" ../../chrom.sizes | cut -f2`
    echo $C $S $hg19C $hg19S $hg19E | awk '{printf "%s\t0\t%d\t%s:%d-%d\t", $1, $2, $3, $4, $5}'
    echo "${L}" | awk '{printf "%d\t%s\t%d\t%d\t%s\n", $5,$6,$7,$8,$9}'
done | grep -v "chrM_rCRS:" > altSequence.tab

    hgLoadBed hg19Patch9 altSequence altSequence.tab
    # Loaded 162 elements of size 9 from altSequence.tab
    featureBits -countGaps hg19Patch9 altSequence
    #	93963414 bases of 93963414 (100.000%) in intersection

############################################################################
# create lift file on unBridged gaps for genbank splits (2012-07-17 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/gap
    cd /hive/data/genomes/hg19Patch9/bed/gap
    # verify all gaps are properly in the gap table:
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../hg19Patch9.2bit > findMotif.txt 2>&1
    #	real    0m0.370s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    featureBits hg19Patch9 -not gap -bed=notGap.bed
    #   85220093 bases of 85220093 (100.000%) in intersection
    featureBits hg19Patch9 allGaps.bed notGap.bed -bed=new.gaps.bed
    #   0 bases of 85220093 (0.000%) in intersectio
    #   there are no bases missing being counted as gaps

    # construct an unBridged gap file for genbank (there are no unbridged gaps)
    gapToLift hg19Patch9 hg19Patch9.unBridged.lift -bedFile=unBridged.lift.bed
    cp -p hg19Patch9.unBridged.lift ../../jkStuff

###########################################################################
# AUTO UPDATE GENBANK RUN  (NOT DONE - 2012-07-17 - Hiram)
    # we no longer run this.
    # align with latest genbank process.
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add hg19Patch9 just before hg19Patch2

# hg19Patch9 - GRCh37.p9 - Genome Reference Consortium Human Reference 37
hg19Patch9.serverGenome = /hive/data/genomes/hg19Patch9/hg19Patch9.2bit
hg19Patch9.clusterGenome = /hive/data/genomes/hg19Patch9/hg19Patch9.2bit
hg19Patch9.ooc = /hive/data/genomes/hg19Patch9/hg19Patch9.11.ooc
hg19Patch9.lift = /hive/data/genomes/hg19Patch9/jkStuff/hg19Patch9.unBridged.lift
hg19Patch9.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
hg19Patch9.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
hg19Patch9.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
hg19Patch9.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
hg19Patch9.genbank.est.native.pslCDnaFilter = ${finished.genbank.est.native.pslCDnaFilter}
hg19Patch9.genbank.est.xeno.pslCDnaFilter   = ${finished.genbank.est.xeno.pslCDnaFilter}
hg19Patch9.genbank.est.xeno.load = no
hg19Patch9.genbank.est.xeno.loadDesc = no
hg19Patch9.genbank.mrna.xeno.load = no
hg19Patch9.genbank.mrna.xeno.loadDesc = no
hg19Patch9.refseq.mrna.xeno.load  = no
hg19Patch9.refseq.mrna.xeno.loadDesc = no
hg19Patch9.mgc = yes
hg19Patch9.orfeome = yes
hg19Patch9.downloadDir = hg19Patch9
hg19Patch9.genbank.mrna.blatTargetDb = yes
hg19Patch9.perChromTables = no

    git commit -m "adding hg19Patch9" etc/genbank.conf
    git push

    # update /cluster/data/genbank/:
    make etc-update

    ssh hgwdev		#	genbank procedure only functions on hgwdev
    screen		#	use a screen to manage this job
    cd /cluster/data/genbank
    time nice -n +19 bin/gbAlignStep -initial hg19Patch9 &
    #	logFile: var/build/logs/2011.07.12-13:04:17.hg19Patch9.initalign.log
    #	real      667m37.192s

    # load database when finished
    ssh hgwdev
    screen	# use screen to manage this long running command
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad hg19Patch9 &
    #	logFile: var/dbload/hgwdev/logs/2011.07.13-09:59:27.dbload.log
    #	real    51m17.530s

    # the following has not been done, XXX - 2011-07-13 - Hiram
    # enable daily alignment and update of hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add hg19Patch9 to:
        etc/align.dbs
        etc/hgwdev.dbs
    git commit -m "Added hg19Patch9 - Human - GRCh37.p9" etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

############################################################################
# new blat server for the hg19.patch9 sequence (WORKING - 2011-07-13 - Hiram)
    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("hg19Patch9", "blatx", "17838", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("hg19Patch9", "blatx", "17839", "0", "1");' \
	    hgcentraltest

############################################################################
# lastz alignment to hg19 (WORKING - 2012-07-18 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18
    cd /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18
    #	construct a 2bit file of just the hg19 reference sequences
    # and all the business to run lastz on each haplotype with its
    # corresponding target sequence in hg19

rm -fr hg19Bits run.blastz hg19Bits.lift
mkdir hg19Bits
mkdir -p run.blastz/tParts
mkdir -p run.blastz/qParts
awk '{print $1}' ../altSequence/altSequence.tab | sort -u | while read H
do
    P=`grep "^${H}" ../altSequence/altSequence.tab | head -1 | awk '{print $4}'`
    HE=`grep "^${H}" ../altSequence/altSequence.tab | head -1 | awk '{print $3}'`
    C=`echo ${P} | sed -e "s/:.*//"`
    CE=`grep "^${C}" /hive/data/genomes/hg19/chrom.sizes | cut -f2 | head -1`
    SE=`echo ${P} | sed -e "s/.*://"`
    S=`echo ${SE} | sed -e "s/-.*//" | awk '{printf "%d", $1-1}'`
    if [ "${S}" -lt 0 ]; then
       S=0
    fi
    E=`echo ${SE} | sed -e "s/.*-//"`
    size=`echo $E $S | awk '{printf "%d", $1-$2}'`
    echo -e "$S\t$C.$S-$E\t$size\t$C\t$CE"
    echo hg19.2bit:${C}:$S-$E 1>&2
    if [ ! -s hg19Bits/$C.$S-$E.fa ]; then
	echo ">$C.$S-$E" > hg19Bits/$C.$S-$E.fa
	twoBitToFa /gbdb/hg19/hg19.2bit:${C}:$S-$E stdout \
	    | grep -v "^>" >> hg19Bits/$C.$S-$E.fa
    fi
    echo -e "/hive/data/genomes/hg19Patch9/hg19Patch9.2bit:$H:0-$HE" \
        > run.blastz/tParts/$H.lst
    echo -e "/hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/hg19Bits.2bit:$C.$S-$E:0-$size" \
        > run.blastz/qParts/$H.lst
    echo -e "/cluster/bin/scripts/blastz-run-ucsc -outFormat psl tParts/$H.lst qParts/$H.lst ../DEF {check out exists ../psl/$H.psl}" \
	>> run.blastz/jobList
done | sort -u > hg19Bits.lift

    faToTwoBit hg19Bits/chr*.fa hg19Bits.2bit
    twoBitInfo hg19Bits.2bit stdout | sort -k2nr > hg19Bits.chrom.sizes

    cat << '_EOF_' > DEF
# human vs human
BLASTZ=lastz
# maximum M allowed with lastz is only 254
BLASTZ_M=254
# lastz does not like the O= and E= lines in the matrix file
BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
BLASTZ_O=600
BLASTZ_E=150
# other parameters from hg18 vs venter1 lastz on advice from Webb
BLASTZ_K=10000
BLASTZ_Y=15000
BLASTZ_T=2

# TARGET: Human Hg19Patch9
SEQ1_DIR=/hive/data/genomes/hg19Patch9/hg19Patch9.2bit
SEQ1_LEN=/hive/data/genomes/hg19Patch9/chrom.sizes
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_IN_CONTIGS=0
SEQ1_LIMIT=2

# QUERY: Human Hg19
SEQ2_DIR=/scratch/data/hg19/hg19.2bit
SEQ2_LEN=/scratch/data/hg19/chrom.sizes
SEQ2_CTGDIR=/hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/hg19Bits.2bit
SEQ2_CTGLEN=/hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/hg19Bits.chrom.sizes
SEQ2_LIFT=/hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/hg19Bits.lift
SEQ2_CHUNK=5000000
SEQ2_LAP=0
SEQ2_IN_CONTIGS=0
SEQ2_LIMIT=2

BASE=/hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    ssh swarm
    cd /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/run.blastz
    mkdir ../psl
    para create jobList
    para try ... check ... push
    para time
# Completed: 162 of 162 jobs
# CPU time in finished jobs:        429s       7.14m     0.12h    0.00d  0.000 y
# IO & Wait Time:                   572s       9.54m     0.16h    0.01d  0.000 y
# Average job time:                   6s       0.10m     0.00h    0.00d
# Longest finished job:             153s       2.55m     0.04h    0.00d
# Submission to last job:           225s       3.75m     0.06h    0.00d


    #	put together the individual results:
    cd /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18
    mkdir pslParts
    cat psl/chr*.psl | gzip -c > pslParts/hg19Patch9.hg19.psl.gz

    #	constructing a chain from those results
    mkdir -p /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/axtChain/run
    cd /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/axtChain/run
time zcat ../../pslParts/hg19Patch9.hg19.psl.gz \
| axtChain -psl -verbose=0 -scoreScheme=/scratch/data/blastz/human_chimp.v2.q -minScore=2000 -linearGap=medium stdin \
    /hive/data/genomes/hg19Patch9/hg19Patch9.2bit \
    /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/hg19Bits.2bit \
    stdout \
| chainAntiRepeat /hive/data/genomes/hg19Patch9/hg19Patch9.2bit \
    /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/hg19Bits.2bit \
    stdin hg19Patch9.hg19.preLift.chain
    #   real    0m48.446s
liftUp -chainQ hg19Patch9.hg19.lifted.chain \
    ../../hg19Bits.lift carry hg19Patch9.hg19.preLift.chain

    # constructing the net files:
cd /hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/axtChain

chainMergeSort run/hg19Patch9.hg19.lifted.chain \
        | nice gzip -c > hg19Patch9.hg19.all.chain.gz
chainSplit chain hg19Patch9.hg19.all.chain.gz
# Make nets ("noClass", i.e. without rmsk/class stats which are added later):
time chainPreNet  hg19Patch9.hg19.all.chain.gz \
    /hive/data/genomes/hg19Patch9/chrom.sizes \
    /scratch/data/hg19/chrom.sizes stdout \
    | chainNet  stdin -minSpace=1 /hive/data/genomes/hg19Patch9/chrom.sizes \
        /scratch/data/hg19/chrom.sizes stdout /dev/null \
    | netSyntenic stdin noClass.net

# Make liftOver chains:
netChainSubset -verbose=0 noClass.net hg19Patch9.hg19.all.chain.gz stdout \
    | chainStitchId stdin stdout | gzip -c > hg19Patch9.hg19.over.chain.gz

# Make axtNet for download: one .axt per hg19Patch9 seq.
netSplit noClass.net net
cd ..
mkdir -p axtNet
foreach f (axtChain/net/*.net)
netToAxt $f axtChain/chain/$f:t:r.chain \
  /hive/data/genomes/hg19Patch9/hg19Patch9.2bit \
        /scratch/data/hg19/hg19.2bit stdout \
  | axtSort stdin stdout \
  | gzip -c > axtNet/$f:t:r.hg19Patch9.hg19.net.axt.gz
end

# Make mafNet for multiz: one .maf per hg19Patch9 seq.
mkdir -p mafNet
foreach f (axtNet/*.hg19Patch9.hg19.net.axt.gz)
  axtToMaf -tPrefix=hg19Patch9. -qPrefix=hg19. $f \
        /hive/data/genomes/hg19Patch9/chrom.sizes \
        /scratch/data/hg19/chrom.sizes \
        stdout \
  | gzip -c > mafNet/$f:t:r:r:r:r:r.maf.gz
end

    # swap that business to hg19
    mkdir /hive/data/genomes/hg19/bed/blastz.hg19Patch9.swap
    cd /hive/data/genomes/hg19/bed/blastz.hg19Patch9.swap
    time doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/hg19Patch9/bed/lastzHg19.2012-07-18/DEF \
	-swap -noDbNameCheck  -stop=load \
	-noLoadChainSplit -chainMinScore=2000 \
	-chainLinearGap=medium -workhorse=hgwdev \
	-smallClusterHub=encodek -bigClusterHub=swarm > swap.load.log 2>&1
    #   real    2m49.392s

    cat fb.hg19.chainHg19Patch9Link.txt 
    #   55576147 bases of 2897316137 (1.918%) in intersection

    # and then fixup the chains to include the haplotypes
    cd /hive/data/genomes/hg19/bed/blastz.hg19Patch9.swap/axtChain
    # split up each chain by the hg19Patch9 query sequences
    mkdir -p queryChains
    chainSplit -q queryChains hg19.hg19Patch9.all.chain.gz

    # then run a 'lift over' chain/net on each single one
    mkdir -p singleLiftOver

for F in queryChains/*.chain
do
    C=`basename ${F}`
    B=`echo ${C} | sed -e "s/.chain//"`
    chainPreNet -inclHap ${F} /scratch/data/hg19/chrom.sizes \
        /hive/data/genomes/hg19Patch9/chrom.sizes stdout \
    | chainNet -inclHap stdin -minSpace=1 /scratch/data/hg19/chrom.sizes \
        /hive/data/genomes/hg19Patch9/chrom.sizes singleLiftOver/${B}.raw.net \
        /dev/null
    netSyntenic singleLiftOver/${B}.raw.net singleLiftOver/${B}.noClass.net
    netFilter -chimpSyn singleLiftOver/${B}.noClass.net > singleLiftOver/${B}.chimpSyn.net
    netChainSubset -verbose=0 singleLiftOver/${B}.noClass.net \
        ${F} stdout \
    | chainStitchId stdin stdout > singleLiftOver/${C}
    echo "${F} -> singleLiftOver/${C}"
done
    # put the chains together into one file
    chainMergeSort singleLiftOver/chr*.chain | gzip -c \
	> hg19.hg19Patch9.single.over.chain.gz

    # construct psl files from those chains
    chainToPsl hg19.hg19Patch9.single.over.chain.gz \
	/hive/data/genomes/hg19/chrom.sizes \
        /hive/data/genomes/hg19Patch9/chrom.sizes \
        /hive/data/genomes/hg19/hg19.2bit \
        /hive/data/genomes/hg19Patch9/hg19Patch9.2bit \
        hg19.hg19Patch9.over.psl
    # chainToPsl appears to have a problem, note errors from pslCheck:
    pslCheck -db=hg19 hg19.hg19Patch9.over.psl
# checked: 884 failed: 45 errors: 45

    pslRecalcMatch hg19.hg19Patch9.over.psl \
	/hive/data/genomes/hg19/hg19.2bit \
	/hive/data/genomes/hg19Patch9/hg19Patch9.2bit \
	fixup.hg19.hg19Patch9.over.psl
    pslCheck -db=hg19 fixup.hg19.hg19Patch9.over.psl
    checked: 884 failed: 0 errors: 0

    # load this PSL track
    hgLoadPsl hg19 -table=altSeqLiftOverPslP9 fixup.hg19.hg19Patch9.over.psl
    # to replace this table in the current track:
    hgLoadPsl hg19 -table=altSeqLiftOverPsl fixup.hg19.hg19Patch9.over.psl

############################################################################
# Add this sequence to hg19 (DONE - 2012-07-18 - Hiram)
    mkdir /hive/data/genomes/hg19Patch9/bed/altSequence/seqExt
    cd /hive/data/genomes/hg19Patch9/bed/altSequence/seqExt
    twoBitToFa ../../../hg19Patch9.2bit hg19Patch9.fa
    mkdir -p /gbdb/hg19/hg19Patch9 hg19Patch9
    faSplit byname hg19Patch9.fa ./hg19Patch9/
    ln -s `pwd`/hg19Patch9/*.fa /gbdb/hg19/hg19Patch9
    hgLoadSeq -drop -seqTbl=seqHg19Patch9 -extFileTbl=extHg19Patch9 hg19 \
        /gbdb/hg19/hg19Patch9/*.fa

############################################################################
