# for emacs: -*- mode: sh; -*-

# tracks for Ed's Neandertal paper
# This starts from empty hg18 on a private server (genome-nt).

#########################################################################
# CREATE NEANDERTAL TRACK GROUP (DONE 3/3/10 angie)
    hgsql hg18 < ~/kent/src/hg/lib/grp.sql
    hgsql hg18 -e \
      "insert into grp values ('neandertal', 'Neandertal Assembly and Analysis', 1.5);"
    # Edit hg.conf to have this grp setting:
db.grp=ntdb:grp
    # where ntdb is a profile (ntdb.user= etc) defined earlier in hg.conf.  


#########################################################################
# ASSEMBLED BAM TRACK TABLES (DONE 3/3/10 angie)
    # Copied Ed's *-hg18.bam into local gbdb/hg18
    mkdir -p /data/NT/gbdb/hg18/neandertal/seqAlis
    cp -p /hive/users/angie/testBams/*-hg18.bam /data/NT/gbdb/hg18/neandertal/seqAlis/
    foreach t (Feld1 Mez1 Sid1253)
      hgsql hg18 -e "drop table if exists bam$t; create table bam$t (fileName varchar(255) not null);"
      hgsql hg18 -e "insert into bam$t values ('/data/NT/gbdb/hg18/neandertal/seqAlis/$t-hg18.bam');"
    end
    foreach i (16 25 26)
      hgsql hg18 -e "drop table if exists bamVi33dot$1; create table bamVi33dot$1 (fileName varchar(255) not null);"
      hgsql hg18 -e "insert into bamVi33dot$1 values ('/data/NT/gbdb/hg18/neandertal/seqAlis/Vi33.$i-hg18.bam');"
    end
    hgsql hg18 -e "drop table if exists bamAll; create table bamAll (fileName varchar(255) not null);"
    hgsql hg18 -e "insert into bamAll values ('/data/NT/gbdb/hg18/neandertal/seqAlis/all-hg18.bam');"


#########################################################################
# RAW READ ALIGNMENTS BAM TRACK TABLES (DONE 3/8/10 angie)
    mkdir /data/NT/gbdb/hg18/neandertal/seqLibs
    cp /hive/users/angie/testBams/by_library/SL*.hg18.bam* /data/NT/gbdb/hg18/neandertal/seqLibs
    chmod a+r /data/NT/gbdb/hg18/neandertal/seqLibs/*
    cd /data/NT/gbdb/hg18/neandertal/seqLibs
    foreach sl (37 39 61 49 15 16 17 18 19 20 21 22 28 29 33 42 46 47 32 6 7 8 10 11 12 \
		13 14 24 26 27 30 31 9)
      samtools index SL$sl.hg18.bam
      hgsql hg18 -e "create table bamSL$sl (fileName varchar(255) not null);"
      hgsql hg18 -e "insert into bamSL$sl values ('/data/NT/gbdb/hg18/neandertal/seqLibs/SL$sl.hg18.bam');"
    end
#*** NOTE: We ended up discarding this track -- too low-level to be of interest.
#*** Removed table & files 4/12/10.


#########################################################################
# HUMAN READ ALIGNMENTS BAM TRACK TABLES (DONE 3/8/10 angie)
    # Copied Ed's MMS*.hg18.bam into local gbdb/hg18
    hgsql hg18 -e "drop table bamMMS4"
    hgsql hg18 -e "drop table bamMMS5"
    hgsql hg18 -e "drop table bamMMS6"
    hgsql hg18 -e "drop table bamMMS7"
    hgsql hg18 -e "drop table bamMMS8"
    hgsql hg18 -e "create table bamMMS4 (fileName varchar(255) not null);"
    hgsql hg18 -e "insert into bamMMS4 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS4_HGDP00778_Han.paired.qualfilt.rmdup.entropy1.0.sort.bam');"

    hgsql hg18 -e "create table bamMMS5 (fileName varchar(255) not null);"
    hgsql hg18 -e "insert into bamMMS5 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS5_HGDP00542_Papuan.paired.qualfilt.rmdup.entropy1.0.sort.bam');"

    hgsql hg18 -e "create table bamMMS6 (fileName varchar(255) not null);"
    hgsql hg18 -e "insert into bamMMS6 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS6_HGDP00927_Yoruba.paired.qualfilt.rmdup.entropy1.0.sort.bam');"

    hgsql hg18 -e "create table bamMMS7 (fileName varchar(255) not null);"
    hgsql hg18 -e "insert into bamMMS7 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS7_HGDP01029_San.paired.qualfilt.rmdup.entropy1.0.sort.bam');"

    hgsql hg18 -e "create table bamMMS8 (fileName varchar(255) not null);"
    hgsql hg18 -e "insert into bamMMS8 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS8_HGDP00521_French.paired.qualfilt.rmdup.entropy1.0.sort.bam');"


#########################################################################
# PER-INDIVIDUAL READS (DONE 3/9/10 angie)
    mkdir /hive/users/angie/testBams/rawReads
    cd /hive/users/angie/testBams/rawReads
    # Headers of these two files are identical, so no pre-processing:
    samtools merge SLMez1.hg18.bam ../by_library/SL{39,61}.hg18.bam
    # Right number of alignments in output?
    samtools view ../by_library/SL39.hg18.bam | wc -l
#625768
    samtools view ../by_library/SL61.hg18.bam | wc -l
#640684
    expr 625768 + 640684
#1266452
    samtools view SLMez1.hg18.bam |wc -l
#1266452
    # Do the same for other individs according to mapping in Ed's Lib2Bone.txt:
    samtools merge SLVi33.16.hg18.bam \
      ../by_library/SL{15,16,17,18,19,20,21,22,28,29,33,42,46,47}.hg18.bam
    samtools merge SLVi33.25.hg18.bam ../by_library/SL{6,7,8,32}.hg18.bam
    samtools merge SLVi33.26.hg18.bam \
      ../by_library/SL{9,10,11,12,13,14,24,26,27,30,31}.hg18.bam
    foreach f (*.bam)
      samtools index $f
    end
    # Make database tables on genome-nt for files copied into local /gbdb:
    foreach i (Feld1 Mez1 Sid1253)
      hgsql hg18 -e "create table bamSL$i (fileName varchar(255) not null);"
      hgsql hg18 -e "insert into bamSL$i values ('/data/NT/gbdb/hg18/neandertal/seqAlis/SL$i.hg18.bam');"
    end
    foreach i (16 25 26)
      hgsql hg18 -e "create table bamSLVi33dot$i (fileName varchar(255) not null);"
      hgsql hg18 -e "insert into bamSLVi33dot$i values ('/data/NT/gbdb/hg18/neandertal/seqAlis/SLVi33.$i.hg18.bam');"
    end

#########################################################################
# NEANDERTAL ALLELES FOR CODING DIFFS BETW CHIMP AND HUMAN (DONE 3/23/10 angie)
    # Copied file emailed from Hernan Burbano 3/15/10 to
    # /hive/users/angie/testBams/hernan/neandertal_table_for_UCSC.tsv.gz
    cd /hive/users/angie/testBams/hernan/
    zcat neandertal_table_for_UCSC.tsv.gz | head -1
#Refseq ID      chr     position        strand  human_nucleotide        chimpanzee_nucleotide   coverage        human_codon     chimpanzee_codon        number_ancestral_reads  number_derived_reads    number_third_state_reads        neandertal_state
    # Make a bed 6 with names that encode the SNP and read info, and view as CT:
    zcat neandertal_table_for_UCSC.tsv.gz \
    | perl -wpe '$_="", next if /^#/; chomp; @w=split("\t"); \
               $w[12] =~ s/^(\w).*/$1/; \
               $other = $w[11] > 0 ? "+$w[11]?" : ""; \
               $name = "$w[12]:$w[9]$w[5]>$w[10]$w[4]$other($w[8]>$w[7])"; \
               $_ = join("\t", $w[1], $w[2], $w[2]+1, $name, $w[6]*100, $w[3]) . "\n"' \
    | sort -k1,1 -k2n,2n \
      > ntSnps.bed
    # Ed asked for coloring to indicate Ancestral vs. Derived:
    zcat neandertal_table_for_UCSC.tsv.gz \
    | perl -wpe '$_="", next if /^#/; chomp; @w=split("\t"); \
               $other = $w[11] > 0 ? "+$w[11]?" : ""; \
               $name = "$w[9]$w[5]>$w[10]$w[4]$other($w[8]>$w[7])"; \
               $color = ($w[12] =~/^A/) ? "0,0,200" : ($w[12] =~ /^D/) ? "0,180,0" : "200,0,0"; \
               $_ = join("\t", $w[1], $w[2], $w[2]+1, $name, $w[6]*100, $w[3], \
                         $w[2], $w[2]+1, $color) . "\n"' \
    | sort -k1,1 -k2n,2n \
      > ntHumChimpCodingDiff.bed
    hgLoadBed -tab -maxChromNameLength=13 hg18 ntHumChimpCodingDiff ntHumChimpCodingDiff.bed
#Loaded 11345 elements of size 9


#########################################################################
# SELECTIVE SWEEP SCAN SCORES (DONE 3/24/10 angie)
    # Ed copied hg18.sss.CpGmask.vbf.zscore.tar.gz to /hive/users/angie/testBams/ .
    # Ed's description of the data:
# The table has a row for every position that is observed to be
# polymorphic in the modern human HGDP data, for which 3 alleles are
# present. The columns have these data:
# 1. chromosome
# 2. position
# 3. Allele string for human data (A=ancestral; D=derived; 0=no data)
# 4. probability that a Neandertal allele would be derived, given the
# configuration of ancestral/derived alleles in column 3
# 5. Observed number of neandertal derived alleles at this site
# 6. Observed number of neandertal ancestral alleles at this site
# 7. Total number of expected neandertal derived alleles in the 100kb
# window around this site
# 8. Observed number of neandertal derived alleles in the 100 kb window
# around this site
# 9. The total number of polymorphic sites in this 100 kb window
# 10. The variance of the number of expected number of derived alleles
# in this 100 kb window
# 11. The z-score of the difference between the observed and the
# expected. That is (column 8 - column 7) / column 10
# 
# For the browser track, I think only columns 1, 2, 10, and 11 are
# necessary. Columns 1 and 2 give the position and the value to be
# displayed is column 11 +/- column 10.
    cd /hive/users/angie/testBams
    tar xvzf hg18.sss.CpGmask.vbf.zscore.tar.gz
    cd /hive/users/angie/testBams/SSS
    # Since we don't have a way to superimpose 2 wiggles (to show value +- 
    # some other value), I will make 4 subtracks to run by Ed:
    # variance (col 10), z-score (col 11), z-score + variance, z-score - variance
    awk '{print $1 "\t" ($2-1) "\t" $2 "\t" $10;}' chr*.txt \
      > ntSssVariance.bed
    awk '{print $1 "\t" ($2-1) "\t" $2 "\t" $11;}' chr*.txt \
      > ntSssZScore.bed
    awk '{print $1 "\t" ($2-1) "\t" $2 "\t" ($11+$10);}' chr*.txt \
      > ntSssZScorePlusVar.bed
    awk '{print $1 "\t" ($2-1) "\t" $2 "\t" ($11-$10);}' chr*.txt \
      > ntSssZScoreMinusVar.bed
    foreach f (nt*.bed)
      hgLoadBed -bedGraph=4 -maxChromNameLength=13 hg18 $f:r $f
    end
#Loaded 5617139 elements of size 4
    # Those are pretty large for bedBraph - consider bigWig.
    # Actually, Ed didn't like those as much as the following one, so ignore them.

    # Make a bigWig with two scores per snp: z-score + variance at the snp position,
    # and z-score - variance at the next base position.
    # By Ed's request, drop the chrY data.
#There's more than one value for chr1 base 4881364 (in coordinates that start with 1).
    cp /dev/null ntSssZScorePMVar.varStep
    foreach f (chr*.txt)
      set chr = $f:r:r:r:r:r
      if ($chr == "chrY") then
        echo skipping $chr
        continue
      endif
      echo "variableStep chrom=$chr" >> ntSssZScorePMVar.varStep
      awk 'BEGIN{prev=-1;} \
           {if ($2 != prev) { print $2 "\t" ($11+$10); } \
            print ($2+1) "\t" ($11-$10);  prev = $2+1;}' $f \
	>> ntSssZScorePMVar.varStep
    end
    wigToBigWig ntSssZScorePMVar.varStep /hive/data/genomes/hg18/chrom.sizes ntSssZScorePMVar.bw
    # Copying instead of linking because testBams is not readable
    cp -p ntSssZScorePMVar.bw /data/NT/gbdb/hg18/neandertal/bbi/
    hgsql hg18 -e 'drop table if exists ntSssZScorePMVar; \
            create table ntSssZScorePMVar (fileName varchar(255) not null); \
            insert into ntSssZScorePMVar values \
              ("/data/NT/gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw");'


#########################################################################
# SELECTIVE SWEEP SCAN TOP 5% REGIONS (DONE 3/24/10 angie - re-shaded 5/5)
    # Ed copied z-p5.CpGmask.vbf.txt to /hive/users/angie/testBams/ .
    # Ed's description of the data:
# This file contains the regions that are in the 5% tail of our score
# distribution, sorted by genetic width. The columns of this
# space-delimited table are:
# 1. chromosome
# 2. start of region
# 3. end of region
# 4. length of region
# 5. number of polymorphic sites in region
# 6. The recombination rate in the region from Myers et al (cM/Mb)
# 7. The genetic width of the region (cM)
# 8. Total number of observed neandertal derived alleles in the region
# 9. Total number of observed neandertal ancestral alleles in the region
# 10. Expected number of neandertal derived alleles in the region
# 11.  The z-score of the region
# 
# For these, the only thing that should be displayed is the region and
# the z-score.
    # Max (absolute) score is -8.7, min (abs) is -4.3.  Make scores for shading, and make
    # this bed not bedGraph so we can click on items to see length etc.
    awk '{print $1 "\t" ($2-1) "\t" $3 "\t" $11 "\t" int(400 + 600*(-$11-4.3)/4.4);}' z-p5.CpGmask.vbf.txt \
    | sort -k1,1 -k2n,2n \
      > ntSssTop5p.bed
    hgLoadBed hg18 ntSssTop5p ntSssTop5p.bed
#Loaded 212 elements of size 4


#########################################################################
# MITOCHONDRION ALIGNMENT (DONE 4/19/10 angie)
    # Download Ed's previously published Neandertal mitochondrial genome from
    # http://www.ncbi.nlm.nih.gov/nuccore/196123578
# NOTE FOR RELEASE: move this from /data/NT/mito to /hive/data/genomes/hg18/bed/ntMito and 
# reload tables.
    mkdir /data/NT/mito
    cd /data/NT/mito
    wget --timestamping -O ntM.fa \
      'http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?tool=portal&db=nuccore&val=196123578&dopt=fasta&sendto=on&log$=seqview'
    # Edit fasta header line to remove everything except "NC_011137.1"
    blat -noHead /hive/data/genomes/hg19/M/chrM.fa ntM.fa ntM.psl
    cat ntM.psl 
#16361   194     0       0       2       10      4       16      +       NC_011137.1     16565   0       16565   chrM    16571   0       16571   5       309,204,15660,76,306,   0,310,514,16174,16259,  0,312,520,16181,16265,
    hgLoadPsl hg18 -table=ntMito ntM.psl
    mkdir /data/NT/gbdb/hg18/neandertal/ntMito
    ln -s `pwd`/ntM.fa /data/NT/gbdb/hg18/neandertal/ntMito/
    hgLoadSeq hg18 /data/NT/gbdb/hg18/neandertal/ntMito/ntM.fa


#########################################################################
# SELECTIVE SWEEP SCAN SNPs (DONE 5/4/10 angie)
    # Ed copied hg18.sss.CpGmask.vbf.zscore.tar.gz to /hive/users/angie/testBams/ .
    # Ed's description of the data:
# The table has a row for every position that is observed to be
# polymorphic in the modern human HGDP data, for which 3 alleles are
# present. The columns have these data:
# 1. chromosome
# 2. position
# 3. Allele string for human data (A=ancestral; D=derived; 0=no data)
#    The order of the individuals is hg18, San, Yoruban, Han, Papuan, French.
# 4. probability that a Neandertal allele would be derived, given the
# configuration of ancestral/derived alleles in column 3
# 5. Observed number of neandertal derived alleles at this site
# 6. Observed number of neandertal ancestral alleles at this site
# 7. Total number of expected neandertal derived alleles in the 100kb
# window around this site
# 8. Observed number of neandertal derived alleles in the 100 kb window
# around this site
# 9. The total number of polymorphic sites in this 100 kb window
# 10. The variance of the number of expected number of derived alleles
# in this 100 kb window
# 11. The z-score of the difference between the observed and the
# expected. That is (column 8 - column 7) / column 10
    # For the SNPs, Ed suggested coloring ones red where most modern humans have
    # the derived allele but Neandertals have only ancestral.  
    cd /hive/users/angie/testBams/SSS
    perl -wpe 'chomp; @w=split; $w[2] =~ s/0/_/g; \
               $tmp = $w[2]; $dCount = ($tmp =~ s/D//g); \
               $rgb = ($dCount >= 4 && $w[4] == 0) ? "220,0,0" : "0,0,0"; \
               $_ = join("\t", $w[0], $w[1]-1, $w[1], $w[2].":$w[4]D,$w[5]A", 0, "+", \
                         $w[1]-1, $w[1], $rgb) . "\n";' \
      chr*.txt \
      > ntSssSnps.bed
    hgLoadBed hg18 ntSssSnps ntSssSnps.bed
    # Drop data on chrY:
    hgsql hg18 -e 'delete from ntSssSnps where chrom = "chrY"'

#########################################################################
# OUT-OF-AFRICA HAPLOTYPES (DONE 5/4/10 angie)
    mkdir /data/NT/ntOoaHaplo
    cd /data/NT/ntOoaHaplo
    # Edited Table 5 of Ed's paper into a bed 4 + file, ntOoaHaplo.bed .
    # Made ntOoaHaplo.as .
    autoSql ntOoaHaplo.as ntOoaHaplo
    # Edited ntOoaHaplo.sql to add bin column, allow NULL for extra fields.
    hgLoadBed hg18 ntOoaHaplo ntOoaHaplo.bed \
      -sqlTable=ntOoaHaplo.sql -tab
    # Some warnings about missing values in row 7, no problem.


#########################################################################
# MIGRATE TRACKS TO HGWDEV (DONE 5/6/10 angie)
#ASAP:
    # Make chmod-700 subdirs of hg18/bed and panTro2/bed
    umask 77
    mkdir /hive/data/genomes/hg18/bed/{ntHumChimpCodingDiff,ntSss,ntOoaHaplo,ntSeqContigs,ntSeqReads,ntModernHumans,ntMito}
    mkdir /hive/data/genomes/panTro2/bed/{ntSeqContigs,ntSeqReads}
    umask 2

    # Hard-link data files from testBams to subDirs, make /gbdb/ links to the subdirs
    ln /hive/users/angie/testBams/rawReads/* /hive/data/genomes/hg18/bed/ntSeqReads
    ln /hive/users/angie/testBams/rawReadsChimp/* /hive/data/genomes/panTro2/bed/ntSeqReads
    chmod 444 /hive/data/genomes/{hg18,panTro2}/bed/ntSeqReads/*
    foreach db (hg18 panTro2)
      set dbAbbrev = `echo $db | sed -e 's/panTro2/pt2/'`
      mkdir -p /gbdb/$db/neandertal/seqAlis/
      ln -s /hive/data/genomes/$db/bed/ntSeqReads/*-$dbAbbrev.bam /gbdb/$db/neandertal/seqAlis/
      rsync -av /hive/users/angie/testBams/*-$dbAbbrev.bam /hive/data/genomes/$db/bed/ntSeqContigs/
      chmod 664 /hive/data/genomes/$db/bed/ntSeqContigs/*
      ln -s /hive/data/genomes/$db/bed/ntSeqContigs/*-$dbAbbrev.bam /gbdb/$db/neandertal/seqAlis/
    end
    # Got sysadmins to make MMS* world-readable.
    ln /hive/users/angie/testBams/MMS*.bam* /hive/data/genomes/hg18/bed/ntModernHumans/
    mkdir /gbdb/hg18/neandertal/humans
    ln -s /hive/data/genomes/hg18/bed/ntModernHumans/MMS*.bam* /gbdb/hg18/neandertal/humans

    ln /hive/users/angie/testBams/hernan/* /hive/data/genomes/hg18/bed/ntHumChimpCodingDiff/
    ln /hive/users/angie/testBams/hg18.sss.CpGmask.vbf.zscore.tar.gz /hive/data/genomes/hg18/bed/ntSss/

    mkdir /hive/data/genomes/hg18/bed/ntSss/SSS
    ln /hive/users/angie/testBams/SSS/* /hive/data/genomes/hg18/bed/ntSss/SSS/
    mkdir /gbdb/hg18/neandertal/bbi/
    ln -s /hive/data/genomes/hg18/bed/ntSss/SSS/ntSssZScorePMVar.bw /gbdb/hg18/neandertal/bbi/
    ln /hive/users/angie/testBams/{z-p5.CpGmask.vbf.txt,ntSssTop5p.bed} /hive/data/genomes/hg18/bed/ntSss/

    # on genome-nt:
    rsync -av /data/NT/mito/* /hive/data/genomes/hg18/bed/ntMito/
    # back on hgwdev:
    mkdir /gbdb/hg18/neandertal/ntMito
    ln -s /hive/data/genomes/hg18/bed/ntMito/ntM.fa /gbdb/hg18/neandertal/ntMito/

    # on genome-nt:
    rsync -av /data/NT/ntOoaHaplo/* /hive/data/genomes/hg18/bed/ntOoaHaplo/

    # Load bed tables with fake names:
    cd /hive/data/genomes/hg18/bed/ntHumChimpCodingDiff
    hgLoadBed -tab hg18 hernan ntHumChimpCodingDiff.bed
#Loaded 11345 elements of size 9
    cd /hive/data/genomes/hg18/bed/ntSss
    hgLoadBed hg18 ashTop5 ntSssTop5p.bed
#Loaded 212 elements of size 5
    cd /hive/data/genomes/hg18/bed/ntSss/SSS
    hgLoadBed hg18 ashSsss ntSssSnps.bed
#Loaded 5617139 elements of size 9
    hgsql hg18 -e 'delete from ashSsss where chrom = "chrY"'

    # Write up overview of downloadable /gbdb, table dumps

# 10:30 or later:
    # copy over .html files to ~/kent/src/...
    # copy over trackDb.ra entries
    # copy /data/NT/ntOoaHaplo/ntOoaHaplo.{as,sql} to kent/src/hg/lib
    # ? copy overview page to ~/browser/ ?

    # migrate pushQ to hgwbeta:
    ssh genome-nt
    hgsqldump qapushq neandertal \
    | sed -re 's/genome-nt/hgwdev/g;  s@/data/NT/gbdb@/gbdb@g;  s/\),\(/),\n(/g' \
      > /hive/users/angie/testBams/pushQ.sql
    # Read carefully to check for any additional necessary edits.
    ssh hgwbeta
    hgsql qapushq < /hive/users/angie/testBams/pushQ.sql

# 10:50:
    # if twiddling thumbs :), take it live on hgwdev-angie
    # load database tables
    foreach db (hg18 panTro2)
      set dbAbbrev = `echo $db | sed -e 's/panTro2/pt2/'`
      foreach t (Feld1 Mez1 Sid1253)
        hgsql $db -e "drop table if exists bam$t; create table bam$t (fileName varchar(255) not null);"
        hgsql $db -e "insert into bam$t values ('/gbdb/$db/neandertal/seqAlis/$t-$dbAbbrev.bam');"
      end
      foreach i (16 25 26)
        hgsql $db -e "drop table if exists bamVi33dot$i; create table bamVi33dot$i (fileName varchar(255) not null);"
        hgsql $db -e "insert into bamVi33dot$i values ('/gbdb/$db/neandertal/seqAlis/Vi33.$i-$dbAbbrev.bam');"
      end
      hgsql $db -e "drop table if exists bamAll; create table bamAll (fileName varchar(255) not null);"
      hgsql $db -e "insert into bamAll values ('/gbdb/$db/neandertal/seqAlis/all-$dbAbbrev.bam');"
    end
    foreach db (hg18 panTro2)
      set dbAbbrev = `echo $db | sed -e 's/panTro2/pt2/'`
      foreach i (Feld1 Mez1 Sid1253)
        hgsql $db -e "drop table if exists bamSL$i; create table bamSL$i (fileName varchar(255) not null);"
        hgsql $db -e "insert into bamSL$i values ('/gbdb/$db/neandertal/seqAlis/SL$i.$dbAbbrev.bam');"
      end
      foreach i (16 25 26)
echo         hgsql $db -e "drop table if exists bamSLVi33dot$i; create table bamSLVi33dot$i (fileName varchar(255) not null);"
echo         hgsql $db -e "insert into bamSLVi33dot$i values ('/gbdb/$db/neandertal/seqAlis/SLVi33.$i.$dbAbbrev.bam');"
      end
    end
    foreach n (4 5 6 7 8)
      hgsql hg18 -e "create table bamMMS$n (fileName varchar(255) not null);"
    end
    hgsql hg18 -e "insert into bamMMS4 values ('/gbdb/hg18/neandertal/humans/MMS4_HGDP00778_Han.paired.qualfilt.rmdup.entropy1.0.sort.bam');"
    hgsql hg18 -e "insert into bamMMS5 values ('/gbdb/hg18/neandertal/humans/MMS5_HGDP00542_Papuan.paired.qualfilt.rmdup.entropy1.0.sort.bam');"
    hgsql hg18 -e "insert into bamMMS6 values ('/gbdb/hg18/neandertal/humans/MMS6_HGDP00927_Yoruba.paired.qualfilt.rmdup.entropy1.0.sort.bam');"
    hgsql hg18 -e "insert into bamMMS7 values ('/gbdb/hg18/neandertal/humans/MMS7_HGDP01029_San.paired.qualfilt.rmdup.entropy1.0.sort.bam');"
    hgsql hg18 -e "insert into bamMMS8 values ('/gbdb/hg18/neandertal/humans/MMS8_HGDP00521_French.paired.qualfilt.rmdup.entropy1.0.sort.bam');"
    hgsql hg18 -e 'drop table if exists ntSssZScorePMVar; \
            create table ntSssZScorePMVar (fileName varchar(255) not null); \
            insert into ntSssZScorePMVar values \
              ("/gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw");'
    cd /hive/data/genomes/hg18/bed/ntMito
    hgLoadPsl hg18 -table=ntMito ntM.psl
    hgLoadSeq hg18 /gbdb/hg18/neandertal/ntMito/ntM.fa
    cd /hive/data/genomes/hg18/bed/ntOoaHaplo
    hgLoadBed hg18 ntOoaHaplo ntOoaHaplo.bed \
      -sqlTable=ntOoaHaplo.sql -tab
#Loaded 13 elements of size 15

# 11am: take it live!
    ssh hgwdev
    # add neandertal group to hg18.grp and panTro2.grp, between compGeno and varRep
    hgsql hg18 -e "INSERT INTO grp VALUES ('neandertal', 'Neandertal Assembly and Analysis', 6.5, 0);"
    hgsql panTro2 -e "INSERT INTO grp VALUES ('neandertal', 'Neandertal Assembly and Analysis', 6.5);"
    # chmod 775 the new bed/ subdirs
    chmod 775 /hive/data/genomes/hg18/bed/{ntHumChimpCodingDiff,ntSss,ntOoaHaplo,ntSeqContigs,ntSeqReads,ntModernHumans,ntMito}
    chmod 775 /hive/data/genomes/panTro2/bed/{ntSeqContigs,ntSeqReads}
    # rename fake-named database tables to real names
    hgsql hg18 -e 'rename table hernan to ntHumChimpCodingDiff'
    hgsql hg18 -e 'rename table ashTop5 to ntSssTop5p'
    hgsql hg18 -e 'rename table ashSsss to ntSssSnps'
    # check in trackDb, html, ntOoaHaplo.{as,sql}
    # make alpha in trackDb on hgwdev
    # run buildTableDescriptions.pl on hgwdev
    cd ~/kentclean/src
    cvsup
    ~/kent/src/test/buildTableDescriptions.pl -db hg18
    ~/kent/src/test/buildTableDescriptions.pl -db panTro2
    # check in overview page in browser CVS, make + make alpha on hgwdev
    cd ~/browser
    cvsup
    cvs add Neandertal
    cvs add Neandertal/index.html
    # other files... images?
    cvs ci -m "Migrating from genome-nt" Neandertal
    make alpha

    # add tracks to tablesIgnored in all.joiner
    # add this file to CVS and add note about track histories in this file to hg18.txt (& panTro2)

# When tracks are on hgwbeta:
    # run buildTableDescriptions.pl on hgwbeta
    ssh hgwbeta
    cd ~/kentfix && cvsup
    ssh qateam@hgwbeta crontab -l | grep buildTableD
    # run that job.


#########################################################################
# Lifting to hg19 (WORKING - 2010-12-15 - Hiram)
    #### H-C Coding Diffs
    mkdir -p /hive/data/genomes/hg19/bed/homNea/codingDiff
    cd /hive/data/genomes/hg19/bed/homNea/codingDiff
    hgsql -N -e "select * from ntHumChimpCodingDiff;" hg18 \
        | cut -f2- > ntHumChimpCodingDiff.hg18.tab
    liftOver -bedPlus=8 -tab ntHumChimpCodingDiff.hg18.tab \
/usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \
    ntHumChimpCodingDiff.hg19.tab unMapped.tab
# -rw-rw-r-- 1   0 Dec 15 10:49 unMapped.tab
    hgLoadBed -tab hg19 ntHumChimpCodingDiff ntHumChimpCodingDiff.hg19.tab

    #### 5% Lowest S - Selective Sweep Scan (S): 5% Smallest S scores
    mkdir /hive/data/genomes/hg19/bed/homNea/SssTop5p
    cd /hive/data/genomes/hg19/bed/homNea/SssTop5p
    hgsql -N -e "select * from ntSssTop5p;" hg18 \
        | cut -f2- > ntSssTop5p.hg18.tab
    liftOver ntSssTop5p.hg18.tab \
/usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \
    ntSssTop5p.hg19.tab unMapped.tab
# -rw-rw-r-- 1   0 Dec 15 10:49 unMapped.tab
    hgLoadBed -tab hg19 ntSssTop5p ntSssTop5p.hg19.tab

    #### Sel Swp Scan (S) - Selective Sweep Scan (S) on Neandertal vs. Human Polymorphisms (Z-Score +- Variance)
    mkdir /hive/data/genomes/hg19/bed/homNea/SssZScorePMVar
    cd /hive/data/genomes/hg19/bed/homNea/SssZScorePMVar
    bigWigToBedGraph /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw \
	ntSssZScorePMVa.hg18.bedGraph
    liftOver -bedPlus=3 -tab ntSssZScorePMVa.hg18.bedGraph \
/usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \
    ntSssZScorePMVa.hg19.bedGraph unMapped.tab
    grep "^chr" unMapped.tab | wc
    #	1981    7924   62659
    grep -v "^chr" unMapped.tab | sort | uniq -c
    #	1981 #Deleted in new
    grep "^chr" unMapped.tab | cut -f1 | sort | uniq -c
#      2 chr10
#     38 chr12
#      5 chr13
#     15 chr14
#     22 chr15
#    532 chr17
#     16 chr19
#     25 chr2
#      2 chr22
#     48 chr3
#     85 chr4
#     23 chr5
#     24 chr6
#    814 chr7
#    198 chr8
#    132 chrX
    sort -k1,1 -k2,2n ntSssZScorePMVa.hg19.bedGraph > sorted.bedGraph
    bedGraphToBigWig sorted.bedGraph \
	/hive/data/genomes/hg19/chrom.sizes ntSssZScorePMVar.bw
    rm -f sorted.bedGraph
    bigWigInfo ntSssZScorePMVar.bw
# version: 4
# isCompressed: yes
# isSwapped: 0
# primaryDataSize: 71,065,376
# primaryIndexSize: 357,392
# zoomLevels: 10
# chromCount: 23
# basesCovered: 11,101,027
# mean: -0.023223
# min: -8.833200
# max: 33.637199
# std: 4.037342
    bigWigInfo /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw
# version: 3
# isCompressed: yes
# isSwapped: 0
# primaryDataSize: 53,391,942
# primaryIndexSize: 357,400
# zoomLevels: 7
# chromCount: 23
# basesCovered: 11,103,008
# mean: -0.023066
# min: -8.833200
# max: 33.637199
# std: 4.037344
   ls -ogL /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw *.bw
# -rw-rw-r-- 1  56244661 Mar 24  2010 /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw
# -rw-rw-r-- 1 107739421 Dec 15 10:11 ntSssZScorePMVar.bw
    mkdir /gbdb/hg19/neandertal/bbi
    ln -s `pwd`/ntSssZScorePMVar.bw /gbdb/hg19/neandertal/bbi

    hgBbiDbLink database trackName fileName
    hgBbiDbLink hg19 ntSssZScorePMVar \
	/gbdb/hg19/neandertal/bbi/ntSssZScorePMVar.bw

    #### S SNPs - SNPS Used for Selective Sweep Scan (S)
    mkdir /hive/data/genomes/hg19/bed/homNea/SssSnps
    cd /hive/data/genomes/hg19/bed/homNea/SssSnps
    hgsql -N -e "select * from ntSssSnps;" hg18 | cut -f2- > ntSssSnps.hg18.bed
    liftOver -bedPlus=8 -tab ntSssSnps.hg18.bed \
/usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \
    ntSssSnps.hg19.bed unMapped.tab
     grep "^chr" unMapped.tab | wc
    #	1197   10773   74797
    grep -v "^chr" unMapped.tab | sort | uniq -c
    #	1197 #Deleted in new
    hgLoadBed hg19 ntSssSnps ntSssSnps.hg19.bed
    #	Loaded 5614241 elements of size 9

    #### Cand. Gene Flow - Candidate Regions for Gene Flow from Neandertal to
    #### Non-African Modern Humans
    
    mkdir /hive/data/genomes/hg19/bed/homNea/OoaHaplo
    cd /hive/data/genomes/hg19/bed/homNea/OoaHaplo
    hgsql -N -e "select * from ntOoaHaplo;" hg18 | cut -f2- \
	> ntOoaHaplo.hg18.bed
    liftOver -bedPlus=8 -tab ntOoaHaplo.hg18.bed \
/usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \
    ntOoaHaplo.hg19.bed unMapped.tab
# -rw-rw-r-- 1   0 Dec 15 10:49 unMapped.tab

    hgLoadBed hg19 ntOoaHaplo ntOoaHaplo.hg19.bed \
      -sqlTable=$HOME/kent/src/hg/lib/ntOoaHaplo.sql -tab

    #### Neandertal Mito - Neandertal Mitochondrial Sequence (Vi33.16, 2008)
    mkdir /hive/data/genomes/hg19/bed/homNea/Mito
    cd /hive/data/genomes/hg19/bed/homNea/Mito
    hgsql -N -e "select * from ntMito;" hg18 | cut -f2- \
	> ntMito.hg18.psl
    # it is the same chrM in hg19, no need to lift

    hgLoadPsl hg19 -table=ntMito ntMito.hg18.psl
    mkdir /gbdb/hg19/neandertal/ntMito
    ln -s /hive/data/genomes/hg18/bed/ntMito/ntM.fa \
	/gbdb/hg19/neandertal/ntMito/
    hgLoadSeq hg19 /gbdb/hg19/neandertal/ntMito/ntM.fa
