# for emacs: -*- mode: sh; -*-


# This file describes how we made the browser database on the 
# Chicken (Gallus gallus) January 2004 release.


# CREATE BUILD DIRECTORY (DONE 1/23/04 angie)
    ssh kksilo
    mkdir /cluster/store6/galGal1
    ln -s /cluster/store6/galGal1 /cluster/data/galGal1


# DOWNLOAD MITOCHONDRION GENOME SEQUENCE (DONE 1/23/04 angie)
    mkdir /cluster/data/galGal1/M
    cd /cluster/data/galGal1/M
    # go to http://www.ncbi.nih.gov/ and search Nucleotide for 
    # "gallus mitochondrion genome".  That shows the gi number:
    # 5834843 
    # Use that number in the entrez linking interface to get fasta:
    wget -O chrM.fa \
      'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=Nucleotide&uid=5834843&dopt=FASTA'
    # Edit chrM.fa: make sure the long fancy header line says it's the 
    # Gallus gallus mitochondrion complete genome, and then replace the 
    # header line with just ">chrM".


# DOWNLOAD GENOME SEQUENCE (DONE 1/28/04 angie)
    ssh kksilo
    cd /cluster/data/galGal1
    # Contig sequences have been stable for a while:
    wget ftp://genome.wustl.edu/private/chicken_040105/Fasta/chicken_040105.contigs.tar.gz
    # Official location (password-protected):
    # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken_040105.contigs.tar.gz
    mkdir contigs
    cd contigs
    tar xvfz ../chicken_040105.contigs.tar.gz
    foreach f (chicken_031214.pcap.contigs*)
      set n = `echo $f | sed -e 's/chicken_031214.pcap.contigs//'`
      mv $f contigs$n.fa
    end
    faSize *.fa
#1054180845 bases (107580 N's 1054073265 real) in 111864 sequences in 100 files
#Total size: mean 9423.8 sd 19990.9 min 52 (Contig28908.1) max 441791 (Contig132.21) median 2122
#N count: mean 1.0 sd 5.1
    mkdir /cluster/bluearc/galGal1
    cat ../contigs/*.fa > /cluster/bluearc/galGal1/allContigs.fa
    # Get qual scores too
    wget ftp://genome.wustl.edu/private/chicken_040105/Qual/chicken_040105.contigs.qual.tar.gz
    # Official location (password-protected):
    # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken_040105.contigs.qual.tar.gz
    

# BUILD AND CHECK CHROM-LEVEL SEQUENCE (DONE 1/30/04 angie)
    # 1/30/04: ZW_random has been made a part of Z.
    # on hgwdev, "hgsql galGal1" and get rid of any chrZW_random tables
    ssh kolossus
    cd /cluster/data/galGal1
    wget ftp://genome.wustl.edu/private/lhillier/old/chicken_agp.040129.tar.gz
    # Note: this January 30th tarfile is labeled January 29 -- rename it 
    # to avoid confusion with several Jan. 29 agp downloads.
    mv chicken_agp.040129.tar.gz chicken_agp.040130.tar.gz
    # Official location (password-protected):
    # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken_agp.040129.tar.gz
    # I downloaded that and cmp'd to chicken_agp.040130.tar.gz -- same.
    tar xvzf chicken_agp.040130.tar.gz
    cd chicken_agp
    # confirmed that the only AGP changes from chicken_agp_29_third were 
    # that ZW_random went away and Z got some new stuff at the beginning.
    cp /dev/null ../chrom.lst
    foreach f (*.agp)
      set chr = `echo $f:r | sed -e 's/^chr//'`
      set base = `echo $chr | sed -e 's/_random$//'`
      mkdir -p ../$base
      cp -p $f ../$base
      if ("$chr" == "$base") echo $chr >> ../chrom.lst
    end
    # OK, tack chrM on there too:
    echo M >> ../chrom.lst
    cd /cluster/data/galGal1
    # get rid of the ZW_random and make a clean slate for Z (not Z_random)
    rm -r ZW Z/chrZ.fa* Z/chrZ_? nib/chrZW_random.nib
    foreach c (`cat chrom.lst`)
      foreach agp ($c/chr$c{,_random}.agp)
        if (-e $agp) then
          set fa = $agp:r.fa
          echo building $fa from $agp
          agpToFa -simpleMultiMixed $agp $agp:t:r $fa \
            /cluster/bluearc/galGal1/allContigs.fa
        endif
      end
    end
    # checkAgpAndFa prints out way too much info -- keep the end/stderr only:
    foreach c (`cat chrom.lst`)
      foreach agp ($c/chr$c{,_random}.agp)
        if (-e $agp) then
          set fa = $agp:r.fa
          echo checking consistency of $agp and $fa
          checkAgpAndFa $agp $fa | tail -1
        endif
      end
    end
    faSize */chr*.fa
#1172160385 bases (118070345 N's 1054090040 real) in 47 sequences in 47 files
#Total size: mean 24939582.7 sd 47753836.2 min 2535 (chrE50C23) max 229650211 (chrUn) median 7900282
#N count: mean 2512135.0 sd 12440150.2


# COMPARE BASE COUNTS WITH WUSTL'S (DONE 2/1/04 angie)
    # Compare LaDeana's size summary file with our chrom faSize output:
    cd /cluster/data/galGal1
    # Download from password-protected location:
    # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken.localized_per_chromosome
    # Actually, we can only compare total bases with faSize because faSize 
    # doesn't differentiate between an N from a gap and an N base.  
    tail +4 chicken.localized_per_chromosome \
    | perl -wpe 'next if (/^\s*$/); \
                 if (s/^(\w+)\s+(\d+)\s+(\d+)\s*$//) { \
                   ($c, $noGapBP, $totalBP) = ($1, $2, $3); \
                   $c =~ s/Unlocalized/Un/; \
                   $chr = "chr$c"; \
                   $c =~ s/_random//; \
                   $faSize = "faSize $c/$chr.fa"; \
                   $faSize = `$faSize`; \
                   if ($faSize =~ /^(\d+) bases \((\d+) N.s (\d+) real/) { \
                     if ($totalBP != $1) { \
                       print "$chr totalBP: WUSTL $totalBP, us $1!\n"; \
                     } \
                   } else { print "parsed faSize wrong:\n$faSize\n"; } \
                 } else { print "what is this?\n$_\n"; }'


# BREAK UP SEQUENCE INTO 5 MB CHUNKS AT CONTIGS/GAPS (DONE 1/30/04 angie)
    ssh kksilo
    cd /cluster/data/galGal1
    foreach c (`cat chrom.lst`)
      foreach agp ($c/chr$c{,_random}.agp)
        if (-e $agp) then
          set fa = $agp:r.fa
          echo splitting $agp and $fa
          cp -p $agp $agp.bak
          cp -p $fa $fa.bak
          splitFaIntoContigs $agp $fa . -nSize=5000000
        endif
      end
    end
    # splitFaIntoContigs makes new dirs for _randoms.  Move their contents 
    # back into the main chrom dirs and get rid of the _random dirs.
    foreach d (*_random)
      set base = `echo $d | sed -e 's/_random$//'`
      mv $d/lift/oOut.lst $base/lift/rOut.lst
      mv $d/lift/ordered.lft $base/lift/random.lft
      mv $d/lift/ordered.lst $base/lift/random.lst
      rmdir $d/lift
      mv $d/* $base
      rmdir $d
    end
    # Make a "pseudo-contig" for processing chrM too:
    mkdir M/chrM_1
    sed -e 's/chrM/chrM_1/' M/chrM.fa > M/chrM_1/chrM_1.fa
    mkdir M/lift
    echo "chrM_1/chrM_1.fa.out" > M/lift/oOut.lst
    echo "chrM_1" > M/lift/ordered.lst
    echo "0       M/chrM_1        16775 chrM    16775" > M/lift/ordered.lft


# MAKE JKSTUFF AND BED DIRECTORIES (DONE 1/23/04 angie)
    # This used to hold scripts -- better to keep them inline here so 
    # they're in CVS.  Now it should just hold lift file(s) and 
    # temporary scripts made by copy-paste from this file.  
    mkdir /cluster/data/galGal1/jkStuff
    # This is where most tracks will be built:
    mkdir /cluster/data/galGal1/bed


# CREATING DATABASE (DONE 1/28/04 angie)
    ssh hgwdev
    echo 'create database galGal1' | hgsql ''
    # Use df to make sure there is at least 75G free on hgwdev:/var/lib/mysql
    df -h /var/lib/mysql
    # /dev/sdc1             1.8T  221G  1.5T  14% /var/lib/mysql


# CREATING GRP TABLE FOR TRACK GROUPING (DONE 1/28/04 angie)
    ssh hgwdev
    echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
      | hgsql galGal1


# MAKE CHROMINFO TABLE WITH (TEMPORARILY UNMASKED) NIBS (DONE 1/30/04 angie)
    # Make nib/, unmasked until RepeatMasker and TRF steps are done.
    # Do this now so we can load up RepeatMasker and run featureBits; 
    # can also load up other tables that don't depend on masking.  
    ssh kksilo
    cd /cluster/data/galGal1
    mkdir nib
    foreach c (`cat chrom.lst`)
      foreach f ($c/chr${c}{,_random}.fa)
        if (-e $f) then
          echo "nibbing $f"
          /cluster/bin/i386/faToNib $f nib/$f:t:r.nib
        endif
      end
    end

    # Make symbolic links from /gbdb/galGal1/nib to the real nibs.
    ssh hgwdev
    mkdir -p /gbdb/galGal1/nib
    foreach f (/cluster/data/galGal1/nib/chr*.nib)
      ln -s $f /gbdb/galGal1/nib
    end
    # Load /gbdb/galGal1/nib paths into database and save size info.
    cd /cluster/data/galGal1
    hgsql galGal1  < $HOME/kent/src/hg/lib/chromInfo.sql
    hgNibSeq -preMadeNib galGal1 /gbdb/galGal1/nib */chr*.fa
    echo "select chrom,size from chromInfo" | hgsql -N galGal1 > chrom.sizes
    # take a look at chrom.sizes, should be 47 lines
    wc chrom.sizes


# REPEAT MASKING (DONE 1/31/04 angie)
    #- Split contigs into 500kb chunks, at gaps if possible:
    ssh kksilo
    cd /cluster/data/galGal1
    foreach c (`cat chrom.lst`)
      foreach d ($c/chr${c}*_?{,?})
        cd $d
        echo "splitting $d"
        set contig = $d:t
        ~/bin/i386/faSplit gap $contig.fa 500000 ${contig}_ -lift=$contig.lft \
            -minGapSize=100
        cd ../..
      end
    end

    #- Make the run directory and job list:
    cd /cluster/data/galGal1
    cat << '_EOF_' > jkStuff/RMChicken
#!/bin/csh -fe

cd $1
pushd .
/bin/mkdir -p /tmp/galGal1/$2
/bin/cp $2 /tmp/galGal1/$2/
cd /tmp/galGal1/$2
/cluster/bluearc/RepeatMasker/RepeatMasker -ali -s -spec chicken $2
popd
/bin/cp /tmp/galGal1/$2/$2.out ./
if (-e /tmp/galGal1/$2/$2.align) /bin/cp /tmp/galGal1/$2/$2.align ./
if (-e /tmp/galGal1/$2/$2.tbl) /bin/cp /tmp/galGal1/$2/$2.tbl ./
if (-e /tmp/galGal1/$2/$2.cat) /bin/cp /tmp/galGal1/$2/$2.cat ./
/bin/rm -fr /tmp/galGal1/$2/*
/bin/rmdir --ignore-fail-on-non-empty /tmp/galGal1/$2
/bin/rmdir --ignore-fail-on-non-empty /tmp/galGal1
'_EOF_'
    # << this line makes emacs coloring happy
    chmod +x jkStuff/RMChicken
    mkdir RMRun
    rm -f RMRun/RMJobs
    touch RMRun/RMJobs
    foreach c (`cat chrom.lst`)
      foreach d ($c/chr${c}{,_random}_?{,?})
          set ctg = $d:t
          foreach f ( $d/${ctg}_?{,?}.fa )
            set f = $f:t
            echo /cluster/data/galGal1/jkStuff/RMChicken \
                 /cluster/data/galGal1/$d $f \
               '{'check out line+ /cluster/data/galGal1/$d/$f.out'}' \
              >> RMRun/RMJobs
          end
      end
    end

    #- Do the run
    ssh kk
    cd /cluster/data/galGal1/RMRun
    para create RMJobs
    para try, para check, para check, para push, para check,...
#Completed: 2783 of 2783 jobs
#Average job time:                1448s      24.13m     0.40h    0.02d
#Longest job:                     3201s      53.35m     0.89h    0.04d
#Submission to last job:          5738s      95.63m     1.59h    0.07d

    #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
    ssh kksilo
    cd /cluster/data/galGal1
    foreach d (*/chr*_?{,?})
      set contig = $d:t
      echo $contig
      liftUp $d/$contig.fa.out $d/$contig.lft warn $d/${contig}_*.fa.out \
        > /dev/null
    end

    #- Lift pseudo-contigs to chromosome level
    foreach c (`cat chrom.lst`)
      echo lifting $c
      cd $c
      if (-e lift/ordered.lft && ! -z lift/ordered.lft) then
        liftUp chr$c.fa.out lift/ordered.lft warn `cat lift/oOut.lst` \
        > /dev/null
      endif
      if (-e lift/random.lft && ! -z lift/random.lft) then
        liftUp chr${c}_random.fa.out lift/random.lft warn `cat lift/rOut.lst` \
        > /dev/null
      endif
      cd ..
    end

    #- Load the .out files into the database with:
    ssh hgwdev
    cd /cluster/data/galGal1
    hgLoadOut galGal1 */chr*.fa.out


# VERIFY REPEATMASKER RESULTS (DONE 1/30/04 angie)
    # Eyeball some repeat annotations in the browser, compare to lib seqs.
    # Run featureBits on galGal1 and on a comparable genome build, and compare:
    ssh hgwdev
    featureBits galGal1 rmsk
#103553237 bases of 1054197620 (9.823%) in intersection
    # No comparable species -- in conf call 01/21/04, Arian said about 
    # 8-10% should be covered.


# MAKE LIFTALL.LFT (DONE 1/30/04 angie)
    ssh kksilo
    cd /cluster/data/galGal1
    cat */lift/{ordered,random}.lft > jkStuff/liftAll.lft


# PUT UNMASKED CHUNKS ON BLUEARC (DONE 1/30/04 angie)
    # Some of Terry's staging scripts require a flat dir containing 
    # all contigs; also, doesn't hurt to set aside unmasked seq.
    ssh kksilo
    cd /cluster/data/galGal1
    mkdir /cluster/bluearc/galGal1/unmaskedChunks
    foreach d (*/chr*_?{,?})
      cp -p $d/${d:t}.fa /cluster/bluearc/galGal1/unmaskedChunks
    end


# MAKE STS MARKERS TRACK (IN PROGRESS 1/30/04 angie -- need more info from Terry)
    # Doing a dry run to see if I can duplicate Terry's mapping of 
    # markers to the prelim release.
    set base = /cluster/data/galGal1/bed/markers
    mkdir -p $base/primers $base/sts
    # Need to find out how Terry built these:
    cp -p /cluster/bluearc/gg1/markers/sts/convert.pl $base
    cp -p /cluster/bluearc/gg1/markers/stsAlias.bed $base
    cp -p /cluster/bluearc/gg1/markers/stsInfoGG.bed $base
    cp -p /cluster/bluearc/gg1/10.ooc $base

    # Need to find out where Terry got these files:
    cp -p /cluster/bluearc/gg1/markers/primers/all.primers{,.fa} $base/primers
    cp -p /cluster/bluearc/gg1/markers/sts/all.sequence.fa $base/sts

    # Run e-PCR on primers
    mkdir $base/primers/epcr
    cd $base/primers/epcr
    mkdir epcr.out
    /cluster/bin/scripts/createCloneList \
      /cluster/bluearc/galGal1/unmaskedChunks
    # That script creates in.lst/$n.in files, where $n is 
    # the contigs number *plus 1*!  in.lst/0.in is empty and makes 
    # para create complain, so get rid of it.  
    rm in.lst/0.in
    egrep -v '/0.in$' files.lst > tmp.lst; mv tmp.lst files.lst
    echo $base/primers/all.primers > epcr.lst
    cat << '_EOF_' > template
#LOOP
/cluster/bin/scripts/runEpcr {check in line+ $(path1)} {check in line+ $(path2)} {check out exists epcr.out/$(root2).epcr}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    /cluster/bin/i386/gensub2 epcr.lst files.lst template jobList
    ssh kk
    set base = /cluster/data/galGal1/bed/markers
    cd $base/primers/epcr
    para create jobList
    para try, check, push, check, ...
#Completed: 251 of 251 jobs
#Average job time:                  30s       0.50m     0.01h    0.00d
#Longest job:                       45s       0.75m     0.01h    0.00d
#Submission to last job:            46s       0.77m     0.01h    0.00d
    cat `ls -1 epcr.out/* | sort -g` > all.epcr

    # blat primers -- with old blat.2 !
    mkdir $base/primers/blat
    cd $base/primers/blat
    mkdir primers.out
    ls -1S /cluster/bluearc/galGal1/unmaskedChunks/*.fa > contigs.lst
    echo $base/primers/all.primers.fa > primers.lst
    cat << '_EOF_' > template
#LOOP
/cluster/home/kent/bin/i386/blat.2 -tileSize=10 -ooc=../../10.ooc -minMatch=1 -minScore=1 -minIdentity=75 {check in line+ $(path1)} {check in line+ $(path2)} {check out line+ primers.out/$(root1).$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    /cluster/bin/i386/gensub2 contigs.lst primers.lst template jobList
    ssh kk
    set base = /cluster/data/galGal1/bed/markers
    cd $base/primers/blat
    para create jobList
    para try, check, push, check, ...
#Completed: 261 of 261 jobs
#Average job time:                  18s       0.30m     0.00h    0.00d
#Longest job:                       27s       0.45m     0.01h    0.00d
#Submission to last job:            28s       0.47m     0.01h    0.00d
    /cluster/bin/i386/pslSort dirs primers.psl temp primers.out
    rm -r temp

    # Make primers.final from e-PCR and blat results:
    cd $base/primers
    filterPrimers -chicken ../stsInfoGG.bed blat/primers.psl all.primers \
      epcr/all.epcr > primers.psl.filter
    extractPslInfo primers.psl.filter
    mv primers.psl.filter.initial primers.initial
    sort -k 4n primers.initial > primers.final

    # blat sts sequences
    mkdir $base/sts/blat
    cd $base/sts/blat
    mkdir stsMarkers.out
    ls -1S /cluster/bluearc/galGal1/unmaskedChunks/*.fa > contigs.lst
    echo $base/sts/all.sequence.fa > stsMarkers.lst
    cat << '_EOF_' > template
#LOOP
/cluster/bin/i386/blat {check in line+ $(path1)} {check in line+ $(path2)} {check out line+ stsMarkers.out/$(root1).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    /cluster/bin/i386/gensub2 contigs.lst stsMarkers.lst template jobList
    ssh kk
    set base = /cluster/data/galGal1/bed/markers
    cd $base/sts/blat
    para create jobList
    para try, check, push, check, ...
#Completed: 261 of 261 jobs
#Average job time:                  96s       1.59m     0.03h    0.00d
#Longest job:                      199s       3.32m     0.06h    0.00d
#Submission to last job:           199s       3.32m     0.06h    0.00d
    /cluster/bin/i386/pslSort dirs raw.psl temp stsMarkers.out
    /cluster/bin/i386/pslReps -nearTop=0.01 -minCover=0.6 -minAli=0.8 \
      -noIntrons raw.psl stsMarkers.psl /dev/null
    rm -rf temp
    rm -f raw.psl

    cd $base/sts
    filterPsl blat/stsMarkers.psl
    extractUniqPsl stsMarkers.psl.filter stsMarkers.psl.filter
    pslReps stsMarkers.psl.filter.dup.psl stsMarkers.psl.filter.1 /dev/null \
      -noIntrons -minCover=0.80 -minAli=0.90 -nearTop=0.05
    mv stsMarkers.psl.filter stsMarkers.psl.filter.orig
    cat stsMarkers.psl.filter.1 stsMarkers.psl.filter.uniq.psl \
      > stsMarkers.psl.filter
    extractPslInfo -h stsMarkers.psl.filter
    rm stsMarkers.psl.filter.1*
    rm stsMarkers.psl.filter.dup.psl stsMarkers.psl.filter.names \
      stsMarkers.psl.filter.uniq.psl
    extractPslInfo -h stsMarkers.psl.filter
    mv stsMarkers.psl.filter.initial stsMarkers.initial
    sort -k 4n stsMarkers.initial > stsMarkers.final.orig
    ../convert.pl ../stsAlias.bed stsMarkers.final.orig > stsMarkers.final

    cd $base
    combineSeqPrimerPos $base/sts/stsMarkers.final \
      $base/primers/primers.final > stsMarkers_pos.rdb
    # This step needs work -- output is 0-length
    createSTSbed stsInfoGG.bed stsMarkers_pos.rdb > stsMap.bed

    ssh hgwdev
    cd /cluster/data/galGal1/bed/markers
    # Make an stsMarkersTmp so we can at least see the locations
    awk '{printf "%s\t%s\t%s\t%s\t%d\n", $1, $2, $3, $4, $5 * 1000};' \
      sts/stsMarkers.final.orig \
    | liftUp -type=.bed stsMarkersTmp.bed ../../jkStuff/liftAll.lft warn stdin
    hgLoadBed galGal1 stsMarkerTmp stsMarkersTmp.bed 
    # lift & load all_sts_primer
    liftUp primers.filter.lifted.psl ../../jkStuff/liftAll.lft warn \
      primers/primers.psl.filter
    hgLoadPsl -table=all_sts_primer -fastLoad \
      galGal1 primers.filter.lifted.psl
    # lift & load all_sts_seq
    liftUp stsMarkers.filter.lifted.psl ../../jkStuff/liftAll.lft warn \
      sts/stsMarkers.psl.filter
    hgLoadPsl -table=all_sts_seq -fastLoad \
      galGal1 stsMarkers.filter.lifted.psl
    # load sequences
    mkdir /gbdb/galGal1/sts
    ln -s /cluster/data/galGal1/bed/markers/sts/all.sequence.fa \
      /gbdb/galGal1/sts/
    ln -s /cluster/data/galGal1/bed/markers/primers/all.primers.fa \
      /gbdb/galGal1/sts/
    hgLoadSeq galGal1 /gbdb/galGal1/sts/*.fa


# BACENDS (DONE 1/31/04 angie)
    ssh kksilo
    mkdir /cluster/data/galGal1/bed/bacends
    cd /cluster/data/galGal1/bed/bacends
    # download
    wget ftp://ftp.ncbi.nih.gov/genomes/BACENDS/gallus_gallus/AllBACends.mfa.gz
    wget ftp://ftp.ncbi.nih.gov/genomes/BACENDS/gallus_gallus/cl_acc_gi_len.gz
    gunzip *.gz
    /cluster/bin/scripts/convertBacEndPairInfo cl_acc_gi_len >& pairInfo.log
    # Next time around, don't keep the version number, so that the 
    # bacends.fa accs will match the cl_acc_gi_len accs.  Use this 
    # replacement instead: 's/^>gi\|\d+\|gb\|(\w+)\.\d+\|.*/>$1/'
    perl -wpe 's/^>gi\|\d+\|gb\|(\w+\.\d+)\|.*/>$1/' AllBACends.mfa \
      > bacends.fa
    faSize bacends.fa
#146921771 bases (2724913 N's 144196858 real) in 135555 sequences in 1 files
#Total size: mean 1083.9 sd 184.7 min 64 (CC322877.1) max 2431 (CC263290.1) median 1095
#N count: mean 20.1 sd 73.1
    mkdir /cluster/bluearc/galGal1/bacends
    faSplit sequence bacends.fa 20 /cluster/bluearc/galGal1/bacends/bacends
    ls -1S /cluster/bluearc/galGal1/bacends/*.fa > bacends.lst
    ls -1S /cluster/bluearc/galGal1/unmaskedChunks/*.fa > contigs.lst
    cat << '_EOF_' > template
#LOOP
/cluster/bin/i386/blat $(path1) $(path2) -tileSize=10 -ooc=../markers/10.ooc {check out line+ psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    mkdir psl
    gensub2 contigs.lst bacends.lst template jobList
    ssh kk
    cd /cluster/data/galGal1/bed/bacends
    para create jobList
    para try, check, push, check, ...
#Completed: 5220 of 5220 jobs
#Average job time:                 105s       1.75m     0.03h    0.00d
#Longest job:                     2431s      40.52m     0.68h    0.03d
#Submission to last job:          2608s      43.47m     0.72h    0.03d
    # back on kksilo, filter and lift results:
    pslSort dirs raw.psl temp psl
    pslReps -nearTop=0.02 -minCover=0.60 -minAli=0.85 -noIntrons raw.psl \
      bacEnds.psl /dev/null
    liftUp bacEnds.lifted.psl ../../jkStuff/liftAll.lft warn bacEnds.psl
    # pslSort ??
    rm -r temp
    rm raw.psl

    extractUniqPsl bacEnds.lifted.psl bacEnds.psl.filter
    pslReps bacEnds.psl.filter.dup.psl bacEnds.psl.filter.1 /dev/null \
      -noIntrons -minCover=0.80 -minAli=0.90 -nearTop=0.05
    extractUniqPsl bacEnds.psl.filter.1 bacEnds.psl.filter.1
    rm bacEnds.psl.filter.1
    pslReps bacEnds.psl.filter.1.dup.psl bacEnds.psl.filter.2 /dev/null \
      -noIntrons -minCover=0.90 -minAli=0.95 -nearTop=0.01
    extractUniqPsl bacEnds.psl.filter.2 bacEnds.psl.filter.2
    rm bacEnds.psl.filter.2
    pslReps bacEnds.psl.filter.2.dup.psl bacEnds.psl.filter.3 /dev/null \
      -noIntrons -minCover=0.95 -minAli=0.98 -nearTop=0.01
    cat bacEnds.psl.filter.3 \
        bacEnds.psl.filter.uniq.psl \
        bacEnds.psl.filter.1.uniq.psl \
        bacEnds.psl.filter.2.uniq.psl \
    > bacEnds.psl.filter.lifted
    rm *.names bacEnds.psl.filter.3 bacEnds.psl.filter.uniq.psl \
       bacEnds.psl.filter.1.uniq.psl bacEnds.psl.filter.2.uniq.psl

    extractPslInfo -h -be bacEnds.psl.filter.lifted
    mv bacEnds.psl.filter.lifted.initial bacEnds.initial
    # Oops, should have stripped the version from bacends.fa acc's so 
    # they would match cl_acc_gi_len accs!
    perl -wpe 's/^(\w+\s+\d+\s+\d+\s+\w+)\.\d+/$1/' bacEnds.initial \
      > bacEndsNoVers.initial
    mv bacEndsNoVers.initial bacEnds.initial
    findAccession -agp -chicken bacEnds.initial /cluster/data/galGal1
    # rm bacEnds.initial
    sort -k 4 bacEnds.initial.acc > bacEnds.sort
    compileBacEnds bacEnds.sort bacEndPairs.txt bacEndSingles.txt \
      > bacEnds.temp
    sorttbl chrom chromStart < bacEnds.temp > bacEnds.rdb
    rm bacEnds.initial.acc bacEnds.sort bacEnds.temp

    createBACpairs bacEnds.rdb bacEndPairs.txt > temp.rdb
    sorttbl chrom1 -r ord1 chromStart1 < temp.rdb > bacEnds.pairs.rdb
    rm temp.rdb
    createBACpairs -bed bacEnds.rdb bacEndPairs.txt > bacEndPairs.rdb
    sorttbl chrom chromStart < bacEndPairs.rdb \
    | headchg -del > bacEndPairs.bed    
    rm bacEndPairs.rdb
    createBacPairsBad -bed bacEnds.rdb bacEndPairs.txt > bacEndPairsBad.rdb
    sorttbl chrom chromStart < bacEndPairsBad.rdb \
    | headchg -del > bacEndPairsBad.bed    
    rm bacEndPairsBad.rdb
    sorttbl chrom chromStart < bacEndPairsLong.rdb \
    | headchg -del > bacEndPairsLong.bed
    rm bacEndPairsLong.rdb
    # This made something 0-length, but I'm not going to pursue for now:
    findMissingEnd -max 500000 singleBac.txt bacEnds.psl.filter.2.dup.psl \
      bacEnds.psl.filter.1.dup.psl bacEnds.psl.filter.dup.psl > bacEndsMiss.psl
    mv bacEnds.psl.filter.lifted bacEnds.psl.filter.lifted.orig
    cat bacEnds.psl.filter.lifted.orig bacEndsMiss.psl \
      > bacEnds.psl.filter.lifted
    rm bacEnds.psl.filter.2.dup.psl bacEnds.psl.filter.1.dup.psl \
       bacEnds.psl.filter.dup.psl
    # Doh... again, having to strip those damn version numbers:
    perl -wpe \
      '@w=split(/\t/); if (defined $w[9]){ $w[9] =~ s/(\w+)\.\d+/$1/; } \
       $_=join("\t",@w)' \
      bacEnds.psl.filter.lifted > tmp
    mv tmp bacEnds.psl.filter.lifted
    extractPslLoad bacEnds.psl.filter.lifted bacEndPairs.bed \
      bacEndPairsBad.bed bacEndPairsLong.bed > bacEnds.psl.rdb
    sorttbl tname tstart < bacEnds.psl.rdb | headchg -del > bacEnds.load.psl
    rm bacEnds.psl.rdb
    # OK, now before loading sequences, we really have to fix bacends.fa:
    perl -wpe 's/^>(\w+)\.\d+/>$1/' bacends.fa \
      > tmp
    mv tmp bacends.fa

    # Load seq's and alignments
    ssh hgwdev
    cd /cluster/data/galGal1/bed/bacends
    mkdir /gbdb/galGal1/bacends
    ln -s /cluster/data/galGal1/bed/bacends/bacends.fa /gbdb/galGal1/bacends
    hgLoadSeq galGal1 /gbdb/galGal1/bacends/bacends.fa
    hgLoadPsl -table=all_bacends -fastLoad \
      galGal1 /cluster/data/galGal1/bed/bacends/bacEnds.load.psl
    # Don't know which two rows prompted this... but all seem to have loaded.
#load of all_bacends did not go as planned: 76325 record(s), 0 row(s) skipped, 2 warning(s) loading psl.tab
    ~/bin/i386/hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/bacEndPairs.sql \
      -hasBin galGal1 bacEndPairs bacEndPairs.bed

    sed -e 's/bacEndPairs/bacEndPairsBad/g' \
      $HOME/kent/src/hg/lib/bacEndPairs.sql > bacEndPairsBad.sql
    ~/bin/i386/hgLoadBed -sqlTable=bacEndPairsBad.sql \
      -hasBin galGal1 bacEndPairsBad bacEndPairsBad.bed
    # Doh!  12 lines of that have negative starts.  Well, trim those 
    # for now and load the rest, then go back and find the neg-start 
    # cause...
    egrep -v '	-[0-9]+' bacEndPairsBad.bed > bacEndPairsBadFudge.bed
    ~/bin/i386/hgLoadBed -sqlTable=bacEndPairsBad.sql \
      -hasBin galGal1 bacEndPairsBad bacEndPairsBadFudge.bed

    sed -e 's/bacEndPairs/bacEndPairsLong/g' \
      $HOME/kent/src/hg/lib/bacEndPairs.sql > bacEndPairsLong.sql
    ~/bin/i386/hgLoadBed -sqlTable=bacEndPairsLong.sql \
      -hasBin galGal1 bacEndPairsLong bacEndPairsLong.bed


# SIMPLE REPEATS (TRF) (DONE 1/30/04 angie)
    # TRF runs pretty quickly now... it takes a few hours total runtime, 
    # so instead of binrsyncing and para-running, just do this on the
    # local fileserver
    ssh kksilo
    mkdir /cluster/data/galGal1/bed/simpleRepeat
    cd /cluster/data/galGal1/bed/simpleRepeat
    mkdir trf
    cp /dev/null jobs.csh
    foreach d (/cluster/data/galGal1/*/chr*_?{,?})
      set ctg = $d:t
      foreach f ($d/${ctg}.fa)
        set fout = $f:t:r.bed
        echo $fout
        echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" \
        >> jobs.csh
      end
    end
    csh jobs.csh >&! jobs.log &
    # check on this with
    tail -f jobs.log
    wc -l jobs.csh
    ls -1 trf | wc -l
    # When job is done do:
    liftUp simpleRepeat.bed /cluster/data/galGal1/jkStuff/liftAll.lft warn \
      trf/*.bed

    # Load into the database:
    ssh hgwdev
    hgLoadBed galGal1 simpleRepeat \
      /cluster/data/galGal1/bed/simpleRepeat/simpleRepeat.bed \
      -sqlTable=$HOME/src/hg/lib/simpleRepeat.sql
    featureBits galGal1 simpleRepeat
    # 8436953 bases of 1054197620 (0.800%) in intersection
    # Seems awful low even though chicken is supposed to be a 
    # low-repeat-density animal.  Run it by someone...


# PROCESS SIMPLE REPEATS INTO MASK (DONE 1/30/04/ angie)
    # After the simpleRepeats track has been built, make a filtered version 
    # of the trf output: keep trf's with period <= 12:
    ssh kksilo
    cd /cluster/data/galGal1/bed/simpleRepeat
    mkdir -p trfMask
    foreach f (trf/chr*.bed)
      awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t
    end
    # Lift up filtered trf output to chrom coords as well:
    cd /cluster/data/galGal1
    mkdir bed/simpleRepeat/trfMaskChrom
    foreach c (`cat chrom.lst`)
      if (-e $c/lift/ordered.lst) then
        perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
          $c/lift/ordered.lst > $c/lift/oTrf.lst
        liftUp bed/simpleRepeat/trfMaskChrom/chr$c.bed \
          jkStuff/liftAll.lft warn `cat $c/lift/oTrf.lst`
      endif
      if (-e $c/lift/random.lst) then
        perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
           $c/lift/random.lst > $c/lift/rTrf.lst
        liftUp bed/simpleRepeat/trfMaskChrom/chr${c}_random.bed \
          jkStuff/liftAll.lft warn `cat $c/lift/rTrf.lst`
      endif
    end
    # Here's the coverage for the filtered TRF:
    ssh hgwdev
    cat /cluster/data/galGal1/bed/simpleRepeat/trfMaskChrom/*.bed \
      > /tmp/filtTrf.bed
    featureBits galGal1 /tmp/filtTrf.bed
    # 4512560 bases of 1054197620 (0.428%) in intersection


# MASK SEQUENCE WITH REPEATMASKER AND SIMPLE REPEAT/TRF (DONE 1/31/04 angie)
    ssh kksilo
    cd /cluster/data/galGal1
    # Soft-mask (lower-case) the contig and chr .fa's, 
    # then make hard-masked versions from the soft-masked.  
    set trfCtg=bed/simpleRepeat/trfMask
    set trfChr=bed/simpleRepeat/trfMaskChrom
    foreach f (*/chr*.fa)
      echo "repeat- and trf-masking $f"
      maskOutFa -soft $f $f.out $f
      set chr = $f:t:r
      maskOutFa -softAdd $f $trfChr/$chr.bed $f
      echo "hard-masking $f"
      maskOutFa $f hard $f.masked
    end
    foreach c (`cat chrom.lst`)
      echo "repeat- and trf-masking contigs of chr$c, chr${c}_random"
      foreach d ($c/chr*_?{,?})
        set ctg=$d:t
        set f=$d/$ctg.fa
        maskOutFa -soft $f $f.out $f
        maskOutFa -softAdd $f $trfCtg/$ctg.bed $f
        maskOutFa $f hard $f.masked
      end
    end
    #- Rebuild the nib files, using the soft masking in the fa:
    mkdir nib
    foreach f (*/chr*.fa)
      faToNib -softMask $f nib/$f:t:r.nib
    end


# GOLD AND GAP TRACKS (DONE 1/30/04 angie)
    ssh hgwdev
    cd /cluster/data/galGal1
    hgGoldGapGl -noGl -chromLst=chrom.lst galGal1 /cluster/data/galGal1 .
    # featureBits fails if there's no chrM_gap, so make one:
    # echo "create table chrM_gap like chr1_gap" | hgsql galGal1
    # oops, that won't work until v4.1, so do this for the time being:
    echo "create table chrM_gap select * from chr1_gap where 0=1" \
    | hgsql galGal1


# MAKE GCPERCENT (DONE 1/30/04 angie -- DROPPED 2/4/04, replaced by gc20kBase)
     ssh hgwdev
     mkdir /cluster/data/galGal1/bed/gcPercent
     cd /cluster/data/galGal1/bed/gcPercent
     hgsql galGal1  < $HOME/kent/src/hg/lib/gcPercent.sql
     hgGcPercent galGal1 ../../nib


# MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR GALGAL1 (DONE 1/28/04 angie)
    ssh hgwdev
    # Add dbDb and defaultDb entries:
    echo 'insert into dbDb (name, description, nibPath, organism,  \
          defaultPos, active, orderKey, genome, scientificName,  \
          htmlPath, hgNearOk)  \
          values("galGal1", "Jan. 2004", \
          "/gbdb/galGal1/nib", "Chicken", "chr13:13124250-13128200", 1, \
          35, "Chicken", "Gallus gallus", \
          "/gbdb/galGal1/html/description.html", 0);' \
    | hgsql -h genome-testdb hgcentraltest
    echo 'insert into defaultDb values("Chicken", "galGal1");' \
        | hgsql -h genome-testdb hgcentraltest

    # Make trackDb table so browser knows what tracks to expect:
    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/trackDb
    cvs up -d -P
    # Edit that makefile to add galGal1 in all the right places and do
    make update
    make alpha
    cvs commit makefile


# MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR GALGAL1 (TODO)
    ssh hgwdev
    echo 'insert into blatServers values("galGal1", "blat?", "17778", "1"); \
          insert into blatServers values("galGal1", "blat?", "17779", "0");' \
      | hgsql -h genome-testdb hgcentraltest


# MAKE DESCRIPTION/SAMPLE POSITION HTML PAGE (DONE 1/28/04 angie)
    ssh hgwdev
    mkdir /gbdb/galGal1/html
    # Write ~/kent/src/hg/makeDb/trackDb/chicken/galGal1/description.html 
    # with a description of the assembly and some sample position queries.  
    chmod a+r $HOME/kent/src/hg/makeDb/trackDb/chicken/galGal1/description.html
    # Check it in and copy (ideally using "make alpha" in trackDb) to 
    # /gbdb/galGal1/html


# MAKE DOWNLOADABLE SEQUENCE FILES (TODO)
    ssh kksilo
    cd /cluster/data/galGal1
    #- Build the .zip files
    cat << '_EOF_' > jkStuff/zipAll.csh
rm -rf zip
mkdir zip
zip -j zip/chromAgp.zip */chr*.agp
zip -j zip/chromOut.zip */chr*.fa.out
zip -j zip/chromFa.zip */chr*.fa
zip -j zip/chromFaMasked.zip */chr*.fa.masked
cd bed/simpleRepeat
zip ../../zip/chromTrf.zip trfMaskChrom/chr*.bed
cd ../..
cd /cluster/data/genbank
./bin/i386/gbGetSeqs -db=galGal1 -native GenBank mrna \
        /cluster/data/galGal1/zip/mrna.fa
cd /cluster/data/galGal1/zip
zip -j mrna.zip mrna.fa
'_EOF_'
    # << this line makes emacs coloring happy
    csh ./jkStuff/zipAll.csh |& tee zip/zipAll.log
    cd zip
    #- Look at zipAll.log to make sure all file lists look reasonable.  
    #- Check zip file integrity:
    foreach f (*.zip)
      unzip -t $f > $f.test
      tail -1 $f.test
    end
    wc -l *.zip.test

    #- Copy the .zip files to hgwdev:/usr/local/apache/...
    ssh hgwdev
    cd /cluster/data/galGal1/zip
    set gp = /usr/local/apache/htdocs/goldenPath/galGal1
    mkdir -p $gp/chromosomes
    foreach f ( ../*/chr*.fa )
      zip -j $gp/chromosomes/$f:t.zip $f
    end
    mkdir -p $gp/bigZips
    cp -p *.zip $gp/bigZips

    cd $gp
    # Take a look at bigZips/* and chromosomes/*, update their README.txt's
    # Can't make refGene upstream sequence files - no refSeq for chicken.
    # Maybe ensGene when we get that.


# PUT MASKED SEQUENCE OUT FOR CLUSTER RUNS (DONE 1/31/04 angie)
    ssh kksilo
    # Chrom-level mixed nibs that have been repeat- and trf-masked:
    rm -rf /cluster/bluearc/galGal1/nib
    mkdir /cluster/bluearc/galGal1/nib
    cp -p nib/chr*.nib /cluster/bluearc/galGal1/nib
    # Pseudo-contig fa that have been repeat- and trf-masked:
    rm -rf /cluster/bluearc/galGal1/trfFa
    mkdir /cluster/bluearc/galGal1/trfFa
    foreach d (*/chr*_?{,?})
      cp $d/$d:t.fa /cluster/bluearc/galGal1/trfFa
    end


# MAKE CHICKEN LINEAGE-SPECIFIC REPEATS (DONE 2/18/04 angie)
    # In an email 2/13/04, Arian said we could treat all human repeats as 
    # lineage-specific, but could exclude these from chicken as ancestral:
    # L3, L3a, L3b, MIR, MIR3, MIRb, MIRm
    ssh kksilo
    cd /cluster/data/galGal1
    mkdir /cluster/bluearc/galGal1/linSpecRep
    foreach f (*/chr*.fa.out)
      awk '$10 !~/^(L3|L3a|L3b|MIR|MIR3|MIRb|MIRm)$/ {print;}' $f \
      > /cluster/bluearc/galGal1/linSpecRep/$f:t:r:r.out.spec
    end


# SWAP BLASTZ HUMAN-CHICKEN TO CHICKEN-HUMAN (HG16) (DONE 2/19/04 angie)
    ssh kolossus
    mkdir /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18
    set aliDir = /cluster/data/hg16/bed/blastz.galGal1.2004-02-18
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom
    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
#1.4G    /cluster/data/hg16/bed/blastz.galGal1.2004-02-18/axtChrom
#1.4G    unsorted
#1.4G    axtChrom
    rm -r unsorted


# CHAIN HUMAN BLASTZ (DONE 2/19/04 angie)
    # Run axtChain on little cluster
    ssh kkr1u00
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChrom/*.axt \
      > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    # Make our own linear gap file with reduced gap penalties, 
    # in hopes of getting longer chains:
    cat << '_EOF_' > ../../chickenHumanTuned.gap
tablesize	11
smallSize	111
position	1	2	3	11	111	2111	12111	32111	72111	152111	252111
qGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
tGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
bothGap	625	660	700	750	900	1400	4000	8000	16000	32000	57000
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
axtFilter -notQ=chrUn_random $1 \
| ~/bin/i386/axtChain -scoreScheme=/cluster/data/blastz/HoxD55.q \
                      -linearGap=../../chickenHumanTuned.gap \
                      -minScore=5000 stdin \
  /cluster/bluearc/galGal1/nib \
  /cluster/bluearc/scratch/hg/gs.17/build34/bothMaskedNibs $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
#Completed: 45 of 45 jobs
#Average job time:                 292s       4.86m     0.08h    0.00d
#Longest job:                      906s      15.10m     0.25h    0.01d
#Submission to last job:          2233s      37.22m     0.62h    0.03d

    # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r
      textHistogram -binSize=10000 /tmp/score.$f:t:r
      echo ""
    end

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        echo loading $c
        hgLoadChain galGal1 ${c}_chainHg16 $i
    end


# NET HUMAN BLASTZ (DONE 2/19/04 angie)
    ssh kksilo
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain
    netClass noClass.net galGal1 hg16 human.net

    # Make a 'syntenic' subset:
    ssh kksilo
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain
    rm noClass.net
    # Make a 'syntenic' subset of these with
    netFilter -syn human.net > humanSyn.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain
    netFilter -minGap=10 human.net |  hgLoadNet galGal1 netHg16 stdin
    netFilter -minGap=10 humanSyn.net | hgLoadNet galGal1 netSyntenyHg16 stdin
    # Add entries for chaing16, netHg16, syntenyHg16 to chicken/galGal1 trackDb


# RUN AXTBEST ON CHICKEN/HUMAN (DONE 2/19/04 angie)
    ssh kolossus
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18
    mkdir axtBest pslBest
    foreach f (axtChrom/chr*.axt)
      set chr=$f:t:r
      echo axtBesting $chr
      axtBest $f $chr axtBest/$chr.axt -minScore=300
      axtToPsl axtBest/$chr.axt S1.len S2.len pslBest/$chr.psl
    end
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18
    cat pslBest/chr*.psl | hgLoadPsl -table=blastzBestHg16 galGal1 stdin


# BLASTZ SELF (DONE 2/23/04 angie)
# lavToAxt has a new -dropSelf option that *will* replace axtDropOverlap,
# once I fix a bug in it...
    ssh kk
    mkdir -p /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    # Abridge repeats (linSpecRep relative to human but should be fine right?)
    cat << '_EOF_' > DEF
# chicken vs. chicken
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz
BLASTZ_H=2000
BLASTZ_ABRIDGE_REPEATS=1

# TARGET
# Chicken
SEQ1_DIR=/cluster/bluearc/galGal1/nib
SEQ1_RMSK=
SEQ1_FLAG=
SEQ1_SMSK=/cluster/bluearc/galGal1/linSpecRep
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY
# Human
SEQ2_DIR=/cluster/bluearc/galGal1/nib
SEQ2_RMSK=
SEQ2_FLAG=
SEQ2_SMSK=/cluster/bluearc/galGal1/linSpecRep
SEQ2_IN_CONTIGS=0
SEQ2_CHUNK=10000000
SEQ2_LAP=10000

BASE=/cluster/data/galGal1/bed/blastz.galGal1.2004-02-19

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len
'_EOF_'
    # << this line makes emacs coloring happy

    # Save the DEF file in the current standard place
    chmod +x DEF
    cp DEF ~angie/hummus/DEF.galGal1-galGal1

    # source the DEF file
    bash
    . ./DEF
    mkdir -p $BASE/run
    ~angie/hummus/make-joblist $DEF > $BASE/run/j
    sh $BASE/xdir.sh
    cd $BASE/run
    # now edit j to prefix path to executable name
    sed -e 's#^#/cluster/bin/penn/#' j > j2
    wc -l j*
    head j2
    # make sure the j2 edits are OK, then use it:
    mv j2 j
    para create j
    para try, check, push, check, ...
#Completed: 21025 of 21025 jobs
#Average job time:                 400s       6.67m     0.11h    0.00d
#Longest job:                     1473s      24.55m     0.41h    0.02d
#Submission to last job:          9966s     166.10m     2.77h    0.12d

    # --- normalize (PennSpeak for lift)
    ssh kkr1u00
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    # run bash shell if not running it already
    source DEF
    mkdir -p $BASE/run.1
    mkdir -p $BASE/lav
    # create a new job list to convert out files to lav
    /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE \
                        > run.1/jobList
    cd run.1
    wc -l jobList
    head jobList
    # make sure the job list is OK
    para create jobList
    para push
#Completed: 145 of 145 jobs
#Average job time:                  46s       0.77m     0.01h    0.00d
#Longest job:                      328s       5.47m     0.09h    0.00d
#Submission to last job:          1192s      19.87m     0.33h    0.01d

    # convert lav files to axt
    ssh kkr1u00
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    mkdir axtChrom
    # a new run directory
    mkdir run.2
    cd run.2
    # Use blastz-self-chromlav2axt, which includes axtDropOverlap in 
    # the pipe (that helps to keep axtSort from barfing).
    # usage:  blastz-self-chromlav2axt lav-dir axt-file seq1-dir seq2-dir
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/scripts/blastz-self-chromlav2axt /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/lav/$(root1) {check out line+ /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChrom/$(root1).axt} /cluster/bluearc/galGal1/nib /cluster/bluearc/galGal1/nib /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/S1.len /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/S2.len
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    \ls -1S /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/lav > chrom.list
    gensub2 chrom.list single gsub jobList
    wc -l jobList
    head jobList
    para create jobList
    para try, check, push, check,...
#Completed: 46 of 47 jobs
#Crashed: 1 jobs
#Average job time:                  34s       0.56m     0.01h    0.00d
#Longest job:                      294s       4.90m     0.08h    0.00d
#Submission to last job:           668s      11.13m     0.19h    0.01d
    # chrUn crashed -- run on kolossus, in two passes!
    # actually, it was a segv in lavToAxt (fixed), but this works anyway:
    ssh kolossus
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    set base=`pwd`
    set seq1_dir=/cluster/data/galGal1/nib
    set seq2_dir=/cluster/data/galGal1/nib
    foreach chr (chrUn)
      set out=axtChrom/$chr.axt
      echo "Translating $chr lav to $out"
      foreach d (lav/$chr/*.lav)
        set smallout=$d.axt
        echo $smallout
        ~/bin/$MACHTYPE/lavToAxt $d $seq1_dir $seq2_dir stdout \
        | axtDropOverlap stdin $base/S1.len $base/S2.len stdout \
        | axtSort stdin $smallout
      end
      cat `ls -1 lav/$chr/*.lav.axt | sort -g` | axtSort stdin $out
    end


# CHAIN SELF BLASTZ (TODO 2/19/04 angie)
    # Run axtChain on little cluster
    ssh kkr1u00
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChrom/*.axt \
      > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    cat << '_EOF_' > doChain
#!/bin/csh
axtFilter -notQ=chrUn $1 \
| ~/bin/i386/axtChain stdin \
  /cluster/bluearc/galGal1/nib \
  /cluster/bluearc/galGal1/nib $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
#Completed: 47 of 47 jobs
#Average job time:                 140s       2.33m     0.04h    0.00d
#Longest job:                      766s      12.77m     0.21h    0.01d
#Submission to last job:           766s      12.77m     0.21h    0.01d

    # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain
    # trim to minScore=20000
    mkdir chainFilt
    foreach f (chain/*.chain)
      chainFilter -minScore=20000 $f > chainFilt/$f:t
    end

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain/chainFilt
    foreach i (*.chain)
        set c = $i:r
        echo loading $c
        hgLoadChain galGal1 ${c}_chainSelf $i
    end


# NET SELF BLASTZ (TODO 2/19/04 angie)
    ssh kksilo
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain
    netClass -noAr noClass.net galGal1 galGal1 self.net

    # Make a 'syntenic' subset:
    ssh kksilo
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain
    rm noClass.net
    netFilter -syn self.net > selfSyn.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain
    netFilter -minGap=10 self.net |  hgLoadNet galGal1 netSelf stdin
    netFilter -minGap=10 selfSyn.net | hgLoadNet galGal1 netSyntenySelf stdin
    # Add entries for chainSelf, netSelf, netSyntenySelf to 
    # chicken/galGal1 trackDb


# RUN AXTBEST ON SELF (TODO 2/19/04 angie)
    # chain/net seem kinda sparse, let's try axtBest...
    ssh kolossus
    cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19
    mkdir axtBest pslBest
    foreach f (axtChrom/chr*.axt)
      set chr=$f:t:r
      echo axtBesting $chr
      axtBest $f $chr axtBest/$chr.axt -minScore=300
      axtToPsl axtBest/$chr.axt S1.len S2.len pslBest/$chr.psl
    end


# MAKE 11.OOC FILE FOR BLAT (DONE 2/1/04 angie)
    ssh kksilo
    mkdir /cluster/data/galGal1/bed/ooc
    cd /cluster/data/galGal1/bed/ooc
    ls -1 /cluster/bluearc/galGal1/nib/chr*.nib > nib.lst
    blat nib.lst /dev/null /dev/null -tileSize=11 \
      -makeOoc=/cluster/bluearc/galGal1/11.ooc -repMatch=1024
#Wrote 1053 overused 11-mers to /cluster/bluearc/galGal1/11.ooc


# AUTO UPDATE GENBANK MRNA RUN  (DONE 2/1/04 angie)
    # Update genbank config and source in CVS:
    cd ~/kent/src/hg/makeDb/genbank
    cvsup .
    # See if /cluster/data/genbank/etc/genbank.conf has had any un-checked-in
    # edits, check them in if necessary:
    diff /cluster/data/genbank/etc/genbank.conf etc/genbank.conf

    # Edit etc/genbank.conf and add these lines:
# galGal1 (chicken)
galGal1.genome = /cluster/bluearc/galGal1/nib/chr*.nib
galGal1.lift = /cluster/data/galGal1/jkStuff/liftAll.lft
galGal1.refseq.mrna.native.load = no
galGal1.genbank.mrna.xeno.load = yes
galGal1.genbank.est.xeno.load = no
galGal1.downloadDir = galGal1

    cvs ci etc/genbank.conf
    # Since chicken is a new species for us, edit src/lib/gbGenome.c.  
    # Pick some other browser species, & monkey-see monkey-do.  
    cvs diff src/lib/gbGenome.c
    make
    cvs ci src/lib/gbGenome.c
    # Edit src/align/gbBlat to add /cluster/bluearc/galGal1/11.ooc
    cvs diff src/align/gbBlat
    make
    cvs ci src/align/gbBlat
    # Install to /cluster/data/genbank:
    make install-server

    ssh eieio
    cd /cluster/data/genbank
    # This is an -initial run, mRNA only:
    nice bin/gbAlignStep -iserver=no -clusterRootDir=/cluster/bluearc/genbank \
      -srcDb=genbank -type=mrna -verbose=1 -initial galGal1
    # Load results:
    ssh hgwdev
    cd /cluster/data/genbank
    nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad galGal1
    # Clean up:
    rm -r /cluster/bluearc/genbank/work/initial.galGal1

    ssh eieio
    # -initial for ESTs:
    nice bin/gbAlignStep -iserver=no -clusterRootDir=/cluster/bluearc/genbank \
      -srcDb=genbank -type=est -verbose=1 -initial galGal1
    # Load results:
    ssh hgwdev
    cd /cluster/data/genbank
    nice bin/gbDbLoadStep -verbose=1 galGal1
    # Clean up:
    rm -r /cluster/bluearc/genbank/work/initial.galGal1


# PRODUCING GENSCAN PREDICTIONS (DONE 2/3/04 angie)
    ssh hgwdev
    mkdir /cluster/data/galGal1/bed/genscan
    cd /cluster/data/galGal1/bed/genscan
    # Check out hg3rdParty/genscanlinux to get latest genscan:
    cvs co hg3rdParty/genscanlinux
    # Run on small cluster (more mem than big cluster).
    ssh kkr1u00
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Generate a list file, genome.list, of all the hard-masked contigs that 
    # *do not* consist of all-N's (which would cause genscan to blow up)
    rm -f genome.list
    touch genome.list
    foreach f ( `ls -1S /cluster/data/galGal1/*/chr*_*/chr*_?{,?}.fa.masked` )
      egrep '[ACGT]' $f > /dev/null
      if ($status == 0) echo $f >> genome.list
    end
    wc -l genome.list
    # Create template file, gsub, for gensub2.  For example (3-line file):
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/i386/gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    gensub2 genome.list single gsub jobList
    para create jobList
    para try, check, push, check, ...
#Completed: 259 of 261 jobs
#Crashed: 2 jobs
#Average job time:                2920s      48.67m     0.81h    0.03d
#Longest job:                   141367s    2356.12m    39.27h    1.64d
#Submission to last job:        144447s    2407.45m    40.12h    1.67d
    # If there are crashes, diagnose with "para problems".  
    # If a job crashes due to genscan running out of memory, re-run it 
    # manually with "-window=1200000" instead of "-window=2400000".
    /cluster/bin/i386/gsBig /cluster/data/galGal1/5/chr5_5/chr5_5.fa.masked gtf/chr5_5.fa.gtf -trans=pep/chr5_5.fa.pep -subopt=subopt/chr5_5.fa.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=1200000
    /cluster/bin/i386/gsBig /cluster/data/galGal1/20/chr20_1/chr20_1.fa.masked gtf/chr20_1.fa.gtf -trans=pep/chr20_1.fa.pep -subopt=subopt/chr20_1.fa.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=1200000
    
    # Convert these to chromosome level files as so:
    ssh kksilo
    cd /cluster/data/galGal1/bed/genscan
    liftUp genscan.gtf ../../jkStuff/liftAll.lft warn gtf/*.gtf
    liftUp genscanSubopt.bed ../../jkStuff/liftAll.lft warn subopt/*.bed
    cat pep/*.pep > genscan.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/galGal1/bed/genscan
    ldHgGene galGal1 genscan genscan.gtf
    hgPepPred galGal1 generic genscanPep genscan.pep
    hgLoadBed galGal1 genscanSubopt genscanSubopt.bed


# PRODUCING FUGU BLAT ALIGNMENTS (DONE 2/6/04)
    ssh kk
    mkdir /cluster/data/galGal1/bed/blatFr1
    cd /cluster/data/galGal1/bed/blatFr1
    ls -1S /cluster/bluearc/fugu/fr1/trfFa/*.fa > fugu.lst
    ls -1S /cluster/bluearc/galGal1/trfFa/*.fa > chicken.lst
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/i386/blat -mask=lower -q=dnax -t=dnax {check in line+ $(path1)} {check in line+ $(path2)} {check out line+ psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    mkdir psl
    gensub2 chicken.lst fugu.lst gsub spec
    para create spec
    para try, check, push, check, ...
    # cluster & bluearc were very busy => slow:
#Completed: 1827 of 1827 jobs
#Average job time:                1685s      28.08m     0.47h    0.02d
#Longest job:                     4121s      68.68m     1.14h    0.05d
#Submission to last job:         56039s     933.98m    15.57h    0.65d

    # Sort alignments:
    ssh kksilo
    cd /cluster/data/galGal1/bed/blatFr1
    pslCat -dir psl \
    | liftUp -type=.psl stdout /cluster/data/galGal1/jkStuff/liftAll.lft \
        warn stdin \
    | pslSortAcc nohead chrom /cluster/store2/temp stdin

    # load into database:
    ssh hgwdev
    cd /cluster/data/galGal1/bed/blatFr1/chrom
    cat *.psl | hgLoadPsl -fastLoad -table=blatFr1 galGal1 stdin
    mkdir /gbdb/galGal1/fuguSeq
    ln -s /cluster/data/fr1/fugu_v3.masked.fa /gbdb/galGal1/fuguSeq/
    cd /cluster/data/galGal1/bed/blatFr1
    hgLoadSeq galGal1 /gbdb/galGal1/fuguSeq/fugu_v3.masked.fa


# LOAD CPGISSLANDS (DONE 2/1/04 angie)
    ssh hgwdev
    mkdir -p /cluster/data/galGal1/bed/cpgIsland
    cd /cluster/data/galGal1/bed/cpgIsland
    # Build software from Asif Chinwalla (achinwal@watson.wustl.edu)
    cvs co hg3rdParty/cpgIslands
    cd hg3rdParty/cpgIslands
    make
    mv cpglh.exe /cluster/data/galGal1/bed/cpgIsland/
    
    ssh kksilo
    cd /cluster/data/galGal1/bed/cpgIsland
    foreach f (../../*/chr*.fa.masked)
      set fout=$f:t:r:r.cpg
      echo running cpglh on $f to $fout
      ./cpglh.exe $f > $fout.cpg
    end
    # Transform cpglh output to bed +
    cat << '_EOF_' > filter.awk
/* chr1\t1325\t3865\t754\tCpG: 183\t64.9\t0.7 */
/* Transforms to:  (tab separated columns above, spaces below) */
/* chr1  1325    3865    CpG: 183  754  183 489  64.9  0.7 */
{
width = $3-$2;
printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\n",
  $1,$2,$3,$5,$6,width,$6,width*$7*0.01,100.0*2*$6/($3-$2),$7);}
'_EOF_'
    # << this line makes emacs coloring happy
    awk -f filter.awk chr*.cpg > cpgIsland.bed

    # load into database:
    ssh hgwdev
    cd /cluster/data/galGal1/bed/cpgIsland
    hgLoadBed galGal1 cpgIsland -tab -noBin \
      -sqlTable=$HOME/kent/src/hg/lib/cpgIsland.sql cpgIsland.bed


# LOAD SOFTBERRY GENES (TODO)
     cd /cluster/data/galGal1/bed
     mkdir softberry
     cd softberry
     wget \
        ftp://www.softberry.com/pub/SC_CHICKEN_JUN03/Softb_fgenesh_chicken_jun03.tar.gz
     tar xvzf Softb_fgenesh_chicken_jun03.tar.gz
     ldHgGene galGal1 softberryGene chr*.gff
     hgPepPred galGal1 softberry *.protein
     hgSoftberryHom galGal1 *.protein


# BUILD BLAST DATABASES
    cd /cluster/data/galGal1
    mkdir blastDb
     for i in `cat chrom.lst`; do for j in `echo $i/chr$i_*/chr*_*_*.fa`; do ln -s `pwd`/$j blastDb/`basename $j .fa`;
      done; done 
      cd blastDb
      for i in *; do formatdb -p F -i $i; done
# END BLAST


# TIGR GENE INDEX (TODO)
    mkdir -p /cluster/data/galGal1/bed/tigr
    cd /cluster/data/galGal1/bed/tigr
    wget ftp://ftp.tigr.org//pub/data/tgi/TGI_track_ChickenGenome_Jun2003.tgz
    tar xvfz TGI_track_ChickenGenome_Jun2003.tgz
    foreach f (*cattle*)
      set f1 = `echo $f | sed -e 's/cattle/cow/g'`
      mv $f $f1
    end
    foreach o (rat cow human pig chicken)
      setenv O $o
      foreach f (chr*_$o*s)
        tail +2 $f | perl -wpe 's /THC/TC/; s/(TH?C\d+)/$ENV{O}_$1/;' > $f.gff
      end
    end
    ldHgGene -exon=TC galGal1 tigrGeneIndex *.gff
       # 139456 gene predictions
    gzip *TCs *.gff


#  GC 5 BASE WIGGLE TRACK (DONE - 2004-02-04 - Hiram)
    #    working on kksilo, the fileserver for this data.
    ssh kksilo
    mkdir /cluster/data/galGal1/bed/gc5Base
    cd /cluster/data/galGal1/bed/gc5Base
    mkdir wigData5 dataLimits5
for n in ../../nib/*.nib
do
    c=`basename ${n} | sed -e "s/.nib//"`
    C=`echo $c | sed -e "s/chr//"`
    echo -n "working on ${c} - ${C} ... "
    hgGcPercent -chr=${c} -doGaps \
	-file=stdout -win=5 galGal1 ../../nib | grep -w GC | \
    awk '
{
    bases = $3 - $2
    perCent = $5/10.0
    printf "%d\t%.1f\n", $2+1, perCent
}' | wigAsciiToBinary \
        -dataSpan=5 -chrom=${c} -wibFile=wigData5/gc5Base_${C} \
        -name=${C} stdin > dataLimits5/${c}
echo "done ${c}"
done

    #	data is complete, load track on hgwdev
    ssh hgwdev
    cd /cluster/data/galGal1/bed/gc5Base
    hgLoadWiggle galGal1 gc5Base wigData5/*.wig
    #	create symlinks for .wib files
    mkdir /gbdb/galGal1/wib
    ln -s `pwd`/wigData5/*.wib /gbdb/galGal1/wib
    #	the trackDb entry
track gc5Base
shortLabel GC Counts
longLabel GC Percent in 5 base windows
group map
priority 23.5
visibility hide
autoScale Off
maxHeightPixels 128:36:16
graphTypeDefault Bar
gridDefault OFF
windowingFunction Mean
color 0,128,255
altColor 255,128,0
viewLimits 30:70
type wig 0 100 


# SUPERCONTIG LOCATIONS TRACK (DONE 2/5/04 angie)
    # Supercontig N is a collection of contained "ContigN.*".  
    # Extract Supercontig starts and ends from AGP into bed.  
    mkdir /cluster/data/galGal1/bed/supercontig
    cd /cluster/data/galGal1/bed/supercontig
    cat ../../chicken_agp/chr*.agp \
    | perl -we 'while (<>) { \
                  chop; @words = split("\t"); \
                  next if ($words[4] eq "N"); \
                  if ($words[5] =~ /^(Contig\d+)\.(\d+)$/) { \
                    $sup = $1;  $newNum = $2; \
                    $firstNum = $newNum if (! defined $firstNum); \
                    if (defined $lastSup && $lastSup ne $sup) { \
                      print "$chr\t$start\t$end\t$lastSup (.$firstNum-.$lastNum)\t1000\t$strand\n"; \
                      undef $start;  undef $end;  $firstNum = $newNum; \
                    } \
                    $chr = $words[0]; $cStart = $words[1]; $cEnd = $words[2]; \
                    $strand = $words[8]; \
                    $start = $cStart if (\!defined $start || $start > $cStart); \
                    $end = $cEnd if (\!defined $end || $end < $cEnd); \
                    $lastSup = $sup;  $lastNum = $newNum; \
                  } else { die "bad 6th word $words[5]\n"; } \
                } \
                print "$chr\t$start\t$end\t$lastSup (.$firstNum-.$lastNum)\t1000\t$strand\n";' \
    > supercontig.bed
    ssh hgwdev
    cd /cluster/data/galGal1/bed/supercontig
    hgLoadBed -tab galGal1 supercontig supercontig.bed


# PLOT MARKER GENETIC VS. GENOMIC POS (DONE 2/5/04 angie)
    ssh kksilo
    mkdir /cluster/data/galGal1/bed/markerPlots
    cd /cluster/data/galGal1/bed/markerPlots
    # Need to find out where Terry got this file:
    cp -p /cluster/bluearc/gg1/markers/genetic.map .
    mkdir gnuplot
    # This is a one-shot and I'm writing a perl script -- too long for inliner.
    chmod a+x ./plotMarkers.pl
    ./plotMarkers.pl genetic.map ../markers/stsMarkersTmp.bed
    mkdir gif
    ssh hgwdev
    cd /cluster/data/galGal1/bed/markerPlots
    foreach f (gnuplot/*.gnuplot)
      gnuplot < $f > tmp.ppm
      ppmtogif tmp.ppm > gif/$f:t:r.gif
      rm tmp.ppm
    end
    mkdir /usr/local/apache/htdocs/angie/chickenPlots
    cp -p /cluster/data/galGal1/bed/markerPlots/gif/* \
      /usr/local/apache/htdocs/angie/chickenPlots
    cp -p /cluster/data/galGal1/bed/markerPlots/*.txt \
      /usr/local/apache/htdocs/angie/chickenPlots
    cp -p /cluster/bluearc/gg1/markers/genetic.map \
      /usr/local/apache/htdocs/angie/chickenPlots/genetic_map.txt
    cp -p /cluster/data/galGal1/bed/markers/stsMarkersTmp.bed \
      /usr/local/apache/htdocs/angie/chickenPlots/stsMarkerPlacements.txt


# CREATING QUALITY SCORE TRACK (DONE 2/6/04 angie)
    ssh kksilo
    mkdir /cluster/data/galGal1/bed/qual
    cd /cluster/data/galGal1/bed/qual
    cat ../../chicken_agp/chr*.agp > assembly.agp
    tar xvzf ../../chicken_040105.contigs.qual.tar.gz
    cat chicken*.qual | qaToQac stdin stdout \
    | ~/bin/i386/chimpChromQuals assembly.agp stdin chrom.qac
    rm chicken*.qual
    mkdir wigData dataLimits
    foreach c (`cat ../../chrom.lst`)
      foreach agp (../../$c/chr$c.agp ../../$c/chr${c}_random.agp)
        if (-e $agp) then
          set chr = $agp:t:r
          set abbrev = `echo $chr | sed -e 's/^chr//;  s/_random/r/;'`
          echo $chr to $abbrev wiggle
          qacToWig chrom.qac -name=$chr stdout \
          | wigAsciiToBinary -dataSpan=1 -chrom=$chr \
              -wibFile=wigData/qual_$abbrev -name=$abbrev stdin \
              > dataLimits/$chr
        endif
      end
    end
    # Verify size of .wib file = chrom length
    foreach f (wigData/*.wib)
      set abbrev = $f:t:r
      set chr = `echo $abbrev | sed -e 's/^qual_/chr/;  s/r$/_random/;'`
      set wibLen = `ls -l $f | awk '{print $5;}'`
      set chromLen = `grep -w $chr ../../chrom.sizes | awk '{print $2;}'`
      if ($wibLen != $chromLen) then
        echo "ERROR: $chr size is $chromLen but wib size is $wibLen"
      else
        echo $chr OK.
      endif
    end
#ERROR: chr26 size is 4262107 but wib size is 3762107
    # That's OK -- chr26 ends in a 500000bp centromere gap -- not incl'd in wib

    # /gbdb & load:
    ssh hgwdev
    cd /cluster/data/galGal1/bed/qual
    mkdir -p /gbdb/galGal1/wib
    ln -s `pwd`/wigData/*.wib /gbdb/galGal1/wib
    hgLoadWiggle galGal1 quality wigData/*.wig

    #	To speed up display for whole chrom views, need to make zoomed
    #	data views.  Zoom to 1K points per pixel - Hiram 2004-02-17
    ssh kksilo
    cd /cluster/data/galGal1/bed/qual/wigData1K
    mkdir -p wigData1K
    mkdir -p dataLimits1K

    for c in `cat ../../chrom.lst`
    do
        if [ -f ../../${c}/chr${c}.agp ]; then
            echo "chr${c} quality 1K zoom"
            qacToWig chrom.qac -name=chr${c} stdout |
                wigZoom stdin | wigAsciiToBinary -dataSpan=1024 \
                -chrom=chr${c} -wibFile=wigData1K/qc1K_${c} \
                -name=${c} stdin > dataLimits1K/chr${c}
        fi
        if [ -f ../../${c}/chr${c}_random.agp ]; then
            echo "chr${c}_random quality 1K zoom"
            qacToWig chrom.qac -name=chr${c}_random stdout |
                wigZoom stdin | wigAsciiToBinary -dataSpan=1024 \
                -chrom=chr${c}_random -wibFile=wigData1K/qc1K_${c}r \
                -name=${c}_random stdin > dataLimits1K/chr${c}r
        fi
    done

    ssh hgwdev
    cd /cluster/data/galGal1/bed/qual/wigData1K
    # load in addition to existing data
    hgLoadWiggle -oldTable galGal1 quality *.wig
    #	create symlinks for .wib files
    ln -s `pwd`/*.wib /gbdb/galGal1/wib

# PLOT MRNA-MARKER GENETIC VS. GENOMIC POS (DONE 2/10/04 angie)
    ssh hgwdev
    mkdir /cluster/data/galGal1/bed/mrnaPlots
    cd /cluster/data/galGal1/bed/mrnaPlots
    # Need to find out where Terry got this file:
    cp -p /cluster/bluearc/gg1/markers/stsAlias.bed .
    echo 'select all_mrna.tName, all_mrna.tStart, all_mrna.tEnd, \
                 all_mrna.qName, 0, all_mrna.strand, geneName.name \
            from all_mrna, mrna, geneName \
            where all_mrna.qName = mrna.acc and mrna.geneName != 0 and \
                  mrna.geneName = geneName.id' \
    | hgsql galGal1 -N > mrnaWithNames.bed

    mkdir gnuplot
    # This is a one-shot and I'm writing a perl script -- too long for inliner.
    chmod a+x ./plotMrnaMarkers.pl
    ./plotMrnaMarkers.pl ../markerPlots/genetic.map stsAlias.bed \
      mrnaWithNames.bed 
    # Too few results to plot -- just emailed summary to LaDeana et al.


# COMPARE GENBANK MRNA ANNOTATED POSITION TO ALIGNED (DONE 2/11/04 angie)
    ssh hgwdev
    cd /cluster/data/galGal1/bed/mrnaPlots
    echo 'select tName, tStart, tEnd, qName, 0, strand from all_mrna' \
    | hgsql galGal1 -N > mrnaPos.bed
    # write another 1-shot perl script
    chmod a+x getMrnaGBPos.pl
    # eieio is the fileserver for genbank stuff...
    ssh eieio
    cd /cluster/data/galGal1/bed/mrnaPlots
    gunzip -c /cluster/store5/genbank/data/download/genbank.139.0/gbvrt*.gz \
    | ./getMrnaGBPos.pl mrnaPos.bed
    # emailed report files mrnaGB*.txt


