# for emacs: -*- mode: sh; -*-

# This file describes the construction of the browser database on
# the mouse strains from NCBI build 38 (Dec 2011 freeze) aka:
#	GRCm38 - Genome Reference Consortium Mouse Reference 38

############################################################################
# gather sequence and AGP definitions (DONE - 2012-09-28 - Hiram)
    mkdir -p  /hive/data/genomes/mm10Strains1/genbank
    cd /hive/data/genomes/mm10Strains1/genbank
    # modified script from hg19Patch10 build, scans the FASTA and AGP
    #   files from mm10/genbank/ for all the alternate sequence
    #   to generate a fasta file and AGP file here with chr prefixes
    #   for chrom names as is typical for UCSC browsers
    ./getSequence.pl ../../mm10/genbank
-rw-rw-r-- 1 18386569 Sep 28 11:01 ucsc.fa.gz
-rw-rw-r-- 1    27454 Sep 28 11:01 ucsc.agp

    # the script is:
    cat << '_EOF_' > getSequence.pl
#!/usr/bin/env perl

use strict;
use warnings;

my $debug = 1;
my $patchName = "patch1";

sub usage() {
    print STDERR "usage: ./gatherNames.pl ../../mm10/genbank > ucscNames.${patchName}.txt\n";
    print STDERR "expecting the Mus_musculus/GRCm38.p1/ hierarchy in ./genbank from NCBI\n";
    exit 255;
}

my $argc = scalar(@ARGV);

if ($argc != 1) {
    usage;
}

my $patchDir = shift;

if ( ! -d $patchDir ) {
    print STDERR "ERROR: given directory $patchDir is not a directory or does not exist";
    usage;
}

open (FA, "|gzip -c > ucsc.fa.gz") or die "can not write to ucsc.fa";
my %glSize;
my %ctgToChr;
my %ctgToFastaName;
# my $fasta = "$patchDir/PATCHES/alt_scaffolds/FASTA/alt.scaf.fa.gz";
my @fastaList = split('\n',`find $patchDir -type f | grep FASTA | grep alt_scaffolds | grep -v UNKNOWN | grep alt.scaf.fa`);
my @agpList = split('\n',`find $patchDir -type f | grep AGP | grep alt_scaffolds | grep -v UNKNOWN | grep alt.scaf.agp`);

for (my $i = 0; $i < scalar(@fastaList); ++$i) {
    my $fasta = $fastaList[$i];

# get lengths of alternate scaffolds
printf STDERR "# reading $fasta\n" if ($debug );
open (FH, "faCount $fasta|") or die "can not faCount $fasta";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    next if ($line =~ m/^total/);
    my ($gi, $len, $rest) = split('\s+', $line, 3);
    my ($x, $acc, $y, $gl, $z) = split('\|', $gi);
    die "ERROR: already exists glSize for {$gl}" if (exists($glSize{$gl}));
    $glSize{$gl} = $len;
    printf STDERR "# glSize{%s} <- %d\n", $gl, $len if ($debug);
}
close (FH);

open (FH, "zcat $fasta|")
        or die "ERROR: can not read $fasta";
while ( my $line = <FH> ) {
    if ($line !~ m/^>/) {
        printf FA "%s", $line;
    } else {
    chomp $line;
printf STDERR "# '%s'\n", $line;
    my ($gi, $rest) = split('\s+',$line,2);
    my ($x, $acc, $y, $gl, $z) = split('\|', $gi);
    my $chr = $rest;
    if ($chr =~ m/ unplaced /) {
        printf STDERR "# WARN: unplaced: %s\n", $line;
        $chr = "Un";
    }
    $chr =~ s/Mus musculus strain .* chromosome //;
    $chr =~ s/Mus musculus chromosome //;
    $chr =~ s/ .*//;
    die "ERROR: already exists ctgToChr for {$gl} <- $chr" if (exists($ctgToChr{$gl}));
    $ctgToChr{$gl} = $chr;
printf STDERR "# ctgToChr{%s} <- %s\n", $gl, $chr;
    die "ERROR: already exists ctgToFastaName for {$gl} <- $chr" if (exists($ctgToFastaName{$gl}));
    $ctgToFastaName{$gl} = $gi;
    $ctgToFastaName{$gl} =~ s/\|$//;
    $ctgToFastaName{$gl} =~ s/^>//;
    printf STDERR "# ctgToFastaName{%s} <- %s\n", $gl, $ctgToFastaName{$gl} if ($debug);
    # fix the suffix .N to be _N
    $gl =~ s/\./_/;
    printf FA ">chr%s_%s\n", $chr, $gl;
    }
}
close (FH);
}       # for (my $i = 0; $i < scalar(@fastaList); ++$i)
close (FA);

my $agpSequenceCount = 0;
open (AG, ">ucsc.agp") or die "can not write to ucsc.agp";
for (my $i = 0; $i < scalar(@agpList); ++$i) {
    my $agp = $agpList[$i];
    printf STDERR "# reading $agp\n" if ($debug );
    open (FH, "zcat $agp|") or die "can not read $agp";
    while (my $line = <FH>) {
        next if ($line =~ m/^#/);
        chomp $line;
        my @a = split('\s+', $line);
        my $gl = $a[0];
        $a[3] = ++$agpSequenceCount;
        if (exists($ctgToChr{$gl})) {
            my $chr = $ctgToChr{$gl};
            $gl =~ s/\./_/;
            printf AG "chr%s_%s", $chr, $gl;
        } else {
            printf STDERR "WARN: can not find gl: $gl in ctgToChr\n";
            $gl =~ s/\./_/;
            printf AG "chrUn_%s", $gl;
        }
        for (my $i = 1; $i < scalar(@a); ++$i) {
            printf AG "\t%s", $a[$i];
        }
        printf AG "\n";
    }
    close (FH);
}
close (AG);
'_EOF_'
    # << happy emacs

    # verify OK:
    checkAgpAndFa ucsc.agp ucsc.fa.gz 2>&1 | tail -3
# FASTA sequence entry
# Valid Fasta file entry
# All AGP and FASTA entries agree - both files are valid

############################################################################
# initial database build (DONE - 2012-09-28 - Hiram)
    cd /hive/data/genomes/mm10Strains1
    cat << '_EOF_' > mm10Strains1.config.ra
# Config parameters for makeGenomeDb.pl:
db mm10Strains1
clade haplotypes
genomeCladePriority 200
scientificName Mus musculus
commonName mm10Haps
assemblyDate Dec. 2011
assemblyLabel GRCm38 Genome Reference Consortium Mouse Reference 38, alternate strains
assemblyShortLabel Mouse strains
orderKey 1300
mitoAcc none
fastaFiles /hive/data/genomes/mm10Strains1/genbank/ucsc.fa.gz
agpFiles /hive/data/genomes/mm10Strains1/genbank/ucsc.agp
# qualFiles /dev/null
dbDbSpeciesDir mouse
photoCreditURL http://www.jax.org/
photoCreditName Photo courtesy The Jackson Laboratory
ncbiGenomeId 52
ncbiAssemblyId   416688
ncbiAssemblyName   GRCm38.p1
ncbiBioProject 51977
genBankAccessionID GCA_000001635.3
taxId   10090
'_EOF_'
    # << happy emacs

    # verify fasta and AGP
    makeGenomeDb.pl -stop=agp mm10Strains1.config.ra > agp.log 2>&1
    # when OK, continue with database construction
    makeGenomeDb.pl -continue=db mm10Strains1.config.ra > db.log 2>&1

############################################################################
# RepeatMasker (DONE - 2012-10-04 - Hiram)
    mkdir /hive/data/genomes/mm10Strains1/bed/repeatMasker
    cd /hive/data/genomes/mm10Strains1/bed/repeatMasker
    time doRepeatMasker.pl mm10Strains1 -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm \
        -dbHost=hgwdev -workhorse=hgwdev > do.log 2>&1 &
    #   real    94m17.323s
    cat faSize.rmsk.txt
# 62219607 bases (1203474 N's 61016133 real 34371016 upper 26645117 lower)
#       in 94 sequences in 1 files
# Total size: mean 661910.7 sd 1106026.7 min 28130 (chr17_GL456021_2)
#       max 5956088 (chr6_GL456054_2) median 215601
# %42.82 masked total, %43.67 masked real

###########################################################################
# TRF simple repeats (DONE - 2012-10-04 - Hiram)
    mkdir /hive/data/genomes/mm10Strains1/bed/simpleRepeat
    cd /hive/data/genomes/mm10Strains1/bed/simpleRepeat
    time doSimpleRepeat.pl mm10Strains1 -buildDir=`pwd` -dbHost=hgwdev \
        -smallClusterHub=encodek -workhorse=hgwdev > do.log 2>&1 &
    #   real    4m57.822s
    cat fb.simpleRepeat
    #   2190891 bases of 61018268 (3.591%) in intersection

    # when RM is done, add this mask:
    cd /hive/data/genomes/mm10Strains1
    twoBitMask mm10Strains1.rmsk.2bit \
        -add bed/simpleRepeat/trfMask.bed mm10Strains1.2bit
    # safe to ignore warning: has >=13 fields
    twoBitToFa mm10Strains1.2bit stdout | faSize stdin \
	> faSize.mm10Strains1.2bit.txt
# 62219607 bases (1203474 N's 61016133 real 34322554 upper 26693579 lower)
#       in 94 sequences in 1 files
# Total size: mean 661910.7 sd 1106026.7 min 28130 (chr17_GL456021_2)
#       max 5956088 (chr6_GL456054_2) median 215601
# %42.90 masked total, %43.75 masked real

    rm /gbdb/mm10Strains1/mm10Strains1.2bit
    ln -s `pwd`/mm10Strains1.2bit /gbdb/mm10Strains1/

###########################################################################
# altSequence track patch10 (DONE - 2012-10-05 - Hiram)
    # provide links to locations on reference genome where these
    # haplotypes map to
    mkdir /hive/data/genomes/mm10Strains1/bed/altSequence
    cd /hive/data/genomes/mm10Strains1/bed/altSequence

    cat << '_EOF_' > hapLocate.pl
#!/usr/bin/env perl

use strict;
use warnings;

sub usage() {
    print STDERR "usage: ./gatherNames.pl ../../../mm10/genbank | sort -k1,1 -k2,2n > strains.to.mm10.bed\n";
    print STDERR "expecting the Mus_musculus/GRCm38.p1/ hierarchy in ../../../mm10/genbank from NCBI\n";
    print STDERR "also writes to the file: mm10.to.strains.bed\n";
    exit 255;
}

my $argc = scalar(@ARGV);

if ($argc != 1) { usage; }

my $genbankDir = shift;

if ( ! -d $genbankDir ) {
    print STDERR "ERROR: given directory $genbankDir is not a directory or does not exist";
    usage;
}
my @placeList = split('\n',`find $genbankDir -type f | grep alt_scaffold_placement.txt | grep -v UNKNOWN`);

open (MM, "|sort -k1,1 -k2,2n > mm10.to.strains.bed")
        or die "can not write to mm10.to.strains.bed";

for (my $i = 0; $i < scalar(@placeList); ++$i) {
    my $placement = $placeList[$i];
    open (FH, "sort -t'\t' -k6,6n $placement|") or die "ERROR: can not read $placement";
    while (my $line = <FH>) {
        chomp $line;
        next if ($line =~ m/^\s*#/);
        my ($altAsmName, $primAsmName, $altScafName, $altScafAcc, $parentType,
            $parentName, $parentAcc, $regionName, $ori, $altScafStart,
            $altScafStop, $parentStart, $parentStop, $altStartTail,
            $altStopTail) = split('\t',$line);
        if ($parentStart eq "na") {
            printf STDERR "# WARN: skipping no parentStart: %s\n", $line;
            next;
        }
        if ($parentName eq "na") {
            printf STDERR "# WARN: skipping no parentName: %s\n", $line;
            next;
        }
        my $altSize = $altScafStop - $altScafStart;
        my $ucscChrName = lc($altScafName);
        my @fields = split('_', $ucscChrName);
        my $ctgIndex = scalar(@fields) - 1;
        $ucscChrName = sprintf("chr%s_%s", $parentName, $altScafAcc);
        $ucscChrName =~ s/\./_/;
        $ori = "+" if ($ori eq "b");
        printf MM "chr%s\t%d\t%d\t%s:%d-%d\n", $parentName, $parentStart-1,
            $parentStop, $ucscChrName, $altScafStart, $altScafStop;
        printf "%s\t%d\t%d\tchr%s:%d-%d\n", $ucscChrName, $altScafStart-1,
            $altScafStop, $parentName, $parentStart, $parentStop;
    }
    close (FH);
}
close (MM);
'_EOF_'
    # << happy emacs
    chmod +x hapLocate.pl
    ./hapLocate.pl ../../../mm10/genbank \
        | sort -k1,1 -k2,2n > strains.to.mm10.bed

    hgLoadBed mm10Strains1 mm10Locations strains.to.mm10.bed
    #   Read 87 elements of size 4 from strains.to.mm10.bed

    featureBits -countGaps mm10Strains1 mm10Locations
    #   60596145 bases of 62219607 (97.391%) in intersection

############################################################################
# lastz alignment to mm10 (DONE - 2012-11-08 - Hiram)
    mkdir /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05
    cd /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05
    #	construct a 2bit file of just the mm10 reference sequences
    # and all the business to run lastz on each haplotype with its
    # corresponding target sequence in mm10

rm -fr mm10Bits run.blastz mm10Bits.lift
mkdir mm10Bits
mkdir -p run.blastz/tParts
mkdir -p run.blastz/qParts
# typical line in ../altSequence/mm10.to.strains.bed
#    location in mm10                      location in the altStrain
# chr1    60640808        63588262        chr1_JH584320_1:1-1003859
awk '{print $1,$2,$3,$4}' ../altSequence/mm10.to.strains.bed | while read L
do
    C=`echo $L | awk '{print $1}'`
    S=`echo $L | awk '{print $2}'`
    E=`echo $L | awk '{print $3}'`
    H=`echo $L | awk '{print $4}' | sed -e "s/:.*//"`
    HS=`echo $L | awk '{print $4}' | sed -e "s/-.*//"`
    HS=`echo $HS | sed -e "s/.*://" | awk '{print $1-1}'`
    HE=`echo $L | awk '{print $4}' | sed -e "s/.*-//"`
    CE=`grep "^${C}" /hive/data/genomes/mm10/chrom.sizes | cut -f2 | head -1`
    size=`echo $E $S | awk '{printf "%d", $1-$2}'`
    echo -e "$S\t$C.$S-$E\t$size\t$C\t$CE"
    echo mm10.2bit:${C}:$S-$E 1>&2
    if [ ! -s mm10Bits/$C.$S-$E.fa ]; then
	echo ">$C.$S-$E" > mm10Bits/$C.$S-$E.fa
	twoBitToFa /gbdb/mm10/mm10.2bit:${C}:$S-$E stdout \
	    | grep -v "^>" >> mm10Bits/$C.$S-$E.fa
    fi
    echo -e "/hive/data/genomes/mm10Strains1/mm10Strains1.2bit:$H:$HS-$HE" \
        > run.blastz/tParts/$H.lst
    echo -e "/hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/mm10Bits.2bit:$C.$S-$E:$HS-$size" \
        > run.blastz/qParts/$H.lst
    echo -e "/cluster/bin/scripts/blastz-run-ucsc -outFormat psl tParts/$H.lst qParts/$H.lst ../DEF {check out exists ../psl/$H.psl}" \
	>> run.blastz/jobList
done | sort -u > mm10Bits.lift

    faToTwoBit mm10Bits/chr*.fa mm10Bits.2bit
    twoBitInfo mm10Bits.2bit stdout | sort -k2nr > mm10Bits.chrom.sizes

    # these lastz parameters are from the human-human self lastz
    cat << '_EOF_' > DEF
# mouse vs mouse
BLASTZ=lastz
# maximum M allowed with lastz is only 254
BLASTZ_M=254
BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
BLASTZ_O=600
BLASTZ_E=150
BLASTZ_K=10000
BLASTZ_Y=15000
BLASTZ_T=2

# TARGET: Mouse Mm10Strains1
SEQ1_DIR=/hive/data/genomes/mm10Strains1/mm10Strains1.2bit
SEQ1_LEN=/hive/data/genomes/mm10Strains1/chrom.sizes
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_IN_CONTIGS=0
SEQ1_LIMIT=2

# QUERY: Mouse Mm10
SEQ2_DIR=/scratch/data/mm10/mm10.2bit
SEQ2_LEN=/scratch/data/mm10/chrom.sizes
SEQ2_CTGDIR=/hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/mm10Bits.2bit
SEQ2_CTGLEN=/hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/mm10Bits.chrom.sizes
SEQ2_LIFT=/hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/mm10Bits.lift
SEQ2_CHUNK=5000000
SEQ2_LAP=0
SEQ2_IN_CONTIGS=0
SEQ2_LIMIT=2

BASE=/hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    ssh swarm
    cd /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/run.blastz
    mkdir ../psl
    para create jobList
    para try ... check ... push
    para time
# Completed: 87 of 87 jobs
# CPU time in finished jobs:        237s       3.95m     0.07h    0.00d  0.000 y
# IO & Wait Time:                   422s       7.04m     0.12h    0.00d  0.000 y
# Average job time:                   8s       0.13m     0.00h    0.00d
# Longest finished job:              72s       1.20m     0.02h    0.00d
# Submission to last job:          3359s      55.98m     0.93h    0.04d

    #	put together the individual results
    cd /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05
    mkdir pslParts
    cat psl/chr*.psl | gzip -c > pslParts/mm10Strains1.mm10.psl.gz
    # lift the query bits to mm10 coordinates to properly show on this
    #   browser if the raw psl bits are to be seen
    cat psl/chr*.psl \
        | liftUp -pslQ -type=.psl stdout mm10Bits.lift error stdin \
        | gzip -c > mm10Strains1.liftedTo_mm10.psl.gz

    #	constructing a chain from those results
    mkdir -p /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/axtChain/run
    cd /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/axtChain/run
time zcat ../../pslParts/mm10Strains1.mm10.psl.gz \
| axtChain -psl -verbose=0 -scoreScheme=/scratch/data/blastz/human_chimp.v2.q -minScore=2000 -linearGap=medium stdin \
    /hive/data/genomes/mm10Strains1/mm10Strains1.2bit \
    /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/mm10Bits.2bit \
    stdout \
| chainAntiRepeat /hive/data/genomes/mm10Strains1/mm10Strains1.2bit \
    /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/mm10Bits.2bit \
    stdin mm10Strains1.mm10.preLift.chain
    #   real    0m44.175s
liftUp -chainQ mm10Strains1.mm10.lifted.chain \
    ../../mm10Bits.lift carry mm10Strains1.mm10.preLift.chain

    # constructing the net files:
cd /hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/axtChain

chainMergeSort run/mm10Strains1.mm10.lifted.chain \
        | nice gzip -c > mm10Strains1.mm10.all.chain.gz
chainSplit chain mm10Strains1.mm10.all.chain.gz
# Make nets ("noClass", i.e. without rmsk/class stats which are added later):
time chainPreNet  mm10Strains1.mm10.all.chain.gz \
    /hive/data/genomes/mm10Strains1/chrom.sizes \
    /scratch/data/mm10/chrom.sizes stdout \
    | chainNet  stdin -minSpace=1 /hive/data/genomes/mm10Strains1/chrom.sizes \
        /scratch/data/mm10/chrom.sizes stdout /dev/null \
    | netSyntenic stdin noClass.net
    #   real    0m1.338s

# Make liftOver chains:
netChainSubset -verbose=0 noClass.net mm10Strains1.mm10.all.chain.gz stdout \
    | chainStitchId stdin stdout | gzip -c > mm10Strains1.mm10.over.chain.gz

# Make axtNet for download: one .axt per mm10Strains1 seq.
netSplit noClass.net net
cd ..
mkdir -p axtNet
foreach f (axtChain/net/*.net)
netToAxt $f axtChain/chain/$f:t:r.chain \
  /hive/data/genomes/mm10Strains1/mm10Strains1.2bit \
        /scratch/data/mm10/mm10.2bit stdout \
  | axtSort stdin stdout \
  | gzip -c > axtNet/$f:t:r.mm10Strains1.mm10.net.axt.gz
end

# Make mafNet for multiz: one .maf per mm10Strains1 seq.
mkdir -p mafNet
foreach f (axtNet/*.mm10Strains1.mm10.net.axt.gz)
  axtToMaf -tPrefix=mm10Strains1. -qPrefix=mm10. $f \
        /hive/data/genomes/mm10Strains1/chrom.sizes \
        /scratch/data/mm10/chrom.sizes \
        stdout \
  | gzip -c > mafNet/$f:t:r:r:r:r:r.maf.gz
end

    # swap that business to mm10
    mkdir /hive/data/genomes/mm10/bed/blastz.mm10Strains1.swap
    cd /hive/data/genomes/mm10/bed/blastz.mm10Strains1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10Strains1/bed/lastzMm10.2012-10-05/DEF \
	-swap -noDbNameCheck  -stop=load \
	-noLoadChainSplit -chainMinScore=2000 \
	-chainLinearGap=medium -workhorse=hgwdev \
	-smallClusterHub=encodek -bigClusterHub=swarm > swap.load.log 2>&1
    #   real     2m51.872s

    cat fb.mm10.chainMm10Strains1Link.txt
    #   48691854 bases of 2652783500 (1.836%) in intersection

    # construct psl files from the liftOver chain:
    cd  /hive/data/genomes/mm10/bed/blastz.mm10Strains1.swap/axtChain
    chainToPsl mm10.mm10Strains1.over.chain.gz \
	/hive/data/genomes/mm10/chrom.sizes \
        /hive/data/genomes/mm10Strains1/chrom.sizes \
        /hive/data/genomes/mm10/mm10.2bit \
        /hive/data/genomes/mm10Strains1/mm10Strains1.2bit \
        mm10.mm10Strains1.over.psl
    # chainToPsl appears to have a problem, note errors from pslCheck:
    pslCheck -db=mm10 mm10.mm10Strains1.over.psl
    # checked: 578 failed: 51 errors: 51

    pslRecalcMatch mm10.mm10Strains1.over.psl \
	/hive/data/genomes/mm10/mm10.2bit \
	/hive/data/genomes/mm10Strains1/mm10Strains1.2bit \
	fixup.mm10.mm10Strains1.over.psl
    pslCheck -db=mm10 fixup.mm10.mm10Strains1.over.psl
    # checked: 946 failed: 0 errors: 0

    # load this PSL track
    hgLoadPsl mm10 -table=altSeqLiftOverPslStrains1 fixup.mm10.mm10Strains1.over.psl
    # using this name on this table since hgc code triggers off of
    # the beginning: "altSeqLiftOverPsl" to display the sequence
    # from the seqMm10Strains1 and extMm10Strains1 tables loaded below

############################################################################
# Add this sequence to mm10 (DONE - 2012-11-08 - Hiram)
    mkdir /hive/data/genomes/mm10Strains1/bed/altSequence/seqExt
    cd /hive/data/genomes/mm10Strains1/bed/altSequence/seqExt
    twoBitToFa ../../../mm10Strains1.2bit mm10Strains1.fa
    mkdir -p /gbdb/mm10/mm10Strains1 mm10Strains1
    faSplit byname mm10Strains1.fa ./mm10Strains1/
    ln -s `pwd`/mm10Strains1/*.fa /gbdb/mm10/mm10Strains1
    hgLoadSeq -drop -seqTbl=seqMm10Strains1 -extFileTbl=extMm10Strains1 mm10 \
        /gbdb/mm10/mm10Strains1/*.fa

############################################################################
