# for emacs: -*- mode: sh; -*-

    # Label multiz results by strain instead of accession.
    cd /hive/data/genomes/eboVir2/bed/multiz49way
    # Use strain names from http://epidemic.bio.ed.ac.uk/ebolavirus_sequences
    # (table from that page copied to text file strainInfo.txt)
    perl -we ' \
      open($STR, "strainInfo.txt") || die; \
      while (<$STR>) { \
        next if (/^accession/); \
        ($acc, undef, $strain, undef, $date) = split("\t"); \
        $date =~ s/-.*//; \
        if ($acc eq "KJ660346" || $acc eq "KJ660347" || $acc eq "KJ660348") { \
          $strain = "Guinea_$strain"; \
        } \
        $strain =~ s/ /_/g; \
        $accToStrain{$acc} = "$strain" . "_$date"; \
        print STDERR "$acc --> $strain" . "_$date\n"; \
      } \
      close ($STR); \
      while (<>) { \
        if (/^s ([\w.]+)(\s+.*)/) { \
          ($acc, $rest) = ($1, $2); \
          $acc =~ s/bundibugyo_NC_014373v1/FJ217161/; \
          $acc =~ s/reston_NC_004161v1/AF522874/; \
          $acc =~ s/sudan_NC_006432v1/AY729654/; \
          $acc =~ s/taiForest_NC_014372v1/FJ217162/; \
          $acc =~ s/zaire_NC_002549v1/AF086833/; \
          $acc =~ s/v\d$//; \
          $strain = $accToStrain{$acc}; \
          if ($strain) { \
            $_ = "s $strain$rest\n"; \
          } \
        } \
        print; \
      }' multiz49way.maf \
      > multiz49wayStrainNames.maf

    mkdir /gbdb/eboVir2/multiz49wayStrainNames
    ln -s `pwd`/multiz49wayStrainNames.maf /gbdb/eboVir2/multiz49wayStrainNames/
    pushd /data/tmp
    hgLoadMaf -pathPrefix=/gbdb/eboVir2/multiz49wayStrainNames eboVir2 multiz49wayStrainNames
    popd

    # Now translate the trackDb order from accessions into strain names.
    # I saved the accessions, 1 per line in order, in order.txt.
    perl -we ' \
      open($STR, "strainInfo.txt") || die; \
      while (<$STR>) { \
        next if (/^accession/); \
        ($acc, undef, $strain, undef, $date) = split("\t"); \
        $date =~ s/-.*//; \
        if ($acc eq "KJ660346" || $acc eq "KJ660347" || $acc eq "KJ660348") { \
          $strain = "Guinea_$strain"; \
        } \
        $strain =~ s/ /_/g; \
        $accToStrain{$acc} = "$strain" . "_$date"; \
        # print STDERR "$acc --> $strain" . "_$date\n"; \
      } \
      close ($STR); \
      while (<>) { \
        chomp;  $acc = $_; $origAcc = $acc; \
        $acc =~ s/bundibugyo_NC_014373v1/FJ217161/; \
        $acc =~ s/reston_NC_004161v1/AF522874/; \
        $acc =~ s/sudan_NC_006432v1/AY729654/; \
        $acc =~ s/taiForest_NC_014372v1/FJ217162/; \
        $acc =~ s/zaire_NC_002549v1/AF086833/; \
        $acc =~ s/v\d$//; \
        $strain = $accToStrain{$acc}; \
        if ($strain) { \
          print "$strain\n"; \
        } else { \
          print "$origAcc\n"; \
        } \
      }' order.txt \
      > strainOrder.txt
    # Now plop those into trackDb.multiz49wayStrainNames.ra after "sGroup Virus"

http://epidemic.bio.ed.ac.uk/ebolavirus_sequences has the location of AY729654 as Gulu, Uganda, but I believe that should be Gulu, Sudan per http://www.ncbi.nlm.nih.gov/nuccore/AY729654

Also, it has loc of AF272001 (mayinga) as Yambuku, DRC but per http://www.ncbi.nlm.nih.gov/nuccore/AF272001 the strain is from Zaire

Meanwhile our zaire_NC_002549v1 --> AF086833 which is not on that page.

