#!/usr/bin/env tclsh

#
# Sort positional tables to optimize them for display
#
set pslTbls {
    ^all_mrna$
    ^chr.+_mrna$
    ^xenoMrna$
    ^all_est$
    ^chr.+_est$
    ^chr.+_intronEst$
    ^intronEst$
    ^refSeqAli$
    ^xenoRefSeqAli$
    ^mgcFullMrna$
    ^orfeomeMrna$
}

# takes far too long:  ^xenoEst$

set gpTbls {
    ^refGene$
    ^refFlat$
    ^xenoRefGene$
    ^xenoRefFlat$
    ^mgcGenes$
    ^orfeomeGenes$
}
set oiTbls {
    ^mrnaOrientInfo$
    ^estOrientInfo$
}

proc modifyDb {logFh db} {
    prLog $logFh "begin $db"
    set tables [getTableLst $db]

    foreach tbl [getMatchingTblSets $::pslTbls $tables] {
        optimizeTbl $logFh $db psl $tbl 13 15 16
    }
    foreach tbl [getMatchingTblSets $::gpTbls $tables] {
        optimizeTbl $logFh $db genePred $tbl 1 2 3
    }
    foreach tbl [getMatchingTblSets $::oiTbls $tables] {
        optimizeTbl $logFh $db orientInfo $tbl 0 1 2
    }
    prLog $logFh "end $db"
}


set task tblopt
set logDir misc/$task.logs
set workDir "data/tmp/gbopt"
set host [exec hostname]
if {$host == "bugle.soe.ucsc.edu"} {
    cd ~/compbio/genbank/genbank.fake
    set workDir "/scratch/tmp/gbopt"
} elseif {$host == "hgwdev"} {
    cd /hive/data/outside/genbank
} else {
    cd /genbank
}


switch $host {
    bugle.soe.ucsc.edu {
        set dbFile {}
    }
    hgwdev {
        set dbFile etc/hgwdev.dbs
    }
    hgwbeta {
        set dbFile etc/hgwbeta.dbs
    }
    mgc {
        set dbFile etc/rr.dbs
    }
    default {
        set dbFile etc/rr.dbs
    }
}

proc loadDbs {dbFile} {
    if {$dbFile == ""} {
        return {hg15test}
    }
    set fh [open $dbFile]
    while {[gets $fh line] >= 0} {
        if {![string match "*\#*" $line]} {
            lappend dbs $line
        }
    }
    close $fh
    return $dbs
}

proc getTableLst {db} {
    return [loadRows $db "show tables"]
}

proc getMatchingTblSets {tblRes tables} {
    set tbls {}
    foreach tblRe $tblRes  {
        set tbls [concat $tbls [getMatchingTbls $tblRe $tables]]
    }
    return $tbls
}
proc getMatchingTbls {tblRe tables} {
    return [lsearch -all -inline -regexp $tables $tblRe]
}

proc callSql {logFh db sql} {
    prLog $logFh "callSql $db $sql"
    exec hgsql -N -e $sql $db
}

proc runSql {logFh db sql} {
    prLog $logFh "runSql $db $sql"
    exec hgsql -e $sql $db >&@$logFh
}

proc delTmpFiles {tbl} {
    file delete -force $::workDir/$tbl.sql $::workDir/$tbl.txt $::workDir/${tbl}_new.txt
}

# field numbers are zero based, without bin
proc optimizeTbl {logFh db type tbl chrCol startCol endCol} {
    prLog $logFh "begin optimize $type $db.$tbl"
    set tblNew ${tbl}_new
    delTmpFiles $tbl
    prLog $logFh "dumping $db.$tbl"
    exec hgsqldump -T $::workDir $db $tbl
    # col numbers need to be 1-based for sort
    if {[hasColumn $db $tbl bin]} {
        set adj 2
    } else {
        set adj 1
    }
    incr chrCol $adj
    incr startCol $adj
    incr endCol $adj

    set tblTxtTab $::workDir/$tbl.txt
    set tblNewTab $::workDir/$tblNew.txt
    prLog $logFh "sorting $db.$tbl"
    exec sort -k ${chrCol},${chrCol} -k ${startCol},${startCol}n -k ${endCol},${endCol}n $tblTxtTab >$tblNewTab
    mkTmpTbl $logFh $db $tbl $tblNew
    prLog $logFh "loading $db.$tbl"
    exec hgsqlimport $db $tblNewTab
    installTbl $logFh $db $tblNew $tbl
    delTmpFiles $tbl
    prLog $logFh "end optimize $type $db.$tbl"
}

proc installTbl {logFh db tblNew tbl} {
    set tblOld ${tbl}_old
    callSql $logFh $db "drop table if exists $tblOld"
    callSql $logFh $db "rename table $tbl to $tblOld, $tblNew to $tbl"
    callSql $logFh $db "drop table if exists $tblOld"
}

proc mkTmpTbl {logFh db tbl newTbl} {
    set create [getCreateTblNew $db $tbl $newTbl]
    callSql $logFh $db "drop table if exists $newTbl"
    callSql $logFh $db $create
}

proc getCreateTblNew {db tbl newTbl} {
    set rows [loadRows $db "show create table $tbl"]
    set row [lindex $rows 0]
    if {[lindex $row 0] != $tbl} {
        error "expect create for $tbl, got [lindex $row 0]"
    }
    if {![regsub "CREATE TABLE `$tbl`" [lindex $row 1] "CREATE TABLE `${newTbl}`" create]} {
        error "edit of create table failed for [lindex $row 1]"
    }
    return $create
}

proc loadRows {db sql} {
    set rows {}
    foreach line [split [exec hgsql -N -e $sql $db] \n] {
        lappend rows [split $line \t]
    }
    return $rows
}

proc hasColumn {db tbl col} {
    foreach row [loadRows $db "describe $tbl"] {
        if {[lindex $row 0] == $col} {
            return 1
        }
    }
    return 0
}

proc prLog {logFh msg} {
    puts $logFh "[clock format [clock seconds] -format %Y-%m-%dT%T] $msg"
}

proc doModify {logFh dbFile} {
    set dbs [loadDbs $dbFile]
    foreach db $dbs {
        modifyDb $logFh $db
    }
}

file mkdir $workDir
exec chmod a+rwx $workDir
set env(TMPDIR) $workDir

catch {file mkdir $logDir}
catch {file mkdir $logDir}
set logFh [open $logDir/$host.log w]
fconfigure $logFh -buffering line
if {[catch {
    doModify $logFh $dbFile
} msg]} {
    prLog $logFh "Error: $msg"
    exit 1
}
file delete $workDir
prLog $logFh "Finished"

