#!/usr/local/bin/perl -w
##---------------------------------------------------------------------------##
##  File:
##      @(#) WUBlastSearchEngine.pm
##  Author:
##      Robert M. Hubley   rhubley@systemsbiology.org
##  Description:
##      An implementation of SearchEngineI for the
##      the WU-Blast search engine.
##
#******************************************************************************
#* Copyright (C) Institute for Systems Biology 2003-2004 Developed by
#* Arian Smit and Robert Hubley.
#*
#* This work is licensed under the Open Source License v2.1.  To view a copy
#* of this license, visit http://www.opensource.org/licenses/osl-2.1.php or
#* see the license.txt file contained in this distribution.
#*
#******************************************************************************
# Implementation Details:
#
# bless(
#      'WUBlastSearchEngine' );
#
###############################################################################
# ChangeLog
#
#     $Log: WUBlastSearchEngine.pm,v $
#     Revision 1.1.1.1  2005/04/19 14:53:52  rhubley
#       Initial import
#
#     Revision 1.46  2004/09/29 17:58:42  rhubley
#       - Added code to ignore a couple of more "safe" WUBlast exeptions.
#         This particular one had to do with sequences which are shorter
#         than the search word length.  Of course there will not be a hit
#         to these sequences but it shouldn't raise an exception.
#
#     Revision 1.44  2004/09/09 22:43:48  rhubley
#     Cleanup before a distribution
#
#
###############################################################################
# To Do:
#
#

=head1 NAME

WUBlastSearchEngine

=head1 SYNOPSIS

use WUBlastSearchEngine

Usage: 

  use SearchEngineI;
  use WUBlastSearchEngine;
  use SearchResultCollection;
  
  my $wuEngine = WUBlastSearchEngine->new( 
                    pathToEngine=>"/usr/local/wublast/blastp" );

  $wuEngine->setMatrix( "/users/bob/simple.matrix" );
  $wuEngine->setQuery( "/users/bob/query.fasta" );
  $wuEngine->setSubject( "/users/bob/subject.fasta" );
  my $searchResults = $wuEngine->search();

=head1 DESCRIPTION

  A concrete implementation of the abstract class / interface SearchEngineI
  which use the WUBlast sequence search engine.

=head1 INSTANCE METHODS

=cut 

package WUBlastSearchEngine;
use strict;
use SearchEngineI;
use SearchResultCollection;
use Data::Dumper;
use FileHandle;
use File::Basename;
use Carp;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);

require Exporter;

@ISA = qw(Exporter SearchEngineI);

@EXPORT = qw();

@EXPORT_OK = qw();

%EXPORT_TAGS = ( all => [ @EXPORT_OK ] );

#
# Version
#
my $VERSION = 0.1;
my $CLASS   = "WUBlastSearchEngine";
my $DEBUG   = 1;

##-------------------------------------------------------------------------##
## Constructor
##-------------------------------------------------------------------------##
sub new {
  my $class          = shift;
  my %nameValuePairs = @_;

  croak $CLASS
      . "::new: Missing path to search engine!\n\n"
      . "use \$searchEngine = $CLASS->new( pathToEngine=>\"/usr/local/"
      . "bin/blastp\")\n"
      if ( not defined $nameValuePairs{'pathToEngine'} );

  # Create ourself as a hash
  my $this = {};

  # Bless this hash in the name of the father, the son...
  bless $this, $class;

  $this->setPathToEngine( $nameValuePairs{'pathToEngine'} );

  return $this;
}

##-------------------------------------------------------------------------##
## Get and Set Methods
##-------------------------------------------------------------------------##

##-------------------------------------------------------------------------##

=over 4

=item Use: my $value = getPathToEngine( );

=item Use: my $oldValue = setPathToEngine( $value );

Get/Set the fully qualified path to the search engine
binary file.

=back 

=cut

##-------------------------------------------------------------------------##
sub getPathToEngine {
  my $this = shift;

  return $this->{'pathToEngine'};
}

sub setPathToEngine {
  my $this  = shift;
  my $value = shift;

  croak $CLASS. "::setPathToEngine( $value ): Program does not exist!"
      if ( not -x $value || `which $value` );

  my $result = `$value 2>&1`;
  if ( $result =~ /^BLASTP (\S+) \[.*/ ) {
    $this->{'version'} = $1;
  }

  my $oldValue = $this->{'pathToEngine'};
  $this->{'pathToEngine'} = $value;

  return $oldValue;
}

##-------------------------------------------------------------------------##
## General Object Methods
##-------------------------------------------------------------------------##

##-------------------------------------------------------------------------##

=over 4

=item Use: my ( $resultCode, $SearchResultCollectionI ) = search( );
                                                                                
=item Use: my ( $resultCode, $SearchResultCollectionI )
                          = search( matrix=>"7p16g.matrix",
                                    ...
                                  );

Run the search and return a SearchResultCollection.

=back 

=cut

##-------------------------------------------------------------------------##
sub search {
  my $this           = shift;
  my %nameValuePairs = @_;

  if ( %nameValuePairs ) {
    while ( my ( $name, $value ) = each( %nameValuePairs ) ) {
      my $method = "set" . _ucFirst( $name );
      unless ( $this->can( $method ) ) {
        croak( $CLASS . "::search: Instance variable $name doesn't exist." );
      }
      $this->$method( $value );
    }
  }

  # Test if engine is available
  my $engine = $this->getPathToEngine();
  if ( !defined $engine || !-f "$engine" ) {
    croak $CLASS
        . "::search: The path to the search engine is undefined or\n"
        . "is set incorrectly: $engine\n";
  }

  # Generate parameter line
  my $parameters    = "";
  my $spanParameter = "";
  my $value;
  if ( ( $value = $this->getSubject() ) ) {

    # Check if simple.lib or at.lib
    if ( $value =~ /simple.lib/ || $value =~ /at.lib/ ) {
      $spanParameter = " -span1";
    }
    else {
      $spanParameter = " -span2";
    }
    if ( -f "$value.ahd" ) {
      $parameters .= " $value";
    }
    else {
      croak $CLASS. "::search: Error...subject ($value) does not exist!\n";
    }
  }
  else {
    croak $CLASS. "::search: Error subject undefined!\n";
  }

  my $outputDirName;
  if ( ( $value = $this->getQuery() ) ) {
    if ( -f $value ) {
      $parameters .= " $value";
    }
    else {
      croak $CLASS. "::search: Error...query ($value) does not exist!\n";
    }
    $outputDirName = dirname( $value );
  }
  else {
    croak $CLASS. "::search: Error query undefined!\n";
  }

  # Constant parameters
  $parameters .= " -warnings -gapall -T=1000000 -p=1 -hspmax=2000000000";
  $parameters .= " V=0 B=100000000";
  $parameters .= $spanParameter;

  if ( defined( $value = $this->getGapInit() )
       && $value =~ /\d+/ )
  {
    $parameters .= " Q=" . abs( $value );
  }
  else {
    $parameters .= " Q=12";
  }

  if ( defined( $value = $this->getInsGapExt() )
       && $value =~ /\d+/ )
  {
    $parameters .= " R=" . abs( $value );
  }
  else {
    $parameters .= " R=2";
  }

  if ( defined( $value = $this->getMinMatch() )
       && $value =~ /\d+/ )
  {
    $parameters .= " W=$value";
  }
  else {
    $parameters .= " W=14";
  }

  if ( defined( $value = $this->getMinScore() )
       && $value =~ /\d+/ )
  {
    $parameters .= " S=$value";
    $parameters .= " gapS2=$value";
    $parameters .= " S2=" . int( $value / 2 );
    $parameters .= " X=$value";
    $parameters .= " gapX=" . ( $value * 2 );
  }
  else {
    $parameters .= " S=30";
    $parameters .= " gapS2=30";
    $parameters .= " S2=15";
    $parameters .= " X=30";
    $parameters .= " gapX=60";
  }

  if ( defined( $value = $this->getBandwidth() )
       && $value =~ /\d+/ )
  {
    $parameters .= " gapW=" . ( ( $value * 3 ) + 1 );
  }

  my ( $matrixRef, $matrixAlphabet, $matrixFreqRef ) = ();

  if ( defined( $value = $this->getMatrix() ) ) {

    # Test if matrix exists
    if ( -f $value ) {

      # Read in the matrix if we need to
      # adjust scores
      if ( $this->getScoreMode() == SearchEngineI::complexityAdjustedScoreMode )
      {
        ( $matrixRef, $matrixAlphabet, $matrixFreqRef ) = _readMatrix( $value );
      }

      # WUBLAST requires that the matrix filename parameter
      # be relative to a directory path specified in
      # environment variables.
      my @path   = split( /[\\\/]/, $value );
      my $matrix = pop @path;

      # WUBLAST expects the path to be above "aa" or "nt" sub
      # directories
      if ( $path[ $#path ] eq "aa" ) {
        pop @path;
      }

      # Set the environment
      $ENV{BLASTMAT}   = join( "/", @path );
      $ENV{WUBLASTMAT} = $ENV{BLASTMAT};

      # RepeatMasker encodes the GC level of the matrices
      # in their name. Ie. 14p35g.matrix = 14 % divergence
      #                                    35 % GC
      my ( $gc ) = $matrix =~ /^\d+p(\d+)g/;
      if ( defined $gc && $gc =~ /\d+/ ) {
        $this->{GC} = $gc / 100;
      }
      else {

        # Default GC level if we don't have any info
        $this->{GC} = .5;
      }
      $parameters .= " -matrix=$matrix";

    }
    else {
      croak $CLASS. "::search: Error...matrix ($value) does not exist!\n";
    }
  }

  # Invoke engine and handle errors
  print $CLASS. "::search() Invoking search engine as: $engine $parameters\n"
      if ( $DEBUG );
  my $POUTPUT = new FileHandle;
  my $errFile;
  do {
    $errFile = $outputDirName . "/wuResults-" . time() . ".err";
  } while ( -f $errFile );
  my $pid     = open( $POUTPUT, "$engine $parameters 2>$errFile |" );

  my $searchResultsCollection;

  # Create SearchResultCollection object from
  # the engine results.
  if ( $DEBUG ) {
    ## Create a debug file
    my $outFile;
    do {
      $outFile = $outputDirName . "/wuResults-" . time() . ".out";
    } while ( -f $outFile );
    open OUT, ">$outFile";
    while ( <$POUTPUT> ) {
      print OUT $_;
    }
    close OUT;
    close $POUTPUT;
    $searchResultsCollection = parseOutput( searchOutput => $outFile );
  }
  else {
    # Just pipe to the parser
    $searchResultsCollection = parseOutput( searchOutput => $POUTPUT );
  }
  close $POUTPUT;

  my $resultCode = ($?>>8);
  print "WUBlast returned a the following result code >$resultCode<\n" if ( $DEBUG );

  # This result code can occur if wublast encounters a sequence
  # (at the end of a query collection) which is short enough
  # that it would never score higher than the threshold. An
  # exit code of 16 | 17 occurs with a printed error of 
  # "FATAL:   There are no valid contexts in the requested search."
  # Look for this and ignore.  NOTE: In WUBLast 2.0 ( non free version )
  # you can use the -novalidctxok option to warn but not return a bad exit 
  # code.
  if ( $resultCode != 0 ) {
    open ERR,"<$errFile";
    my $errOk = 0;
    while ( <ERR> ) {
      if ( /^FATAL:/ ) {
        $errOk = 1 unless ( /context|cpus|P option|uses P|shorter/i ); 
      }
    }
    close ERR;
    $resultCode = 0 unless ( $errOk == 1 );
  }
  unlink $errFile unless ( $DEBUG );

  #
  # Postprocess the results
  #
  if ( defined $searchResultsCollection
       && $searchResultsCollection->size() > 0 )
  {

    #
    # Calculate the complexity adjusted score if necessary
    #
    my $minScore = $this->getMinScore();
    if ( $this->getScoreMode() == SearchEngineI::complexityAdjustedScoreMode ) {

      my $lambda = _calculateLambda( $matrixRef, $matrixFreqRef );

      for ( my $i = $searchResultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
        my $adjScore = _complexityAdjust(
                          $searchResultsCollection->get( $i )->getScore(),
                          $searchResultsCollection->get( $i )->getQueryString(),
                          $searchResultsCollection->get( $i )->getSubjString(),
                          $lambda,
                          $matrixAlphabet,
                          $matrixRef,
                          $matrixFreqRef
        );

        if ( defined $minScore && $adjScore < $minScore ) {
          $searchResultsCollection->remove( $i );
        }
        else {
          $searchResultsCollection->get( $i )->setScore( $adjScore );
        }
      }
    }

    #
    # Filter set using "masklevel" concept
    #

    # Sort scores high to low
    $searchResultsCollection->sort(
      sub ($$) {
        $_[ 1 ]->getScore() <=> $_[ 0 ]->getScore();
      }
    );
    if ( defined( $value = $this->getMaskLevel() )
         && $value < 101 )
    {
      my %deleteHash = ();    # Hash to hold indexes of filtered entries
      for ( my $i = 0 ; $i < $searchResultsCollection->size() ; $i++ ) {
        for ( my $j = $i + 1 ; $j < $searchResultsCollection->size() ; $j++ ) {
          my $overlap = 0;
          my $perc    = 0;
          my $result1 = $searchResultsCollection->get( $i );
          my $result2 = $searchResultsCollection->get( $j );

          # Get members
          my $begin1 = $result1->getQueryStart();
          my $begin2 = $result2->getQueryStart();
          my $end1   = $result1->getQueryEnd();
          my $end2   = $result2->getQueryEnd();

          # Check if they overlap
          next if ( $begin2 > $end1 || $begin1 > $end2 );
          next
              if ( $searchResultsCollection->get( $i )->getQueryName() ne
                   $searchResultsCollection->get( $j )->getQueryName() );

          # Calc overlap
          $overlap = $begin1 - $begin2 if ( $begin2 < $begin1 );
          $overlap += $end2 - $end1 if ( $end2 > $end1 );
          $perc = ( $overlap / ( $end2 - $begin2 + 1 ) ) * 100 if ( $overlap );
          if ( $perc < ( 100 - $this->getMaskLevel() ) ) {
            $deleteHash{$j} = 1;
          }
        }
      }

      # Remove all hits which were filtered above
      if ( keys( %deleteHash ) ) {
        foreach my $index ( sort { $b <=> $a } keys( %deleteHash ) ) {
          $searchResultsCollection->remove( $index );
        }
      }
    }

    # Secondary sort by query position
    $searchResultsCollection->sort(
      sub ($$) {
        $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart();
      }
    );

  }

  return ( $resultCode, $searchResultsCollection );
}

##-------------------------------------------------------------------------##
## Private Methods
##-------------------------------------------------------------------------##

##-------------------------------------------------------------------------##
## Use: my _complexityAdjust( $swnScore, $qrySeq, $sbjSeq, $matLambda,
##                            $matAlphabet, $matScoresRef, $matFreqsRef );
##
##      $swnScore    : Smith-Waterman Raw Alignment Score
##      $qrySeq      : Query string from the alignment
##      $sbjSeq      : Subject string from the alignment
##      $matLambda   : Matrix Lambda parameter
##      $matAlphabet : Matrix alphabet string (in column order)
##      $matScoresRef: Score Matrix
##      $matFreqsRef : Matrix alphabet freq vector
##
## Returns
##      $adj_score : The complexity adjusted score according to
##                   to Phil Green's swat/cross_match program.
##-------------------------------------------------------------------------##
sub _complexityAdjust {
  my ( $swnScore, $qrySeq, $sbjSeq, $matLambda, $matAlphabet, $matScoresRef,
       $matFreqsRef )
      = @_;
  my $mCol      = 0;
  my $t_factor  = 0;
  my $t_sum     = 0;
  my $t_counts  = 0;
  my $n_letters = 0;
  my $baseIndex = 0;
  my $qBase     = "";
  my $sBase     = "";
  my $pos_score = 0;
  my @matCounts = ();
  my $adj_score = 0;

  # Creates a vector of base counts from the query sequence.  It
  # ignores base instances when they are part of a deletion or
  # are insertion characters "-".
  for ( $baseIndex = 0 ; $baseIndex < length( $qrySeq ) ; $baseIndex++ ) {
    $qBase = substr( $qrySeq, $baseIndex, 1 );
    $sBase = substr( $sbjSeq, $baseIndex, 1 );
    if ( $qBase ne "-" && $sBase ne "-" ) {
      $pos_score +=
          $$matScoresRef[ index( $matAlphabet, $qBase ) ]
          [ index( $matAlphabet, $sBase ) ];
      $matCounts[ index( $matAlphabet, $qBase ) ]++
          if ( ( index $matAlphabet, $qBase ) >= 0 );
    }
  }

  #
  #
  #
  for ( $mCol = 0 ; $mCol < length( $matAlphabet ) ; $mCol++ ) {
    if ( defined $matCounts[ $mCol ] && $matCounts[ $mCol ] > 0 ) {
      if ( $$matFreqsRef[ $mCol ] > 0 && log( $$matFreqsRef[ $mCol ] ) != 0 ) {
        $t_factor += $matCounts[ $mCol ] * log( $matCounts[ $mCol ] );
        $t_sum    += $matCounts[ $mCol ] * log( $$matFreqsRef[ $mCol ] );
        $t_counts += $matCounts[ $mCol ];
        $n_letters++;
      }
    }
  }

  #
  #
  #
  $t_factor -= $t_counts * log( $t_counts );
  $t_sum    -= $t_factor;

  #
  # Looks like Phil changed his mind here
  #
  #my $complexity_factor = 0.25;
  #if ( $n_letters > ( 1 / $complexity_factor )) {
  #  $t_factor /= $t_counts * log(1 / $n_letters);
  #}else {
  #  $t_factor /= $t_counts * log($complexity_factor);
  #}
  #
  #$old_adj_score = $swnScore + ($t_factor * $pos_score) - $pos_score + .5;

  #
  #
  #
  $adj_score = sprintf( "%0.0d", $swnScore + $t_sum / $matLambda + .999 );

  $adj_score = 0 if ( !( $adj_score =~ /\d+/ ) || $adj_score < 0 );
  return ( $adj_score );
}

##-------------------------------------------------------------------------##
## Use: my _calculateLambda( $matScoresRef, $matFreqsRef );
##
##      $matScoresRef: Score Matrix
##      $matFreqsRef : Matrix alphabet freq vector
##
## Returns
##      $lambda : The lambda parameter derived from the matrix
##                and the matrix alphabet frequencies.  This
##                is derived from Phil Green's swat/cross_match
##                programs.
##-------------------------------------------------------------------------##
sub _calculateLambda {
  my ( $matScoresRef, $matFreqsRef ) = @_;
  my $lambda_upper = 0;
  my $lambda_lower = 0;
  my $lambda       = 0.5;
  my $sum          = 0;

  do {
    $sum = _getSum( $lambda, $matScoresRef, $matFreqsRef );
    if ( $sum < 1.0 ) {
      $lambda_lower = $lambda;
      $lambda *= 2.0;
    }
  } while ( $sum < 1.0 );

  $lambda_upper = $lambda;

  while ( $lambda_upper - $lambda_lower > .00001 ) {
    $lambda = ( $lambda_lower + $lambda_upper ) / 2.0;
    $sum = _getSum( $lambda, $matScoresRef, $matFreqsRef );
    if ( $sum >= 1.0 ) {
      $lambda_upper = $lambda;
    }
    else {
      $lambda_lower = $lambda;
    }
  }
  return ( $lambda );
}

##-------------------------------------------------------------------------##
## Use: my _getSum( $lambda, $matScoresRef, $matFreqsRef );
##
##      $matScoresRef: Score Matrix
##      $matFreqsRef : Matrix alphabet frequency vector
##
## Returns
##      $sum : Good question....???  This is used by the
##             lambda estimation procedure.  _getSum is
##             derived from Phil Green's swat/cross_match
##             programs.
##-------------------------------------------------------------------------##
sub _getSum {
  my ( $lambda, $matScoresRef, $matFreqsRef ) = @_;
  my $check = 0;
  my $total = 0;
  my $i     = 0;
  my $j     = 0;

  for ( $i = 0 ; $i <= $#$matFreqsRef ; $i++ ) {
    for ( $j = 0 ; $j <= $#$matFreqsRef ; $j++ ) {
      if ( $$matFreqsRef[ $i ] && $$matFreqsRef[ $j ] ) {
        $total += $$matFreqsRef[ $i ] * $$matFreqsRef[ $j ] *
            exp( $lambda * $$matScoresRef[ $i ][ $j ] );
        $check += $$matFreqsRef[ $i ] * $$matFreqsRef[ $j ];
      }
    }
  }
  die "error in _getSum!! check=$check\n"
      if (    $check > 1.001
           || $check < .999 );

  return ( $total );
}

##-------------------------------------------------------------------------##
## Use: my ( \@matrix, $alphabet, \@freqArray ) =
##                                        _readMatrix( $matrixFileName );
##
##      $matFileName : Name of the file containing the WUBlast Matrix
##
## Returns
##      This is a very specific matrix reader for WUBlast matrices
##      which have been created for RepeatMasker.  This routine
##      assumes that the standard crossmatch matrix frequency line
##      is included in the comment section. Ie.:
##
##        # FREQS A 0.325 C 0.175 G 0.175 T 0.325
##
##      The routine returns the matrix stored as a 2-d array.  It also
##      returns the alphabet as a compressed string and the frequencies
##      as an array ( sizeof alphabet ).
##
##-------------------------------------------------------------------------##
sub _readMatrix {
  my ( $matrixFileName ) = @_;
  my %freqHash           = ();
  my @matrix             = ();
  my @rowValues          = ();
  my $row                = 0;
  my $alphabet           = "";
  my $i                  = 0;
  my @freqArray          = ();

  open MATRIX, "<$matrixFileName" || die "Can't open $matrixFileName!\n";

  while ( <MATRIX> ) {
    $_ = uc( $_ );
    chomp;
    if ( /FREQS\s+(.*)/ ) {
      %freqHash = split " ", $1;
    }
    if ( /^\s*[A-Z]\s+[A-Z]\s+[A-Z]\s+[A-Z]\s+/ ) {
      s/ //g;
      $alphabet = $_;
    }
    elsif ( $alphabet
            && /^\s*\S\s+[\d-]+\s+[\d-]+\s+[\d-]+\s+[\d-]+\s+/ )
    {
      @rowValues = split;
      shift @rowValues;
      $matrix[ $row++ ] = [ @rowValues ];
    }
  }
  close MATRIX;

  if ( scalar( keys %freqHash ) == 0 ) {
    $freqHash{"A"} = 0.25;
    $freqHash{"C"} = 0.25;
    $freqHash{"G"} = 0.25;
    $freqHash{"T"} = 0.25;
  }

  for ( $i = 0 ; $i < length( $alphabet ) ; $i++ ) {
    $freqArray[ $i ] = $freqHash{ substr( $alphabet, $i, 1 ) } || 0;
  }

  return ( \@matrix, $alphabet, \@freqArray );
}

##-------------------------------------------------------------------------##

=over 4
                                                                               
=item Use: my $SearchResultCollection = parseOutput(
                                     searchOutput => $filename|$FH,
                                     [excludeAlignments => 1],
                                     [scoreHighThresh => #],
                                     [scoreLowThresh => #],
                                     [subjPattern => ""],
                                     [queryPattern => ""] );
                                                                           
Parse the result of a search and return a SearchResultCollection.
                                                                          
=back
                                                                         
=cut

##-------------------------------------------------------------------------##
sub parseOutput {
  my %nameValueParams = @_;

  croak $CLASS. "::parseOutput() missing searchOutput parameter!\n"
      if ( !exists $nameValueParams{'searchOutput'} );

  my $WUFILE;
  if ( ref( $nameValueParams{'searchOutput'} ) !~ /GLOB|FileHandle/ ) {
    print $CLASS
        . "::parseOutput() Opening file "
        . $nameValueParams{'searchOutput'} . "\n"
        if ( $DEBUG );
    open $WUFILE, $nameValueParams{'searchOutput'}
        or die $CLASS
        . "::parseOutput: Unable to open "
        . "results file: $nameValueParams{'searchOutput'} : $!";
  }
  else {
    $WUFILE = $nameValueParams{'searchOutput'};
  }

  my $inAlignState = 0;
  my $sbjID        = "";
  my $qryID        = "";
  my $absIndex     = 0;
  my $score        = 0;
  my $adjScore     = 0;
  my $sbjSeq       = "";
  my $qrySeq       = "";
  my $sbjOrient    = "";
  my $qryOrient    = "";    # TODO: Check to see if this ever happens
  my $sbjStart     = 0;
  my $sbjEnd       = 0;
  my $qryStart     = 0;
  my $qryEnd       = 0;
  my $qryLength    = 0;
  my $sbjLength    = 0;
  my $matrix;

  my $resultColl = SearchResultCollection->new();

  while ( <$WUFILE> ) {

    #
    # Conditions for the end of a hit record:
    #   o Must have seen a score
    #   o Must be in the alignment state (ie. have seen Query: and
    #     Subj: recently)
    #   o Must see something which isn't either "Query:" "Subj:" or " "
    #     or must see the end of the file
    #
    if ( $inAlignState ) {
      if ( !/^(Query:|Sbjct:|\s{8}|\n|\r)/ || eof ) {

        $qryLength =~ s/,//g;

        #
        # Reorient if this is a reverse strand
        # hit.
        #
        if ( $sbjOrient eq "C" ) {
          $qrySeq = reverse $qrySeq;
          $qrySeq =~ tr/ACGTYRMKHBVD/TGCARYKMDVBH/;    # complement
          $sbjSeq = reverse $sbjSeq;
          $sbjSeq =~ tr/ACGTYRMKHBVD/TGCARYKMDVBH/;    # complement
          my $tmpEnd = $sbjEnd;
          $sbjEnd   = $sbjLength - $sbjStart + 1;
          $sbjStart = $sbjLength - $tmpEnd + 1;
        }

        #
        # Calculate percent divergence
        #           percent insertions
        #           percent deletions
        #
        my %baseFreq = ();
        my $mismatch = 0;
        for ( my $i = 0 ; $i < length( $qrySeq ) ; $i++ ) {
          my $qryBase = substr( $qrySeq, $i, 1 );
          my $sbjBase = substr( $sbjSeq, $i, 1 );
          next if ( $qryBase =~ /-|x/i || $sbjBase =~ /-|x/i );
          $baseFreq{$qryBase}++;
          $mismatch++ if ( $qryBase ne $sbjBase );
        }
        my $percDiv =
            sprintf( "%4.2f", $mismatch * 100 / ( $qryEnd + 1 - $qryStart ) );
        my $qgap = $qrySeq =~ tr/-/-/;
        my $sgap = $sbjSeq =~ tr/-/-/;
        my $percIns =
            sprintf( "%4.2f", $sgap * 100 / ( ( $sbjEnd + 1 ) - $sbjStart ) );
        my $percDel =
            sprintf( "%4.2f", $qgap * 100 / ( ( $qryEnd + 1 ) - $qryStart ) );

        my $result = SearchResult->new(
                                     queryName      => $qryID,
                                     queryStart     => $qryStart,
                                     queryEnd       => $qryEnd,
                                     queryRemaining => ( $qryLength - $qryEnd ),
                                     queryString    => $qrySeq,
                                     subjString     => $sbjSeq,
                                     orientation    => $sbjOrient,
                                     subjName       => $sbjID,
                                     subjStart      => $sbjStart,
                                     subjEnd        => $sbjEnd,
                                     subjRemaining  => ( $sbjLength - $sbjEnd ),
                                     queryString    => $qrySeq,
                                     pctDiverge     => $percDiv,
                                     pctInsert      => $percIns,
                                     pctDelete      => $percDel,
                                     score          => $score
        );

        $resultColl->add( $result );

        $score        = "";
        $sbjSeq       = "";
        $qrySeq       = "";
        $qryOrient    = "";
        $sbjStart     = 0;
        $sbjEnd       = 0;
        $qryStart     = 0;
        $qryEnd       = 0;
        $inAlignState = 0;
      }
    }

    #
    # Query alignment
    #
    if ( /Query:\s+(\d+)\s+(\S+)\s+(\d+)/ ) {
      $qrySeq .= $2;
      if ( $qryStart < 1 ) {
        $qryStart = min( $1, $3 );
        $qryEnd   = max( $1, $3 );
      }
      else {
        $qryStart = min( min( $1, $3 ), $qryStart );
        $qryEnd   = max( max( $1, $3 ), $qryEnd );
      }
      $qryOrient = "C" if ( $1 > $3 );
      $inAlignState = 1;
    }

    #
    # Subject alignment
    #
    if ( /Sbjct:\s+(\d+)\s+(\S+)\s+(\d+)/ ) {
      $sbjSeq .= $2;
      if ( $sbjStart < 1 ) {
        $sbjStart = min( $1, $3 );
        $sbjEnd   = max( $1, $3 );
      }
      else {
        $sbjStart = min( min( $1, $3 ), $sbjStart );
        $sbjEnd   = max( max( $1, $3 ), $sbjEnd );
      }
      $sbjOrient = "C" if ( $1 > $3 );
      $inAlignState = 1;
    }

    #
    # Query source name
    #
    if ( /^Query=\s+(\S+)/ ) {
      $qryID = $1;
    }

    #
    # Query length
    #
    $qryLength = $1 if ( /^\s+\(([,\d]+) letters/ );

    #
    # Database source name
    #
    #$this->{databaseName} = $1 if ( /Database:\s+(\S+)/ );

    if ( /^\s+Length = (\d+)\s*$/ ) {
      $sbjLength = $1;
    }

    #
    # Score
    #
    if ( /Score = (\d+)/ ) {
      $score = $1;
    }

    #
    # Hit description line
    #
    if ( /^>(.*)/ ) {
      $sbjID = $1;
      if ( $sbjID =~ /anti/ ) {
        $sbjOrient = "C";
      }
      else { $sbjOrient = ""; }
      ( $sbjID ) = $sbjID =~ /(\S+)/;
    }

    #
    # Grab the matrix from the parameter list
    #
    if ( /matrix=\s*(\S+)/ ) {
      my @path = split( /[\\\/]/ );
      $matrix = $path[ $#path ];

      # TODO: Go back and set matrix for all previous hits
    }

  }
  close $WUFILE;

  return $resultColl;
}

##-------------------------------------------------------------------------##
## Use: my min( $num1, $num2 );
##
##              $num1   :       A number to be compared
##              $num2   :       A number to be comprared
##
##      Returns:                The minimum of the two numbers
##
##-------------------------------------------------------------------------##
sub min {
  my ( $num1, $num2 ) = @_;
  if ( $num1 < $num2 ) {
    return ( $num1 );
  }
  else {
    return ( $num2 );
  }
}

##-------------------------------------------------------------------------##
## Use: my max( $num1, $num2 );
##
##              $num1   :       A number to be compared
##              $num2   :       A number to be comprared
##
##      Returns:                The maximum of the two numbers
##
##-------------------------------------------------------------------------##
sub max {
  my ( $num1, $num2 ) = @_;
  if ( $num1 < $num2 ) {
    return ( $num2 );
  }
  else {
    return ( $num1 );
  }
}

##-------------------------------------------------------------------------##
## Use: my _ucFirst( $string );
##
##   Uppercases the first character in a string and returns it.
##
##-------------------------------------------------------------------------##
sub _ucFirst {
  my $string = shift;

  if ( defined $string && $string ne "" ) {
    substr( $string, 0, 1 ) = uc( substr( $string, 0, 1 ) );
  }
  return $string;
}

##-------------------------------------------------------------------------##
## Serialization & Debug Routines
##-------------------------------------------------------------------------##

##-------------------------------------------------------------------------##
## Use: my $string = toString([$this]);
##
##      $this         : Normally passed implicitly
##
##  Returns
##
##      Uses the Data::Dumper to create a printable reprentation
##      of a data structure.  In this case the object data itself.
##
##-------------------------------------------------------------------------##
sub toString {
  my $this = shift;
  my $data_dumper = new Data::Dumper( [ $this ] );
  $data_dumper->Purity( 1 )->Terse( 1 )->Deepcopy( 1 );
  return $data_dumper->Dump();
}

##-------------------------------------------------------------------------##
## Use: my serializeOUT( $filename );
##
##	  $filename	: A filename to be created
##
##  Returns
##
##	Uses the Data::Dumper module to save out the data
##	structure as a text file.  This text file can be
##	read back into an object of this type.
##
##-------------------------------------------------------------------------##
sub serializeOUT {
  my $this     = shift;
  my $fileName = shift;

  my $data_dumper = new Data::Dumper( [ $this ] );
  $data_dumper->Purity( 1 )->Terse( 1 )->Deepcopy( 1 );
  open OUT, ">$fileName";
  print OUT $data_dumper->Dump();
  close OUT;
}

##-------------------------------------------------------------------------##
## Use: my serializeIN( $filename );
##
##	$filename	: A filename containing a serialized object
##
##  Returns
##
##	Uses the Data::Dumper module to read in data
##	from a serialized PERL object or data structure.
##
##-------------------------------------------------------------------------##
sub serializeIN {
  my $this         = shift;
  my $fileName     = shift;
  my $fileContents = "";
  my $oldSep       = $/;
  undef $/;
  my $in;
  open $in, "$fileName";
  $fileContents = <$in>;
  $/            = $oldSep;
  close $in;
  return eval( $fileContents );
}

1;
