#!/usr/bin/perl -w
# Script to split contigs as needed so as not to BLASTN too big a piece. 

(@ARGV==2) || die ("usage: create-subfrags tmp-directory fasta-file\n");
 
$dir=$ARGV[0]; 
$file=$ARGV[1];

open(SEQ,$file) || die ("Can't open $file!\n");

if (!-e($dir)) {
  mkdir($dir,0777);
}
$subseqlen=3000;
$overlap=1000;
$minimum=100;

$seq=" ";
 
while($seq ne "") {
  ($seq,$name)=load_next_seq(\*SEQ);
  $seqlen=length($seq);
 
  for($i=0;$i<$seqlen;$i+=($subseqlen-$overlap)) {
      
    $subseq=substr($seq,$i,$subseqlen);
    $start=$i+1;
    $end=$start+length($subseq)-1;
    if ($seqlen-$end<$minimum) { # fragment of contig remaining is too short
      $subseq=substr($seq,$i);
      $end=$seqlen;
      $i=$seqlen+1;
    }
    $subname=$name."=".$start."=".$end;
    open(FILE,">$dir/$subname") || die ("Can't create $dir/$subname!\n");
    print_seq($subname,$subseq);
    close(FILE);
    if ($end>$seqlen) {
        last;
    }
  }
}
close(SEQ);
 
sub load_next_seq {
  my $fname=shift;
  my $seq=""; my $header=""; my $curpos=0; 
  while(<$fname>) {
    chop($_);
    if (/^>/) {
      if ($header ne "") {last;}
      else {($header)=split(" ",substr($_,1));}
    }
    else{$seq.=uc($_);}
    $curpos = tell($fname);
  }
  seek($fname, $curpos, 0);
  $seq=~tr/ //d;
  return ($seq,$header);
}
 
sub print_seq {
    my($name, $seq) = @_;
    
    my($i,$n,$ln);
    
    print FILE ">$name\n";
    $n = length($seq);
    for ($i=0; ($i < $n); $i += 60)
    {
        if (($i + 60) <= $n)
      {
            $ln = substr($seq,$i,60);
      }
      else
      {
            $ln = substr($seq,$i,($n-$i));
      }
        print FILE "$ln\n";
    }
}
