#!/usr/bin/perl -w
# Removes overlapping CRITICA calls (with at least a given amount of 
# overlap. The call with the highest (worse) p-value is removed.

(@ARGV==2) || die ("usage: remove-overlaps percentage cds-file\n");

$percent=shift(@ARGV);
$file=shift(@ARGV);

if (($percent<1) || ($percent>100)) {
  die ("remove-overlaps: percentage needs to be within 1 to 100.\n");
}
$oldpvalue=1;
$oldcontig=" ";
open(MAP,"map-critica-orfs $file |") || die ("remove-overlaps: could not create map\n") ;
while(<MAP>) {
  if (length($_)<5) {next;}
  ($contig,$pvalue,$matrix,$start,$end,$comp,$dicod,@rest)=split(" ",$_);
  
  $overlap=pop(@rest);
  if ($oldcontig ne $contig) {
    $oldpvalue=1;
    $oldcontig=" ";
  }
  ($orig)=split("\t",$_);
  $line{$contig." ".$start." ".$end}=$orig;
  if ($overlap>=$percent) {
    if ($pvalue>$oldpvalue) {
      printf(STDERR "removing %20s %8.3e %7d %7d %7d %7d\n",$contig,
             $pvalue,$start,$end,$comp,$dicod);
      $line{$contig." ".$start." ".$end}=-1;
    }
    else {
      ($contig,$pvalue,$matrix,$start,$end,@rest)=split(" ",$oldline);
      printf(STDERR "removing %20s %8.3e %7d %7d %7d %7d\n",$contig,
             $pvalue,$start,$end,$comp,$dicod);
      $line{$contig." ".$start." ".$end}=-1;
    }
  }
  $oldline=$_;
  $oldcontig=$contig;
  $oldpvalue=$pvalue;
}

foreach $l (sort (keys %line)) {
  if (length($line{$l})>5) {
    printf $line{$l}."\n";
  }
}
