#!/usr/bin/perl

@quant=(
	 0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
 	 6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,
	 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
	11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
);


use POSIX;
my($groupn)=@ARGV;
my%hash;
my$count=0;
my$lines=0;

if(!defined($groupn)){
    print "Usage: residue_entropy <groupsize> \n";
    exit(1);
}
$|=1;

while (<STDIN>) {
    chop;
    my@nums = ();
    @nums = split(/,/);
    $lines++;

    my$step=$#nums/$groupn;
    for(my$i=0;$i<$step;$i++){
	my$key="";
	for(my$j=$i;$j<$#nums;$j+=$step){
	    if($nums[$j]<0){
		$num=-$quant[int(-$nums[$j]*2)];
	    }else{
		$num=$quant[int($nums[$j]*2)];
	    }
	    $key.=":$num";
	}
	
	if(!defined($hash{$key})){
	    $count++;
	    $hash{$key}=1;
	}
    }
	 
    if(($lines % 1000)==0){
	print "\rworking... $lines lines, found $count values so far";
    }
}

foreach $key (keys %hash){
    print "\t$key\n";
}

print "\r$count values total                                       \n";
print "Done.\n\n";