#!/usr/bin/perl

use strict;
use warnings;
use XML::LibXML;
use Encode qw(is_utf8 encode decode);
use Storable;
use Data::Dumper;

binmode('STDOUT',":utf8");

## files and directorise
my $home_dir=$ENV{'HOME'};
## default place where to look for profiles,
## if nothing else is given as the first argument
my $where_to_search="$home_dir/opt/profiles";
my $queries_file="$home_dir/opt/queries";
my $titles_file="$home_dir/opt/titles.dump";

my $titles=retrieve $titles_file;

my $title_extract=&analyse_titles($titles);


sub get_talovaya {
  my $titles=shift;
  foreach my $shortid (keys %$titles) {
    #print "shortid is $shortid\n";
    if(not $shortid) {
      next;
    }
    foreach my $status (keys %{$titles->{$shortid}}) {
      #print "status is $status\n";
      my @titles=@{$titles->{$shortid}->{$status}};
      foreach my $title (@titles) {
        #print "title is $title\n";
        foreach my $term (split(/\W+/,$title)) {
          my $term=lc($term);
          if($term=~m|^\d+$|) {
            next;
          }      
          if(length($term)<2) {
            next;
          }
          if(not defined($t->{$shortid}->{$status}->{$term})) {
            $t->{$shortid}->{$status}->{$term}=0;
          }
          $t->{$shortid}->{$status}->{$term}++;
          if(not defined($t->{$shortid}->{'t'}->{$status})) {
            $t->{$shortid}->{'t'}->{$status}=0;
          }
          $t->{$shortid}->{'t'}->{$status}++;
          if(not defined($t->{'total'}->{$status}->{$term})) {
            $t->{'total'}->{$status}->{$term}=0;
          }
          $t->{'total'}->{$status}->{$term}++;
          if(not defined($t->{'total'}->{'t'}->{$term})) {
            $t->{'total'}->{'t'}->{$term}=0;
          }
          $t->{'total'}->{'t'}->{$term}++;
          if(not defined($t->{'grand'})) {
            $t->{'grand'}=0;
          }
          $t->{'grand'}++;
        }
      }
    }
    ## now set the own document ratio
    my $number_of_accepted;
    if(defined $titles->{$shortid}->{'a'}) {    
      $number_of_accepted=scalar(@{$titles->{$shortid}->{'a'}});
    }
    else {
      $number_of_accepted=0;
    }
    my $number_of_refused;
    if(defined $titles->{$shortid}->{'r'}) {    
      $number_of_refused=scalar(@{$titles->{$shortid}->{'r'}});
    }
    else {
      $number_of_refused=0;
    }
    $t->{$shortid}->{'odr'}=$number_of_accepted/($number_of_accepted+$number_of_refused);
  }
  ## start with the talovaya
  my $talovaia;
  foreach my $shortid (keys %$titles) {
    #print "shortid is $shortid\n";
    if(not $shortid) {
      next;
    }
    ## calculate use of term by others
    #print "$term $shortid\n";
    ## calcultate for accepted terms only
    foreach my $term (keys %{$t->{$shortid}->{'a'}}) {  
      if(not defined($t->{$shortid}->{'r'}->{$term})) {
        $t->{$shortid}->{'r'}->{$term}=0;
      }
      if(not defined($t->{$shortid}->{'t'}->{'r'})) {
        $t->{$shortid}->{'t'}->{'r'}=0;
      }
      my $other_use=($t->{'total'}->{'t'}->{$term}-
                     $t->{$shortid}->{'a'}->{$term}-
                     $t->{$shortid}->{'r'}->{$term}
                    ) 
        /
          ($t->{'grand'}-
           $t->{$shortid}->{'t'}->{'a'}-
           $t->{$shortid}->{'t'}->{'r'});
      my $numerator=($t->{$shortid}->{'odr'})*$other_use;
      if($t->{$shortid}->{'t'}->{'r'}>0) {
        $numerator+=
          (1-$t->{$shortid}->{'odr'})
            *($t->{$shortid}->{'r'}->{$term})
              / ($t->{$shortid}->{'t'}->{'r'});    
      }
      my $denominator=$t->{$shortid}->{'a'}->{$term}
        / ($t->{$shortid}->{'t'}->{'a'});
      $talovaia->{$shortid}->{$term}=$numerator/$denominator;  
    }  
  }
  return $talovaya;
}


