#!/usr/bin/perl

use strict;
use warnings;
use XML::LibXML;
use Encode qw(is_utf8 encode decode);
use Data::Dumper;
use List::Util qw(shuffle);

## files and directorise
my $home_dir=$ENV{'HOME'};
## default place where to look for profiles,
## if nothing else is given as the first argument
my $where_to_search="$home_dir/opt/profiles";
my $queries_file="$home_dir/opt/queries";

## ns constants
my $acis_ns='http://acis.openlib.org';

## global variables
my $queries;

## global actions
my $parser = XML::LibXML->new();
$parser->no_network(1);

## deal with pontential argument
if(defined($ARGV[0])) {
  $where_to_search=$ARGV[0];
}
else {
  $where_to_search="$home_dir/opt/profiles";
}

## main call
&work_with_files($where_to_search);

## initial texts 
my $texts;
sub work_with_files {
  my $input=shift;
  if(-f $input) {
    $texts=&extract_titles_from_file($input,$texts);
  }
  elsif(-d $input) {    
    foreach my $file (`find $input -type f -name '*.amf.xml'`) {
      chomp $file;
      $texts=&extract_titles_from_file($file,$texts);
    }
  }
  else {
    print "invalid input: '$input'\n";
  }
}

&gather_document_structure($texts);

##
sub gather_document_structure {
  my $texts=shift;
  my $doc;
  foreach my $shortid (keys %{$texts}) {
    $doc->{$shortid}->{'a'}=scalar @{$texts->{$shortid}->{'a'}};
    $doc->{$shortid}->{'r'}=scalar @{$texts->{$shortid}->{'r'}};
  }
  print Dumper $doc;
}



##
sub extract_titles_from_file {
  my $file=shift;
  my $fh;
  ## output structure computed from the profile
  my $out;
  open $fh,"< $file";
  binmode $fh; # drop all PerlIO layers possibly created by a use open pragma
  my $doc = eval{ $parser->load_xml(IO => $fh)};
  my $root_element=$doc->documentElement;
  ## person element, we look only at the first one
  my $person_element=$root_element->getElementsByTagName('person')->[0];
  my $shortid=$person_element->getElementsByTagName('acis:shortid')->[0]->textContent;
  ## collect texts that the person has not authored
  my $reject_count=0;
  my @hasnoconnectionto_elements=$root_element->getElementsByTagName('acis:hasnoconnectionto'); 
  foreach my $hasnoconnectionto_element (@hasnoconnectionto_elements) {
    my @text_elements=$hasnoconnectionto_element->getElementsByTagName('text');
    foreach my $text_element (@text_elements) {
      $out->{$shortid}->{'r'}->[$reject_count++]=$text_element;
    }
  }
  my @isauthorof_elements=$root_element->getElementsByTagName('isauthorof'); 
  ## collect texts that the person has authored
  my $accept_count=0;
  foreach my $isauthorof_element (@isauthorof_elements) {
    my @text_elements=$isauthorof_element->getElementsByTagName('text'); 
    foreach my $text_element (@text_elements) {
      $out->{$shortid}->{'a'}->[$accept_count++]=$text_element;
    }
  }
  return $out;
}
  
##
sub normalize_space {
  my $in=shift;
  $in=~s|^\s+||;
  $in=~s|\s+$||;
  $in=~s|\s+| |;
  return $in;
}

