#!/usr/bin/perl -w
# --------------------------------------------------------------------------------- 
# Author: James Brunskill (Library/University of Waikato) - brunskil@waikato.ac.nz 
# Original Release Date: 28/02/2007
# Description/Purpose:
# Compares two lists of ISBN's looking for matches, and outputs statistics

# perl -S ISBN_Compare.pl list1.txt list2.txt
# 
# --------------------------------------------------------------------------------- 
use strict;
use IO::File;

#
#Stuff to pull in from the commandline
#
if( $#ARGV < 1)
{
    die "Please include two list files";
}

my $list1 = $ARGV[0];
my $list2 = $ARGV[1];


my %list1ISBN; #Hashtable to hold ISBNs from list1;

my $list1_uniq = 0;
my $list1_total = 0;
my $list2_uniq = 0;
my $list2_total = 0;
my $list_shared = 0;

#Temporary Vars to hold various info
my $tmp ="";

open (INFILE1, $list1)|| die "couldn't open list1 file!";

while ($tmp = <INFILE1>)
{
    $list1_total = $list1_total + 1;

    $list1ISBN{$tmp} = 1;
}

close(INFILE1);

print "Loaded $list1_total ISBN's from the first list";



open (INFILE2, $list2)|| die "couldn't open list1 file!";

while ($tmp = <INFILE2>)
{
    $list2_total = $list2_total + 1;

    if (exists $list1ISBN{$tmp})
    {
	$list_shared = $list_shared + 1;
    }
}

close(INFILE2);

print "Loaded $list2_total ISBN's from the second list";

$list1_uniq = $list1_total - $list_shared;
$list2_uniq = $list2_total - $list_shared;

my $list1_per = $list1_uniq / $list1_total * 100;
my $list2_per = $list2_uniq / $list2_total * 100;

print "STATS:\n";
print "List 1\n";
print "Total \t Unique \t Percentage\n";
print "$list1_total \t $list1_uniq \t $list1_per\n";

print "List 2\n";
print "Total \t Unique \t Percentage\n";
print "$list2_total \t $list2_uniq \t $list2_per\n";


print "Total in common: $list_shared\n";

