#!/usr/bin/perl -w
# --------------------------------------------------------------------------------- 
# Author: James Brunskill (Library/University of Waikato) - brunskil@waikato.ac.nz 
# Original Release Date: 27/04/2007
# Description/Purpose:
#
# Generates set of html webpages and rss feeds for library lists based on data 
# pulled from the voyager database.
#
# --------------------------------------------------------------------------------- 

use strict;
use IO::File;
use DBI;


#Links to the library catalogue:
my $biblink = "yourcatalogueurl/Pwebrecon.cgi?BBID=";

#Voyager Access Details 
my $db_name = "yourdbname";
my $db_username = "your read only username";
my $db_password = "your read only password";
my $oracleconnectstr = "dbi:Oracle:host=202.175.129.177;sid=VGER;port=1521";

##
#Grab the command line arguements
##
if( $#ARGV < 0)
{
    die "Please specify an ini file. Eg. list_from_db.pl listname.ini";
}
my $inifilepath = $ARGV[0];

#Variables to use throughout the program

my %htmlfiles; #hashtable to hold the html output files
my %rssfiles; #hashtable to hold the rss output files

my %inifile; #hashtable to hold the variables read in from an ini file

my %categories; #hashtable to hold the categories


#Read data in from the ini file
readINIFile($inifilepath);

my $outputdir = $inifile{"outputdir"};
mkdir($outputdir);


## PROCESS DATA FROM THE INI FILE
my $displaynav = 1;#Boolean to say whether we want to display the navigation bar or not

if( $inifile{"displaylinks"} && $inifile{"displaylinks"} eq "no")
{
    $displaynav = 0; #Don't display the links to each file (alphabetical or categories)
}

my $useRSS = 0; #boolean to say whether we are using rss

if( $inifile{"userss"} && $inifile{"userss"} eq "yes")
{
    $useRSS = 1; #Write rss files for each category
}


my $htmlheaderpath = "";
my $htmlfooterpath = "";

my $rssheaderpath = "";
my $rssfooterpath = "";

#allow the iniFile to specify header and footer...
if( $inifile{"htmlheader"})
{
    $htmlheaderpath = $inifile{"htmlheader"};
}
if( $inifile{"htmlfooter"})
{
    $htmlfooterpath = $inifile{"htmlfooter"};
}
if( $inifile{"rssheader"})
{
    $rssheaderpath = $inifile{"rssheader"};
}
if( $inifile{"rssfooter"})
{
    $rssfooterpath = $inifile{"rssfooter"};
}



################################################################
# MAIN CODE                                                    #
################################################################

#1. RUN SQL QUERY

my $results = getSQLResults($inifile{"sqlfile"});

#2. GET LIST OF CATEGORIES
findCategories($results);

#3. Create RSS and HTML files for each category
initializeFiles($inifile{"outputdir"});

#4. OUTPUT BIBs to files for each category
my $cat = "";
foreach my $title (keys %$results) {
#DEBUG  print "Value of ID $title is $results->{$title}->{'BIB_ID'}\n";
    $cat = $results->{$title}->{'CATEGORY'};
    
    addToHTML($htmlfiles{$cat},$results->{$title}->{'BIB_ID'},$title);
    if($useRSS)
    {
	addToRSS($rssfiles{$cat},$results->{$title}->{'BIB_ID'},$title, $cat);
    }

}

#5. Include file footers and Close all files
finaliseFiles();


################################################################
# END MAIN CODE                                                #
################################################################



##
# Reads the SQL File and puts it into a string ready to be run by the program
# In future it could do some form of santising the sql statement if nessisary
#
# readSQLFile([str path to file])
##

sub readSQLFile
{
    my $sqlfilepath = shift;
    my $line = '';
    my $sqlquery = "";

    open (INFILE, $sqlfilepath)|| die "readSQLFile couldn't open file ($sqlfilepath)";

    while ($line = <INFILE>)
    {
	$sqlquery .= $line;
    }
    
    close(INFILE);

    return trim($sqlquery);
}



##
# Adds the bib data to an RSS File
##

sub addToRSS
{
#DEBUG    print "ADDING ITEM TO RSS FILE\n";

    my $OUTFILE = shift;
    my $bibid = shift;
    my $title = shift;
    my $cat = shift;
    
    $title = xmlify($title);
    $cat = xmlify($cat);
    
#DEBUG    print "RSS: $bibid | $title | $cat\n";

#OUTPUT FORMAT:
##<item>
##<title>Item Title</title>
##<link>Link to Item by BIB ID</link>
##<guid>Link to Item by BIB ID</guid>
##<category>CATEGORY</category>
##</item>
    

    print $OUTFILE "<item>\n";
    print $OUTFILE "<title>$title</title>\n";
    print $OUTFILE "<link>$biblink$bibid</link>\n";
    print $OUTFILE "<guid>$biblink$bibid</guid>\n";
    print $OUTFILE "<category>$cat</category>\n";
    print $OUTFILE "</item>\n";

}

##
# Adds the bib data to an html File
# addToHTML([filehandle],[bibid],[title]);
##

sub addToHTML
{

my $OUTFILE = shift;
my $bibid = shift;
my $title = shift;

$title = xmlify($title);


#DEBUG    print "HTML: $bibid | $title | $cat\n";

#OUTPUT FORMAT:
##<li><a href=\"(LINK TO ITEM BY BIB ID)\">(ITEM TITLE)</a></li>\

print $OUTFILE "<li><a href=\"$biblink$bibid\">$title</a></li>\n";


}


##
#readINIFile reads variables from an ini file and puts them in the inifile hashtable
#INI File format is 'variable name' : 'variable data'
#
# readINIFile([str path to file])
##

sub readINIFile
{
    my $infilepath = shift;
    my $line = '';
    my $variableName = '';
    
    open (INFILE, $infilepath)|| die "couldn't open the file!";
    
    while ($line = <INFILE>)
    {
	if( $line =~ m/^(.*):(.*)$/)
	{
	    #We have found that the line contains a "include"
	    $variableName = trim($1);#make sure we have just the name no extra chars like spaces or tabs
	    $inifile{$variableName} = trim($2);
	}
    }
    

    close(INFILE);
    
}

##
# REMOVE Blank and un-wanted charaters from the beginning and end of a string
##

sub trim {
    my $string = shift;
    for ($string) {
        s/^\s+//;
        s/\s+$//;
        s/^[-+\'\"]+//;#get rid of other unwanted chars
        s/[-+\'\"]+$//;#get rid of other unwanted chars
    }
    return $string;
}


##
# Replaces characters with html/xml enties
##

sub xmlify {
    my $string = shift;
    for ($string) {
       s/&/&amp;/g;
       s/</&lt;/g; 
       s/>/&gt;/g;
       s/\"/&quot;/g;
       s/\'/&apos;/g;

    }
    return $string;
}



##
# Inserts the contents of one file into another, 
# replacing variables between << and >> charaters with variables from the ini file.
##


sub insertFile
{
    my $OUTFILE = shift;
    my $infilepath = shift;
    my $category = shift;
    my $rssfilename = shift;
    $inifile{"category"} = $category;
    $inifile{"rssfilename"} = $rssfilename;
    
    
    my $line = '';
    my $variableName = '';
    open (INFILE, $infilepath)|| die "<insertFile> can't open inputfile ($infilepath)\n";
    
    while ($line = <INFILE>)
    {

	while( $line =~ m/<<(.*?)>>/g)
	{
	    #We have found that the line contains an "include"
	    $variableName = $1; # grab the result of the regexp so we can use it in the next one.
	    if($inifile{$variableName})
	    {
		$line =~ s/<<$1>>/$inifile{$variableName}/g;
# reset the regexp position so we can further replace inside our new line
		pos($line)=0;#This results in a slight overhead but it is worth it for the extra functionality
	    }
	    else
	    {
		$line =~ s/<<$variableName>>//g;
	    }


	}
	#print STDERR "line: $line\n";
	print $OUTFILE $line;
    }
    

    close(INFILE);
    
}




##
#findCategories creates a hash table with Category in it
##

sub findCategories
{
    my $results = shift;

    my $category = "";

#    print STDERR "Finding Categories\n";

    foreach my $title (keys %$results) {
	
	$category = $results->{$title}->{'CATEGORY'};

	unless ($categories{$category})
	{	
	    
#	    print STDERR "Added Category: $category\n";
	    $categories{$category} = 1;
	}
	
    }
    
        
}


##
# Inserts links to the various pages created into the file passed in
# It also appends or prepends the strings passed into the function to the file
##


sub insertNav
{
    my $OUTFILE = shift;
    my $stringbefore = shift;
    my $stringafter = shift;

    my $cat = "";
    my $catfile = "";

    print $OUTFILE $stringbefore;

    
    print $OUTFILE "\n<hr>\n<p style='text-align: center'>\n| ";
    foreach $cat (sort keys %categories)
    {
	if($cat ne "Number" && $cat ne "Numbers" && $cat ne "Other") #These categories should go on their own line in file
	{
	    $catfile = makefilename($cat,".shtml");
	    print $OUTFILE "<a href='$catfile'>$cat</a> | ";
	}
    }

    
    print $OUTFILE "\n<br>\n";
    if($categories{"Numbers"})
    {
	$catfile = makefilename("Numbers",".shtml");
	print $OUTFILE " <a href='$catfile'>Numbers</a> ";
    }
    elsif($categories{"Number"})
    {
	$catfile = makefilename("Number",".shtml");
	print $OUTFILE " <a href='$catfile'>Numbers</a> ";
    }
    elsif($categories{"Other"})
    {
	$catfile = makefilename("Other",".shtml");
	print $OUTFILE " <a href='$catfile'>Other</a>";
    }

	
    print $OUTFILE "</p>\n";
    print $OUTFILE "<hr>\n";

    print $OUTFILE $stringafter;
}



##
##initializeFiles creates a list of file handles based on the categories in %categories hash table
# initializeFiles([output directory])
##

sub initializeFiles
{
    my $dir = shift;
    my $htmlfilename = "";
    my $rssfilename = "";

#DEBUG    print STDERR "Initialising Files";

    foreach my $category (keys %categories)
    {		
	$htmlfilename = makefilename($category,".shtml");
	$rssfilename = makefilename($category, ".xml");

	#ADD HTML FILE
	unless ($htmlfiles{$category})
	{	
#	    print STDERR "cat = $category AND dir = $dir\n";
	    $htmlfiles{$category} = new IO::File ">$dir/$htmlfilename";
	    insertFile($htmlfiles{$category}, $htmlheaderpath, $category, $rssfilename);
	    if($displaynav)
	    {
		insertNav($htmlfiles{$category},"","\n<ul>\n");
	    }
	}
	

	#Add RSS File (If we are outputting rss)
	if($useRSS)
	{
	    
	    unless ($rssfiles{$category})
	    {	
		#print STDERR "cat = $category AND dir = $dir\n";
		$rssfiles{$category} = new IO::File ">$dir/$rssfilename";
		insertFile($rssfiles{$category}, $rssheaderpath, $category);
		
	    }
	

	}
    }    

}





##
# finaliseFiles closes all open files and adds the footer to them.
# 
##

sub finaliseFiles
{

#DEBUG    print STDERR "Finalising Files";

    foreach my $category (keys %categories)
    {		
	
	if($displaynav)
	{
	    insertNav($htmlfiles{$category},"\n</ul>\n","");
	}
	insertFile($htmlfiles{$category}, $htmlfooterpath, $category);
	close($htmlfiles{$category});
	
	
	#Add RSS File (If we are outputting rss)
	if($useRSS)
	{
	    
	    insertFile($rssfiles{$category}, $rssfooterpath, $category);
	    close($rssfiles{$category});
	}
	

	
    }    
    
}


##
# Runs the SQL Query from the file specified in the INI FILE 
##

sub getSQLResults
{
    my $sqlfile = shift;
        
# Connect to Voyager's Oracle database
    my $dbh = DBI->connect($oracleconnectstr,$db_username, $db_password)
	|| die("Could not connect: $DBI::errstr");
    


   my $sqlQuery = readSQLFile($sqlfile);
 
#DEBUG   print "SQL STRING = $sqlQuery";
    
    #Run Query
    my $results = $dbh->selectall_hashref($sqlQuery, 'TITLE')
	|| die("\n***\nError Running Query: $DBI::errstr\n***\n");
    
    return $results;
}


##
# Make File Name, takes 2 strings, on a name for the file (which is converted to a valid file name) and a file extention
##

sub makefilename {
    my $string = shift;
    my $ext = shift;

    $string = trim($string);
    
    $string = lc($string);
    
    $string =~ s/[\s\'\\\/]/_/g; #Replace invalid chars with underscores

    return $string . $ext;
}
