###########################################################################
#
# ArcPlug.pm --
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# plugin which recurses through an archives.inf file
# (i.e. the file generated in the archives directory
# when an import is done), processing each file it finds 

# 12-05-02 Added usage datastructure - John Thompson

package ArcPlug;

use util;
use BasPlug;
use plugin;
use arcinfo;
use gsprintf;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa

BEGIN {
    @ArcPlug::ISA = ('BasPlug');
}

my $arguments = [
		 ];

my $options = { 'name'     => "ArcPlug",
		'desc'     => "{ArcPlug.desc}",
		'abstract' => "no",
		'inherits' => "yes" };
         
sub gsprintf
{
    return &gsprintf::gsprintf(@_);
}

sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};

    my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);

    return bless $self, $class;
}

sub deinit {
    my ($self) = @_;

    my $archive_info = $self->{'archive_info'};

    if (defined $archive_info) {
	my $archive_info_filename = $self->{'archive_info_filename'};

       	my $file_list = $archive_info->get_file_list();

	# change each file to "Been Indexed"

	foreach my $subfile (@$file_list) {
	    my $doc_oid = $subfile->[1];
	    # why do we get this when it is not used???
	    my $index_status = $archive_info->get_status_info($doc_oid);
	    $archive_info->set_status_info($doc_oid,"B");
	}

	$archive_info->save_info($archive_info_filename);
    }
}

# return 1 if this class might recurse using $pluginfo
sub is_recursive {
    my $self = shift (@_);

    return 1;
}




# return number of files processed, undef if can't process
# Note that $base_dir might be "" and that $file might 
# include directories
sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
    my $outhandle = $self->{'outhandle'};

    my $count = 0;

    # see if this has a archives information file within it
    my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");

    if (-e $archive_info_filename) {

	# found an archives.inf file
	&gsprintf($outhandle, "ArcPlug: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;

	# read in the archives information file
	my $archive_info = new arcinfo ();
	$self->{'archive_info'} = $archive_info;
	$self->{'archive_info_filename'} = $archive_info_filename;

	$archive_info->load_info ($archive_info_filename);
	
	my $file_list = $archive_info->get_file_list();

	# process each file
	foreach my $subfile (@$file_list) {
	    last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);

	    my $tmp = &util::filename_cat ($file, $subfile->[0]);
	    next if $tmp eq $file;

	    # Decide if file needs to be processed
	    my $doc_oid = $subfile->[1];
	    my $index_status = $archive_info->get_status_info($doc_oid);

	    my $process_file = 0;
	    
	    if (!$processor->is_incremental_capable() || !$self->{'incremental'}) {
		$process_file = 1;
	    }
	    else {
	        # is incremental

	        # check to see if file needs to be indexed
		$index_status = $archive_info->get_status_info($doc_oid);

		if ($index_status eq "I") {
		    $process_file = 1;
		}
	    }

	    if ($process_file) {
		# note: metadata is not carried on to the next level
		$count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
	    }

	}

	return $count;
    }

    # wasn't an archives directory, someone else will have to process it
    return undef;
}

1;
