###########################################################################
#
# UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
#
# A component of the Greenstone digital library software from the New
# Zealand Digital Library Project at the University of Waikato, New
# Zealand.
#
# Copyright (C) 2001 Gordon W. Paynter
# Copyright (C) 2001 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# UnknownPlug - a plugin for unknown files

# This is a simple Plugin for importing files in formats that
# Greenstone doesn't know anything about.  A fictional document will
# be created for every such file, and the file itself will be passed
# to Greenstone as the "associated file" of the document.

# Here's an example where it is useful: I have a collection of
# pictures that include a couple of quicktime movie files with names
# like DCP_0163.MOV.  Rather than write a new plugin for quicktime
# movies, I add this line to the collection configuration file:

# plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"

# A document is created for each movie, with the associated movie
# file's name in the "movie" metadata field.  In the collection's
# format strings, I use the {If} macro to output different text for
# each type of file, like this:

# {If}{[movie],<HTML for displaying movie>}{If}{[Image],<HTML for displaying image>}

# You can also add extra metadata, such as the Title, Subject, and
# Duration, with metadata.xml files and RecPlug.  (If you want to use
# UnknownPlug with more than one type of file, you will have to add
# some sort of distinguishing metadata in this way.)



package UnknownPlug;

use BasPlug;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa

sub BEGIN {
    @UnknownPlug::ISA = ('BasPlug');
}

my $arguments =
    [ { 'name' => "assoc_field",
	'desc' => "{UnknownPlug.assoc_field}",
	'type' => "string",
	'deft' => "",
	'reqd' => "no" },
      { 'name' => "file_format",
	'desc' => "{UnknownPlug.file_format}",
	'type' => "string",
	'deft' => "",
	'reqd' => "no" },
      { 'name' => "mime_type",
	'desc' => "{UnknownPlug.mime_type}",
	'type' => "string",
	'deft' => "",
	'reqd' => "no" },
      { 'name' => "srcicon",
	'desc' => "{UnknownPlug.srcicon}",
	'type' => "string",
	'deft' => "iconunknown",
	'reqd' => "no" },
      { 'name' => "process_extension",
	'desc' => "{UnknownPlug.process_extension}",
	'type' => "string",
	'deft' => "",
	'reqd' => "no" } ];

my $options = { 'name'     => "UnknownPlug",
		'desc'     => "{UnknownPlug.desc}",
		'abstract' => "no",
		'inherits' => "yes",
		'args'     => $arguments };


sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};

    my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);

    # "-process_extension" is a simpler alternative to -process_exp for non-regexp people
    if (!$self->{'process_exp'} && $self->{'process_extension'}) {
	$self->{'process_exp'} = "\\." . $self->{'process_extension'} . "\$";
    }

    return bless $self, $class;
}

sub get_default_process_exp {
    return '';
}


# Associate the unknown file with the new document

sub associate_unknown_file {
    my $self = shift (@_);
    my $filename = shift (@_);   # filename with full path
    my $file = shift (@_);       # filename without path
    my $doc_obj = shift (@_);
    
    my $verbosity = $self->{'verbosity'};
    my $outhandle = $self->{'outhandle'};

    # check the filename is okay
    return 0 if ($file eq "" || $filename eq "");

    # Add the image metadata
    my $url = $file;
    $url =~ s/ /%20/g;

    # Add the file as an associated file ...
    my $section = $doc_obj->get_top_section();
    my $file_format = $self->{'file_format'} || "unknown";
    my $mime_type = $self->{'mime_type'} || "unknown/unknown";
    my $assoc_field = $self->{'assoc_field'} || "unknown_file";

    $doc_obj->associate_file($filename, $file, $mime_type, $section);
    $doc_obj->add_metadata ($section, "FileFormat", $file_format);
    $doc_obj->add_metadata ($section, "MimeType", $mime_type);
    $doc_obj->add_metadata ($section, $assoc_field, $file);
    
    $doc_obj->add_metadata ($section, "srclink", 
			    "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[$assoc_field]\">");
    #$doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");
    $doc_obj->add_metadata ($section, "srcicon", "_".$self->{'srcicon'}."_");
    $doc_obj->add_metadata ($section, "/srclink", "</a>");
    
    return 1;
}



# The UnknownPlug read() function. This function does all the right
# things to make general options work for a given plugin.  UnknownPlug
# overrides read() because there is no need to read the actual text of
# the file in, because the contents of the file is not text...
#
#
# Return number of files processed, undef if can't process 
#
# Note that $base_dir might be "" and that $file might include directories

sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;

    my $outhandle = $self->{'outhandle'};

    # Make sure we're processing the correct file
    my ($block_status,$filename) = $self->read_block(@_);    
    return $block_status if ((!defined $block_status) || ($block_status==0));

    print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
    print $outhandle "UnknownPlug processing \"$filename\"\n"
	    if $self->{'verbosity'} > 1;

    #if there's a leading directory name, eat it...
    $file =~ s/^.*[\/\\]//;
    
    # create a new document
    my $doc_obj = new doc ($filename, "indexed_doc");
    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});    
    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    $doc_obj->add_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($file)); # set the filename as Source metadata to be consistent with other plugins
    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename));

    # URL metadata (even invalid ones) are used to support internal
    # links, so even if 'file_is_url' is off, still need to store info

    my $web_url = "http://$file";
    $doc_obj->add_metadata($doc_obj->get_top_section(), "URL", $web_url);


    # associate the file with the document
    if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
    {
	if ($gli) {
	    print STDERR "<ProcessingError n='$file'>\n";
	}
	print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";
	return -1; # error during processing
    }

    #create an empty text string so we don't break downstream plugins 
    my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);

    # include any metadata passed in from previous plugins 
    my $section = $doc_obj->get_top_section();
    $self->extra_metadata ($doc_obj, $section, $metadata);

    $self->title_fallback($doc_obj,$section,$file);

    # do plugin specific processing of doc_obj
    unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
	return -1;
    }

    # do any automatic metadata extraction
    $self->auto_extract_metadata ($doc_obj);

    # add an OID
    $doc_obj->set_OID();
    $doc_obj->add_utf8_text($section, $text);

    # process the document
    $processor->process($doc_obj);

    $self->{'num_processed'} ++;
    return 1;
}


# UnknownPlug processing of doc_obj.  In practice we don't need to do
# anything here because the read function takes care of everything.

sub process {
    my $self = shift (@_);
    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    my $outhandle = $self->{'outhandle'};
    
    return 1;
}


1;











