###########################################################################
#
# PPTPlug.pm -- plugin for importing Microsoft PowerPoint files.
#  (currently only versions 95 and 97)
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 2002 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package PPTPlug;

use ConvertToPlug;
use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa

sub BEGIN {
    @PPTPlug::ISA = ('ConvertToPlug');
}

my $convert_to_list =
    [ {	'name' => "auto",
	'desc' => "{ConvertToPlug.convert_to.auto}" },
      {	'name' => "html",
	'desc' => "{ConvertToPlug.convert_to.html}" },
      {	'name' => "text",
	'desc' => "{ConvertToPlug.convert_to.text}" },
      { 'name' => "pagedimg_jpg",
	'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" },
      { 'name' => "pagedimg_gif",
	'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" },
      { 'name' => "pagedimg_png",
	'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" }
      ];

my $arguments = 
    [ { 'name' => "process_exp",
	'desc' => "{BasPlug.process_exp}",
	'type' => "regexp",
	'reqd' => "no",
	'deft' => &get_default_process_exp()}
      ];

my $options = { 'name'     => "PPTPlug",
		'desc'     => "{PPTPlug.desc}",
		'abstract' => "no",
		'inherits' => "yes",
	        'args'     => $arguments };

sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
	my $ws_arg =[{ 'name' => "convert_to",
		       'desc' => "{ConvertToPlug.convert_to}",
		       'type' => "enum",
		       'reqd' => "yes",
		       'list' => $convert_to_list, 
		       'deft' => "html" },
		     { 'name' => "windows_scripting",
		       'desc' => "{PPTPlug.windows_scripting}",
		       'type' => "flag",
		       'reqd' => "no" }
		     ];
	push(@$arguments,@$ws_arg);
    }
    
    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};


    my @arg_array = @$inputargs;
    my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
 
    if ($self->{'info_only'}) {
	# don't worry about any options etc
	return bless $self, $class;
    }

    # ppthtml outputs utf-8 already.
    #these are passed through to gsConvert.pl by ConvertToPlug.pm
    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
    my $secondary_plugin_options = $self->{'secondary_plugin_options'};

    if ($self->{'windows_scripting'} && ($self->{'convert_to'} eq "PagedImg")) {
	$secondary_plugin_options->{'PagedImgPlug'} = [];
    } else {
	$secondary_plugin_options->{'HTMLPlug'} = [];
    }
    my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    my $pageimg_options = $secondary_plugin_options->{'PagedImgPlug'};

    if ($self->{'input_encoding'} eq "auto") {
	$self->{'input_encoding'} = "utf8";
	if (defined $secondary_plugin_options->{'HTMLPlug'}){
	    push(@$html_options,"-input_encoding", "utf8");
	    push(@$html_options,"-extract_language") if $self->{'extract_language'};

	    # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) 
	    # to extract these metadata fields from the HEAD META fields
	    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
	} 
	if (defined $secondary_plugin_options->{'PagedImgPlug'}){
	    push(@$pageimg_options,"-input_encoding", "utf8");
	    push(@$pageimg_options,"-extract_language") if $self->{'extract_language'};
	}
    }

    $self = bless $self, $class;

    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
    return $self;
}

sub get_default_process_exp {
    my $self = shift (@_);
    return q^(?i)\.ppt$^;
}

sub get_file_type {
    my $self = shift (@_);
    my $file_type = "PPT";
    return $file_type;
}

sub convert_post_process
{
    my $self = shift (@_);
    my ($conv_filename) = @_;

    my $outhandle=$self->{'outhandle'};
    my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
    # read in file ($text will be in utf8)
    my $text = "";
    $self->read_file ($conv_filename, $encoding, $language, \$text);
 
    # turn any high bytes that aren't valid utf-8 into utf-8.
    unicode::ensure_utf8(\$text);
    # Write it out again!
    $self->utf8_write_file (\$text, $conv_filename);
}

sub process {
    my $self = shift (@_);
    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;

    return $self->process_type("ppt",$base_dir,$file,$doc_obj);
}

1;

