###########################################################################
#
# METSPlugout.pm -- the plugout module for METS archives
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 2006 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package METSPlugout;

use strict;
no strict 'refs';

eval {require bytes};
use util;
use BasPlugout;
use docprint; # for escape_text

sub BEGIN {
    @METSPlugout::ISA = ('BasPlugout');
}

my $arguments = [
      { 'name' => "saveas_version", 
	'desc' => "{METSPlugout.version}",
	'type' => "string",
        'deft' => 'greenstone',
	'reqd' => "yes",    
	'hiddengli' => "no"},
      { 'name' => "xslt_txt", 
	'desc' => "{METSPlugout.xslt_txt}",
	'type' => "string",
	'reqd' => "no",    
	'hiddengli' => "no"},
      { 'name' => "xslt_mets", 
	'desc' => "{METSPlugout.xslt_mets}",
	'type' => "string",
	'reqd' => "no",    
	'hiddengli' => "no"}
      ];

my $options = { 'name'     => "METSPlugout",
		'desc'     => "{METSPlugout.desc}",
		'abstract' => "no",
		'inherits' => "yes", 
	        'args'     => $arguments
                };

sub new {
    my ($class) = shift (@_);
    my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
    push(@$plugoutlist, $class);

      
    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};

    my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs); 

 
   
   return bless $self, $class;
}

sub saveas {
    my $self = shift (@_);
    my ($doc_obj,$doc_dir) = @_;
    my $version = $self->{'saveas_version'};

    $self->process_assoc_files ($doc_obj, $doc_dir, '');

    my $output_dir = $self->get_output_dir();
    &util::mk_all_dir ($output_dir) unless -e $output_dir;
 
    my $working_dir = &util::filename_cat ($output_dir, $doc_dir);              
   
    &util::mk_all_dir ($working_dir) unless -e $working_dir;

    #########################
    # save the text file
    #########################
    my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");	
	
    $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});

    my $outhandler; 

    if (defined $self->{'xslt_writer'}){
	$outhandler = $self->{'xslt_writer'};
    }
    else{
	$outhandler = $self->get_output_handler($doc_txt_file);
    } 

    $self->output_xml_header($outhandler);
    $self->output_txt_section($outhandler,$doc_obj, $doc_obj->get_top_section());
    $self->output_xml_footer($outhandler);
    

    if (defined $self->{'xslt_writer'}){     
	$self->close_xslt_pipe(); 
    }
    else{
	close($outhandler);
    }
    
    #########################
    # save the mets file
    #########################
    my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
    
    my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
    if (!defined $doc_title) {
	$doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
    }
 
    $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});

    if (defined $self->{'xslt_writer'}){
       $outhandler = $self->{'xslt_writer'};
    }
    else{
       $outhandler = $self->get_output_handler($doc_mets_file);
     }   
 
      
    $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
    $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$version,$working_dir);
    $self->output_mets_xml_footer($outhandler);
	
     if (defined $self->{'xslt_writer'}){     
	$self->close_xslt_pipe(); 
    }
    else{
	close($outhandler);
    }
     
    $self->{'short_doc_file'} =  &util::filename_cat ($doc_dir, "docmets.xml");
  
    $self->store_output_info_reference($doc_obj);
      
}


sub output_mets_xml_header(){
    my $self = shift(@_);
    my ($handle, $OID, $doc_title) = @_;

    my $version = $self->{'saveas_version'};

    my $extra_attr = "";
    if ($version eq "fedora") {	
	my $fnamespace = $ENV{'FEDORA_PID_NAMESPACE'};
	my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";

	$extra_attr = "OBJID=\"$oid_namespace:$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
    }
    else {
	# Greenstone METS profile
	$extra_attr = "OBJID=\"$OID:2\"";
    }


    print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
    print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
    print $handle '           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
    print $handle '           xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
    print $handle '           xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
    print $handle '           xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
    print $handle '           http://www.loc.gov/standards/mets/mets.xsd' . "\n";
    print $handle '           http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
    print $handle '           http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
    print $handle "  $extra_attr>\n";

    if ($version eq "fedora") {
	print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
    }

}

sub output_mets_xml_footer() {
    my $self = shift(@_);
    my ($handle) = @_;
    print $handle '</mets:mets>' . "\n";
}

#  print out doctxt.xml file
sub output_txt_section {
    my $self = shift (@_);
    my ($handle, $doc_obj, $section) = @_;

    print $handle $self->buffer_txt_section_xml($doc_obj, $section);
}

sub buffer_txt_section_xml {
    my $self = shift(@_);
    my ($doc_obj, $section) = @_;
 
    my $section_ptr = $doc_obj->_lookup_section ($section);
    
    return "" unless defined $section_ptr;
   
    my $all_text = "<Section>\n";
    $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
   
    #output all the subsections
    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
       $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection");
     }

     $all_text .= "</Section>\n";

     
     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
     return $all_text;
}

#  print out docmets.xml file
sub output_mets_section {
    my $self = shift(@_);
    my ($handle, $doc_obj, $section, $version, $working_dir) = @_;

    # print out the dmdSection
    print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section, $version);

    print $handle "<mets:fileSec>\n";
    if ($version eq "fedora") {
	print $handle "  <mets:fileGrp ID=\"DATASTREAMS\">\n";
    }

    # print out the fileSection by sections
    print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$version);

    # print out the whole fileSection
    print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$version,$working_dir); 

    if ($version eq "fedora") {
	print $handle "  </mets:fileGrp>\n";
    }
    print $handle "</mets:fileSec>\n";
  
    # print out the StructMapSection by sections

    my $struct_type;
    if ($version eq "fedora") {
	$struct_type = "fedora:dsBindingMap";
    }
    else {
	$struct_type = "Section";
    }

    if ($version ne "fedora") {
	print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
	my $order_num=0;
	print $handle $self->buffer_mets_StructMapSection_section_xml($doc_obj,$section, \$order_num);
	print $handle "</mets:structMap>\n";

	print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
	print $handle $self->buffer_mets_StructMapWhole_section_xml($doc_obj,$section);
	print $handle "</mets:structMap>\n";
    }
  
}

sub buffer_mets_dmdSection_section_xml(){
    my $self = shift(@_);
    my ($doc_obj,$section,$version) = @_;
   
    $section="" unless defined $section;
    
    my $section_ptr=$doc_obj->_lookup_section($section);
    return "" unless defined $section_ptr;

    # convert section number
    my $section_num ="1". $section;
    my $dmd_num = $section_num;

    # #**output the dmdSection details  
    # if ($section_num eq "1") {
    #	$dmd_num = "0";
    # }


    my $all_text = "";

    my $label_attr = "";
    if ($version eq "fedora") {
	$all_text .= "<mets:amdSec ID=\"DC\" >\n";
	$all_text .= "  <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?

	$label_attr = "LABEL=\"Dublin Core Metadata\"";
    }
    else {
	# TODO::
	#print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
	#print STDERR "***** Check to see if <techMD> required\n";
	# if it isn't allowed, go back and set $mdTag = dmdSec/amdSec

	$all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
    }

    $all_text .= "  <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
    $all_text .= "    <mets:xmlData>\n";

    if ($version eq "fedora") {
	my $dc_namespace = "";
	$dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
	$dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";

	$all_text .= "  <oai_dc:dc $dc_namespace>\n";

	$all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
	$all_text .= "  </oai_dc:dc>\n";
    }
    else {
	foreach my $data (@{$section_ptr->{'metadata'}}){
	    my $escaped_value = &docprint::escape_text($data->[1]);
	    $all_text .= '      <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
	    if ($data->[0] eq "dc.Title") {
		$all_text .= '      <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
	    }
	}
    }
   
    $all_text .= "    </mets:xmlData>\n";
    $all_text .= "  </mets:mdWrap>\n";
    
    if ($version eq "fedora") {
	$all_text .= "  </mets:techMD>\n";
	$all_text .= "</mets:amdSec>\n";
    }
    else {
	$all_text .= "</mets:dmdSec>\n";    
    }


    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
       $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection",$version);
    }
    
    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;

    return $all_text;
}

sub buffer_mets_StructMapSection_section_xml(){
    my $self = shift(@_);
    my ($doc_obj,$section, $order_numref) = @_;

    $section="" unless defined $section;
    
    
    my $section_ptr=$doc_obj->_lookup_section($section);
    return "" unless defined $section_ptr;


    # output fileSection by Sections
    my $section_num ="1". $section;
    my $dmd_num = $section_num;

    ##**output the dmdSection details  
    #if ($section_num eq "1") {
    #	$dmd_num = "0";
    #}

    #**output the StructMap details
 
    my $dmdid_attr = "DM$dmd_num";

    my $all_text = "  <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
    $all_text .= '      ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
    $all_text .= "      LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
   
    $all_text .= '    <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";


    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
       $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref);
    }
    
    $all_text .= "  </mets:div>\n";

    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;

    return $all_text;
}


sub buffer_mets_StructMapWhole_section_xml(){
    my $self = shift(@_);
    my ($doc_obj,$section) = @_;
    
    my $section_ptr = $doc_obj->_lookup_section($section);
    return "" unless defined $section_ptr;
    
    my $all_text="";
    my $fileID=0;
    my $order_num = 0;

    $all_text .= '  <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
  
    #** output the StructMapSection for the whole section
    #  get the sourcefile and associative file

    foreach my $data (@{$section_ptr->{'metadata'}}){
       my $escaped_value = &docprint::escape_text($data->[1]);
   
       if ($data->[0] eq "gsdlsourcefilename") { 
          ++$fileID;
	  $all_text .= '    <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
       }
       
       if ($data->[0] eq "gsdlassocfile"){
          ++$fileID;
	  $all_text .= '    <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
       }
    }
    $all_text .= "  </mets:div>\n";
    
    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    
    return $all_text;
}

sub buffer_mets_fileSection_section_xml() {
    my $self = shift(@_);
    my ($doc_obj,$section,$version) = @_;

    #$section="" unless defined $section;
    

    my $section_ptr=$doc_obj->_lookup_section($section);
    return "" unless defined $section_ptr;
 

    # output fileSection by sections
    my $section_num ="1". $section;
      
    my $filePath = 'doctxt.xml';

    my $opt_owner_id = "";
    if ($version eq "fedora") {
	$opt_owner_id = "OWNERID=\"M\"";
    }

    # output the fileSection details
    my $all_text = '  <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
    $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
    $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
    
    my $xpath = "1".$section;
    $xpath =~ s/\./]\/Section[/g;
   
    $all_text .=  $xpath;

    $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
    $all_text .= "    </mets:file>\n";
    $all_text .= "  </mets:fileGrp>\n";


    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
	$all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$version);
    }
    
    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;

    return $all_text;
}

sub buffer_mets_fileWhole_section_xml(){
    my $self = shift(@_);
    my ($doc_obj,$section,$version,$working_dir) = @_;

    my $section_ptr = $doc_obj-> _lookup_section($section);
    return "" unless defined $section_ptr;
    
    my $all_text="";

    my $fileID=0;

    # Output the fileSection for the whole section
    #  => get the sourcefile and associative file

    my $id_root = "";
    my $opt_owner_id = "";    

    if ($version eq "fedora") {
	$opt_owner_id = "OWNERID=\"M\"";
    }
    else {
	$id_root = "default";
    }

    if ($version ne "fedora") {
	$all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
    }

    foreach my $data (@{$section_ptr->{'metadata'}}){
       my $escaped_value = &docprint::escape_text($data->[1]);

       if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) { 
          my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;

          ++$fileID;	  
          $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
        
	  $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
	  
          $all_text .= "    </mets:file>\n";
       }
       
       if ($data->[0] eq "gsdlassocfile"){
	   
	   $escaped_value =~ m/^(.*?):(.*):(.*)$/;
	   my $assoc_file = $1;
	   my $mime_type  = $2;
	   my $assoc_dir  = $3;
	   
	   if ($version eq "fedora") {
	       $id_root = $assoc_file;
	       $id_root =~ s/\//_/g;
	       $all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
	   }
	   
	   my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
	   ++$fileID;
	   
	   my $mime_attr   = "MIMETYPE=\"$mime_type\"";
	   my $xlink_title = "xlink:title=\"$assoc_file\"";

	   my $id_attr;
	   my $xlink_href;

	   if ($version eq "fedora") {
	       $id_attr = "ID=\"$id_root.0\"";

	       my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
	       if (!defined $fedora_prefix) {
		   $xlink_href  = "xlink:href=\"$assfilePath\"";
	       }
	       else
	       {
		   my $gsdlhome = $ENV{'GSDLHOME'};
		   my $gsdl_href = "$working_dir/$assfilePath";

		   $gsdl_href =~ s/^$gsdlhome(\/)?//;
		   $gsdl_href = "/gsdl/$gsdl_href";

		   my $fserver = $ENV{'FEDORA_HOSTNAME'};
		   my $fport = $ENV{'FEDORA_SERVER_PORT'};

		   my $fdomain = "http://$fserver:$fport";
		   $xlink_href  = "xlink:href=\"$fdomain$gsdl_href\"";
	       }

	       my $top_section = $doc_obj->get_top_section();
	       my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
	   }
	   else {
	       $id_attr = "ID=\"$id_root.$fileID\"";
	       $xlink_href  = "xlink:href=\"$assfilePath\"";
	   }

	   $all_text .= "    <mets:file $mime_attr $id_attr $opt_owner_id >\n";
	   $all_text .= "      <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
	   
	   $all_text .= "    </mets:file>\n";
	   
	   if ($version eq "fedora") {
	       $all_text .= "  </mets:fileGrp>\n";
	   }
	   
       }
   }
    
    if ($version ne "fedora") {
	$all_text .= "  </mets:fileGrp>\n";
    }         
    
    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    
    return $all_text;
}


1;
