/**
 *############################################################################
 * A component of the Greenstone Librarian Interface, part of the Greenstone
 * digital library suite from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
 *
 * Copyright (C) 2004 New Zealand Digital Library Project
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *############################################################################
 */

package org.greenstone.gatherer.metadata;


import java.io.*;
import java.util.*;
import org.greenstone.gatherer.DebugStream;
import org.greenstone.gatherer.util.Utility;


/** This class represents one doc.xml file */
public class DocXMLFile
    extends File
{
    private HashMap source_file_name_to_description_elements_mapping = new HashMap();


    public DocXMLFile(String doc_xml_file_path)
    {
	super(doc_xml_file_path);
    }


    public ArrayList getMetadataExtractedFromFile(File file)
    {
	// Build up a list of metadata extracted from this file
	ArrayList metadata_values = new ArrayList();

	String file_relative_path = file.getAbsolutePath();
	int import_index = file_relative_path.indexOf("import");
	if (import_index != -1) {
	    file_relative_path = file_relative_path.substring(import_index + "import".length() + 1);
	}

	// Check whether this doc.xml file contains extracted metadata for the specified file
	ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
	if (description_elements_list == null) {
	    // ...it doesn't
	    return metadata_values;
	}

 	MetadataSet extracted_metadata_set = MetadataSetManager.getMetadataSet(MetadataSetManager.EXTRACTED_METADATA_NAMESPACE);

	// Parse the doc.xml file
	DebugStream.println("Applicable doc.xml file: " + this);
	try {
	    BufferedReader buffered_reader = new BufferedReader(new FileReader(this));

	    int description_element_num = 0;
	    int next_description_element_start = ((Integer) description_elements_list.get(description_element_num)).intValue();
	    boolean in_relevant_description_element = false;

	    String line = null;
	    for (int line_num = 0; (line = buffered_reader.readLine()) != null; line_num++) {
		// Check if this line contains the start of a relevant Description element
		if (line_num == next_description_element_start) {
		    in_relevant_description_element = true;
		    continue;
		}

		// If we're not in a relevant Description element we don't care about anything
		if (in_relevant_description_element == false) {
		    continue;
		}

		// Check if this line contains the end of the relevant Description element
		if (line.indexOf("</Description>") != -1) {
		    description_element_num++;
		    if (description_element_num == description_elements_list.size()) {
			break;
		    }

		    next_description_element_start = ((Integer) description_elements_list.get(description_element_num)).intValue();
		    in_relevant_description_element = false;
		    continue;
		}

		// If this line doesn't contain a complete Metadata element, we're not interested
		if (line.indexOf("<Metadata ") == -1 || line.indexOf("</Metadata>") == -1) {
		    continue;
		}

		// Extract the metadata element name
		int name_index = line.indexOf(" name=\"") + " name=\"".length();
		String metadata_element_name_full = line.substring(name_index, line.indexOf("\"", name_index));

		// If the metadata has a namespace it isn't extracted metadata, so we're not interested
		String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
		if (!metadata_set_namespace.equals("")) {
		    continue;
		}

		// Extracted metadata!
		String metadata_element_name = metadata_element_name_full;

		// We completely ignore bibliographic data
		if (metadata_element_name.equals("SourceSegment")) {
		    buffered_reader.close();
		    return new ArrayList();
		}

		// Ignore metadata starting with gsdl (used to be lowercase metadata and /metadata)
		if (metadata_element_name.startsWith("gsdl")) {
		    continue;
		}

		MetadataElement metadata_element = extracted_metadata_set.getMetadataElementWithName(metadata_element_name);

		// Value trees are not stored for extracted metadata, so create a new value tree node now
		int value_index = line.indexOf(">", name_index) + ">".length();
		String metadata_element_value = line.substring(value_index, line.lastIndexOf("</Metadata>"));

		metadata_element.addMetadataValue(metadata_element_value);
		MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_element_value);

		// Add the new metadata value to the list
		MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
		metadata_values.add(metadata_value);
	    }

	    buffered_reader.close();
	}
	catch (FileNotFoundException exception) {
 	    DebugStream.printStackTrace(exception);
 	}
	catch (IOException exception) {
 	    DebugStream.printStackTrace(exception);
 	}

	return metadata_values;
    }


    /**
     * Every doc.xml file must be skimmed when a collection is opened, for two reasons:
     *   - To build a mapping from source file to its corresponding doc.xml file
     *   - To get a complete list of all extracted metadata elements
     */
    public void skimFile()
    {
	MetadataSet extracted_metadata_set = MetadataSetManager.getMetadataSet(MetadataSetManager.EXTRACTED_METADATA_NAMESPACE);

	// Skim the doc.xml file as quickly as possible (don't parse as XML), looking at the Metadata elements
	DebugStream.println("Skimming doc.xml file " + this + "...");
	try {
	    BufferedReader buffered_reader = new BufferedReader(new FileReader(this));
	    int description_element_start = -1;

	    String line = null;
	    for (int line_num = 0; (line = buffered_reader.readLine()) != null; line_num++) {
		// This line contains the start of a Description element
		if (line.indexOf("<Description>") != -1) {
		    if (description_element_start != -1) {
			System.err.println("Parse error: previous Description element unfinished!");
		    }
		    description_element_start = line_num;
		    continue;
		}

		// This line contains the end of a Description element
		if (line.indexOf("</Description>") != -1) {
		    if (description_element_start == -1) {
			System.err.println("Parse error: Description element unstarted!");
		    }
		    description_element_start = -1;
		    continue;
		}

		// If we're not in a Description element there shouldn't be any Metadata elements
		if (description_element_start == -1) {
		    continue;
		}

		// This line doesn't contain a Metadata element, so we're not interested
		if (line.indexOf("<Metadata ") == -1) {
		    DebugStream.println("Warning: Description element line doesn't contain Metadata element.");
		    continue;
		}

		// Extract the metadata element name
		int name_index = line.indexOf(" name=\"") + " name=\"".length();
		String metadata_element_name_full = line.substring(name_index, line.indexOf("\"", name_index));

		// If the metadata has a namespace it isn't extracted metadata, so we're not interested
		String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
		if (!metadata_set_namespace.equals("")) {
		    continue;
		}

		// Extracted metadata!
		String metadata_element_name = metadata_element_name_full;

		// Note which file this doc.xml is for
		if (metadata_element_name.equals("gsdlsourcefilename")) {
		    // Extract the gsdlsourcefilename element value
		    int value_index = line.indexOf(">", name_index) + ">".length();
		    String gsdlsourcefilename_value = line.substring(value_index, line.indexOf("<", value_index));

		    // We're only interested in the path relative to the import folder
		    int import_index = gsdlsourcefilename_value.indexOf("import");
		    if (import_index != -1) {
			gsdlsourcefilename_value = gsdlsourcefilename_value.substring(import_index + "import".length());

			boolean is_unix_path = gsdlsourcefilename_value.startsWith("/");
			gsdlsourcefilename_value = gsdlsourcefilename_value.substring(1);

			// Make sure the path matches the OS that is running
			if (is_unix_path && Utility.isWindows()) {
			    // Convert path from Unix to Windows
			    gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\/", "\\\\");
			}
			else if (!is_unix_path && !Utility.isWindows()) {
			    // Convert path from Windows to Unix
			    gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", "/");
			}

			// Remember this for quick access later
			if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) {
			    source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, new ArrayList());
			}

			((ArrayList) source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value)).add(new Integer(description_element_start));
		    }

		    // Warn about an odd gsdlsourcefilename, except if it is the Greenstone "tmp" directory
		    // This is true when the source files come from a zip file processed by ZIPPlug, for example
		    else if (gsdlsourcefilename_value.indexOf("tmp") == -1) {
			// We don't really know what is going on...
			System.err.println("Warning: Could not understand gsdlsourcefilename " + gsdlsourcefilename_value);
		    }
		}

		// Ignore metadata starting with gsdl (used to be lowercase metadata and /metadata)
		if (metadata_element_name.startsWith("gsdl")) {
		    continue;
		}

		MetadataElement metadata_element = extracted_metadata_set.getMetadataElementWithName(metadata_element_name);
		if (metadata_element == null) {
		    // This element isn't defined in ex.mds, so create it for this session
		    DebugStream.println("Extracted metadata element not defined: " + metadata_element_name);
		    extracted_metadata_set.addMetadataElementForThisSession(metadata_element_name);
		}
	    }

	    buffered_reader.close();
	}
	catch (FileNotFoundException exception) {
 	    DebugStream.printStackTrace(exception);
 	}
	catch (IOException exception) {
 	    DebugStream.printStackTrace(exception);
 	}
    }
}
