public bool Read_Metadata(Stream Input_Stream, SobekCM_Item Return_Package, Dictionary<string, object> Options, out string Error_Message)
{
Error_Message = String.Empty;
// Read the options from the dictionary of options
bool minimizeFileInfo = false;
if (Options != null)
{
if (Options.ContainsKey("METS_File_ReaderWriter:Minimize_File_Info"))
bool.TryParse(Options["METS_File_ReaderWriter:Minimize_File_Info"].ToString(), out minimizeFileInfo);
if (Options.ContainsKey("METS_File_ReaderWriter:Support_Divisional_dmdSec_amdSec"))
{
bool supportDivisionalDmdSecAmdSec;
bool.TryParse(Options["METS_File_ReaderWriter:Support_Divisional_dmdSec_amdSec"].ToString(), out supportDivisionalDmdSecAmdSec);
}
}
// Keep a list of all the files created, by file id, as additional data is gathered
// from the different locations ( amdSec, fileSec, structmap )
Dictionary<string, SobekCM_File_Info> files_by_fileid = new Dictionary<string, SobekCM_File_Info>();
// For now, to do support for old way of doing downloads, build a list to hold
// the deprecated download files
List<Download_Info_DEPRECATED> deprecatedDownloads = new List<Download_Info_DEPRECATED>();
// Need to store the unanalyzed sections of dmdSec and amdSec until we determine if
// the scope is the whole package, or the top-level div. We use lists as the value since
// several sections may have NO id and the METS may even (incorrectly) have multiple sections
// with the same ID
Dictionary<string, List<Unanalyzed_METS_Section>> dmdSec = new Dictionary<string, List<Unanalyzed_METS_Section>>();
Dictionary<string, List<Unanalyzed_METS_Section>> amdSec = new Dictionary<string, List<Unanalyzed_METS_Section>>();
// Dictionaries store the link between dmdSec and amdSec id's to single divisions
Dictionary<string, abstract_TreeNode> division_dmdids = new Dictionary<string, abstract_TreeNode>();
Dictionary<string, abstract_TreeNode> division_amdids = new Dictionary<string, abstract_TreeNode>();
try
{
// Try to read the XML
XmlReader r = new XmlTextReader(Input_Stream);
// Begin stepping through each of the XML nodes
while (r.Read())
{
#region Handle some processing instructions requested by Florida SUS's / FLVC (hope to deprecate)
// Handle some processing instructions requested by Florida SUS's / FLVC
if (r.NodeType == XmlNodeType.ProcessingInstruction)
{
if (r.Name.ToLower() == "fcla")
{
string value = r.Value.ToLower();
if (value.IndexOf("fda=\"yes\"") >= 0)
{
DAITSS_Info daitssInfo = Return_Package.Get_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY) as DAITSS_Info;
if (daitssInfo == null)
{
daitssInfo = new DAITSS_Info();
Return_Package.Add_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY, daitssInfo);
}
daitssInfo.toArchive = true;
}
if (value.IndexOf("fda=\"no\"") >= 0)
{
DAITSS_Info daitssInfo2 = Return_Package.Get_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY) as DAITSS_Info;
if (daitssInfo2 == null)
{
daitssInfo2 = new DAITSS_Info();
Return_Package.Add_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY, daitssInfo2);
}
daitssInfo2.toArchive = false;
}
}
}
#endregion
if (r.NodeType == XmlNodeType.Element)
{
switch (r.Name.Replace("METS:", ""))
{
case "mets":
if (r.MoveToAttribute("OBJID"))
Return_Package.METS_Header.ObjectID = r.Value;
break;
case "metsHdr":
read_mets_header(r.ReadSubtree(), Return_Package);
break;
case "dmdSec":
case "dmdSecFedora":
Unanalyzed_METS_Section thisDmdSec = store_dmd_sec(r.ReadSubtree());
if ( dmdSec.ContainsKey(thisDmdSec.ID))
dmdSec[thisDmdSec.ID].Add(thisDmdSec);
else
{
List<Unanalyzed_METS_Section> newDmdSecList = new List<Unanalyzed_METS_Section>();
newDmdSecList.Add(thisDmdSec);
dmdSec[thisDmdSec.ID] = newDmdSecList;
}
break;
case "amdSec":
Unanalyzed_METS_Section thisAmdSec = store_amd_sec(r.ReadSubtree());
if (amdSec.ContainsKey(thisAmdSec.ID))
amdSec[thisAmdSec.ID].Add(thisAmdSec);
else
{
List<Unanalyzed_METS_Section> newAmdSecList = new List<Unanalyzed_METS_Section> {thisAmdSec};
amdSec[thisAmdSec.ID] = newAmdSecList;
}
break;
case "fileSec":
read_file_sec(r.ReadSubtree(), minimizeFileInfo, files_by_fileid);
break;
case "structMap":
if (!r.IsEmptyElement)
{
read_struct_map(r.ReadSubtree(), Return_Package, files_by_fileid, division_dmdids, division_amdids);
}
break;
case "behaviorSec":
read_behavior_sec(r.ReadSubtree(), Return_Package);
break;
}
}
}
// writer.Close();
r.Close();
}
catch
{
// Do nothinh
}
Input_Stream.Close();
// Load some options for interoperability
Dictionary<string, object> options = new Dictionary<string, object>();
options.Add("SobekCM_FileInfo_METS_amdSec_ReaderWriter:Files_By_FileID", files_by_fileid);
#region Process the previously stored dmd sections
// Now, process the previously stored dmd sections
foreach (string thisDmdSecId in dmdSec.Keys)
{
// Could be multiple stored sections with the same (or no) ID
foreach (Unanalyzed_METS_Section metsSection in dmdSec[thisDmdSecId])
{
XmlReader reader = XmlReader.Create(new StringReader(metsSection.Inner_XML));
string mdtype = String.Empty;
string othermdtype = String.Empty;
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
if (reader.Name.ToLower().Replace("mets:", "") == "mdwrap")
{
if (reader.MoveToAttribute("MDTYPE"))
mdtype = reader.Value;
if (reader.MoveToAttribute("OTHERMDTYPE"))
othermdtype = reader.Value;
// NOt crazy about this part, but sometimes people do not use the OTHERMDTYPE
// tag correctly, and just use the LABEL to differentiate the types
if ((mdtype == "OTHER") && (othermdtype.Length == 0) && (reader.MoveToAttribute("LABEL")))
othermdtype = reader.Value;
// Now, determine if this was a division-level read, or a package-wide
if (division_dmdids.ContainsKey(thisDmdSecId))
{
// Division level dmdSec
// Get the division
abstract_TreeNode node = division_dmdids[thisDmdSecId];
// Get an appropriate reader from the metadata configuration
iDivision_dmdSec_ReaderWriter rw = ResourceObjectSettings.MetadataConfig.Get_Division_DmdSec_ReaderWriter(mdtype, othermdtype);
// Is this dmdSec analyzable? (i.e., did we find an appropriate reader/writer?)
if (rw == null)
{
node.Add_Unanalyzed_DMDSEC(metsSection);
}
else
{
rw.Read_dmdSec(reader, node, options);
}
}
else
{
// Package-level dmdSec
// Get an appropriate reader from the metadata configuration
iPackage_dmdSec_ReaderWriter rw = ResourceObjectSettings.MetadataConfig.Get_Package_DmdSec_ReaderWriter(mdtype, othermdtype);
// Is this dmdSec analyzable? (i.e., did we find an appropriate reader/writer?)
if (rw == null)
{
Return_Package.Add_Unanalyzed_DMDSEC(metsSection);
}
else
{
rw.Read_dmdSec(reader, Return_Package, options);
}
}
}
}
}
}
}
#endregion
#region Process the previously stored amd sections
// Now, process the previously stored amd sections
foreach (string thisAmdSecId in amdSec.Keys)
{
// Could be multiple stored sections with the same (or no) ID
foreach (Unanalyzed_METS_Section metsSection in amdSec[thisAmdSecId])
{
XmlReader reader = XmlReader.Create(new StringReader(metsSection.Inner_XML));
string mdtype = String.Empty;
string othermdtype = String.Empty;
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
if (reader.Name.ToLower().Replace("mets:", "") == "mdwrap")
{
if (reader.MoveToAttribute("MDTYPE"))
mdtype = reader.Value;
if (reader.MoveToAttribute("OTHERMDTYPE"))
othermdtype = reader.Value;
// Package-level amdSec
// Get an appropriate reader from the metadata configuration
iPackage_amdSec_ReaderWriter rw = ResourceObjectSettings.MetadataConfig.Get_Package_AmdSec_ReaderWriter(mdtype, othermdtype);
// Is this amdSec analyzable? (i.e., did we find an appropriate reader/writer?)
if (rw == null)
{
Return_Package.Add_Unanalyzed_AMDSEC(metsSection);
}
else
{
rw.Read_amdSec(reader, Return_Package, options);
}
}
}
}
}
}
#endregion
#region Special code used for moving downloads into the structure map system, and out of the old SobekCM METS section
// For backward compatability, move from the old download system to the
// new structure. This has to happen here at the end so that we have access
// Were there some downloads added here?
if (deprecatedDownloads.Count > 0)
{
// Get the list of downloads from the download tree
List<SobekCM_File_Info> newStructureDownloads = Return_Package.Divisions.Download_Tree.All_Files;
// Step through each download in the old system
foreach (Download_Info_DEPRECATED thisDownload in deprecatedDownloads)
{
// Get the label (if there is one)
string label = thisDownload.Label;
string filename = thisDownload.FileName;
bool found = false;
if ((filename.Length == 0) && (thisDownload.File_ID.Length > 0))
{
if (files_by_fileid.ContainsKey(thisDownload.File_ID))
{
SobekCM_File_Info thisDownloadFile = files_by_fileid[thisDownload.File_ID];
filename = thisDownloadFile.System_Name;
// Ensure a file of this name doesn't already exist
foreach (SobekCM_File_Info existingFile in newStructureDownloads)
{
if (existingFile.System_Name.ToUpper().Trim() == filename.ToUpper().Trim())
{
found = true;
break;
}
}
// Not found, so add it
if (!found)
{
// Determine the label if it was missing or identical to file name
if ((label.Length == 0) || (label == filename))
{
label = filename;
int first_period_index = label.IndexOf('.');
if (first_period_index > 0)
{
label = label.Substring(0, first_period_index);
}
}
// Add the root to the download tree, if not existing
Division_TreeNode newRoot;
if (Return_Package.Divisions.Download_Tree.Roots.Count == 0)
{
newRoot = new Division_TreeNode("Main", String.Empty);
Return_Package.Divisions.Download_Tree.Roots.Add(newRoot);
}
else
{
newRoot = (Division_TreeNode) Return_Package.Divisions.Download_Tree.Roots[0];
}
// Add a page for this, with the provided label if there was one
Page_TreeNode newPage = new Page_TreeNode(label);
newRoot.Nodes.Add(newPage);
// Now, add this existing file
newPage.Files.Add(thisDownloadFile);
// Add to the list of files added (in case it appears twice)
newStructureDownloads.Add(thisDownloadFile);
}
}
}
else
{
// Ensure a file of this name doesn't already exist
foreach (SobekCM_File_Info existingFile in newStructureDownloads)
{
if (existingFile.System_Name.ToUpper().Trim() == filename.ToUpper().Trim())
{
found = true;
break;
}
}
// Not found, so add it
if (!found)
{
// Determine the label if it was missing or identical to file name
if ((label.Length == 0) || (label == filename))
{
label = filename;
int first_period_index = label.IndexOf('.');
if (first_period_index > 0)
{
label = label.Substring(0, first_period_index);
}
}
// Add the root to the download tree, if not existing
Division_TreeNode newRoot;
if (Return_Package.Divisions.Download_Tree.Roots.Count == 0)
{
newRoot = new Division_TreeNode("Main", String.Empty);
Return_Package.Divisions.Download_Tree.Roots.Add(newRoot);
}
else
{
newRoot = (Division_TreeNode) Return_Package.Divisions.Download_Tree.Roots[0];
}
// Add a page for this, with the provided label if there was one
Page_TreeNode newPage = new Page_TreeNode(label);
newRoot.Nodes.Add(newPage);
// Now, add this existing file
SobekCM_File_Info thisDownloadFile = new SobekCM_File_Info(filename);
newPage.Files.Add(thisDownloadFile);
// Add to the list of files added (in case it appears twice)
newStructureDownloads.Add(thisDownloadFile);
}
}
}
}
#endregion
#region Special code for distributing any page-level coordinate information read from the old SobekCM coordinate metadata
// Get the geospatial data
GeoSpatial_Information geoSpatial = Return_Package.Get_Metadata_Module(GlobalVar.GEOSPATIAL_METADATA_MODULE_KEY) as GeoSpatial_Information;
if ((geoSpatial != null) && ( geoSpatial.Polygon_Count > 0 ))
{
// See if any has the page sequence filled out, which means it came from the old metadata system
bool redistribute = false;
foreach (Coordinate_Polygon thisPolygon in geoSpatial.Polygons)
{
if (thisPolygon.Page_Sequence > 0)
{
redistribute = true;
break;
}
}
// If we need to redistribute, get started!
if (redistribute)
{
// Get the pages, by sequence
List<abstract_TreeNode> pagesBySequence = Return_Package.Divisions.Physical_Tree.Pages_PreOrder;
List<Coordinate_Polygon> polygonsToRemove = new List<Coordinate_Polygon>();
// Step through each polygon
foreach (Coordinate_Polygon thisPolygon in geoSpatial.Polygons)
{
if ((thisPolygon.Page_Sequence > 0) && ( thisPolygon.Page_Sequence <= pagesBySequence.Count ))
{
// Get the page
abstract_TreeNode thisPageFromSequence = pagesBySequence[thisPolygon.Page_Sequence - 1];
// We can assume this page does not already have the coordiantes
GeoSpatial_Information thisPageCoord = new GeoSpatial_Information();
thisPageFromSequence.Add_Metadata_Module( GlobalVar.GEOSPATIAL_METADATA_MODULE_KEY, thisPageCoord );
thisPageCoord.Add_Polygon( thisPolygon);
// Remove this from the package-level coordinates
polygonsToRemove.Add(thisPolygon);
}
}
// Now, remove all polygons flagged to be removed
foreach (Coordinate_Polygon thisPolygon in polygonsToRemove)
{
geoSpatial.Remove_Polygon(thisPolygon);
}
}
}
#endregion
#region Copy any serial hierarchy in the Behaviors.Serial_Info part into the bib portion, if not there
// Do some final cleanup on the SERIAL HIERARCHY
if ((Return_Package.Behaviors.hasSerialInformation) && (Return_Package.Behaviors.Serial_Info.Count > 0))
{
if ((Return_Package.Bib_Info.Series_Part_Info.Enum1.Length == 0) && (Return_Package.Bib_Info.Series_Part_Info.Year.Length == 0))
{
if (Return_Package.Bib_Info.SobekCM_Type == TypeOfResource_SobekCM_Enum.Newspaper)
{
Return_Package.Bib_Info.Series_Part_Info.Year = Return_Package.Behaviors.Serial_Info[0].Display;
Return_Package.Bib_Info.Series_Part_Info.Year_Index = Return_Package.Behaviors.Serial_Info[0].Order;
if (Return_Package.Behaviors.Serial_Info.Count > 1)
{
Return_Package.Bib_Info.Series_Part_Info.Month = Return_Package.Behaviors.Serial_Info[1].Display;
Return_Package.Bib_Info.Series_Part_Info.Month_Index = Return_Package.Behaviors.Serial_Info[1].Order;
}
}
if (Return_Package.Behaviors.Serial_Info.Count > 2)
{
Return_Package.Bib_Info.Series_Part_Info.Day = Return_Package.Behaviors.Serial_Info[2].Display;
Return_Package.Bib_Info.Series_Part_Info.Day_Index = Return_Package.Behaviors.Serial_Info[2].Order;
}
}
else
{
Return_Package.Bib_Info.Series_Part_Info.Enum1 = Return_Package.Behaviors.Serial_Info[0].Display;
Return_Package.Bib_Info.Series_Part_Info.Enum1_Index = Return_Package.Behaviors.Serial_Info[0].Order;
if (Return_Package.Behaviors.Serial_Info.Count > 1)
{
Return_Package.Bib_Info.Series_Part_Info.Enum2 = Return_Package.Behaviors.Serial_Info[1].Display;
Return_Package.Bib_Info.Series_Part_Info.Enum2_Index = Return_Package.Behaviors.Serial_Info[1].Order;
}
if (Return_Package.Behaviors.Serial_Info.Count > 2)
{
Return_Package.Bib_Info.Series_Part_Info.Enum3 = Return_Package.Behaviors.Serial_Info[2].Display;
Return_Package.Bib_Info.Series_Part_Info.Enum3_Index = Return_Package.Behaviors.Serial_Info[2].Order;
}
}
}
#endregion
return true;
}