private static void CreateNodes(String filename, Ontology thisOntology, Dictionary<string, List<string>> dictExistingNodes,
Dictionary<string, uint> dictDirectoryEntries, Dictionary<string, long> dictVertexIDs, Action<string> LogMessage, Action<string> LogError)
{
LogMessage("Begin CreateNodes(" + filename + ")");
try
{
using (StreamReader srNodes = new StreamReader(filename))
{
#region if input stream is empty --> do error handling
if (srNodes == null)
{
LogError("Error reading Nodes file: '" + filename + "'");
return;
}
#endregion
#region init local vars
int iCurrentLevel = -1;
int iCurrentTripleLevel = 0;
String strCurrentTriple;
Triple currentTriple = null;
Triple selectedTriple = null;
uint lineCount = 0;
uint instanceCount = 0;
#endregion
#region for each line
while ((strCurrentTriple = srNodes.ReadLine()) != null)
{
#region some debug info
if (lineCount % 100 == 0)
{
Console.Write(".");
}
if (lineCount % 10000 == 0)
{
LogMessage("CreateNodes: lineCount=" + lineCount + " instanceCount=" + instanceCount);
GC.Collect();
GC.Collect();
}
if (instanceCount > Properties.Settings.Default.InsertLimit)
{
LogMessage("Quit execution due to InsertLimit setting");
break;
}
lineCount++;
#endregion
currentTriple = NTripleParser.Split(strCurrentTriple, LogError);
#region some sample data for help
/* currentTriple.Subject currentTriple.Predicate currentTriple.TripleObject
<http://dbpedia.org/resource/Autism> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> .
<http://dbpedia.org/resource/Autism> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Disease> .
<http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> .
<http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Place> .
<http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PopulatedPlace> .
<http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/AdministrativeRegion> .
*/
#endregion
#region new concept: per uniqe subject, only one INSERT is done, the one with the highest level within the ontology
if (selectedTriple == null)
selectedTriple = currentTriple;
#region execute gql statement (includes redundancy check for several triple lines regarding one class (eg. Thing, Species, Animal, Mammal --> only mammal is inserted)
iCurrentTripleLevel = thisOntology.GetOClassLevel(currentTriple.TripleObject);
if (selectedTriple.Subject.Equals(currentTriple.Subject))
{
// check level
if (iCurrentLevel < iCurrentTripleLevel)
{
// replace existing with new gql command
selectedTriple = currentTriple;
iCurrentLevel = iCurrentTripleLevel;
} // else do nothing
}
else
{
if (!SaveTriple(selectedTriple, dictExistingNodes, dictDirectoryEntries, dictVertexIDs, LogError))
{
break;
}
instanceCount++;
// reset values
selectedTriple = currentTriple;
iCurrentLevel = iCurrentTripleLevel;
}
#endregion
#endregion
} // end while
#endregion
#region finally - save last line
if (selectedTriple != null)
{
SaveTriple(selectedTriple, dictExistingNodes, dictDirectoryEntries, dictVertexIDs, LogError);
}
#endregion
}
}
catch (Exception e)
{
LogError("Error creating instance file");
LogError(e.Message);
LogError(e.StackTrace);
}
LogMessage("End CreateNodes(" + filename + ")");
}