private void Initialize(Dictionary<string, int> programWordCount)
{
this.CamelSplitter = new ConservativeIdSplitter();
//set ProgramWordCount and calculate log of total
this.ProgramWordCount = programWordCount;
ulong ProgramTotalWordCount = 0;
foreach (int value in this.ProgramWordCount.Values)
{
ProgramTotalWordCount = ProgramTotalWordCount + (ulong)value;
}
this.LogProgramTotalWordCount = Math.Log10(ProgramTotalWordCount);
//load globalWordCount from default location
var rawGlobalWordCount = LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.GlobalWordCountFile"), false, IncludeIdentifier);
this.GlobalWordCount = new Dictionary<string, double>();
//add weighting to word counts
foreach (var kvp in rawGlobalWordCount)
{
this.GlobalWordCount[kvp.Key] = kvp.Value * Math.Pow((double)kvp.Key.Length - 1, 1.5);
}
//read prefix and suffix lists from default locations
//TODO: the words must be in lowercase. Should we lowercase them on loading, or just assume/require that they're in lowercase in the file?
this.Prefixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Prefixesfile"));
this.Suffixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Suffixesfile"));
}