protected override void Train(IDataset ds)
{
bool use_junk = PGetb("junk") && !DisableJunk;
if (charclass.IsEmpty)
{
charclass.SetComponent(ComponentCreator.MakeComponent(PGet("charclass")));
TryAttachCharClassifierEvent(charclass.Object);
}
if (junkclass.IsEmpty)
{
junkclass.SetComponent(ComponentCreator.MakeComponent(PGet("junkclass")));
TryAttachJunkClassifierEvent(junkclass.Object);
}
if (ulclass.IsEmpty)
ulclass.SetComponent(ComponentCreator.MakeComponent(PGet("ulclass")));
Global.Debugf("info", "Training content classifier");
if (use_junk && !junkclass.IsEmpty)
{
Intarray nonjunk = new Intarray();
for (int i = 0; i < ds.nSamples(); i++)
if (ds.Cls(i) != jc())
nonjunk.Push(i);
Datasubset nonjunkds = new Datasubset(ds, nonjunk);
charclass.Object.XTrain(nonjunkds);
}
else
{
charclass.Object.XTrain(ds);
}
if (use_junk && !junkclass.IsEmpty)
{
Global.Debugf("info", "Training junk classifier");
Intarray isjunk = new Intarray();
int njunk = 0;
for (int i = 0; i < ds.nSamples(); i++)
{
bool j = (ds.Cls(i) == jc());
isjunk.Push(Convert.ToInt32(j));
if (j) njunk++;
}
if (njunk > 0)
{
MappedDataset junkds = new MappedDataset(ds, isjunk);
junkclass.Object.XTrain(junkds);
}
else
{
Global.Debugf("warn", "you are training a junk class but there are no samples to train on");
junkclass.SetComponent(null);
}
if (PGeti("ul") > 0 && !ulclass.IsEmpty)
{
throw new Exception("ulclass not implemented");
}
}
}