public virtual void Performance()
{
int[] tokCount = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
for (int k = 0; k < tokCount.Length; k++)
{
System.Text.StringBuilder buffer = new System.Text.StringBuilder();
System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
for (int i = 0; i < tokCount[k]; i++)
{
buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))));
TokenStream sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100));
teeStream.ConsumeAllTokens();
TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), 100);
ITermAttribute tfTok = stream.AddAttribute<ITermAttribute>();
ITermAttribute sinkTok = sink.AddAttribute<ITermAttribute>();
for (int i = 0; stream.IncrementToken(); i++)
{
Assert.IsTrue(sink.IncrementToken());
Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i);
}
//simulate two fields, each being analyzed once, for 20 documents
for (int j = 0; j < modCounts.Length; j++)
{
int tfPos = 0;
long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
for (int i = 0; i < 20; i++)
{
stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())));
IPositionIncrementAttribute posIncrAtt = stream.GetAttribute<IPositionIncrementAttribute>();
while (stream.IncrementToken())
{
tfPos += posIncrAtt.PositionIncrement;
}
stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
posIncrAtt = stream.GetAttribute<IPositionIncrementAttribute>();
while (stream.IncrementToken())
{
tfPos += posIncrAtt.PositionIncrement;
}
}
long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
int sinkPos = 0;
//simulate one field with one sink
start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
for (int i = 0; i < 20; i++)
{
teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))));
sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
IPositionIncrementAttribute posIncrAtt = teeStream.GetAttribute<IPositionIncrementAttribute>();
while (teeStream.IncrementToken())
{
sinkPos += posIncrAtt.PositionIncrement;
}
//System.out.println("Modulo--------");
posIncrAtt = sink.GetAttribute<IPositionIncrementAttribute>();
while (sink.IncrementToken())
{
sinkPos += posIncrAtt.PositionIncrement;
}
}
finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
}
System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
}
}