public virtual void TestIntersectRandom()
{
Directory dir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
int numTerms = AtLeast(300);
//final int numTerms = 50;
HashSet<string> terms = new HashSet<string>();
ICollection<string> pendingTerms = new List<string>();
IDictionary<BytesRef, int?> termToID = new Dictionary<BytesRef, int?>();
int id = 0;
while (terms.Count != numTerms)
{
string s = RandomString;
if (!terms.Contains(s))
{
terms.Add(s);
pendingTerms.Add(s);
if (Random().Next(20) == 7)
{
AddDoc(w, pendingTerms, termToID, id++);
}
}
}
AddDoc(w, pendingTerms, termToID, id++);
BytesRef[] termsArray = new BytesRef[terms.Count];
HashSet<BytesRef> termsSet = new HashSet<BytesRef>();
{
int upto = 0;
foreach (string s in terms)
{
BytesRef b = new BytesRef(s);
termsArray[upto++] = b;
termsSet.Add(b);
}
Array.Sort(termsArray);
}
if (VERBOSE)
{
Console.WriteLine("\nTEST: indexed terms (unicode order):");
foreach (BytesRef t in termsArray)
{
Console.WriteLine(" " + t.Utf8ToString() + " -> id:" + termToID[t]);
}
}
IndexReader r = w.Reader;
w.Dispose();
// NOTE: intentional insanity!!
FieldCache.Ints docIDToID = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false);
for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
{
// TODO: can we also test infinite As here...?
// From the random terms, pick some ratio and compile an
// automaton:
HashSet<string> acceptTerms = new HashSet<string>();
SortedSet<BytesRef> sortedAcceptTerms = new SortedSet<BytesRef>();
double keepPct = Random().NextDouble();
Automaton a;
if (iter == 0)
{
if (VERBOSE)
{
Console.WriteLine("\nTEST: empty automaton");
}
a = BasicAutomata.MakeEmpty();
}
else
{
if (VERBOSE)
{
Console.WriteLine("\nTEST: keepPct=" + keepPct);
}
foreach (string s in terms)
{
string s2;
if (Random().NextDouble() <= keepPct)
{
s2 = s;
}
else
{
s2 = RandomString;
}
acceptTerms.Add(s2);
sortedAcceptTerms.Add(new BytesRef(s2));
}
a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
}
if (Random().NextBoolean())
{
if (VERBOSE)
{
Console.WriteLine("TEST: reduce the automaton");
}
a.Reduce();
}
CompiledAutomaton c = new CompiledAutomaton(a, true, false);
BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.Count];
HashSet<BytesRef> acceptTermsSet = new HashSet<BytesRef>();
int upto = 0;
foreach (string s in acceptTerms)
{
BytesRef b = new BytesRef(s);
acceptTermsArray[upto++] = b;
acceptTermsSet.Add(b);
Assert.IsTrue(Accepts(c, b));
}
Array.Sort(acceptTermsArray);
if (VERBOSE)
{
Console.WriteLine("\nTEST: accept terms (unicode order):");
foreach (BytesRef t in acceptTermsArray)
{
Console.WriteLine(" " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
}
Console.WriteLine(a.ToDot());
}
for (int iter2 = 0; iter2 < 100; iter2++)
{
BytesRef startTerm = acceptTermsArray.Length == 0 || Random().NextBoolean() ? null : acceptTermsArray[Random().Next(acceptTermsArray.Length)];
if (VERBOSE)
{
Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));
if (startTerm != null)
{
int state = c.RunAutomaton.InitialState;
for (int idx = 0; idx < startTerm.Length; idx++)
{
int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
Console.WriteLine(" state=" + state + " label=" + label);
state = c.RunAutomaton.Step(state, label);
Assert.IsTrue(state != -1);
}
Console.WriteLine(" state=" + state);
}
}
TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);
int loc;
if (startTerm == null)
{
loc = 0;
}
else
{
loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
if (loc < 0)
{
loc = -(loc + 1);
}
else
{
// startTerm exists in index
loc++;
}
}
while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
{
loc++;
}
DocsEnum docsEnum = null;
while (loc < termsArray.Length)
{
BytesRef expected = termsArray[loc];
BytesRef actual = te.Next();
if (VERBOSE)
{
Console.WriteLine("TEST: next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
}
Assert.AreEqual(expected, actual);
Assert.AreEqual(1, te.DocFreq());
docsEnum = TestUtil.Docs(Random(), te, null, docsEnum, DocsEnum.FLAG_NONE);
int docID = docsEnum.NextDoc();
Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
do
{
loc++;
} while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
}
Assert.IsNull(te.Next());
}
}
r.Dispose();
dir.Dispose();
}