public void TestRandom()
{
// We build two indices at once: one normalized (which
// ToParentBlockJoinQuery/Collector,
// ToChildBlockJoinQuery can query) and the other w/
// the same docs, just fully denormalized:
Directory dir = NewDirectory();
Directory joinDir = NewDirectory();
int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
//final int numParentDocs = 30;
// Values for parent fields:
string[][] parentFields = GetRandomFields(numParentDocs / 2);
// Values for child fields:
string[][] childFields = GetRandomFields(numParentDocs);
bool doDeletes = Random().NextBoolean();
IList<int> toDelete = new List<int>();
// TODO: parallel star join, nested join cases too!
RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir, Similarity, TimeZone);
for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
{
Document parentDoc = new Document();
Document parentJoinDoc = new Document();
Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
parentDoc.Add(id);
parentJoinDoc.Add(id);
parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
for (int field = 0; field < parentFields.Length; field++)
{
if (Random().NextDouble() < 0.9)
{
Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
parentDoc.Add(f);
parentJoinDoc.Add(f);
}
}
if (doDeletes)
{
parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
}
IList<Document> joinDocs = new List<Document>();
if (VERBOSE)
{
StringBuilder sb = new StringBuilder();
sb.Append("parentID=").Append(parentDoc.Get("parentID"));
for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
{
string parent = parentDoc.Get("parent" + fieldID);
if (parent != null)
{
sb.Append(" parent" + fieldID + "=" + parent);
}
}
Console.WriteLine(" " + sb);
}
int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
{
// Denormalize: copy all parent fields into child doc:
Document childDoc = TestUtil.CloneDocument(parentDoc);
Document joinChildDoc = new Document();
joinDocs.Add(joinChildDoc);
Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
childDoc.Add(childID);
joinChildDoc.Add(childID);
for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
{
if (Random().NextDouble() < 0.9)
{
Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
childDoc.Add(f);
joinChildDoc.Add(f);
}
}
if (VERBOSE)
{
StringBuilder sb = new StringBuilder();
sb.Append("childID=").Append(joinChildDoc.Get("childID"));
for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
{
string child = joinChildDoc.Get("child" + fieldID);
if (child != null)
{
sb.Append(" child" + fieldID + "=" + child);
}
}
Console.WriteLine(" " + sb);
}
if (doDeletes)
{
joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
}
w.AddDocument(childDoc);
}
// Parent last:
joinDocs.Add(parentJoinDoc);
joinW.AddDocuments(joinDocs);
if (doDeletes && Random().Next(30) == 7)
{
toDelete.Add(parentDocID);
}
}
foreach (int deleteID in toDelete)
{
if (VERBOSE)
{
Console.WriteLine("DELETE parentID=" + deleteID);
}
w.DeleteDocuments(new Term("blockID", "" + deleteID));
joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
}
IndexReader r = w.Reader;
w.Dispose();
IndexReader joinR = joinW.Reader;
joinW.Dispose();
if (VERBOSE)
{
Console.WriteLine("TEST: reader=" + r);
Console.WriteLine("TEST: joinReader=" + joinR);
for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
{
Console.WriteLine(" docID=" + docIDX + " doc=" + joinR.Document(docIDX));
}
}
IndexSearcher s = NewSearcher(r);
IndexSearcher joinS = new IndexSearcher(joinR);
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
int iters = 200 * RANDOM_MULTIPLIER;
for (int iter = 0; iter < iters; iter++)
{
if (VERBOSE)
{
Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
}
Query childQuery;
if (Random().Next(3) == 2)
{
int childFieldID = Random().Next(childFields.Length);
childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
}
else if (Random().Next(3) == 2)
{
BooleanQuery bq = new BooleanQuery();
childQuery = bq;
int numClauses = TestUtil.NextInt(Random(), 2, 4);
bool didMust = false;
for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
{
Query clause;
BooleanClause.Occur occur;
if (!didMust && Random().NextBoolean())
{
occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
clause = new TermQuery(RandomChildTerm(childFields[0]));
didMust = true;
}
else
{
occur = BooleanClause.Occur.SHOULD;
int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
}
bq.Add(clause, occur);
}
}
else
{
BooleanQuery bq = new BooleanQuery();
childQuery = bq;
bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
}
int x = Random().Next(4);
ScoreMode agg;
if (x == 0)
{
agg = ScoreMode.None;
}
else if (x == 1)
{
agg = ScoreMode.Max;
}
else if (x == 2)
{
agg = ScoreMode.Total;
}
else
{
agg = ScoreMode.Avg;
}
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
// To run against the block-join index:
Query parentJoinQuery;
// Same query as parentJoinQuery, but to run against
// the fully denormalized index (so we can compare
// results):
Query parentQuery;
if (Random().NextBoolean())
{
parentQuery = childQuery;
parentJoinQuery = childJoinQuery;
}
else
{
// AND parent field w/ child field
BooleanQuery bq = new BooleanQuery();
parentJoinQuery = bq;
Term parentTerm = RandomParentTerm(parentFields[0]);
if (Random().NextBoolean())
{
bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
}
else
{
bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
}
BooleanQuery bq2 = new BooleanQuery();
parentQuery = bq2;
if (Random().NextBoolean())
{
bq2.Add(childQuery, BooleanClause.Occur.MUST);
bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
}
else
{
bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
bq2.Add(childQuery, BooleanClause.Occur.MUST);
}
}
Sort parentSort = GetRandomSort("parent", parentFields.Length);
Sort childSort = GetRandomSort("child", childFields.Length);
if (VERBOSE)
{
Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
}
// Merge both sorts:
IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
Sort parentAndChildSort = new Sort(sortFields.ToArray());
TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);
if (VERBOSE)
{
Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
ScoreDoc[] hits = results.ScoreDocs;
for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
{
Document doc = s.Doc(hits[hitIDX].Doc);
//System.out.println(" score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
Console.WriteLine(" parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
FieldDoc fd = (FieldDoc)hits[hitIDX];
if (fd.Fields != null)
{
Console.Write(" ");
foreach (object o in fd.Fields)
{
if (o is BytesRef)
{
Console.Write(((BytesRef)o).Utf8ToString() + " ");
}
else
{
Console.Write(o + " ");
}
}
Console.WriteLine();
}
}
}
bool trackScores;
bool trackMaxScore;
if (agg == ScoreMode.None)
{
trackScores = false;
trackMaxScore = false;
}
else
{
trackScores = Random().NextBoolean();
trackMaxScore = Random().NextBoolean();
}
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
joinS.Search(parentJoinQuery, c);
int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
//final int hitsPerGroup = 100;
TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
if (VERBOSE)
{
Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
if (joinResults != null)
{
IGroupDocs<int>[] groups = joinResults.Groups;
for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
{
IGroupDocs<int> group = groups[groupIDX];
if (group.GroupSortValues != null)
{
Console.Write(" ");
foreach (object o in group.GroupSortValues)
{
if (o is BytesRef)
{
Console.Write(((BytesRef)o).Utf8ToString() + " ");
}
else
{
Console.Write(o + " ");
}
}
Console.WriteLine();
}
assertNotNull(group.GroupValue);
Document parentDoc = joinS.Doc(group.GroupValue);
Console.WriteLine(" group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
{
Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
//System.out.println(" score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
Console.WriteLine(" childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
}
}
}
}
if (results.TotalHits == 0)
{
assertNull(joinResults);
}
else
{
CompareHits(r, joinR, results, joinResults);
TopDocs b = joinS.Search(childJoinQuery, 10);
foreach (ScoreDoc hit in b.ScoreDocs)
{
Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
Document document = joinS.Doc(hit.Doc - 1);
int childId = Convert.ToInt32(document.Get("childID"));
assertTrue(explanation.IsMatch);
assertEquals(hit.Score, explanation.Value, 0.0f);
assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
}
}
// Test joining in the opposite direction (parent to
// child):
// Get random query against parent documents:
Query parentQuery2;
if (Random().Next(3) == 2)
{
int fieldID = Random().Next(parentFields.Length);
parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
}
else if (Random().Next(3) == 2)
{
BooleanQuery bq = new BooleanQuery();
parentQuery2 = bq;
int numClauses = TestUtil.NextInt(Random(), 2, 4);
bool didMust = false;
for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
{
Query clause;
BooleanClause.Occur occur;
if (!didMust && Random().NextBoolean())
{
occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
clause = new TermQuery(RandomParentTerm(parentFields[0]));
didMust = true;
}
else
{
occur = BooleanClause.Occur.SHOULD;
int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
}
bq.Add(clause, occur);
}
}
else
{
BooleanQuery bq = new BooleanQuery();
parentQuery2 = bq;
bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
}
if (VERBOSE)
{
Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
}
// Maps parent query to child docs:
ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());
// To run against the block-join index:
Query childJoinQuery2;
// Same query as parentJoinQuery, but to run against
// the fully denormalized index (so we can compare
// results):
Query childQuery2;
// apply a filter to children
Filter childFilter2, childJoinFilter2;
if (Random().NextBoolean())
{
childQuery2 = parentQuery2;
childJoinQuery2 = parentJoinQuery2;
childFilter2 = null;
childJoinFilter2 = null;
}
else
{
Term childTerm = RandomChildTerm(childFields[0]);
if (Random().NextBoolean()) // filtered case
{
childJoinQuery2 = parentJoinQuery2;
Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
}
else
{
childJoinFilter2 = null;
// AND child field w/ parent query:
BooleanQuery bq = new BooleanQuery();
childJoinQuery2 = bq;
if (Random().NextBoolean())
{
bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
}
else
{
bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
}
}
if (Random().NextBoolean()) // filtered case
{
childQuery2 = parentQuery2;
Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
}
else
{
childFilter2 = null;
BooleanQuery bq2 = new BooleanQuery();
childQuery2 = bq2;
if (Random().NextBoolean())
{
bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
}
else
{
bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
}
}
}
Sort childSort2 = GetRandomSort("child", childFields.Length);
// Search denormalized index:
if (VERBOSE)
{
Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
}
TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
if (VERBOSE)
{
Console.WriteLine(" " + results2.TotalHits + " totalHits:");
foreach (ScoreDoc sd in results2.ScoreDocs)
{
Document doc = s.Doc(sd.Doc);
Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
}
}
// Search join index:
if (VERBOSE)
{
Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
}
TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
if (VERBOSE)
{
Console.WriteLine(" " + joinResults2.TotalHits + " totalHits:");
foreach (ScoreDoc sd in joinResults2.ScoreDocs)
{
Document doc = joinS.Doc(sd.Doc);
Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
}
}
CompareChildHits(r, joinR, results2, joinResults2);
}
r.Dispose();
joinR.Dispose();
dir.Dispose();
joinDir.Dispose();
}