Lucene.Net.Join.ToParentBlockJoinCollector.GetTopGroups C# (CSharp) Method

GetTopGroups() public method

Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each GroupDocs will be the parent docID for that group. The number of documents within each group is calculated as minimum of maxDocsPerGroup and number of matched child documents for that group. Returns null if no groups matched.
if there is a low-level I/O error
public GetTopGroups ( Lucene.Net.Join.ToParentBlockJoinQuery query, Lucene.Net.Search.Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields ) : TopGroups
query Lucene.Net.Join.ToParentBlockJoinQuery Search query
withinGroupSort Lucene.Net.Search.Sort Sort criteria within groups
offset int Parent docs offset
maxDocsPerGroup int Upper bound of documents per group number
withinGroupOffset int Offset within each group of child docs
fillSortFields bool Specifies whether to add sort fields or not
return TopGroups
        public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
        {
            int? slot;
            if (!joinQueryID.TryGetValue(query, out slot))
            {
                if (totalHitCount == 0)
                {
                    return null;
                }
            }

            if (sortedGroups == null)
            {
                if (offset >= queue.Size())
                {
                    return null;
                }
                sortQueue();
            }
            else if (offset > sortedGroups.Length)
            {
                return null;
            }

            return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
        }

Usage Example

Exemplo n.º 1
0
        public void TestRandom()
        {
            // We build two indices at once: one normalized (which
            // ToParentBlockJoinQuery/Collector,
            // ToChildBlockJoinQuery can query) and the other w/
            // the same docs, just fully denormalized:
            Directory dir = NewDirectory();
            Directory joinDir = NewDirectory();

            int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
            //final int numParentDocs = 30;

            // Values for parent fields:
            string[][] parentFields = GetRandomFields(numParentDocs / 2);
            // Values for child fields:
            string[][] childFields = GetRandomFields(numParentDocs);

            bool doDeletes = Random().NextBoolean();
            IList<int> toDelete = new List<int>();

            // TODO: parallel star join, nested join cases too!
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir, Similarity, TimeZone);
            for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
            {
                Document parentDoc = new Document();
                Document parentJoinDoc = new Document();
                Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
                parentDoc.Add(id);
                parentJoinDoc.Add(id);
                parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
                for (int field = 0; field < parentFields.Length; field++)
                {
                    if (Random().NextDouble() < 0.9)
                    {
                        Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
                        parentDoc.Add(f);
                        parentJoinDoc.Add(f);
                    }
                }

                if (doDeletes)
                {
                    parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                }

                IList<Document> joinDocs = new List<Document>();

                if (VERBOSE)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.Append("parentID=").Append(parentDoc.Get("parentID"));
                    for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
                    {
                        string parent = parentDoc.Get("parent" + fieldID);
                        if (parent != null)
                        {
                            sb.Append(" parent" + fieldID + "=" + parent);
                        }
                    }
                    Console.WriteLine("  " + sb);
                }

                int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
                for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
                {
                    // Denormalize: copy all parent fields into child doc:
                    Document childDoc = TestUtil.CloneDocument(parentDoc);
                    Document joinChildDoc = new Document();
                    joinDocs.Add(joinChildDoc);

                    Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
                    childDoc.Add(childID);
                    joinChildDoc.Add(childID);

                    for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
                    {
                        if (Random().NextDouble() < 0.9)
                        {
                            Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
                            childDoc.Add(f);
                            joinChildDoc.Add(f);
                        }
                    }

                    if (VERBOSE)
                    {
                        StringBuilder sb = new StringBuilder();
                        sb.Append("childID=").Append(joinChildDoc.Get("childID"));
                        for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
                        {
                            string child = joinChildDoc.Get("child" + fieldID);
                            if (child != null)
                            {
                                sb.Append(" child" + fieldID + "=" + child);
                            }
                        }
                        Console.WriteLine("    " + sb);
                    }

                    if (doDeletes)
                    {
                        joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    }

                    w.AddDocument(childDoc);
                }

                // Parent last:
                joinDocs.Add(parentJoinDoc);
                joinW.AddDocuments(joinDocs);

                if (doDeletes && Random().Next(30) == 7)
                {
                    toDelete.Add(parentDocID);
                }
            }

            foreach (int deleteID in toDelete)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("DELETE parentID=" + deleteID);
                }
                w.DeleteDocuments(new Term("blockID", "" + deleteID));
                joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
            }

            IndexReader r = w.Reader;
            w.Dispose();
            IndexReader joinR = joinW.Reader;
            joinW.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: reader=" + r);
                Console.WriteLine("TEST: joinReader=" + joinR);

                for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
                {
                    Console.WriteLine("  docID=" + docIDX + " doc=" + joinR.Document(docIDX));
                }
            }

            IndexSearcher s = NewSearcher(r);

            IndexSearcher joinS = new IndexSearcher(joinR);

            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));

            int iters = 200 * RANDOM_MULTIPLIER;

            for (int iter = 0; iter < iters; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
                }

                Query childQuery;
                if (Random().Next(3) == 2)
                {
                    int childFieldID = Random().Next(childFields.Length);
                    childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                }
                else if (Random().Next(3) == 2)
                {
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                    {
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                        {
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomChildTerm(childFields[0]));
                            didMust = true;
                        }
                        else
                        {
                            occur = BooleanClause.Occur.SHOULD;
                            int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                            clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                        }
                        bq.Add(clause, occur);
                    }
                }
                else
                {
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;

                    bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
                    int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                    bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
                }

                int x = Random().Next(4);
                ScoreMode agg;
                if (x == 0)
                {
                    agg = ScoreMode.None;
                }
                else if (x == 1)
                {
                    agg = ScoreMode.Max;
                }
                else if (x == 2)
                {
                    agg = ScoreMode.Total;
                }
                else
                {
                    agg = ScoreMode.Avg;
                }

                ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);

                // To run against the block-join index:
                Query parentJoinQuery;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query parentQuery;

                if (Random().NextBoolean())
                {
                    parentQuery = childQuery;
                    parentJoinQuery = childJoinQuery;
                }
                else
                {
                    // AND parent field w/ child field
                    BooleanQuery bq = new BooleanQuery();
                    parentJoinQuery = bq;
                    Term parentTerm = RandomParentTerm(parentFields[0]);
                    if (Random().NextBoolean())
                    {
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                    }
                    else
                    {
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                    }

                    BooleanQuery bq2 = new BooleanQuery();
                    parentQuery = bq2;
                    if (Random().NextBoolean())
                    {
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                    }
                    else
                    {
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                    }
                }

                Sort parentSort = GetRandomSort("parent", parentFields.Length);
                Sort childSort = GetRandomSort("child", childFields.Length);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
                }

                // Merge both sorts:
                IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
                sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
                Sort parentAndChildSort = new Sort(sortFields.ToArray());

                TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
                    ScoreDoc[] hits = results.ScoreDocs;
                    for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
                    {
                        Document doc = s.Doc(hits[hitIDX].Doc);
                        //System.out.println("  score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        Console.WriteLine("  parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        FieldDoc fd = (FieldDoc)hits[hitIDX];
                        if (fd.Fields != null)
                        {
                            Console.Write("    ");
                            foreach (object o in fd.Fields)
                            {
                                if (o is BytesRef)
                                {
                                    Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                }
                                else
                                {
                                    Console.Write(o + " ");
                                }
                            }
                            Console.WriteLine();
                        }
                    }
                }

                bool trackScores;
                bool trackMaxScore;
                if (agg == ScoreMode.None)
                {
                    trackScores = false;
                    trackMaxScore = false;
                }
                else
                {
                    trackScores = Random().NextBoolean();
                    trackMaxScore = Random().NextBoolean();
                }
                ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);

                joinS.Search(parentJoinQuery, c);

                int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
                //final int hitsPerGroup = 100;
                TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
                    if (joinResults != null)
                    {
                        IGroupDocs<int>[] groups = joinResults.Groups;
                        for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
                        {
                            IGroupDocs<int> group = groups[groupIDX];
                            if (group.GroupSortValues != null)
                            {
                                Console.Write("  ");
                                foreach (object o in group.GroupSortValues)
                                {
                                    if (o is BytesRef)
                                    {
                                        Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                    }
                                    else
                                    {
                                        Console.Write(o + " ");
                                    }
                                }
                                Console.WriteLine();
                            }

                            assertNotNull(group.GroupValue);
                            Document parentDoc = joinS.Doc(group.GroupValue);
                            Console.WriteLine("  group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
                            for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
                            {
                                Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
                                //System.out.println("    score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                                Console.WriteLine("    childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                            }
                        }
                    }
                }

                if (results.TotalHits == 0)
                {
                    assertNull(joinResults);
                }
                else
                {
                    CompareHits(r, joinR, results, joinResults);
                    TopDocs b = joinS.Search(childJoinQuery, 10);
                    foreach (ScoreDoc hit in b.ScoreDocs)
                    {
                        Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
                        Document document = joinS.Doc(hit.Doc - 1);
                        int childId = Convert.ToInt32(document.Get("childID"));
                        assertTrue(explanation.IsMatch);
                        assertEquals(hit.Score, explanation.Value, 0.0f);
                        assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
                    }
                }

                // Test joining in the opposite direction (parent to
                // child):

                // Get random query against parent documents:
                Query parentQuery2;
                if (Random().Next(3) == 2)
                {
                    int fieldID = Random().Next(parentFields.Length);
                    parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                }
                else if (Random().Next(3) == 2)
                {
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                    {
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                        {
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomParentTerm(parentFields[0]));
                            didMust = true;
                        }
                        else
                        {
                            occur = BooleanClause.Occur.SHOULD;
                            int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                            clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                        }
                        bq.Add(clause, occur);
                    }
                }
                else
                {
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;

                    bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
                    int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                    bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
                }

                // Maps parent query to child docs:
                ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());

                // To run against the block-join index:
                Query childJoinQuery2;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query childQuery2;

                // apply a filter to children
                Filter childFilter2, childJoinFilter2;

                if (Random().NextBoolean())
                {
                    childQuery2 = parentQuery2;
                    childJoinQuery2 = parentJoinQuery2;
                    childFilter2 = null;
                    childJoinFilter2 = null;
                }
                else
                {
                    Term childTerm = RandomChildTerm(childFields[0]);
                    if (Random().NextBoolean()) // filtered case
                    {
                        childJoinQuery2 = parentJoinQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                    }
                    else
                    {
                        childJoinFilter2 = null;
                        // AND child field w/ parent query:
                        BooleanQuery bq = new BooleanQuery();
                        childJoinQuery2 = bq;
                        if (Random().NextBoolean())
                        {
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                        }
                        else
                        {
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                        }
                    }

                    if (Random().NextBoolean()) // filtered case
                    {
                        childQuery2 = parentQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                    }
                    else
                    {
                        childFilter2 = null;
                        BooleanQuery bq2 = new BooleanQuery();
                        childQuery2 = bq2;
                        if (Random().NextBoolean())
                        {
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                        }
                        else
                        {
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                        }
                    }
                }

                Sort childSort2 = GetRandomSort("child", childFields.Length);

                // Search denormalized index:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
                }
                TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
                if (VERBOSE)
                {
                    Console.WriteLine("  " + results2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in results2.ScoreDocs)
                    {
                        Document doc = s.Doc(sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
                    }
                }

                // Search join index:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
                }
                TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
                if (VERBOSE)
                {
                    Console.WriteLine("  " + joinResults2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in joinResults2.ScoreDocs)
                    {
                        Document doc = joinS.Doc(sd.Doc);
                        Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
                    }
                }

                CompareChildHits(r, joinR, results2, joinResults2);
            }

            r.Dispose();
            joinR.Dispose();
            dir.Dispose();
            joinDir.Dispose();
        }
All Usage Examples Of Lucene.Net.Join.ToParentBlockJoinCollector::GetTopGroups