Lucene.Net.Search.TestFuzzyQuery.TestFuzziness C# (CSharp) Method

TestFuzziness() private method

private TestFuzziness ( ) : void
return void
        public virtual void TestFuzziness()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // test scoring
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "3 documents should match");
            IList<string> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // test pq size by supplying maxExpansions=2
            // this query would normally return 3 documents, because 3 terms match (see above):
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "only 2 documents should match");
            order = Arrays.AsList("bbbbb", "abbbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }