public virtual void TestFuzziness()
{
Directory directory = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
AddDoc("aaaaa", writer);
AddDoc("aaaab", writer);
AddDoc("aaabb", writer);
AddDoc("aabbb", writer);
AddDoc("abbbb", writer);
AddDoc("bbbbb", writer);
AddDoc("ddddd", writer);
IndexReader reader = writer.Reader;
IndexSearcher searcher = NewSearcher(reader);
writer.Dispose();
FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
// same with prefix
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(2, hits.Length);
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
// test scoring
query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length, "3 documents should match");
IList<string> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");
for (int i = 0; i < hits.Length; i++)
{
string term = searcher.Doc(hits[i].Doc).Get("field");
//System.out.println(hits[i].Score);
Assert.AreEqual(order[i], term);
}
// test pq size by supplying maxExpansions=2
// this query would normally return 3 documents, because 3 terms match (see above):
query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(2, hits.Length, "only 2 documents should match");
order = Arrays.AsList("bbbbb", "abbbb");
for (int i = 0; i < hits.Length; i++)
{
string term = searcher.Doc(hits[i].Doc).Get("field");
//System.out.println(hits[i].Score);
Assert.AreEqual(order[i], term);
}
// not similar enough:
query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// query identical to a word in the index:
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
// default allows for up to two edits:
Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
// query similar to a word in the index:
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
// now with prefix
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(2, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
// now with prefix
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// different field = no match:
query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
reader.Dispose();
directory.Dispose();
}