private void PerformTest(Directory dir)
{
PayloadAnalyzer analyzer = new PayloadAnalyzer();
IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy()));
// should be in sync with value in TermInfosWriter
const int skipInterval = 16;
const int numTerms = 5;
const string fieldName = "f1";
int numDocs = skipInterval + 1;
// create content for the test documents with just a few terms
Term[] terms = GenerateTerms(fieldName, numTerms);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < terms.Length; i++)
{
sb.Append(terms[i].Text());
sb.Append(" ");
}
string content = sb.ToString();
int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
var payloadData = GenerateRandomData(payloadDataLength);
Document d = new Document();
d.Add(NewTextField(fieldName, content, Field.Store.NO));
// add the same document multiple times to have the same payload lengths for all
// occurrences within two consecutive skip intervals
int offset = 0;
for (int i = 0; i < 2 * numDocs; i++)
{
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
offset += numTerms;
writer.AddDocument(d, analyzer);
}
// make sure we create more than one segment to test merging
writer.Commit();
// now we make sure to have different payload lengths next at the next skip point
for (int i = 0; i < numDocs; i++)
{
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
offset += i * numTerms;
writer.AddDocument(d, analyzer);
}
writer.ForceMerge(1);
// flush
writer.Dispose();
/*
* Verify the index
* first we test if all payloads are stored correctly
*/
IndexReader reader = DirectoryReader.Open(dir);
var verifyPayloadData = new byte[payloadDataLength];
offset = 0;
var tps = new DocsAndPositionsEnum[numTerms];
for (int i = 0; i < numTerms; i++)
{
tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text()));
}
while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
{
for (int i = 1; i < numTerms; i++)
{
tps[i].NextDoc();
}
int freq = tps[0].Freq();
for (int i = 0; i < freq; i++)
{
for (int j = 0; j < numTerms; j++)
{
tps[j].NextPosition();
BytesRef br = tps[j].Payload;
if (br != null)
{
Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length);
offset += br.Length;
}
}
}
}
AssertByteArrayEquals(payloadData, verifyPayloadData);
/*
* test lazy skipping
*/
DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text()));
tp.NextDoc();
tp.NextPosition();
// NOTE: prior rev of this test was failing to first
// call next here:
tp.NextDoc();
// now we don't read this payload
tp.NextPosition();
BytesRef payload = tp.Payload;
Assert.AreEqual(1, payload.Length, "Wrong payload length.");
Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]);
tp.NextDoc();
tp.NextPosition();
// we don't read this payload and skip to a different document
tp.Advance(5);
tp.NextPosition();
payload = tp.Payload;
Assert.AreEqual(1, payload.Length, "Wrong payload length.");
Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]);
/*
* Test different lengths at skip points
*/
tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text()));
tp.NextDoc();
tp.NextPosition();
Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length.");
tp.Advance(skipInterval - 1);
tp.NextPosition();
Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length.");
tp.Advance(2 * skipInterval - 1);
tp.NextPosition();
Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length.");
tp.Advance(3 * skipInterval - 1);
tp.NextPosition();
Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.Payload.Length, "Wrong payload length.");
reader.Dispose();
// test long payload
analyzer = new PayloadAnalyzer();
writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE));
string singleTerm = "lucene";
d = new Document();
d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO));
// add a payload whose length is greater than the buffer size of BufferedIndexOutput
payloadData = GenerateRandomData(2000);
analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
writer.AddDocument(d);
writer.ForceMerge(1);
// flush
writer.Dispose();
reader = DirectoryReader.Open(dir);
tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm));
tp.NextDoc();
tp.NextPosition();
BytesRef bref = tp.Payload;
verifyPayloadData = new byte[bref.Length];
var portion = new byte[1500];
Array.Copy(payloadData, 100, portion, 0, 1500);
AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length);
reader.Dispose();
}