public override void Build(IInputIterator iterator)
{
if (iterator.HasPayloads)
{
throw new System.ArgumentException("this suggester doesn't support payloads");
}
if (iterator.HasContexts)
{
throw new System.ArgumentException("this suggester doesn't support contexts");
}
FileInfo tempInput = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
OfflineSorter.ByteSequencesReader reader = null;
ExternalRefSorter sorter = null;
// Push floats up front before sequences to sort them. For now, assume they are non-negative.
// If negative floats are allowed some trickery needs to be done to find their byte order.
bool success = false;
count = 0;
try
{
byte[] buffer = new byte[0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
BytesRef spare;
while ((spare = iterator.Next()) != null)
{
if (spare.Length + 4 >= buffer.Length)
{
buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
}
output.Reset(buffer);
output.WriteInt(EncodeWeight(iterator.Weight));
output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
writer.Write(buffer, 0, output.Position);
}
writer.Dispose();
// We don't know the distribution of scores and we need to bucket them, so we'll sort
// and divide into equal buckets.
OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
tempInput.Delete();
FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);
int inputLines = info.Lines;
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
long line = 0;
int previousBucket = 0;
int previousScore = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
BytesRef tmp1 = new BytesRef();
BytesRef tmp2 = new BytesRef();
while (reader.Read(tmp1))
{
input.Reset(tmp1.Bytes);
int currentScore = input.ReadInt();
int bucket;
if (line > 0 && currentScore == previousScore)
{
bucket = previousBucket;
}
else
{
bucket = (int)(line * buckets / inputLines);
}
previousScore = currentScore;
previousBucket = bucket;
// Only append the input, discard the weight.
tmp2.Bytes = tmp1.Bytes;
tmp2.Offset = input.Position;
tmp2.Length = tmp1.Length - input.Position;
builder.Add(tmp2, bucket);
line++;
count++;
}
// The two FSTCompletions share the same automaton.
this.higherWeightsCompletion = builder.Build();
this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);
success = true;
}
finally
{
if (success)
{
IOUtils.Close(reader, writer, sorter);
}
else
{
IOUtils.CloseWhileHandlingException(reader, writer, sorter);
}
tempInput.Delete();
tempSorted.Delete();
}
}