internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays)
{
/// <summary>
/// Part of the file is the labelRepository, which needs to be rehashed
/// and label offsets re-added to the object. I am unsure as to why we
/// can't just store these off in the file as well, but in keeping with
/// the spirit of the original code, I did it this way. (ssuppe)
/// </summary>
CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();
l2o.loadFactor = loadFactor;
l2o.hashArrays = new HashArray[numHashArrays];
BinaryReader dis = null;
try
{
dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read));
// TaxiReader needs to load the "counter" or occupancy (L2O) to know
// the next unique facet. we used to load the delimiter too, but
// never used it.
l2o.counter = dis.ReadInt32();
l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.counter);
l2o.Init();
// now read the chars
l2o.labelRepository = CharBlockArray.Open(dis.BaseStream);
l2o.collisionMap = new CollisionMap(l2o.labelRepository);
// Calculate hash on the fly based on how CategoryPath hashes
// itself. Maybe in the future we can call some static based methods
// in CategoryPath so that this doesn't break again? I don't like
// having code in two different places...
int cid = 0;
// Skip the initial offset, it's the CategoryPath(0,0), which isn't
// a hashed value.
int offset = 1;
int lastStartOffset = offset;
// This loop really relies on a well-formed input (assumes pretty blindly
// that array offsets will work). Since the initial file is machine
// generated, I think this should be OK.
while (offset < l2o.labelRepository.Length)
{
// identical code to CategoryPath.hashFromSerialized. since we need to
// advance offset, we cannot call the method directly. perhaps if we
// could pass a mutable Integer or something...
int length = (ushort)l2o.labelRepository.CharAt(offset++);
int hash = length;
if (length != 0)
{
for (int i = 0; i < length; i++)
{
int len = (ushort)l2o.labelRepository.CharAt(offset++);
hash = hash * 31 + l2o.labelRepository.SubSequence(offset, offset + len).GetHashCode();
offset += len;
}
}
// Now that we've hashed the components of the label, do the
// final part of the hash algorithm.
hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
// Add the label, and let's keep going
l2o.AddLabelOffset(hash, cid, lastStartOffset);
cid++;
lastStartOffset = offset;
}
}
catch (SerializationException se)
{
throw new IOException("Invalid file format. Cannot deserialize.", se);
}
finally
{
if (dis != null)
{
dis.Dispose();
}
}
l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
return l2o;
}