Lucene.Net.Facet.Taxonomy.WriterCache.CompactLabelToOrdinal.Open C# (CSharp) Method

Open() static private method

Opens the file and reloads the CompactLabelToOrdinal. The file it expects is generated from the Flush(Stream) command.
static private Open ( FileInfo file, float loadFactor, int numHashArrays ) : CompactLabelToOrdinal
file System.IO.FileInfo
loadFactor float
numHashArrays int
return CompactLabelToOrdinal
        internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays)
        {
            /// <summary>
            /// Part of the file is the labelRepository, which needs to be rehashed
            /// and label offsets re-added to the object. I am unsure as to why we
            /// can't just store these off in the file as well, but in keeping with
            /// the spirit of the original code, I did it this way. (ssuppe)
            /// </summary>
            CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();
            l2o.loadFactor = loadFactor;
            l2o.hashArrays = new HashArray[numHashArrays];

            BinaryReader dis = null;
            try
            {
                dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read));

                // TaxiReader needs to load the "counter" or occupancy (L2O) to know
                // the next unique facet. we used to load the delimiter too, but
                // never used it.
                l2o.counter = dis.ReadInt32();

                l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.counter);
                l2o.Init();

                // now read the chars
                l2o.labelRepository = CharBlockArray.Open(dis.BaseStream);

                l2o.collisionMap = new CollisionMap(l2o.labelRepository);

                // Calculate hash on the fly based on how CategoryPath hashes
                // itself. Maybe in the future we can call some static based methods
                // in CategoryPath so that this doesn't break again? I don't like
                // having code in two different places...
                int cid = 0;
                // Skip the initial offset, it's the CategoryPath(0,0), which isn't
                // a hashed value.
                int offset = 1;
                int lastStartOffset = offset;
                // This loop really relies on a well-formed input (assumes pretty blindly
                // that array offsets will work).  Since the initial file is machine 
                // generated, I think this should be OK.
                while (offset < l2o.labelRepository.Length)
                {
                    // identical code to CategoryPath.hashFromSerialized. since we need to
                    // advance offset, we cannot call the method directly. perhaps if we
                    // could pass a mutable Integer or something...
                    int length = (ushort)l2o.labelRepository.CharAt(offset++);
                    int hash = length;
                    if (length != 0)
                    {
                        for (int i = 0; i < length; i++)
                        {
                            int len = (ushort)l2o.labelRepository.CharAt(offset++);
                            hash = hash * 31 + l2o.labelRepository.SubSequence(offset, offset + len).GetHashCode();
                            offset += len;
                        }
                    }
                    // Now that we've hashed the components of the label, do the
                    // final part of the hash algorithm.
                    hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
                    hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
                    // Add the label, and let's keep going
                    l2o.AddLabelOffset(hash, cid, lastStartOffset);
                    cid++;
                    lastStartOffset = offset;
                }

            }
            catch (SerializationException se)
            {
                throw new IOException("Invalid file format. Cannot deserialize.", se);
            }
            finally
            {
                if (dis != null)
                {
                    dis.Dispose();
                }
            }

            l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
            return l2o;

        }

Usage Example

Beispiel #1
0
        public virtual void TestL2O()
        {
            LabelToOrdinal map = new LabelToOrdinalMap();

            CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);

            int       n = AtLeast(10 * 1000);
            const int numUniqueValues = 50 * 1000;

            string[] uniqueValues = new string[numUniqueValues];
            byte[]   buffer       = new byte[50];

            Random random = Random();

            for (int i = 0; i < numUniqueValues;)
            {
                random.NextBytes(buffer);
                int size = 1 + random.Next(buffer.Length);

                // This test is turning random bytes into a string,
                // this is asking for trouble.
                uniqueValues[i] = Encoding.UTF8.GetString(buffer, 0, size);
                // we cannot have empty path components, so eliminate all prefix as well
                // as middle consecutive delimiter chars.
                uniqueValues[i] = Regex.Replace(uniqueValues[i], "/+", "/");
                if (uniqueValues[i].StartsWith("/", StringComparison.Ordinal))
                {
                    uniqueValues[i] = uniqueValues[i].Substring(1);
                }
                if (uniqueValues[i].IndexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1)
                {
                    i++;
                }
            }

            var tmpDir        = CreateTempDir("testLableToOrdinal");
            var f             = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp"));
            int flushInterval = 10;

            for (int i = 0; i < n; i++)
            {
                if (i > 0 && i % flushInterval == 0)
                {
                    using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
                    {
                        compact.Flush(fileStream);
                    }
                    compact = CompactLabelToOrdinal.Open(f, 0.15f, 3);
                    //assertTrue(f.Delete());
                    f.Delete();
                    assertFalse(File.Exists(f.FullName));
                    if (flushInterval < (n / 10))
                    {
                        flushInterval *= 10;
                    }
                }

                int        index = random.Next(numUniqueValues);
                FacetLabel label;
                string     s = uniqueValues[index];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label: " + label.ToString());
                }

                assertEquals(ord1, ord2);

                if (ord1 == LabelToOrdinal.INVALID_ORDINAL)
                {
                    ord1 = compact.GetNextOrdinal();
                    map.AddLabel(label, ord1);
                    compact.AddLabel(label, ord1);
                }
            }

            for (int i = 0; i < numUniqueValues; i++)
            {
                FacetLabel label;
                string     s = uniqueValues[i];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }
                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label 2: " + label.ToString());
                }

                assertEquals(ord1, ord2);
            }
        }
All Usage Examples Of Lucene.Net.Facet.Taxonomy.WriterCache.CompactLabelToOrdinal::Open