BitMiracle.LibJpeg.Classic.Internal.huff_entropy_encoder.jpeg_gen_optimal_table C# (CSharp) Method

jpeg_gen_optimal_table() protected method

Generate the best Huffman code table for the given counts, fill htbl. The JPEG standard requires that no symbol be assigned a codeword of all one bits (so that padding bits added at the end of a compressed segment can't look like a valid code). Because of the canonical ordering of codewords, this just means that there must be an unused slot in the longest codeword length category. Section K.2 of the JPEG spec suggests reserving such a slot by pretending that symbol 256 is a valid symbol with count 1. In theory that's not optimal; giving it count zero but including it in the symbol set anyway should give a better Huffman code. But the theoretically better code actually seems to come out worse in practice, because it produces more all-ones bytes (which incur stuffed zero bytes in the final file). In any case the difference is tiny. The JPEG standard requires Huffman codes to be no more than 16 bits long. If some symbols have a very small but nonzero probability, the Huffman tree must be adjusted to meet the code length restriction. We currently use the adjustment method suggested in JPEG section K.2. This method is *not* optimal; it may not choose the best possible limited-length code. But typically only very-low-frequency symbols will be given less-than-optimal lengths, so the code is almost optimal. Experimental comparisons against an optimal limited-length-code algorithm indicate that the difference is microscopic --- usually less than a hundredth of a percent of total size. So the extra complexity of an optimal algorithm doesn't seem worthwhile.
protected jpeg_gen_optimal_table ( JHUFF_TBL htbl, long freq ) : void
htbl JHUFF_TBL
freq long
return void
        protected void jpeg_gen_optimal_table(JHUFF_TBL htbl, long[] freq)
        {
            const int MAX_CLEN = 32;     /* assumed maximum initial code length */

            byte[] bits = new byte[MAX_CLEN + 1];   /* bits[k] = # of symbols with code length k */
            int[] codesize = new int[257];      /* codesize[k] = code length of symbol k */
            int[] others = new int[257];        /* next symbol in current branch of tree */
            int c1, c2;
            int p, i, j;
            long v;

            /* This algorithm is explained in section K.2 of the JPEG standard */
            for (i = 0; i < 257; i++)
                others[i] = -1;     /* init links to empty */

            freq[256] = 1;      /* make sure 256 has a nonzero count */
            /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
            * that no real symbol is given code-value of all ones, because 256
            * will be placed last in the largest codeword category.
            */

            /* Huffman's basic algorithm to assign optimal code lengths to symbols */

            for (;;)
            {
                /* Find the smallest nonzero frequency, set c1 = its symbol */
                /* In case of ties, take the larger symbol number */
                c1 = -1;
                v = 1000000000L;
                for (i = 0; i <= 256; i++)
                {
                    if (freq[i] != 0 && freq[i] <= v)
                    {
                        v = freq[i];
                        c1 = i;
                    }
                }

                /* Find the next smallest nonzero frequency, set c2 = its symbol */
                /* In case of ties, take the larger symbol number */
                c2 = -1;
                v = 1000000000L;
                for (i = 0; i <= 256; i++)
                {
                    if (freq[i] != 0 && freq[i] <= v && i != c1)
                    {
                        v = freq[i];
                        c2 = i;
                    }
                }

                /* Done if we've merged everything into one frequency */
                if (c2 < 0)
                    break;

                /* Else merge the two counts/trees */
                freq[c1] += freq[c2];
                freq[c2] = 0;

                /* Increment the codesize of everything in c1's tree branch */
                codesize[c1]++;
                while (others[c1] >= 0)
                {
                    c1 = others[c1];
                    codesize[c1]++;
                }

                others[c1] = c2;        /* chain c2 onto c1's tree branch */

                /* Increment the codesize of everything in c2's tree branch */
                codesize[c2]++;
                while (others[c2] >= 0)
                {
                    c2 = others[c2];
                    codesize[c2]++;
                }
            }

            /* Now count the number of symbols of each code length */
            for (i = 0; i <= 256; i++)
            {
                if (codesize[i] != 0)
                {
                    /* The JPEG standard seems to think that this can't happen, */
                    /* but I'm paranoid... */
                    if (codesize[i] > MAX_CLEN)
                        m_cinfo.ERREXIT(J_MESSAGE_CODE.JERR_HUFF_CLEN_OVERFLOW);

                    bits[codesize[i]]++;
                }
            }

            /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
            * Huffman procedure assigned any such lengths, we must adjust the coding.
            * Here is what the JPEG spec says about how this next bit works:
            * Since symbols are paired for the longest Huffman code, the symbols are
            * removed from this length category two at a time.  The prefix for the pair
            * (which is one bit shorter) is allocated to one of the pair; then,
            * skipping the BITS entry for that prefix length, a code word from the next
            * shortest nonzero BITS entry is converted into a prefix for two code words
            * one bit longer.
            */

            for (i = MAX_CLEN; i > 16; i--)
            {
                while (bits[i] > 0)
                {
                    j = i - 2;      /* find length of new prefix to be used */
                    while (bits[j] == 0)
                        j--;

                    bits[i] -= 2;       /* remove two symbols */
                    bits[i - 1]++;      /* one goes in this length */
                    bits[j + 1] += 2;       /* two new symbols in this length */
                    bits[j]--;      /* symbol of this length is now a prefix */
                }
            }

            /* Remove the count for the pseudo-symbol 256 from the largest codelength */
            while (bits[i] == 0)        /* find largest codelength still in use */
                i--;
            bits[i]--;

            /* Return final symbol counts (only for lengths 0..16) */
            Buffer.BlockCopy(bits, 0, htbl.Bits, 0, htbl.Bits.Length);

            /* Return a list of the symbols sorted by code length */
            /* It's not real clear to me why we don't need to consider the codelength
            * changes made above, but the JPEG spec seems to think this works.
            */
            p = 0;
            for (i = 1; i <= MAX_CLEN; i++)
            {
                for (j = 0; j <= 255; j++)
                {
                    if (codesize[j] == i)
                    {
                        htbl.Huffval[p] = (byte)j;
                        p++;
                    }
                }
            }

            /* Set sent_table false so updated table will be written to JPEG file. */
            htbl.Sent_table = false;
        }
    }