/**
* Parses the samples of the image from the underlying content parser, ignoring all filters.
* The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
* The parser will be left positioned immediately following the EI operator.
* This is primarily useful if no filters have been applied.
* @param imageDictionary the dictionary of the inline image
* @param ps the content parser
* @return the samples of the image
* @throws IOException if anything bad happens during parsing
*/
private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps)
{
// special case: when no filter is specified, we just read the number of bits
// per component, multiplied by the width and height.
if (imageDictionary.Contains(PdfName.FILTER))
throw new ArgumentException("Dictionary contains filters");
PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT);
int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue;
byte[] bytes = new byte[bytesToRead];
PRTokeniser tokeniser = ps.GetTokeniser();
int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
// from the PDF spec: Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data.
// unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it
int startIndex = 0;
if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0){ // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't)
bytes[0] = (byte)shouldBeWhiteSpace;
startIndex++;
}
for (int i = startIndex; i < bytesToRead; i++){
int ch = tokeniser.Read();
if (ch == -1)
throw new InlineImageParseException("End of content stream reached before end of image data");
bytes[i] = (byte)ch;
}
PdfObject ei = ps.ReadPRObject();
if (!ei.ToString().Equals("EI"))
throw new InlineImageParseException("EI not found after end of image data");
return bytes;
}