private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps)
{
// special case: when no filter is specified, we just read the number of bits
// per component, multiplied by the width and height.
if (imageDictionary.Contains(PdfName.FILTER))
throw new ArgumentException("Dictionary contains filters");
PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT);
int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue;
byte[] bytes = new byte[bytesToRead];
PRTokeniser tokeniser = ps.GetTokeniser();
int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
// from the PDF spec: Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data.
// unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it
int startIndex = 0;
if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0){ // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't)
bytes[0] = (byte)shouldBeWhiteSpace;
startIndex++;
}
for (int i = startIndex; i < bytesToRead; i++){
int ch = tokeniser.Read();
if (ch == -1)
throw new InlineImageParseException("End of content stream reached before end of image data");
bytes[i] = (byte)ch;
}
PdfObject ei = ps.ReadPRObject();
if (!ei.ToString().Equals("EI"))
throw new InlineImageParseException("EI not found after end of image data");
return bytes;
}