private static byte[] ParseInlineImageSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps)
{
// by the time we get to here, we have already parsed the ID operator
if (!imageDictionary.Contains(PdfName.FILTER)){
return ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps);
}
// read all content until we reach an EI operator surrounded by whitespace.
// The following algorithm has two potential issues: what if the image stream
// contains <ws>EI<ws> ?
// Plus, there are some streams that don't have the <ws> before the EI operator
// it sounds like we would have to actually decode the content stream, which
// I'd rather avoid right now.
MemoryStream baos = new MemoryStream();
MemoryStream accumulated = new MemoryStream();
int ch;
int found = 0;
PRTokeniser tokeniser = ps.GetTokeniser();
byte[] ff = null;
while ((ch = tokeniser.Read()) != -1){
if (found == 0 && PRTokeniser.IsWhitespace(ch)){
found++;
accumulated.WriteByte((byte)ch);
} else if (found == 1 && ch == 'E'){
found++;
accumulated.WriteByte((byte)ch);
} else if (found == 1 && PRTokeniser.IsWhitespace(ch)){
// this clause is needed if we have a white space character that is part of the image data
// followed by a whitespace character that precedes the EI operator. In this case, we need
// to flush the first whitespace, then treat the current whitespace as the first potential
// character for the end of stream check. Note that we don't increment 'found' here.
baos.Write(ff = accumulated.ToArray(), 0, ff.Length);
accumulated.SetLength(0);
accumulated.WriteByte((byte)ch);
} else if (found == 2 && ch == 'I'){
found++;
accumulated.WriteByte((byte)ch);
} else if (found == 3 && PRTokeniser.IsWhitespace(ch)){
return baos.ToArray();
} else {
baos.Write(ff = accumulated.ToArray(), 0, ff.Length);
accumulated.SetLength(0);
baos.WriteByte((byte)ch);
found = 0;
}
}
throw new InlineImageParseException("Could not find image data or EI");
}