internal virtual IList<Chunk> Split(string pattern)
{
int p = 0;
int n = pattern.Length;
IList<Chunk> chunks = new List<Chunk>();
// find all start and stop indexes first, then collect
IList<int> starts = new List<int>();
IList<int> stops = new List<int>();
while (p < n)
{
if (p == pattern.IndexOf(escape + start, p))
{
p += escape.Length + start.Length;
}
else
{
if (p == pattern.IndexOf(escape + stop, p))
{
p += escape.Length + stop.Length;
}
else
{
if (p == pattern.IndexOf(start, p))
{
starts.Add(p);
p += start.Length;
}
else
{
if (p == pattern.IndexOf(stop, p))
{
stops.Add(p);
p += stop.Length;
}
else
{
p++;
}
}
}
}
}
// System.out.println("");
// System.out.println(starts);
// System.out.println(stops);
if (starts.Count > stops.Count)
{
throw new ArgumentException("unterminated tag in pattern: " + pattern);
}
if (starts.Count < stops.Count)
{
throw new ArgumentException("missing start tag in pattern: " + pattern);
}
int ntags = starts.Count;
for (int i = 0; i < ntags; i++)
{
if (starts[i] >= stops[i])
{
throw new ArgumentException("tag delimiters out of order in pattern: " + pattern);
}
}
// collect into chunks now
if (ntags == 0)
{
string text = Sharpen.Runtime.Substring(pattern, 0, n);
chunks.Add(new TextChunk(text));
}
if (ntags > 0 && starts[0] > 0)
{
// copy text up to first tag into chunks
string text = Sharpen.Runtime.Substring(pattern, 0, starts[0]);
chunks.Add(new TextChunk(text));
}
for (int i_1 = 0; i_1 < ntags; i_1++)
{
// copy inside of <tag>
string tag = Sharpen.Runtime.Substring(pattern, starts[i_1] + start.Length, stops[i_1]);
string ruleOrToken = tag;
string label = null;
int colon = tag.IndexOf(':');
if (colon >= 0)
{
label = Sharpen.Runtime.Substring(tag, 0, colon);
ruleOrToken = Sharpen.Runtime.Substring(tag, colon + 1, tag.Length);
}
chunks.Add(new TagChunk(label, ruleOrToken));
if (i_1 + 1 < ntags)
{
// copy from end of <tag> to start of next
string text = Sharpen.Runtime.Substring(pattern, stops[i_1] + stop.Length, starts[i_1 + 1]);
chunks.Add(new TextChunk(text));
}
}
if (ntags > 0)
{
int afterLastTag = stops[ntags - 1] + stop.Length;
if (afterLastTag < n)
{
// copy text from end of last tag to end
string text = Sharpen.Runtime.Substring(pattern, afterLastTag, n);
chunks.Add(new TextChunk(text));
}
}
// strip out the escape sequences from text chunks but not tags
for (int i_2 = 0; i_2 < chunks.Count; i_2++)
{
Chunk c = chunks[i_2];
if (c is TextChunk)
{
TextChunk tc = (TextChunk)c;
string unescaped = tc.Text.Replace(escape, string.Empty);
if (unescaped.Length < tc.Text.Length)
{
chunks.Set(i_2, new TextChunk(unescaped));
}
}
}
return chunks;
}