private static IEnumerable<Token> Tokenize(TokenType tokenKind, string pattern, ref KeyValuePair<string, int>[] untokenizedParts)
{
//Do a bit of setup
var resultParts = new List<KeyValuePair<string, int>>();
var resultTokens = new List<Token>();
var regex = new Regex(pattern);
//Look through all of our currently untokenized data
foreach (var part in untokenizedParts)
{
//Find all of our available matches
List<Match> matches = regex.Matches(part.Key).OfType<Match>().ToList();
//If we don't have any, keep the data as untokenized and move to the next chunk
if (matches.Count == 0)
{
resultParts.Add(part);
continue;
}
//Store the untokenized data in a working copy and save the absolute index it reported itself at in the source file
string workingPart = part.Key;
int index = part.Value;
//Look through each of the matches that were found within this untokenized segment
foreach (Match match in matches)
{
//Calculate the effective start of the match within the working copy of the data
int effectiveStart = match.Index - (part.Key.Length - workingPart.Length);
resultTokens.Add(Token.Create(tokenKind, match, part.Value));
//If we didn't match at the beginning, save off the first portion to the set of untokenized data we'll give back
if (effectiveStart > 0)
{
string value = workingPart.Substring(0, effectiveStart);
resultParts.Add(new KeyValuePair<string, int>(value, index));
}
//Get rid of the portion of the working copy we've already used
if (match.Index + match.Length < part.Key.Length)
workingPart = workingPart.Substring(effectiveStart + match.Length);
else
workingPart = string.Empty;
//Update the current absolute index in the source file we're reporting to be at
index += effectiveStart + match.Length;
}
//If we've got remaining data in the working copy, add it back to the untokenized data
if (!string.IsNullOrEmpty(workingPart))
resultParts.Add(new KeyValuePair<string, int>(workingPart, index));
}
//Update the untokenized data to contain what we couldn't process with this pattern
untokenizedParts = resultParts.ToArray();
//Return the tokens we were able to extract
return resultTokens;
}