/// <summary>
/// Divide text for book into TextTokens.
/// Set Offset, Length, BookText, AnnotationOffset, Chapter, Verse
/// Tricky things needing done:
/// 1) Split \v N abc... into two tokens, first containing just verse number
/// 2) \f X abc... don't return caller as part of the token
/// </summary>
private List <UnitTestUSFMTextToken> DivideText(string text)
{
UnitTestUSFMTextToken tok = null;
List <UnitTestUSFMTextToken> tokens = new List <UnitTestUSFMTextToken>();
string chapter = "1";
string verse = "0";
bool inPublishable = false;
for (int i = 0; i < text.Length;)
{
int ind = text.IndexOf("\\", i);
if (tok != null) // if token in progress, set its length
{
int last = (ind == -1) ? text.Length : ind;
tok.Length = last - tok.Offset;
}
if (ind == -1)
{
break; // quit if not more markers
}
tok = NextToken(text, ind); // start new token
if (tok.IsParagraphStart)
{
inPublishable = tok.IsPublishable ||
tok.IsChapter;
}
// sadly \c does not have publishable property set in usfm.sty
if (inPublishable)
{
tokens.Add(tok);
}
if (tok.IsChapter)
{
chapter = GetCVNumber(text, tok.Offset);
// Everything after \c is verse '0'.
// This allows the title of Psalms (\d) which are present in the Hebrew
// text to be considered verse text.
verse = "0";
}
else if (tok.IsVerse)
{
// Add a token with just the verse number
verse = GetCVNumber(text, tok.Offset);
tok.Length = verse.Length;
// Make another token to contain the verse text
tok = tok.Clone() as UnitTestUSFMTextToken;
tok.CharStyleName = "";
tok.Offset += verse.Length;
tokens.Add(tok);
// If number followed by a space, skip this
if (char.IsWhiteSpace(text[tok.Offset]))
{
tok.Offset += 1;
}
}
tok.Chapter = chapter;
tok.Verse = verse;
if (tok.IsNoteStart)
{
// Skip over the footnote caller
while (tok.Offset < text.Length)
{
char cc = text[tok.Offset];
if (cc == '\\')
{
break;
}
if (char.IsWhiteSpace(cc))
{
++tok.Offset;
break;
}
++tok.Offset;
}
}
i = tok.Offset;
}
return(tokens);
}