/// ------------------------------------------------------------------------------------
/// <summary>
/// Gets a list if TextTokenSubstrings conataining the references and character offsets
/// where repeated words occur.
/// </summary>
/// <param name="tokens">The tokens (from the data source) to check for repeated words.
/// </param>
/// <param name="_desiredKey">If looking for occurrences of a specific repeated word,
/// set this to be that word; otherwise pass an empty string.</param>
/// <returns></returns>
/// ------------------------------------------------------------------------------------
public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey)
{
#if DEBUG
List <ITextToken> AllTokens = new List <ITextToken>(tokens);
#endif
characterCategorizer = m_checksDataSource.CharacterCategorizer;
// Get a string of words that may be validly repeated.
// Words are separated by blanks.
ValidItems = m_checksDataSource.GetParameterValue("RepeatableWords");
// List of words that are known to be not repeatable.
InvalidItems = m_checksDataSource.GetParameterValue("NonRepeatableWords");
TextType prevTextType = TextType.Other;
m_repeatedWords = new List <TextTokenSubstring>();
ProcessRepeatedWords bodyProcessor =
new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey);
ProcessRepeatedWords noteProcessor =
new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey);
foreach (ITextToken tok in tokens)
{
if (tok.IsParagraphStart)
{
noteProcessor.Reset();
bodyProcessor.Reset();
}
if (tok.TextType == TextType.Note)
{
if (tok.IsNoteStart)
{
noteProcessor.Reset();
}
noteProcessor.ProcessToken(tok);
}
// When we leave a caption, we start over checking for repeated words.
// A caption is a start of a paragraph, so we already start over
// when we encounter a picture caption.
if (prevTextType == TextType.PictureCaption)
{
noteProcessor.Reset();
}
if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other)
{
noteProcessor.Reset();
bodyProcessor.ProcessToken(tok);
}
if (tok.TextType == TextType.ChapterNumber)
{
bodyProcessor.Reset();
}
prevTextType = tok.TextType;
}
return(m_repeatedWords);
}