/// <summary>
/// 合并英文专用词。
/// 如果字典中有英文专用词如U.S.A, C++.C#等
/// 需要对初步分词后的英文和字母进行合并
/// </summary>
/// <param name="words"></param>
/// <param name="start"></param>
/// <param name="end"></param>
/// <returns></returns>
//private String MergeEnglishSpecialWord(CExtractWords extractWords, ArrayList words, int start, ref int end)
//{
// StringBuilder str = new StringBuilder();
// int i;
// for (i = start; i < words.Count; i++)
// {
// string word = (string)words[i];
// //word 为空或者为空格回车换行等分割符号,中断扫描
// if (word.Trim() == "")
// {
// break;
// }
// //如果遇到中文,中断扫描
// if (word[0] >= 0x4e00 && word[0] <= 0x9fa5)
// {
// break;
// }
// str.Append(word);
// }
// String mergeString = str.ToString();
// List<T_WordInfo> exWords = extractWords.ExtractFullText(mergeString);
// if (exWords.Count == 1)
// {
// T_WordInfo info = (T_WordInfo)exWords[0];
// if (info.Word.Length == mergeString.Length)
// {
// end = i;
// return mergeString;
// }
// }
// return null;
//}
private bool MergeEnglishSpecialWord(string orginalText, SuperLinkedList <WordInfo> wordInfoList, ref SuperLinkedListNode <WordInfo> current)
{
SuperLinkedListNode <WordInfo> cur = current;
cur = cur.Next;
int last = -1;
while (cur != null)
{
if (cur.Value.WordType == WordType.Symbol || cur.Value.WordType == WordType.English)
{
last = cur.Value.Position + cur.Value.Word.Length;
cur = cur.Next;
}
else
{
break;
}
}
if (last >= 0)
{
int first = current.Value.Position;
string newWord = orginalText.Substring(first, last - first);
WordAttribute wa = _WordDictionary.GetWordAttr(newWord);
if (wa == null)
{
return(false);
}
while (current != cur)
{
SuperLinkedListNode <WordInfo> removeItem = current;
current = current.Next;
wordInfoList.Remove(removeItem);
}
WordInfo newWordInfo = new WordInfo(new PanGu.Dict.PositionLength(first, last - first,
wa), orginalText, _Parameters);
newWordInfo.WordType = WordType.English;
newWordInfo.Rank = _Parameters.EnglishRank;
if (_Options.EnglishSegment)
{
string lowerWord = newWordInfo.Word.ToLower();
if (lowerWord != newWordInfo.Word)
{
if (current == null)
{
wordInfoList.AddLast(newWordInfo);
}
else
{
wordInfoList.AddBefore(current, newWordInfo);
}
}
newWordInfo = new WordInfo(lowerWord, newWordInfo.Position, newWordInfo.Pos, newWordInfo.Frequency, _Parameters.EnglishLowerRank, newWordInfo.WordType,
newWordInfo.OriginalWordType);
}
else if (_Options.IgnoreCapital)
{
newWordInfo.Word = newWordInfo.Word.ToLower();
}
if (current == null)
{
wordInfoList.AddLast(newWordInfo);
}
else
{
wordInfoList.AddBefore(current, newWordInfo);
}
return(true);
}
return(false);
}