PanGu.Segment.PreSegment C# (CSharp) Method

PreSegment() private method

private PreSegment ( String text ) : SuperLinkedList
text String
return SuperLinkedList
        private SuperLinkedList<WordInfo> PreSegment(String text)
        {
            SuperLinkedList<WordInfo> result = GetInitSegment(text);

            SuperLinkedListNode<WordInfo> cur = result.First;

            while (cur != null)
            {
                if (_Options.IgnoreSpace)
                {
                    if (cur.Value.WordType == WordType.Space)
                    {
                        SuperLinkedListNode<WordInfo> lst = cur;
                        cur = cur.Next;
                        result.Remove(lst);
                        continue;
                    }
                }

                switch (cur.Value.WordType)
                {
                    case WordType.SimplifiedChinese:

                        string inputText = cur.Value.Word;

                        WordType originalWordType = WordType.SimplifiedChinese;

                        if (_Options.TraditionalChineseEnabled)
                        {
                            string simplified = Microsoft.VisualBasic.Strings.StrConv(cur.Value.Word, Microsoft.VisualBasic.VbStrConv.SimplifiedChinese, 0);

                            if (simplified != cur.Value.Word)
                            {
                                originalWordType = WordType.TraditionalChinese;
                                inputText = simplified;
                            }
                        }

                        PanGu.Framework.AppendList<Dict.PositionLength> pls = _WordDictionary.GetAllMatchs(inputText, _Options.ChineseNameIdentify);
                        PanGu.Match.ChsFullTextMatch chsMatch = new PanGu.Match.ChsFullTextMatch(_WordDictionary);
                        chsMatch.Options = _Options;
                        chsMatch.Parameters = _Parameters;
                        SuperLinkedList<WordInfo> chsMatchWords = chsMatch.Match(pls.Items, cur.Value.Word, pls.Count);

                        SuperLinkedListNode<WordInfo> curChsMatch = chsMatchWords.First;
                        while (curChsMatch != null)
                        {
                            WordInfo wi = curChsMatch.Value;

                            wi.Position += cur.Value.Position;
                            wi.OriginalWordType = originalWordType;
                            wi.WordType = originalWordType;

                            if (_Options.OutputSimplifiedTraditional)
                            {
                                if (_Options.TraditionalChineseEnabled)
                                {
                                    string newWord;
                                    WordType wt;

                                    if (originalWordType == WordType.SimplifiedChinese)
                                    {
                                        newWord = Microsoft.VisualBasic.Strings.StrConv(wi.Word,
                                            Microsoft.VisualBasic.VbStrConv.TraditionalChinese, 0);
                                        wt = WordType.TraditionalChinese;
                                    }
                                    else
                                    {
                                        newWord = Microsoft.VisualBasic.Strings.StrConv(wi.Word,
                                            Microsoft.VisualBasic.VbStrConv.SimplifiedChinese, 0);
                                        wt = WordType.SimplifiedChinese;
                                    }

                                    if (newWord != wi.Word)
                                    {
                                        WordInfo newWordInfo = new WordInfo(wi);
                                        newWordInfo.Word = newWord;
                                        newWordInfo.OriginalWordType = originalWordType;
                                        newWordInfo.WordType = wt;
                                        newWordInfo.Rank = _Parameters.SimplifiedTraditionalRank;
                                        newWordInfo.Position = wi.Position;
                                        chsMatchWords.AddBefore(curChsMatch, newWordInfo);
                                    }
                                }
                            }

                            curChsMatch = curChsMatch.Next;
                        }

                        SuperLinkedListNode<WordInfo> lst = result.AddAfter(cur, chsMatchWords);
                        SuperLinkedListNode<WordInfo> removeItem = cur;
                        cur = lst.Next;
                        result.Remove(removeItem);
                        break;
                    case WordType.English:
                        cur.Value.Rank = _Parameters.EnglishRank;
                        List<string> output;
                        cur.Value.Word = ConvertChineseCapitalToAsiic(cur.Value.Word);

                        if (_Options.EnglishSegment)
                        {
                            string lower = cur.Value.Word.ToLower();

                            if (lower != cur.Value.Word)
                            {
                                result.AddBefore(cur, new WordInfo(lower, cur.Value.Position, POS.POS_A_NX, 1,
                                    _Parameters.EnglishLowerRank, WordType.English, WordType.English));
                            }

                            string stem = GetStem(lower);

                            if (!string.IsNullOrEmpty(stem))
                            {
                                if (lower != stem)
                                {
                                    result.AddBefore(cur, new WordInfo(stem, cur.Value.Position, POS.POS_A_NX, 1,
                                        _Parameters.EnglishStemRank, WordType.English, WordType.English));
                                }
                            }
                        }
                        else if (_Options.IgnoreCapital)
                        {
                            cur.Value.Word = cur.Value.Word.ToLower();
                        }

                        if (_Options.EnglishMultiDimensionality)
                        {
                            bool needSplit = false;

                            foreach (char c in cur.Value.Word)
                            {
                                if ((c >= '0' && c <= '9') || (c == '_'))
                                {
                                    needSplit = true;
                                    break;
                                }
                            }

                            if (needSplit)
                            {
                                if (Framework.Regex.GetMatchStrings(cur.Value.Word, PATTERNS, true, out output))
                                {
                                    int outputCount = 0;

                                    foreach (string str in output)
                                    {
                                        if (!string.IsNullOrEmpty(str))
                                        {
                                            outputCount++;

                                            if (outputCount > 1)
                                            {
                                                break;
                                            }
                                        }
                                    }

                                    if (outputCount > 1)
                                    {
                                        int position = cur.Value.Position;

                                        foreach (string splitWord in output)
                                        {
                                            if (string.IsNullOrEmpty(splitWord))
                                            {
                                                continue;
                                            }

                                            WordInfo wi;

                                            if (splitWord[0] >= '0' && splitWord[0] <= '9')
                                            {
                                                wi = new WordInfo(splitWord, POS.POS_A_M, 1);
                                                wi.Position = position;
                                                wi.Rank = _Parameters.NumericRank;
                                                wi.OriginalWordType = WordType.English;
                                                wi.WordType = WordType.Numeric;
                                            }
                                            else
                                            {
                                                wi = new WordInfo(splitWord, POS.POS_A_NX, 1);
                                                wi.Position = position;
                                                wi.Rank = _Parameters.EnglishRank;
                                                wi.OriginalWordType = WordType.English;
                                                wi.WordType = WordType.English;
                                            }

                                            result.AddBefore(cur, wi);
                                            position += splitWord.Length;
                                        }
                                    }
                                }
                            }
                        }

                        if (!MergeEnglishSpecialWord(text, result, ref cur))
                        {
                            cur = cur.Next;
                        }

                        break;
                    case WordType.Numeric:
                        cur.Value.Word = ConvertChineseCapitalToAsiic(cur.Value.Word);
                        cur.Value.Rank = _Parameters.NumericRank;

                        if (!MergeEnglishSpecialWord(text, result, ref cur))
                        {
                            cur = cur.Next;
                        }

                        //cur = cur.Next;
                        break;
                    case WordType.Symbol:
                        cur.Value.Rank = _Parameters.SymbolRank;
                        cur = cur.Next;
                        break;
                    default:
                        cur = cur.Next;
                        break;
                }

            }

            return result;
        }