SolarixGrammarEngineNET.GrammarEngine2.Tokenize C# (CSharp) Method

Tokenize() public method

public Tokenize ( string Text, int LanguageID ) : string[]
Text string
LanguageID int
return string[]
        public string[] Tokenize( string Text, int LanguageID )
        {
            return SolarixGrammarEngineNET.GrammarEngine.sol_TokenizeFX( GetEngineHandle(), Text, LanguageID );
        }

Usage Example

Example #1
0
    public string Preprocess(string phrase0, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        string phrase = phrase0;

        if (phrase.EndsWith(".."))
        {
            phrase = phrase.Substring(0, phrase.Length - 2);
        }

        if (phrase.EndsWith("!"))
        {
            phrase = phrase.Substring(0, phrase.Length - 1);
        }


        string[]      tokens     = gren.Tokenize(phrase, SolarixGrammarEngineNET.GrammarEngineAPI.RUSSIAN_LANGUAGE);
        List <string> res_tokens = tokens.ToList();
        bool          changed    = false;

        string s = string.Join("|", tokens).ToLower();

        foreach (string prefix in prefixes)
        {
            if (s.StartsWith(prefix))
            {
                // Ну и жара нынче стоит!
                res_tokens = res_tokens.Skip(prefix.Split("|".ToCharArray(), StringSplitOptions.RemoveEmptyEntries).Length).ToList();
                changed    = true;
                break;
            }
        }

        foreach (string infix in infixes)
        {
            if (res_tokens.Contains(infix))
            {
                res_tokens.Remove(infix);
                changed = true;
            }
        }


        if (changed)
        {
            return(string.Join(" ", res_tokens));
        }
        else
        {
            return(phrase);
        }
    }