Lucene.Net.QueryParsers.TestQueryParser.TestCJK C# (CSharp) Method

TestCJK() private method

private TestCJK ( ) : void
return void
        public virtual void TestCJK()
        {
            // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
            // used google to translate the word "term" to japanese -> 用語
            //
            // NOTE: What is printed above is not the translation of "term" into
            // Japanese.  Google translate currently gives:
            //
            // 期間
            //
            // Which translates to unicode characters 26399 and 38291, or
            // the literals '\u671f' and '\u9593'.
            //
            // Unlike the second and third characters in the previous string ('\u201d' and '\u00a8')
            // which fail the test for IsCharacter when tokenized by LetterTokenizer (as it should
            // in Java), which causes the word to be split differently than if it actually used
            // letters as defined by Unicode.
            //
            // Using the string "\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593" with just the two
            // characters is enough, as it uses two characters with the full width of a CJK character cell.
            AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
            AssertQueryEquals("\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593", null, "\u671f\u9593\u0020\u671f\u9593\u0020\u671f\u9593");
        }