Strabo.Core.TextRecognition.JaccardDistance.GetDistanceFast C# (CSharp) Method

GetDistanceFast() public method

public GetDistanceFast ( String source, String target ) : float
source String
target String
return float
        public float GetDistanceFast(String source, String target)
        {
            int sl = source.Length;
            int tl = target.Length;

            if (sl == 0 || tl == 0)
            {
                if (sl == tl)
                {
                    return 1;
                }
                else
                {
                    return 0;
                }
            }
            if (sl < n || tl < n)
                return 0;

            //char[] sa = new char[sl + 2 * n - 2];
            //char[] ta = new char[tl + 2 * n - 2];

            //for (int i = 0; i < sa.Length; i++)
            //{
            //    if (i < n - 1 || i > sl)
            //        sa[i] = (char)0;//padding
            //    else
            //        sa[i] = source[i - n + 1];
            //}

            //for (int i = 0; i < ta.Length; i++)
            //{
            //    if (i < n - 1 || i > tl)
            //        ta[i] = (char)0;//padding
            //    else
            //        ta[i] = target[i - n + 1];
            //}

            HashSet<string> sset = new HashSet<string>();
            HashSet<string> tset = new HashSet<string>();
            HashSet<string> allset = new HashSet<string>();

            for (int i = 0; i < sl + 2 * n - 2 - n + 1; i++)
            {
                char[] qgram = new char[n];
                for (int j = 0; j < n; j++)
                {
                    if (i+j < n - 1 || i - n + 1 + j >= sl)
                        qgram[j] = (char)0;
                    else
                        qgram[j] = source[i -n+1 + j];
                 }
                sset.Add(new string(qgram));
                allset.Add(new string(qgram));
            }
            for (int i = 0; i < tl + 2 * n - 2 - n + 1; i++)
            {
                char[] qgram = new char[n];
                for (int j = 0; j < n; j++)
                {
                    if (i+j < n - 1 || i - n + 1 + j >= tl)
                        qgram[j] = (char)0;
                    else
                        qgram[j] = target[i - n + 1 + j];
                }
                tset.Add(new string(qgram));
                allset.Add(new string(qgram));
            }

            int matches = 0;
            foreach (string qgram in allset)
            {
                if (sset.Contains(qgram) && tset.Contains(qgram))
                    matches++;
            }

            return (float)matches / (float)(allset.Count);
        }

Usage Example

        public static void TestJaccard()
        {
            string fn1 = "stree";
            string fn2 = "street";
            string fn3 = "steere";

            JaccardDistance jd = new JaccardDistance(2);

            double x = jd.GetDistanceFast(fn1, fn2);
            double y = jd.GetDistanceFast(fn1, fn3);

            double z = 1;
        }