public GetDistance ( String source, String target ) : float | ||
source | String | |
target | String | |
return | float |
public float GetDistance(String source, String target)
{
int sl = source.Length;
int tl = target.Length;
if (sl == 0 || tl == 0)
{
if (sl == tl)
{
return 1;
}
else
{
return 0;
}
}
if (sl < n || tl < n)
return 0;
char[] sa = new char[sl + 2*n - 2];
char[] ta = new char[tl + 2 * n - 2];
for(int i=0;i<sa.Length;i++)
{
if (i < n - 1 || i>sl)
sa[i] = (char)0;//padding
else
sa[i] = source[i - n + 1];
}
for (int i = 0; i < ta.Length; i++)
{
if (i < n - 1 || i > tl)
ta[i] = (char)0;//padding
else
ta[i] = target[i - n + 1];
}
HashSet<string> sset = new HashSet<string>();
HashSet<string> tset = new HashSet<string>();
HashSet<string> allset = new HashSet<string>();
for(int i=0;i<sa.Length-n+1;i++)
{
char[] qgram = new char[n];
for (int j = 0; j < n; j++)
qgram[j] = sa[i + j];
sset.Add(new string(qgram));
allset.Add(new string(qgram));
}
for (int i = 0; i < ta.Length - n + 1; i++)
{
char[] qgram = new char[n];
for (int j = 0; j < n; j++)
qgram[j] = ta[i + j];
tset.Add(new string(qgram));
allset.Add(new string(qgram));
}
int matches = 0;
foreach(string qgram in allset)
{
if (sset.Contains(qgram) && tset.Contains(qgram))
matches++;
}
return (float)matches / (float)(allset.Count);
}
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList) { index = index - _minWordLength; word = word.ToLower(); double NewSimilarity = 0; int WordLength = word.Length; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]); if (NewSimilarity > maxSimilarity) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); maxSimilarity = NewSimilarity; } else if (NewSimilarity == maxSimilarity) equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); } return maxSimilarity; }