public Exec ( string a, string b ) : string | ||
a | string | 1st string |
b | string | 2nd string |
return | string |
public string Exec(string a, string b)
{
if (a == null || b == null)
{
return null;
}
int x;
int y;
int maxx;
int maxy;
int[] go = new int[4];
const int X = 1;
const int Y = 2;
const int R = 3;
const int D = 0;
/*
* setup memory if needed => processing speed up
*/
maxx = a.Length + 1;
maxy = b.Length + 1;
if ((maxx >= sizex) || (maxy >= sizey))
{
sizex = maxx + 8;
sizey = maxy + 8;
net = RectangularArrays.ReturnRectangularIntArray(sizex, sizey);
way = RectangularArrays.ReturnRectangularIntArray(sizex, sizey);
}
/*
* clear the network
*/
for (x = 0; x < maxx; x++)
{
for (y = 0; y < maxy; y++)
{
net[x][y] = 0;
}
}
/*
* set known persistent values
*/
for (x = 1; x < maxx; x++)
{
net[x][0] = x;
way[x][0] = X;
}
for (y = 1; y < maxy; y++)
{
net[0][y] = y;
way[0][y] = Y;
}
for (x = 1; x < maxx; x++)
{
for (y = 1; y < maxy; y++)
{
go[X] = net[x - 1][y] + DELETE;
// way on x costs 1 unit
go[Y] = net[x][y - 1] + INSERT;
// way on y costs 1 unit
go[R] = net[x - 1][y - 1] + REPLACE;
go[D] = net[x - 1][y - 1]
+ ((a[x - 1] == b[y - 1]) ? NOOP : 100);
// diagonal costs 0, when no change
ushort min = (ushort)D;
if (go[min] >= go[X])
{
min = (ushort)X;
}
if (go[min] > go[Y])
{
min = (ushort)Y;
}
if (go[min] > go[R])
{
min = (ushort)R;
}
way[x][y] = min;
net[x][y] = (ushort)go[min];
}
}
// read the patch string
StringBuilder result = new StringBuilder();
char @base = (char)('a' - 1);
char deletes = @base;
char equals = @base;
for (x = maxx - 1, y = maxy - 1; x + y != 0;)
{
switch (way[x][y])
{
case X:
if (equals != @base)
{
result.Append("-" + (equals));
equals = @base;
}
deletes++;
x--;
break;
// delete
case Y:
if (deletes != @base)
{
result.Append("D" + (deletes));
deletes = @base;
}
if (equals != @base)
{
result.Append("-" + (equals));
equals = @base;
}
result.Append('I');
result.Append(b[--y]);
break;
// insert
case R:
if (deletes != @base)
{
result.Append("D" + (deletes));
deletes = @base;
}
if (equals != @base)
{
result.Append("-" + (equals));
equals = @base;
}
result.Append('R');
result.Append(b[--y]);
x--;
break;
// replace
case D:
if (deletes != @base)
{
result.Append("D" + (deletes));
deletes = @base;
}
equals++;
x--;
y--;
break;
// no change
}
}
if (deletes != @base)
{
result.Append("D" + (deletes));
deletes = @base;
}
return result.ToString();
}
}
/// <summary> /// Entry point to the DiffIt application. /// <para> /// This application takes one argument, the path to a file containing a /// stemmer table. The program reads the file and generates the patch commands /// for the stems. /// </para> /// </summary> /// <param name="args">the path to a file containing a stemmer table</param> public static void Main(string[] args) { int ins = Get(0, args[0]); int del = Get(1, args[0]); int rep = Get(2, args[0]); int nop = Get(3, args[0]); // LUCENENET specific - reformatted with : string charset = SystemProperties.GetProperty("egothor:stemmer:charset", "UTF-8"); var stemmerTables = new List <string>(); // LUCENENET specific // command line argument overrides environment variable or default, if supplied for (int i = 1; i < args.Length; i++) { if ("-e".Equals(args[i], StringComparison.Ordinal) || "--encoding".Equals(args[i], StringComparison.Ordinal)) { charset = args[i]; } else { stemmerTables.Add(args[i]); } } foreach (var stemmerTable in stemmerTables) { // System.out.println("[" + args[i] + "]"); Diff diff = new Diff(ins, del, rep, nop); using (TextReader input = new StreamReader(new FileStream(stemmerTable, FileMode.Open, FileAccess.Read), Encoding.GetEncoding(charset))) { string line; while ((line = input.ReadLine()) != null) { try { line = line.ToLowerInvariant(); StringTokenizer st = new StringTokenizer(line); st.MoveNext(); string stem = st.Current; Console.WriteLine(stem + " -a"); while (st.MoveNext()) { string token = st.Current; if (token.Equals(stem, StringComparison.Ordinal) == false) { Console.WriteLine(stem + " " + diff.Exec(token, stem)); } } } catch (InvalidOperationException /*x*/) { // no base token (stem) on a line } } } } }