public override void Parse(TextReader @in)
{
int lineNumber = 0;
try
{
string line = null;
while ((line = @in.ReadLine()) != null)
{
lineNumber++;
if (line.Length == 0 || line[0] == '#')
{
continue; // ignore empty lines and comments
}
CharsRef[] inputs;
CharsRef[] outputs;
// TODO: we could process this more efficiently.
string[] sides = Split(line, "=>");
if (sides.Length > 1) // explicit mapping
{
if (sides.Length != 2)
{
throw new System.ArgumentException("more than one explicit mapping specified on the same line");
}
string[] inputStrings = Split(sides[0], ",");
inputs = new CharsRef[inputStrings.Length];
for (int i = 0; i < inputs.Length; i++)
{
inputs[i] = Analyze(Unescape(inputStrings[i]).Trim(), new CharsRef());
}
string[] outputStrings = Split(sides[1], ",");
outputs = new CharsRef[outputStrings.Length];
for (int i = 0; i < outputs.Length; i++)
{
outputs[i] = Analyze(Unescape(outputStrings[i]).Trim(), new CharsRef());
}
}
else
{
string[] inputStrings = Split(line, ",");
inputs = new CharsRef[inputStrings.Length];
for (int i = 0; i < inputs.Length; i++)
{
inputs[i] = Analyze(Unescape(inputStrings[i]).Trim(), new CharsRef());
}
if (expand)
{
outputs = inputs;
}
else
{
outputs = new CharsRef[1];
outputs[0] = inputs[0];
}
}
// currently we include the term itself in the map,
// and use includeOrig = false always.
// this is how the existing filter does it, but its actually a bug,
// especially if combined with ignoreCase = true
for (int i = 0; i < inputs.Length; i++)
{
for (int j = 0; j < outputs.Length; j++)
{
Add(inputs[i], outputs[j], false);
}
}
}
}
catch (System.ArgumentException e)
{
throw new Exception("Invalid synonym rule at line " + lineNumber, e);
//ex.initCause(e);
//throw ex;
}
finally
{
@in.Dispose();
}
}