public Dictionary<string, List<WordRecord>> Tag(ICollection<string> words)
{
//create temp files for the input and output to the POS tagger process
string inputFileName = Path.GetTempFileName();
string outputFileName = Path.GetTempFileName();
//the part-of-speech dictionary to be returned
Dictionary<string, List<WordRecord>> posDict = new Dictionary<string, List<WordRecord>>();
try
{
//write the input words to the temp file, one word per line
using (StreamWriter inputFile = new StreamWriter(inputFileName))
{
foreach (string word in words)
{
inputFile.WriteLine(word);
}
}
//start the POS Tagger process
Process TaggerProcess = new Process();
ProgramArguments = string.Format("{0} -i {1} -o \"{2}\"", ProgramArguments, inputFileName, outputFileName);
Console.WriteLine("ProgramArguments: {0}", ProgramArguments);
ProcessStartInfo tsi = new ProcessStartInfo(ProgramFileName, ProgramArguments);
tsi.WorkingDirectory = Path.GetDirectoryName(ProgramFileName);
tsi.UseShellExecute = false; //necessary in order to redirect standard input/output
TaggerProcess.StartInfo = tsi;
TaggerProcess.Start();
//wait for the process to finish, before we start trying to read the output file
TaggerProcess.WaitForExit();
TaggerProcess.Close();
//read the output file
var taggerOutput = XElement.Load(outputFileName);
XmlSerializer xs = new XmlSerializer(Type.GetType("ABB.Swum.WordRecord"));
//query to get the valid WordRecord elements in the output and deserialize them to WordRecord objects
//WordRecords that don't have sub-elements correspond to input words that the tagger couldn't tag. These are ignored.
var validWordRecordQuery = from wr in taggerOutput.Elements("WordRecord")
where wr.HasElements
select (WordRecord)xs.Deserialize(wr.CreateReader());
//place each WordRecord in the dictionary, mapped to its word
foreach (WordRecord wr in validWordRecordQuery)
{
List<WordRecord> list;
if (!posDict.TryGetValue(wr.word, out list))
{
//word is not already in the dictionary, so create a new list of word records
list = new List<WordRecord>();
}
//add new word record to the list for the word
list.Add(wr);
posDict[wr.word] = list;
}
}
finally
{
//clean up the temporary files that we created
File.Delete(inputFileName);
File.Delete(outputFileName);
}
return posDict;
}