ABB.Swum.ExternalProcessTagger.Tag C# (CSharp) Method

Tag() public method

Calls an external part-of-speech tagger and runs it on each of the supplied words. Note that each call of this method creates (and cleans up) a separate process running the POS tagger. For performance reasons, it is better to tag words in large batches, rather than small, frequent batches.
public Tag ( ICollection words ) : List>.Dictionary
words ICollection A collection of words to be tagged
return List>.Dictionary
        public Dictionary<string, List<WordRecord>> Tag(ICollection<string> words)
        {
            //create temp files for the input and output to the POS tagger process
            string inputFileName = Path.GetTempFileName();
            string outputFileName = Path.GetTempFileName();

            //the part-of-speech dictionary to be returned
            Dictionary<string, List<WordRecord>> posDict = new Dictionary<string, List<WordRecord>>();

            try
            {
                //write the input words to the temp file, one word per line
                using (StreamWriter inputFile = new StreamWriter(inputFileName))
                {
                    foreach (string word in words)
                    {
                        inputFile.WriteLine(word);
                    }
                }

                //start the POS Tagger process
                Process TaggerProcess = new Process();
                ProgramArguments = string.Format("{0} -i {1} -o \"{2}\"", ProgramArguments, inputFileName, outputFileName);
                Console.WriteLine("ProgramArguments: {0}", ProgramArguments);
                ProcessStartInfo tsi = new ProcessStartInfo(ProgramFileName, ProgramArguments);
                tsi.WorkingDirectory = Path.GetDirectoryName(ProgramFileName);
                tsi.UseShellExecute = false; //necessary in order to redirect standard input/output
                TaggerProcess.StartInfo = tsi;
                TaggerProcess.Start();

                //wait for the process to finish, before we start trying to read the output file
                TaggerProcess.WaitForExit();
                TaggerProcess.Close();

                //read the output file
                var taggerOutput = XElement.Load(outputFileName);
                XmlSerializer xs = new XmlSerializer(Type.GetType("ABB.Swum.WordRecord"));

                //query to get the valid WordRecord elements in the output and deserialize them to WordRecord objects
                //WordRecords that don't have sub-elements correspond to input words that the tagger couldn't tag. These are ignored.
                var validWordRecordQuery = from wr in taggerOutput.Elements("WordRecord")
                                           where wr.HasElements
                                           select (WordRecord)xs.Deserialize(wr.CreateReader());

                //place each WordRecord in the dictionary, mapped to its word
                foreach (WordRecord wr in validWordRecordQuery)
                {
                    List<WordRecord> list;
                    if (!posDict.TryGetValue(wr.word, out list))
                    {
                        //word is not already in the dictionary, so create a new list of word records
                        list = new List<WordRecord>();
                    }

                    //add new word record to the list for the word
                    list.Add(wr);
                    posDict[wr.word] = list;
                }

            }
            finally
            {
                //clean up the temporary files that we created
                File.Delete(inputFileName);
                File.Delete(outputFileName);
            }
            
            return posDict;
        }
ExternalProcessTagger