UserSimulation.ErrorGenerator.GenerateErrorString C# (CSharp) Méthode

GenerateErrorString() public méthode

public GenerateErrorString ( string input, Classification c ) : string
input string
c Classification
Résultat string
        public string GenerateErrorString(string input, Classification c)
        {
            // get typo dict
            var td = c.GetTypoDict();

            // get transposition dict
            var trd = c.GetTranspositionDict();

            // convert the input into a char array
            var ochars = StringToOptCharArray(input);

            // add leading and trailing 'empty characters'
            var inputchars = AddLeadingTrailingSpace(ochars);

            // calculate the marginal probabilities of NOT making a typo for each char in input
            double[] PrsCharNotTypo = inputchars.Select(oc =>
            {
                var key = new Tuple<OptChar, string>(oc, OptCharToString(oc));
                int count;
                if (!td.TryGetValue(key, out count)) {
                    count = 0;
                }
                // funny case to handle the fact that FSharpOption.None == null
                var cond_dist = td.Where(kvp => kvp.Key.Item1 == null ? oc == null : kvp.Key.Item1.Equals(oc));
                int total = cond_dist.Aggregate(0, (acc, kvp) => acc + kvp.Value);
                if (total == 0)
                {
                    return 1.0;
                }
                else
                {
                    return (double)count / total;
                }
            }).ToArray();

            // calculate the probability of making at least one error
            // might need log-probs here
            double PrTypo = 1.0 - PrsCharNotTypo.Aggregate(1.0, (acc, pr_not_typo) => acc * pr_not_typo);

            // calculate the marginal probabilities of NOT making a
            // transposition for each position in the input
            // note that we do NOT consider the empty strings here
            // For strings of length 1, the probability of not making a
            // transposition should be exactly 1.
            double[] PrsPosNotTrans = ochars.Length > 1 ? ochars.ToArray().Select((oc, idx) =>
            {
                int count;
                if (!trd.TryGetValue(0, out count)) {
                    count = 0;
                }
                int total = trd.Where(kvp => kvp.Key < input.Length - idx && kvp.Key >= -idx).Select(kvp => kvp.Value).Sum();
                if (total == 0)
                {
                    return 1.0;
                }
                else
                {
                    return (double)count / total;
                }
            }).ToArray() : new [] { 1.0 };

            // calculate the probability of having at least one transposition
            double PrTrans = 1.0 - PrsPosNotTrans.Aggregate(1.0, (acc, pr_not_trans) => acc * pr_not_trans);

            // calculate the relative probability of making a typo vs a transposition
            double RelPrTypo = PrTypo / (PrTypo + PrTrans);

            // init with original input in case typos/transpositions prove to be impossible
            string output = input;

            // the while loop ensures that we do not return an unmodified string.
            // for most strings, returning an unmodified string is very unlikely
            do
            {
                // flip a coin to determine whether our guaranteed error is a typo or a transposition
                if (r.NextDouble() < RelPrTypo)
                {   // is a typo
                    // determine the index of the guaranteed typo
                    double[] PrsMistype = PrsCharNotTypo.Select(pr => 1.0 - pr).ToArray();
                    // if there are no possible typos then we just can't produce one
                    if (PrsMistype.Sum() == 0)
                    {
                        break;
                    }
                    var i = MultinomialSample(PrsMistype);
                    // run transposition algorithm & add leading/trailing empty chars
                    // we set the guaranteed transposition index to -1 to ensure that no
                    // transpositions are guaranteed
                    OptChar[] input_t = AddLeadingTrailingSpace(Transposize(ochars, trd, -1));
                    // run typo algorithm (adjust i for leading space)
                    output = Typoize(input_t, td, i);
                }
                else
                {   // is a transposition
                    // determine the index of the guaranteed transposition
                    double[] PrsMistype = PrsPosNotTrans.Select(pr => 1.0 - pr).ToArray();
                    // if there are no possible transpositions then we just can't produce one
                    if (PrsMistype.Sum() == 0)
                    {
                        break;
                    }
                    var i = MultinomialSample(PrsMistype);
                    // run transposition algorithm & add leading/trailing empty chars
                    OptChar[] input_t = AddLeadingTrailingSpace(Transposize(ochars, trd, i));
                    // run typo algorithm; set guaranteed typo index to -1 to ensure that no
                    // typo is guaranteed
                    output = Typoize(input_t, td, -1);
                }
            } while (input == output);

            return output;
        }

Usage Example

Exemple #1
0
        public static void RunProportionExperiment(Excel.Application app, Excel.Workbook wbh, int nboots, double significance, double threshold, UserSimulation.Classification c, Random r, String outfile, long max_duration_in_ms, String logfile, ProgBar pb, bool ignore_parse_errors)
        {
            pb.setMax(5);

            // record intitial state of spreadsheet
            var prepdata = Prep.PrepSimulation(app, wbh, pb, ignore_parse_errors);

            // init error generator
            var eg = new ErrorGenerator();

            // get inputs as an array of addresses to facilitate random selection
            // DATA INPUTS ONLY
            AST.Address[] inputs = prepdata.dag.terminalInputCells();

            // sanity check: all of the inputs should also be in prepdata.original_inputs
            foreach (AST.Address addr in inputs)
            {
                if (!prepdata.original_inputs.ContainsKey(addr))
                {
                    throw new Exception("Missing address!");
                }
            }

            for (int i = 0; i < 100; i++)
            {
                // randomly choose an input address
                AST.Address rand_addr = inputs[r.Next(inputs.Length)];

                // get the value
                String input_value = prepdata.original_inputs[rand_addr];

                // perturb it
                String erroneous_input = eg.GenerateErrorString(input_value, c);

                // create an error dictionary with this one perturbed value
                var errors = new CellDict();
                errors.Add(rand_addr, erroneous_input);

                // run simulations; simulation code does insertion of errors and restore of originals
                RunSimulation(app, wbh, nboots, significance, threshold, c, r, outfile, max_duration_in_ms, logfile, pb, prepdata, errors);
            }
        }
All Usage Examples Of UserSimulation.ErrorGenerator::GenerateErrorString