public static string Numerize(string value)
{
var result = value;
// preprocess
result = @" +|([^\d])-([^\d])".Compile().Replace(result, "$1 $2");
// will mutilate hyphenated-words but shouldn't matter for date extraction
result = result.Replace("a half", "haAlf");
// take the 'a' out so it doesn't turn into a 1, save the half for the end
// easy/direct replacements
DIRECT_NUMS.ForEach<string, string>(
(p, r) =>
result =
Regex.Replace(
result,
p,
"<num>" + r));
ORDINALS.ForEach<string, string>(
(p, r) =>
result =
Regex.Replace(
result,
p,
"<num>" + r +
p.
LastCharacters
(2)));
// ten, twenty, etc.
TEN_PREFIXES.ForEach<string, int>(
(p, r) =>
result =
Regex.Replace(
result,
"(?:" + p + @") *<num>(\d(?=[^\d]|$))*",
match => "<num>" + (r + int.Parse(match.Groups[1].Value))));
TEN_PREFIXES.ForEach<string, int>(
(p, r) => result = Regex.Replace(result, p, "<num>" + r.ToString()));
// hundreds, thousands, millions, etc.
BIG_PREFIXES.ForEach<string, long>(
(p, r) =>
{
result = Regex.Replace(result, @"(?:<num>)?(\d*) *" + p, match => "<num>" + (r * int.Parse(match.Groups[1].Value)).ToString());
result = Andition(result);
});
// fractional addition
// I'm not combining this with the previous block as using float addition complicates the strings
// (with extraneous .0"s and such )
result = Regex.Replace(result, @"(\d +)(?: |and | -)*haAlf", match => (float.Parse(match.Groups[1].Value) + 0.5).ToString());
result = result.Replace("<num>", "");
return result;
}