public static AmbiguousRule generate_rule(string url)
{
/*
to solve issue #13
there are a lot of second level domains, e.g. domain.info.au, domain.vic.au, ...
so we check these rules:
if url has only two parts (e.g. x.tld or www.x.tld) choose *.x.tld
else if url has 3 parts or more(e.g. y.x.tld) and y!=www:
check the following rules: (x = second part after tld)
1.(x is part of domain)
if len(x) > 4: assume that x is not part of extension, and choose *.x.tld
2.(x is part of extension)
if len(x) <=2 (e.g. y.id.au) than choose *.y.x.tld
if x is in exceptions (com,net,org,edu,gov,asn.sch) choose *.y.x.tld
because many TLD's have second level domains on these, e.g. chap.sch.ir
if count(parts)==4 and first part is www: e.g. www.news.com.au, choose *.y.x.tld
if none of the rules apply, the case is ambiguous, display both options in a context menu.
e.g. sealake.vic.au or something.fun.ir
*/
// needed variables
var domain = new Uri(url).Host;
var parts = domain.Split('.');
var count = parts.Length;
var tld = parts.Last();
var x = "";
var y = "";
try
{
x = parts[count - 2]; //second-level
y = parts[count - 3]; //third-level
}
catch (IndexOutOfRangeException) { } // in case domain did not have 3 parts.. (e.g. localhost, google.com)
// creating the patterns
var rule_tld = String.Format("*.{0}.{1}", x, tld);
var rule_second = String.Format("*.{0}.{1}.{2}", y, x, tld);
var mode = 0; // 0 = error, 1=use rule_tld (*.x.tld), 2=use rule_second (*.y.x.tld), 3=ambiguous
// this conditions are based on the long comment above
if (count == 2 || (count == 3 && y == "www"))
mode = 1;
else if (count >= 3)
{
if (x.Length > 4)
mode = 1;
else if (
(x.Length <= 2) ||
((new[] { "com", "net", "org", "edu", "gov", "asn", "sch" }).Contains(x)) ||
(count == 4 && parts[0] == "www")
)
mode = 2;
else
mode = 3;
}
return new AmbiguousRule()
{
tld_rule = rule_tld,
second_rule = rule_second,
mode = mode
};
}