private static SortedList<string, HashSet<string>> DetermineSemiAggregateParentFolders(ICollection<string> dataFilepaths)
{
SortedList<string, HashSet<string>> parents = new SortedList<string, HashSet<string>>();
// find all the parent folders and the data files they contain
foreach(string data_filepath in dataFilepaths)
{
string directory = Path.GetDirectoryName(data_filepath);
bool first = true;
while(true)
{
if(directory != null)
{
HashSet<string> parent_data_filepaths;
if(!parents.TryGetValue(directory + (first ? null : @"*"), out parent_data_filepaths))
{
parent_data_filepaths = new HashSet<string>();
parent_data_filepaths.Add(data_filepath);
parents.Add(directory + (first ? null : "*"), parent_data_filepaths);
}
else
{
parent_data_filepaths.Add(data_filepath);
}
if(first)
{
first = false;
}
else
{
directory = Path.GetDirectoryName(directory);
}
}
else
{
break;
}
}
}
// remove parent folders with only one data file or every data file, as they are processed separately and not eligible for semi-aggregate analysis
int i = 0;
while(i < parents.Count)
{
HashSet<string> parent_data_filepaths = parents.Values[i];
if(parent_data_filepaths.Count == 1 || parent_data_filepaths.Count == dataFilepaths.Count)
{
parents.RemoveAt(i);
}
else
{
i++;
}
}
// remove higher-level parent folders where a lower-level parent folder contains the same data files
int j = 0;
while(j < parents.Count - 1)
{
HashSet<string> current_data_filepaths = parents.Values[j];
bool removed = false;
int k = j + 1;
while(k < parents.Count)
{
HashSet<string> next_data_filepaths = parents.Values[k];
if(next_data_filepaths.SetEquals(current_data_filepaths))
{
parents.RemoveAt(j);
removed = true;
break;
}
else
{
k++;
}
}
if(!removed)
{
j++;
}
}
return parents;
}