private MatchComponent ConstructComponent(ConnectedComponent subGraph, Dictionary<string, MatchEdge> revEdgeDict,
Dictionary<Tuple<string, bool>, Statistics> srcNodeStatisticsDict)
{
//var componentStates = new List<MatchComponent>();
MatchComponent optimalFinalComponent = null;
//Init
double maxValue = Double.MinValue;
var componentStates = subGraph.Nodes.Select(node => new MatchComponent(node.Value)).ToList();
// DP
while (componentStates.Any())
{
int maxIndex = -1;
var nextCompnentStates = new List<MatchComponent>();
// Iterate on current components
foreach (var curComponent in componentStates)
{
var nodeUnits = GetNodeUnits(subGraph, curComponent);
if (!nodeUnits.Any()
&& curComponent.ActiveNodeCount == subGraph.ActiveNodeCount
&& curComponent.EdgeMaterilizedDict.Count(e => e.Value == true) == subGraph.EdgeCount
)
{
if (optimalFinalComponent == null || curComponent.Cost < optimalFinalComponent.Cost)
{
optimalFinalComponent = curComponent;
}
continue;
}
var candidateUnits = _pruningStrategy.GetCandidateUnits(nodeUnits, curComponent, revEdgeDict);
// Iterates on the candidate node units & add it to the current component to generate next states
foreach (var candidateUnit in candidateUnits)
{
// Pre-filter. If the lower bound of the current totoal join cost
// > current optimal join cost, prunes this component.
if (optimalFinalComponent != null)
{
double candidateSize = candidateUnit.TreeRoot.EstimatedRows*
candidateUnit.PreMatOutgoingEdges.Select(e => e.AverageDegree)
.Aggregate(1.0, (cur, next) => cur*next)*
candidateUnit.PostMatOutgoingEdges.Select(e => e.AverageDegree)
.Aggregate(1.0, (cur, next) => cur*next)*
candidateUnit.UnmaterializedEdges.Select(e => e.AverageDegree)
.Aggregate(1.0, (cur, next) => cur*next);
double costLowerBound = curComponent.Cardinality*
candidateUnit.PreMatIncomingEdges.Select(e => e.AverageDegree)
.Aggregate(1.0, (cur, next) => cur * next)
+ candidateSize;
if (candidateUnit.JoinHint == JoinHint.Loop)
costLowerBound = Math.Min(costLowerBound,
Math.Log(candidateUnit.TreeRoot.EstimatedRows, 512));
if (curComponent.Cost + costLowerBound >
optimalFinalComponent.Cost )
{
continue;
}
}
var newComponent = curComponent.GetNextState(candidateUnit, _statisticsCalculator, _graphMetaData, srcNodeStatisticsDict);
if (nextCompnentStates.Count >= MaxStates)
{
if (maxIndex < 0)
{
var tuple = GetMostExpensiveMatchComponent(nextCompnentStates);
maxIndex = tuple.Item1;
maxValue = tuple.Item2;
}
else
{
int compEdgeCount = newComponent.EdgeMaterilizedDict.Count;
compEdgeCount = compEdgeCount == 0 ? 1 : compEdgeCount;
if (newComponent.Cost/compEdgeCount < maxValue)
{
nextCompnentStates[maxIndex] = newComponent;
var tuple = GetMostExpensiveMatchComponent(nextCompnentStates);
maxIndex = tuple.Item1;
maxValue = tuple.Item2;
}
continue;
}
}
nextCompnentStates.Add(newComponent);
}
}
componentStates = nextCompnentStates;
}
return optimalFinalComponent;
}