public int SelectAction(Agent agent)
{
Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action");
double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value;
double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits);
int bestAction = -1;
double bestPriority = double.NegativeInfinity;
foreach (int action in agent.Environment.ValidActions)
{
MonteCarloSearchNode node = null;
if (this.Children.ContainsKey(action))
{
node = this.Children[action];
}
double priority;
if (node == null || node.Visits == 0)
{
// previously unexplored node
priority = this.UnexploredBias; //unexplored bias
}
else
{
priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits);
}
if (priority > (bestPriority + Utils.RandomDouble(0, 0.001)))
{
bestAction = action;
bestPriority = priority;
}
}
return(bestAction);
}