public int SelectAction(Agent agent)
{
Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action");
double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value;
double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits);
int bestAction = -1;
double bestPriority = double.NegativeInfinity;
foreach (int action in agent.Environment.ValidActions) {
MonteCarloSearchNode node=null;
if (this.Children.ContainsKey(action)) {
node=this.Children[action];
}
double priority;
if (node == null || node.Visits == 0) {
// previously unexplored node
priority = this.UnexploredBias; //unexplored bias
}
else{
priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits);
}
if (priority > (bestPriority+Utils.RandomDouble(0, 0.001))){
bestAction=action;
bestPriority=priority;
}
}
return bestAction;
}