AIXI.MonteCarloSearchNode.SelectAction C# (CSharp) Метод

SelectAction() публичный Метод

public SelectAction ( Agent agent ) : int
agent Agent
Результат int
        public int SelectAction(Agent agent)
        {
            Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action");

            double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value;
            double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits);
            int bestAction = -1;
            double bestPriority = double.NegativeInfinity;

            foreach (int action in agent.Environment.ValidActions) {
                MonteCarloSearchNode node=null;
                if (this.Children.ContainsKey(action)) {
                    node=this.Children[action];
                }
                double priority;
                if (node == null || node.Visits == 0) {
                    // previously unexplored node
                    priority = this.UnexploredBias;    //unexplored bias
                }
                else{
                    priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits);
                }

                if (priority > (bestPriority+Utils.RandomDouble(0, 0.001))){
                    bestAction=action;
                    bestPriority=priority;
                }

            }
            return bestAction;
        }