public double Sample(Agent agent, int horizon)
{
double reward = 0.0;
if (horizon == 0)
{
return((int)reward);
}
else if (this.Type == ChanceNode)
{
var percept = agent.GeneratePerceptAndUpdate();
int observation = percept.Item1;
int randomReward = percept.Item2;
if (!this.Children.ContainsKey(observation)) //new node ->add it as decision node
{
this.Children[observation] = new MonteCarloSearchNode(DecisionNode);
}
MonteCarloSearchNode observationChild = this.Children[observation];
reward = randomReward + observationChild.Sample(agent, horizon - 1);
}
else if (this.Visits == 0) //unvisited decision node or we have exceeded maximum tree depth
{
reward = agent.Playout(horizon);
// Console.WriteLine("from playout: reward ="+reward);
}
else //Previously visited decision node
{
int actionNullable = this.SelectAction(agent);
int action = actionNullable;
agent.ModelUpdateAction(action);
if (!this.Children.ContainsKey(action)) //this action is new chance child
{
this.Children[action] = new MonteCarloSearchNode(ChanceNode);
}
MonteCarloSearchNode actionChild = this.Children[action];
reward = actionChild.Sample(agent, horizon); //it is not clear if not horizon-1. (asks pyaixi)
}
double visitsDouble = this.Visits;
//Console.WriteLine("> {3} - {0}, {1}, {2}", this.mean, reward, (reward + (visitsDouble * this.mean) / (visitsDouble + 1.0)), visitsDouble);
this.Mean = (reward + (visitsDouble * this.Mean)) / (1.0 + visitsDouble);
this.Visits = this.Visits + 1;
return(reward);
}