public override Tuple<int, int> PerformAction(int action)
{
//note: here is little confussion because my actions are his observations and vice versa
Debug.Assert(this.IsValidAction(action), "non-valid action used " + action);
this.Action = action;
//Bias in environment: if we won playing rock, we repeat it:
if ((this.Observation == Rock) && (this.Reward == RLose))
{
this.Observation = Rock;
}
else
{
this.Observation = Utils.RandomElement(this.ValidObservations);
}
if (action == this.Observation)
{
this.Reward = this.RDraw;
}
else if ((action == Rock && Observation == Paper) ||
(action == Paper && Observation == Scissors) ||
(action == Scissors && Observation == Rock))
{//Agent lost; env won
this.Reward = RLose;
}
else
{//Agent won
this.Reward = RWin;
}
return new Tuple<int, int>(this.Observation, this.Reward);
}