public int ChooseAction( double[] actionEstimates )
{
// actions count
int actionsCount = actionEstimates.Length;
// find the best action (greedy)
double maxReward = actionEstimates[0];
int greedyAction = 0;
for ( int i = 1; i < actionsCount; i++ )
{
if ( actionEstimates[i] > maxReward )
{
maxReward = actionEstimates[i];
greedyAction = i;
}
}
// try to do exploration
if ( rand.NextDouble( ) < epsilon )
{
int randomAction = rand.Next( actionsCount - 1 );
if ( randomAction >= greedyAction )
randomAction++;
return randomAction;
}
return greedyAction;
}
}