public int ChooseAction( double[] actionEstimates )
{
// actions count
int actionsCount = actionEstimates.Length;
// action probabilities
double[] actionProbabilities = new double[actionsCount];
// actions sum
double sum = 0, probabilitiesSum = 0;
for ( int i = 0; i < actionsCount; i++ )
{
double actionProbability = Math.Exp( actionEstimates[i] / temperature );
actionProbabilities[i] = actionProbability;
probabilitiesSum += actionProbability;
}
if ( ( double.IsInfinity( probabilitiesSum ) ) || ( probabilitiesSum == 0 ) )
{
// do greedy selection in the case of infinity or zero
double maxReward = actionEstimates[0];
int greedyAction = 0;
for ( int i = 1; i < actionsCount; i++ )
{
if ( actionEstimates[i] > maxReward )
{
maxReward = actionEstimates[i];
greedyAction = i;
}
}
return greedyAction;
}
// get random number, which determines which action to choose
double actionRandomNumber = rand.NextDouble( );
for ( int i = 0; i < actionsCount; i++ )
{
sum += actionProbabilities[i] / probabilitiesSum;
if ( actionRandomNumber <= sum )
return i;
}
return actionsCount - 1;
}
}