public int ChooseAction( double[] actionEstimates )
{
// get amount of non-tabu actions
int nonTabuActions = actions;
for ( int i = 0; i < actions; i++ )
{
if ( tabuActions[i] != 0 )
{
nonTabuActions--;
}
}
// allowed actions
double[] allowedActionEstimates = new double[nonTabuActions];
int[] allowedActionMap = new int[nonTabuActions];
for ( int i = 0, j = 0; i < actions; i++ )
{
if ( tabuActions[i] == 0 )
{
// allowed action
allowedActionEstimates[j] = actionEstimates[i];
allowedActionMap[j] = i;
j++;
}
else
{
// decrease tabu time of tabu action
tabuActions[i]--;
}
}
return allowedActionMap[basePolicy.ChooseAction( allowedActionEstimates )]; ;
}