public int ChooseAction(double[] actionEstimates)
{
// get amount of non-tabu actions
int nonTabuActions = actions;
for (int i = 0; i < actions; i++)
{
if (tabuActions[i] != 0)
{
nonTabuActions--;
}
}
// allowed actions
double[] allowedActionEstimates = new double[nonTabuActions];
int[] allowedActionMap = new int[nonTabuActions];
for (int i = 0, j = 0; i < actions; i++)
{
if (tabuActions[i] == 0)
{
// allowed action
allowedActionEstimates[j] = actionEstimates[i];
allowedActionMap[j] = i;
j++;
}
else
{
// decrease tabu time of tabu action
tabuActions[i]--;
}
}
return allowedActionMap[basePolicy.ChooseAction(allowedActionEstimates)]; ;
}