public void UpdateState(int previousState, int action, double reward, int nextState)
{
// next state's action estimations
double[] nextActionEstimations = qvalues[nextState];
// find maximum expected summary reward from the next state
double maxNextExpectedReward = nextActionEstimations[0];
for (int i = 1; i < actions; i++)
{
if (nextActionEstimations[i] > maxNextExpectedReward)
maxNextExpectedReward = nextActionEstimations[i];
}
// previous state's action estimations
double[] previousActionEstimations = qvalues[previousState];
// update expexted summary reward of the previous state
previousActionEstimations[action] *= (1.0 - learningRate);
previousActionEstimations[action] += (learningRate * (reward + discountFactor * maxNextExpectedReward));
}
}