public void UpdateState( int previousState, int action, double reward, int nextState )
{
// next state's action estimations
double[] nextActionEstimations = qvalues[nextState];
// find maximum expected summary reward from the next state
double maxNextExpectedReward = nextActionEstimations[0];
for ( int i = 1; i < actions; i++ )
{
if ( nextActionEstimations[i] > maxNextExpectedReward )
maxNextExpectedReward = nextActionEstimations[i];
}
// previous state's action estimations
double[] previousActionEstimations = qvalues[previousState];
// update expexted summary reward of the previous state
previousActionEstimations[action] *= ( 1.0 - learningRate );
previousActionEstimations[action] += ( learningRate * ( reward + discountFactor * maxNextExpectedReward ) );
}
}