private void updateValueFunction(double[] sensors)
{
// Add the discounted maximum reward we expect for the current stateActionPair
var bestValue = greedyValue(sensors);
// Scale the reward in the range [0,1]
var scaledReward = (reward + MaxReward) / (2.0 * MaxReward);
// Set the reward for the action we took plus the discounted look-ahead reward
_observedValue[0] = Math.Max(0, Math.Min(1, scaledReward + DiscountFactor * bestValue));
// Run a backprop epoch
((FastCyclicNetwork)Brain).Train(_prevState, _observedValue);
// Reset the reward
reward = 0;
}