social_learning.QLearningAgent.updateValueFunction C# (CSharp) 메소드

updateValueFunction() 개인적인 메소드

private updateValueFunction ( double sensors ) : void
sensors double
리턴 void
        private void updateValueFunction(double[] sensors)
        {
            // Add the discounted maximum reward we expect for the current stateActionPair
            var bestValue = greedyValue(sensors);

            // Scale the reward in the range [0,1]
            var scaledReward = (reward + MaxReward) / (2.0 * MaxReward);

            // Set the reward for the action we took plus the discounted look-ahead reward
            _observedValue[0] = Math.Max(0, Math.Min(1, scaledReward + DiscountFactor * bestValue));

            // Run a backprop epoch
            ((FastCyclicNetwork)Brain).Train(_prevState, _observedValue);

            // Reset the reward
            reward = 0;
        }