public QLearningAgent(int id, int speciesId, IBlackBox brain, bool agentsNavigate, bool agentsHide,
int numOrientationActions, int numVelocityActions, World world)
: base(id, speciesId, brain, agentsNavigate, agentsHide)
{
Debug.Assert(brain.OutputCount == 1, "Incorrect number of outputs in neural network!");
_numVelocityActions = numVelocityActions;
_numOrientationActions = numOrientationActions;
_random = new Random();
_prevState = new double[brain.InputCount];
_observedValue = new double[1];
world.PlantEaten += new World.PlantEatenHandler(world_PlantEaten);
MaxReward = 200;
LearningRate = DEFAULT_LEARNING_RATE;
DiscountFactor = DEFAULT_DISCOUNT_FACTOR;
Epsilon = DEFAULT_EPSILON;
// The backprop learning rate is equivalent to the Q-Learning learning rate.
((FastCyclicNetwork)Brain).BackpropLearningRate = LearningRate;
}