public QLearningAgent ( int id, int speciesId, IBlackBox brain, bool agentsNavigate, bool agentsHide, int numOrientationActions, int numVelocityActions, |
||
id | int | The unique ID of this teacher. |
speciesId | int | |
brain | IBlackBox | The neural network value function for this teacher. It should have (2 + # of sensors) input nodes and 1 output node. |
agentsNavigate | bool | |
agentsHide | bool | |
numOrientationActions | int | The number of buckets to discretize the orientation action spacer into. |
numVelocityActions | int | The number of buckets to discretize the velocity action spacer into. |
world | The world this teacher will be evaluated in. | |
return | System |
public QLearningAgent(int id, int speciesId, IBlackBox brain, bool agentsNavigate, bool agentsHide,
int numOrientationActions, int numVelocityActions, World world)
: base(id, speciesId, brain, agentsNavigate, agentsHide)
{
Debug.Assert(brain.OutputCount == 1, "Incorrect number of outputs in neural network!");
_numVelocityActions = numVelocityActions;
_numOrientationActions = numOrientationActions;
_random = new Random();
_prevState = new double[brain.InputCount];
_observedValue = new double[1];
world.PlantEaten += new World.PlantEatenHandler(world_PlantEaten);
MaxReward = 200;
LearningRate = DEFAULT_LEARNING_RATE;
DiscountFactor = DEFAULT_DISCOUNT_FACTOR;
Epsilon = DEFAULT_EPSILON;
// The backprop learning rate is equivalent to the Q-Learning learning rate.
((FastCyclicNetwork)Brain).BackpropLearningRate = LearningRate;
}