private void QLearningThread( )
{
int iteration = 0;
// curent coordinates of the agent
int agentCurrentX, agentCurrentY;
// exploration policy
TabuSearchExploration tabuPolicy = (TabuSearchExploration) qLearning.ExplorationPolicy;
EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration) tabuPolicy.BasePolicy;
// loop
while ( ( !needToStop ) && ( iteration < learningIterations ) )
{
// set exploration rate for this iteration
explorationPolicy.Epsilon = explorationRate - ( (double) iteration / learningIterations ) * explorationRate;
// set learning rate for this iteration
qLearning.LearningRate = learningRate - ( (double) iteration / learningIterations ) * learningRate;
// clear tabu list
tabuPolicy.ResetTabuList( );
// reset agent's coordinates to the starting position
agentCurrentX = agentStartX;
agentCurrentY = agentStartY;
// steps performed by agent to get to the goal
int steps = 0;
while ( ( !needToStop ) && ( ( agentCurrentX != agentStopX ) || ( agentCurrentY != agentStopY ) ) )
{
steps++;
// get agent's current state
int currentState = GetStateNumber( agentCurrentX, agentCurrentY );
// get the action for this state
int action = qLearning.GetAction( currentState );
// update agent's current position and get his reward
double reward = UpdateAgentPosition( ref agentCurrentX, ref agentCurrentY, action );
// get agent's next state
int nextState = GetStateNumber( agentCurrentX, agentCurrentY );
// do learning of the agent - update his Q-function
qLearning.UpdateState( currentState, action, reward, nextState );
// set tabu action
tabuPolicy.SetTabuAction( ( action + 2 ) % 4, 1 );
}
System.Diagnostics.Debug.WriteLine( steps );
iteration++;
// show current iteration
SetText( iterationBox, iteration.ToString( ) );
}
// enable settings controls
EnableControls( true );
}