private void ShowSolutionThread( )
{
// set exploration rate to 0, so agent uses only what he learnt
TabuSearchExploration tabuPolicy = null;
EpsilonGreedyExploration exploratioPolicy = null;
if ( qLearning != null )
tabuPolicy = (TabuSearchExploration) qLearning.ExplorationPolicy;
else
tabuPolicy = (TabuSearchExploration) sarsa.ExplorationPolicy;
exploratioPolicy = (EpsilonGreedyExploration) tabuPolicy.BasePolicy;
exploratioPolicy.Epsilon = 0;
tabuPolicy.ResetTabuList( );
// curent coordinates of the agent
int agentCurrentX = agentStartX, agentCurrentY = agentStartY;
// pripate the map to display
Array.Copy( map, mapToDisplay, mapWidth * mapHeight );
mapToDisplay[agentStartY, agentStartX] = 2;
mapToDisplay[agentStopY, agentStopX] = 3;
while ( !needToStop )
{
// dispay the map
cellWorld.Map = mapToDisplay;
// sleep for a while
Thread.Sleep( 200 );
// check if we have reached the end point
if ( ( agentCurrentX == agentStopX ) && ( agentCurrentY == agentStopY ) )
{
// restore the map
mapToDisplay[agentStartY, agentStartX] = 2;
mapToDisplay[agentStopY, agentStopX] = 3;
agentCurrentX = agentStartX;
agentCurrentY = agentStartY;
cellWorld.Map = mapToDisplay;
Thread.Sleep( 200 );
}
// remove agent from current position
mapToDisplay[agentCurrentY, agentCurrentX] = 0;
// get agent's current state
int currentState = GetStateNumber( agentCurrentX, agentCurrentY );
// get the action for this state
int action = ( qLearning != null ) ? qLearning.GetAction( currentState ) : sarsa.GetAction( currentState );
// update agent's current position and get his reward
double reward = UpdateAgentPosition( ref agentCurrentX, ref agentCurrentY, action );
// put agent to the new position
mapToDisplay[agentCurrentY, agentCurrentX] = 2;
}
// enable settings controls
EnableControls( true );
}