SampleApp.MainForm.SarsaThread C# (CSharp) Method

MainForm Class Documentation ファイルを表示 Open project: accord-net/framework

SarsaThread() private method

private SarsaThread ( ) : void
return	void

        private void SarsaThread( )
        {
            int iteration = 0;
            // curent coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // exploration policy
            TabuSearchExploration tabuPolicy = (TabuSearchExploration) sarsa.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration) tabuPolicy.BasePolicy;

			// loop
            while ( ( !needToStop ) && ( iteration < learningIterations ) )
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ( (double) iteration / learningIterations ) * explorationRate;
                // set learning rate for this iteration
                sarsa.LearningRate = learningRate - ( (double) iteration / learningIterations ) * learningRate;
                // clear tabu list
                tabuPolicy.ResetTabuList( );

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // steps performed by agent to get to the goal
                int steps = 1;
                // previous state and action
                int previousState = GetStateNumber( agentCurrentX, agentCurrentY );
                int previousAction = sarsa.GetAction( previousState );
                // update agent's current position and get his reward
                double reward = UpdateAgentPosition( ref agentCurrentX, ref agentCurrentY, previousAction );

                while ( ( !needToStop ) && ( ( agentCurrentX != agentStopX ) || ( agentCurrentY != agentStopY ) ) )
                {
                    steps++;

                    // set tabu action
                    tabuPolicy.SetTabuAction( ( previousAction + 2 ) % 4, 1 );

                    // get agent's next state
                    int nextState = GetStateNumber( agentCurrentX, agentCurrentY );
                    // get agent's next action
                    int nextAction = sarsa.GetAction( nextState );
                    // do learning of the agent - update his Q-function
                    sarsa.UpdateState( previousState, previousAction, reward, nextState, nextAction );

                    // update agent's new position and get his reward
                    reward = UpdateAgentPosition( ref agentCurrentX, ref agentCurrentY, nextAction );

                    previousState = nextState;
                    previousAction = nextAction;
                }

                if ( !needToStop )
                {
                    // update Q-function if terminal state was reached
                    sarsa.UpdateState( previousState, previousAction, reward );
                }

                System.Diagnostics.Debug.WriteLine( steps );

                iteration++;

                // show current iteration
                SetText( iterationBox, iteration.ToString( ) );
            }

            // enable settings controls
            EnableControls( true );
        }

MainForm

AGVStep

Application_Idle

ApplyFilter

ClearCurrentImage

ClearDataSeries

ClearEstimation

CloseCamera

CloseCurrentVideoSource

CloseImage

CloseVideoSource

CopyImage