AIXI.Program.InteractionLoop C# (CSharp) Method

Program Class Documentation Show file Open project: GoodAI/SummerCamp
InteractionLoop() public static method

public static InteractionLoop ( Agent agent, AIXIEnvironment env, string>.Dictionary options ) : void
agent	Agent
env	AIXIEnvironment
options	string>.Dictionary
return	void
        public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options)
        {
            Random rnd;
            if (options.ContainsKey("random-seed"))
            {
                int seed;
                int.TryParse(options["random-seed"], out seed);
                rnd = new Random(seed);
            }
            else
            {
                rnd = new Random();
            }

            // Exploration = try random action
            // probability will decay exponentially as exploreRate * exploreDecay ** round_number
            var exploreRate = 0.0;
            if (options.ContainsKey("exploration"))
            {
                exploreRate = Utils.MyToDouble(options["exploration"]);
            }
            var explore = exploreRate > 0;

            var exploreDecay = 0.0;
            if (options.ContainsKey("explore-decay"))
            {
                exploreDecay = Utils.MyToDouble(options["explore-decay"]);
            }

            Debug.Assert(0.0 <= exploreRate);
            Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);

            //automatic halting after certain number of rounds
            var terminateAge = 0;
            if (options.ContainsKey("terminate-age"))
            {
                terminateAge = Convert.ToInt32(options["terminate-age"]);
            }
            var terminateCheck = terminateAge > 0;
            Debug.Assert(0 <= terminateAge);

            // when learning period passes, agent will stop changing/improving model and just use it.
            var learningPeriod = 0;
            if (options.ContainsKey("learning-period"))
            {
                learningPeriod = Convert.ToInt32(options["learning-period"]);
            }
            Debug.Assert(0 <= learningPeriod);

            var cycle = 0;
            while (!env.IsFinished)
            {

                if (terminateCheck && agent.Age > terminateAge)
                {
                    break;
                }
                var cycleStartTime = DateTime.Now;
                var observation = env.Observation;
                var reward = env.Reward;

                if (learningPeriod > 0 && cycle > learningPeriod)
                {
                    explore = false;
                }

                //give observation and reward to agent.
                agent.ModelUpdatePercept(observation, reward);

                var explored = false;
                int action;

                if (explore && rnd.NextDouble() < exploreRate)
                {
                    explored = true;
                    action = agent.GenerateRandomAction();
                }
                else
                {
                    //get agents response to observation and reward
                    action = agent.Search();
                }

                //pass agent's action to environment
                env.PerformAction(action);
                agent.ModelUpdateAction(action);

                var timeTaken = DateTime.Now - cycleStartTime;

                Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5}  \t{6},{7}\t>{8},{9}",
                    cycle, observation, reward, action,
                    explored, exploreRate,
                    agent.TotalReward, agent.AverageReward(),
                    timeTaken, agent.ModelSize()
                    );

                if (explore)
                {
                    exploreRate *= exploreDecay;
                }
                cycle += 1;
            }
        }
Program
InteractionLoop
Main