public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options)
{
Random rnd;
if (options.ContainsKey("random-seed"))
{
int seed;
int.TryParse(options["random-seed"], out seed);
rnd = new Random(seed);
}
else
{
rnd = new Random();
}
// Exploration = try random action
// probability will decay exponentially as exploreRate * exploreDecay ** round_number
var exploreRate = 0.0;
if (options.ContainsKey("exploration"))
{
exploreRate = Utils.MyToDouble(options["exploration"]);
}
var explore = exploreRate > 0;
var exploreDecay = 0.0;
if (options.ContainsKey("explore-decay"))
{
exploreDecay = Utils.MyToDouble(options["explore-decay"]);
}
Debug.Assert(0.0 <= exploreRate);
Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);
//automatic halting after certain number of rounds
var terminateAge = 0;
if (options.ContainsKey("terminate-age"))
{
terminateAge = Convert.ToInt32(options["terminate-age"]);
}
var terminateCheck = terminateAge > 0;
Debug.Assert(0 <= terminateAge);
// when learning period passes, agent will stop changing/improving model and just use it.
var learningPeriod = 0;
if (options.ContainsKey("learning-period"))
{
learningPeriod = Convert.ToInt32(options["learning-period"]);
}
Debug.Assert(0 <= learningPeriod);
var cycle = 0;
while (!env.IsFinished)
{
if (terminateCheck && agent.Age > terminateAge)
{
break;
}
var cycleStartTime = DateTime.Now;
var observation = env.Observation;
var reward = env.Reward;
if (learningPeriod > 0 && cycle > learningPeriod)
{
explore = false;
}
//give observation and reward to agent.
agent.ModelUpdatePercept(observation, reward);
var explored = false;
int action;
if (explore && rnd.NextDouble() < exploreRate)
{
explored = true;
action = agent.GenerateRandomAction();
}
else
{
//get agents response to observation and reward
action = agent.Search();
}
//pass agent's action to environment
env.PerformAction(action);
agent.ModelUpdateAction(action);
var timeTaken = DateTime.Now - cycleStartTime;
Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5} \t{6},{7}\t>{8},{9}",
cycle, observation, reward, action,
explored, exploreRate,
agent.TotalReward, agent.AverageReward(),
timeTaken, agent.ModelSize()
);
if (explore)
{
exploreRate *= exploreDecay;
}
cycle += 1;
}
}