improved the MAES optimizer and trainer and decisoin interface.

tcmxx · tcmxx · commit be306e35301a · 2018-08-13T15:59:30.000+03:00
diff --git a/Assets/UnityTensorflow/MAESOptimization/DecisionMAES.cs b/Assets/UnityTensorflow/MAESOptimization/DecisionMAES.cs
@@ -9,35 +9,25 @@ public class DecisionMAES : AgentDependentDecision
 {
     protected ESOptimizer optimizer;
     
-    public bool useMAESParamsFromAgent = true;
-
     public bool useHeuristic = true;
 
-    protected AgentES agentES = null;
+    protected IESOptimizable optimizable = null;
 
     protected override void Awake()
     {
         optimizer = GetComponent<ESOptimizer>();
-        agentES = GetComponent<AgentES>();
-        Debug.Assert(agentES != null, "DesicionMAES need to attach to a gameobjec with an agent that implements AgentES.");
+        optimizable = GetComponent<IESOptimizable>();
+        Debug.Assert(optimizable != null, "DesicionMAES need to attach to a gameobjec with an agent that implements IESOptmizable.");
 
     }
 
     public override float[] Decide(List<float> vectorObs, List<Texture2D> visualObs, List<float> heuristicAction, List<float> heuristicVariance = null)
     {
         
-
-        if (useMAESParamsFromAgent)
-        {
-            optimizer.populationSize = agentES.populationSize;
-            optimizer.targetValue = agentES.targetValue;
-            optimizer.maxIteration = agentES.maxIteration;
-            optimizer.initialStepSize = agentES.initialStepSize;
-        }
-
+       
         if (heuristicVariance != null && useHeuristic)
             optimizer.initialStepSize = heuristicVariance[0];
-        double[] best = optimizer.Optimize(agentES, null, useHeuristic?heuristicAction.Select(t=>(double)t).ToArray(): new double[heuristicAction.Count]);
+        double[] best = optimizer.Optimize(optimizable, useHeuristic?heuristicAction.Select(t=>(double)t).ToArray(): new double[heuristicAction.Count]);
         
         var result = Array.ConvertAll(best, t => (float)t);
         return result;
diff --git a/Assets/UnityTensorflow/MAESOptimization/ESOptimizer.cs b/Assets/UnityTensorflow/MAESOptimization/ESOptimizer.cs
@@ -47,10 +47,10 @@ private void Update()
             for (int it = 0; it < iterationPerUpdate; ++it)
             {
                 optimizer.generateSamples(samples);
-                for(int s = 0; s <= samples.Length/evaluationBatchSize; ++s)
+                for (int s = 0; s <= samples.Length / evaluationBatchSize; ++s)
                 {
                     List<double[]> paramList = new List<double[]>();
-                    for(int b = 0; b < evaluationBatchSize; ++b)
+                    for (int b = 0; b < evaluationBatchSize; ++b)
                     {
                         int ind = s * evaluationBatchSize + b;
                         if (ind < samples.Length)
@@ -82,21 +82,27 @@ private void Update()
                 BestScore = optimizer.getBestObjectiveFuncValue();
 
                 BestParams = optimizer.getBest();
-                
+
                 if ((iteration >= maxIteration && maxIteration > 0) ||
                     (BestScore <= targetValue && mode == OptimizationModes.minimize) ||
                     (BestScore >= targetValue && mode == OptimizationModes.maximize))
                 {
                     //optimizatoin is done
-                    if(onReady != null)
+                    if (onReady != null)
                         onReady.Invoke(BestParams);
                     IsOptimizing = false;
                 }
             }
         }
     }
 
-
+    /// <summary>
+    /// Start to optimize asynchronized. It is actaually not running in another thread, but running in Update() in each frame of your game.
+    /// This way the optimization will not block your game.
+    /// </summary>
+    /// <param name="optimizeTarget">Target to optimize</param>
+    /// <param name="onReady">Action to call when optmization is ready. THe input is the best solution found.</param>
+    /// <param name="initialMean">initial mean guess.</param>
     public void StartOptimizingAsync(IESOptimizable optimizeTarget, Action<double[]> onReady = null, double[] initialMean = null)
     {
         optimizable = optimizeTarget;
@@ -127,7 +133,14 @@ public void StartOptimizingAsync(IESOptimizable optimizeTarget, Action<double[]>
         this.onReady = onReady;
     }
 
-    public double[] Optimize(IESOptimizable optimizeTarget, Action<double[]> onReady = null, double[] initialMean = null)
+
+    /// <summary>
+    /// Optimize and return the solution immediately.
+    /// </summary>
+    /// <param name="optimizeTarget">Target to optimize</param>
+    /// <param name="initialMean">initial mean guess.</param>
+    /// <returns>The best solution found</returns>
+    public double[] Optimize(IESOptimizable optimizeTarget,  double[] initialMean = null)
     {
 
         var tempOptimizer = (optimizerType == ESOptimizerType.LMMAES ? (IMAES)new LMMAES() : (IMAES)new MAES());
@@ -153,7 +166,7 @@ public double[] Optimize(IESOptimizable optimizeTarget, Action<double[]> onReady
         //iteration
         double[] bestParams = null;
 
-        bool hasInvokeReady = false;
+        //bool hasInvokeReady = false;
         iteration = 0;
         for (int it = 0; it < maxIteration; ++it)
         {
@@ -188,29 +201,29 @@ public double[] Optimize(IESOptimizable optimizeTarget, Action<double[]> onReady
 
             iteration++;
             bestParams = tempOptimizer.getBest();
-            
+
             if ((BestScore <= targetValue && mode == OptimizationModes.minimize) ||
                 (BestScore >= targetValue && mode == OptimizationModes.maximize))
             {
                 //optimizatoin is done
-                if (onReady != null)
+                /*if (onReady != null)
                 {
                     onReady.Invoke(bestParams);
                     hasInvokeReady = true;
-                }
+                }*/
                 break;
             }
         }
 
-        if (onReady != null && !hasInvokeReady)
+        /*if (onReady != null && !hasInvokeReady)
         {
             onReady.Invoke(bestParams);
-        }
+        }*/
         return bestParams;
-        
+
     }
 
-    
+
     public void StopOptimizing(Action<double[]> onReady = null)
     {
         if (IsOptimizing == false)
@@ -221,5 +234,5 @@ public void StopOptimizing(Action<double[]> onReady = null)
             onReady.Invoke(BestParams);
         }
     }
-    
+
 }
diff --git a/Assets/UnityTensorflow/MAESOptimization/IESOptimizable.cs b/Assets/UnityTensorflow/MAESOptimization/IESOptimizable.cs
@@ -11,5 +11,9 @@ public interface IESOptimizable {
     /// <returns></returns>
     List<float> Evaluate(List<double[]> param);
 
+    /// <summary>
+    /// Return the dimension of the parameters
+    /// </summary>
+    /// <returns>dimension of the parameters</returns>
     int GetParamDimension();
 }
diff --git a/Assets/UnityTensorflow/MAESOptimization/TrainerMAES.cs b/Assets/UnityTensorflow/MAESOptimization/TrainerMAES.cs
@@ -4,146 +4,22 @@
 using System.Linq;
 using UnityEngine;
 using MLAgents;
+using System;
 
 public class TrainerMAES : MonoBehaviour, ITrainer
 {
 
     /// Reference to the brain that uses this CoreBrainInternal
     protected Brain brain;
-    public ESOptimizerType optimizer;
-
-    public OptimizationModes optimizationMode;
-    public int iterationPerFrame = 20;
-    public int evaluationBatchSize = 8;
+    
     public bool debugVisualization = true;
 
-    private Dictionary<AgentES, OptimizationData> currentOptimizingAgents;
-
-
-    public enum ESOptimizerType
-    {
-        MAES,
-        LMMAES
-    }
-
-    protected class OptimizationData
-    {
-        public OptimizationData(int populationSize, IMAES optimizerToUse, int dim)
-        {
-            samples = new OptimizationSample[populationSize];
-            for (int i = 0; i < populationSize; ++i)
-            {
-                samples[i] = new OptimizationSample(dim);
-            }
-            interation = 0;
-            optimizer = optimizerToUse;
-        }
-
-        public int interation;
-        public OptimizationSample[] samples;
-        public IMAES optimizer;
-    }
-
-
+    
 
-    protected void FixedUpdate()
-    {
-        ContinueOptimization();
-    }
-    /// Create the reference to the brain
     public void Initialize()
     {
-        currentOptimizingAgents = new Dictionary<AgentES, OptimizationData>();
-    }
-
-
-
-
-    protected void AddOptimization(List<AgentES> agents)
-    {
-        foreach (var agent in agents)
-        {
-            currentOptimizingAgents[agent] = new OptimizationData(agent.populationSize, optimizer == ESOptimizerType.LMMAES ? (IMAES)new LMMAES() : (IMAES)new MAES(), agent.GetParamDimension());
-            currentOptimizingAgents[agent].optimizer.init(brain.brainParameters.vectorActionSize,
-                agent.populationSize, new double[brain.brainParameters.vectorActionSize], agent.initialStepSize, optimizationMode);
-            agent.OnEndOptimizationRequested += OnEndOptimizationRequested;
-        }
-    }
-
-    protected void ContinueOptimization()
-    {
-        for (int it = 0; it < iterationPerFrame; ++it)
-        {
-            List<AgentES> agentList = currentOptimizingAgents.Keys.ToList();
-            foreach (var agent in agentList)
-            {
-                var optData = currentOptimizingAgents[agent];
-                optData.optimizer.generateSamples(optData.samples);
-
-
-                agent.SetVisualizationMode(debugVisualization ? AgentES.VisualizationMode.Sampling : AgentES.VisualizationMode.None);
-
-                for (int s = 0; s <= optData.samples.Length / evaluationBatchSize; ++s)
-                {
-                    List<double[]> paramList = new List<double[]>();
-                    for (int b = 0; b < evaluationBatchSize; ++b)
-                    {
-                        int ind = s * evaluationBatchSize + b;
-                        if (ind < optData.samples.Length)
-                        {
-                            paramList.Add(optData.samples[ind].x);
-                        }
-                    }
-
-                    var values = agent.Evaluate(paramList);
-
-                    for (int b = 0; b < evaluationBatchSize; ++b)
-                    {
-                        int ind = s * evaluationBatchSize + b;
-                        if (ind < optData.samples.Length)
-                        {
-                            optData.samples[ind].objectiveFuncVal = values[b];
-                        }
-                    }
-
-                }
-                /*foreach (OptimizationSample s in optData.samples)
-                {
-                    float value = agent.Evaluate(new List<double[]> { s.x })[0];
-                    s.objectiveFuncVal = value;
-                }*/
-
-
-
-                optData.optimizer.update(optData.samples);
-                double bestScore = optData.optimizer.getBestObjectiveFuncValue();
-                //Debug.Log("Best shot score " + optData.optimizer.getBestObjectiveFuncValue());
-                agent.SetVisualizationMode(debugVisualization ? AgentES.VisualizationMode.Best : AgentES.VisualizationMode.None);
-                agent.Evaluate(new List<double[]> { optData.optimizer.getBest() });
-
-                optData.interation++;
-                if ((optData.interation >= agent.maxIteration && agent.maxIteration > 0) ||
-                    (bestScore <= agent.targetValue && optimizationMode == OptimizationModes.minimize) ||
-                    (bestScore >= agent.targetValue && optimizationMode == OptimizationModes.maximize))
-                {
-                    //optimizatoin is done
-                    agent.OnReady(optData.optimizer.getBest());
-                    currentOptimizingAgents.Remove(agent);
-                }
-            }
-        }
     }
 
-    protected void OnEndOptimizationRequested(AgentES agent)
-    {
-        if (currentOptimizingAgents.ContainsKey(agent))
-        {
-            var optData = currentOptimizingAgents[agent];
-            agent.OnReady(optData.optimizer.getBest());
-            currentOptimizingAgents.Remove(agent);
-            agent.OnEndOptimizationRequested -= OnEndOptimizationRequested;
-        }
-    }
 
 
     public int GetStep()
@@ -158,28 +34,23 @@ public int GetMaxStep()
 
     public Dictionary<Agent,TakeActionOutput> TakeAction(Dictionary<Agent, AgentInfo> agentInfos)
     {
-        var agentList = agentInfos.Keys;
-        List<AgentES> agentsToOptimize = new List<AgentES>();
-        foreach (Agent a in agentList)
+        var result = new Dictionary<Agent, TakeActionOutput>();
+        foreach (var a in agentInfos)
         {
-            if (!(a is AgentES))
-            {
-                Debug.LogError("Agents using CoreBrainMAES must inherit from AgentES");
-            }
-            if (!currentOptimizingAgents.ContainsKey((AgentES)a))
-            {
-                agentsToOptimize.Add((AgentES)a);
-            }
-            else
+            AgentES agent = a.Key as AgentES;
+            if (agent != null)
             {
-                //Debug.LogError("new decision requested while last decision is not made yet");
+                if (agent.synchronizedDecision)
+                {
+
+                    result[agent] = new TakeActionOutput() { outputAction = Array.ConvertAll(agent.Optimize(), t => (float)t) };
+                }
+                else
+                {
+                    agent.OptimizeAsync();
+                }
             }
         }
-
-        if (agentsToOptimize.Count > 0)
-            AddOptimization(agentsToOptimize);
-        
-
         return new Dictionary<Agent, TakeActionOutput>();
     }
 

Original file line number	Diff line number	Diff line change
`@@ -47,10 +47,10 @@ private void Update()`
`47`	`47`	`for (int it = 0; it < iterationPerUpdate; ++it)`
`48`	`48`	`{`
`49`	`49`	`optimizer.generateSamples(samples);`
`50`		`- for(int s = 0; s <= samples.Length/evaluationBatchSize; ++s)`
	`50`	`+ for (int s = 0; s <= samples.Length / evaluationBatchSize; ++s)`
`51`	`51`	`{`
`52`	`52`	`List<double[]> paramList = new List<double[]>();`
`53`		`- for(int b = 0; b < evaluationBatchSize; ++b)`
	`53`	`+ for (int b = 0; b < evaluationBatchSize; ++b)`
`54`	`54`	`{`
`55`	`55`	`int ind = s * evaluationBatchSize + b;`
`56`	`56`	`if (ind < samples.Length)`
`@@ -82,21 +82,27 @@ private void Update()`
`82`	`82`	`BestScore = optimizer.getBestObjectiveFuncValue();`
`83`	`83`
`84`	`84`	`BestParams = optimizer.getBest();`
`85`		`-`
	`85`	`+`
`86`	`86`	`if ((iteration >= maxIteration && maxIteration > 0) \|\|`
`87`	`87`	`(BestScore <= targetValue && mode == OptimizationModes.minimize) \|\|`
`88`	`88`	`(BestScore >= targetValue && mode == OptimizationModes.maximize))`
`89`	`89`	`{`
`90`	`90`	`//optimizatoin is done`
`91`		`- if(onReady != null)`
	`91`	`+ if (onReady != null)`
`92`	`92`	`onReady.Invoke(BestParams);`
`93`	`93`	`IsOptimizing = false;`
`94`	`94`	`}`
`95`	`95`	`}`
`96`	`96`	`}`
`97`	`97`	`}`
`98`	`98`
`99`		`-`
	`99`	`+ /// <summary>`
	`100`	`+ /// Start to optimize asynchronized. It is actaually not running in another thread, but running in Update() in each frame of your game.`
	`101`	`+ /// This way the optimization will not block your game.`
	`102`	`+ /// </summary>`
	`103`	`+ /// <param name="optimizeTarget">Target to optimize</param>`
	`104`	`+ /// <param name="onReady">Action to call when optmization is ready. THe input is the best solution found.</param>`
	`105`	`+ /// <param name="initialMean">initial mean guess.</param>`
`100`	`106`	`public void StartOptimizingAsync(IESOptimizable optimizeTarget, Action<double[]> onReady = null, double[] initialMean = null)`
`101`	`107`	`{`
`102`	`108`	`optimizable = optimizeTarget;`
`@@ -127,7 +133,14 @@ public void StartOptimizingAsync(IESOptimizable optimizeTarget, Action<double[]>`
`127`	`133`	`this.onReady = onReady;`
`128`	`134`	`}`
`129`	`135`
`130`		`- public double[] Optimize(IESOptimizable optimizeTarget, Action<double[]> onReady = null, double[] initialMean = null)`
	`136`	`+`
	`137`	`+ /// <summary>`
	`138`	`+ /// Optimize and return the solution immediately.`
	`139`	`+ /// </summary>`
	`140`	`+ /// <param name="optimizeTarget">Target to optimize</param>`
	`141`	`+ /// <param name="initialMean">initial mean guess.</param>`
	`142`	`+ /// <returns>The best solution found</returns>`
	`143`	`+ public double[] Optimize(IESOptimizable optimizeTarget, double[] initialMean = null)`
`131`	`144`	`{`
`132`	`145`
`133`	`146`	`var tempOptimizer = (optimizerType == ESOptimizerType.LMMAES ? (IMAES)new LMMAES() : (IMAES)new MAES());`
`@@ -153,7 +166,7 @@ public double[] Optimize(IESOptimizable optimizeTarget, Action<double[]> onReady`
`153`	`166`	`//iteration`
`154`	`167`	`double[] bestParams = null;`
`155`	`168`
`156`		`- bool hasInvokeReady = false;`
	`169`	`+ //bool hasInvokeReady = false;`
`157`	`170`	`iteration = 0;`
`158`	`171`	`for (int it = 0; it < maxIteration; ++it)`
`159`	`172`	`{`
`@@ -188,29 +201,29 @@ public double[] Optimize(IESOptimizable optimizeTarget, Action<double[]> onReady`
`188`	`201`
`189`	`202`	`iteration++;`
`190`	`203`	`bestParams = tempOptimizer.getBest();`
`191`		`-`
	`204`	`+`
`192`	`205`	`if ((BestScore <= targetValue && mode == OptimizationModes.minimize) \|\|`
`193`	`206`	`(BestScore >= targetValue && mode == OptimizationModes.maximize))`
`194`	`207`	`{`
`195`	`208`	`//optimizatoin is done`
`196`		`- if (onReady != null)`
	`209`	`+ /*if (onReady != null)`
`197`	`210`	`{`
`198`	`211`	`onReady.Invoke(bestParams);`
`199`	`212`	`hasInvokeReady = true;`
`200`		`- }`
	`213`	`+ }*/`
`201`	`214`	`break;`
`202`	`215`	`}`
`203`	`216`	`}`
`204`	`217`
`205`		`- if (onReady != null && !hasInvokeReady)`
	`218`	`+ /*if (onReady != null && !hasInvokeReady)`
`206`	`219`	`{`
`207`	`220`	`onReady.Invoke(bestParams);`
`208`		`- }`
	`221`	`+ }*/`
`209`	`222`	`return bestParams;`
`210`		`-`
	`223`	`+`
`211`	`224`	`}`
`212`	`225`
`213`		`-`
	`226`	`+`
`214`	`227`	`public void StopOptimizing(Action<double[]> onReady = null)`
`215`	`228`	`{`
`216`	`229`	`if (IsOptimizing == false)`
`@@ -221,5 +234,5 @@ public void StopOptimizing(Action<double[]> onReady = null)`
`221`	`234`	`onReady.Invoke(BestParams);`
`222`	`235`	`}`
`223`	`236`	`}`
`224`		`-`
	`237`	`+`
`225`	`238`	`}`
Original file line number	Diff line number	Diff line change
`@@ -11,5 +11,9 @@ public interface IESOptimizable {`
`11`	`11`	`/// <returns></returns>`
`12`	`12`	`List<float> Evaluate(List<double[]> param);`
`13`	`13`
	`14`	`+ /// <summary>`
	`15`	`+ /// Return the dimension of the parameters`
	`16`	`+ /// </summary>`
	`17`	`+ /// <returns>dimension of the parameters</returns>`
`14`	`18`	`int GetParamDimension();`
`15`	`19`	`}`