move improvements

tcmxx · tcmxx · commit 689369fd74b6 · 2018-08-13T16:00:41.000+03:00
diff --git a/Assets/UnityTensorflow/Examples/IntelligentPool/BilliardMAESOnly-OneShot-UseTrainer.unity b/Assets/UnityTensorflow/Examples/IntelligentPool/BilliardMAESOnly-OneShot-UseTrainer.unity
@@ -402,10 +402,6 @@ MonoBehaviour:
   m_Script: {fileID: 11500000, guid: 9cfc7edd5a153124fae99222c7dfb7de, type: 3}
   m_Name: 
   m_EditorClassIdentifier: 
-  optimizer: 0
-  optimizationMode: 1
-  iterationPerFrame: 1
-  evaluationBatchSize: 8
   debugVisualization: 1
 --- !u!4 &949524765
 Transform:
@@ -521,7 +517,7 @@ Prefab:
         type: 2}
       propertyPath: onValueChanged.m_PersistentCalls.m_Calls.Array.data[0].m_Target
       value: 
-      objectReference: {fileID: 1914681085}
+      objectReference: {fileID: 1746133764}
     - target: {fileID: 114726768938732050, guid: db0f163ff1982d64985f44d81eaf13d1,
         type: 2}
       propertyPath: agentRef
@@ -567,9 +563,67 @@ Prefab:
       value: 0
       objectReference: {fileID: 0}
     - target: {fileID: 1202550805354182, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
+      propertyPath: m_IsActive
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 114487051412096316, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: onValueChanged.m_PersistentCalls.m_Calls.Array.data[0].m_MethodName
+      value: OnAutoRequestToggled
+      objectReference: {fileID: 0}
+    - target: {fileID: 114487051412096316, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: onValueChanged.m_PersistentCalls.m_Calls.Array.data[0].m_Mode
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 1866456776103920, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
+      propertyPath: m_IsActive
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 1010238890107942, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
       propertyPath: m_IsActive
       value: 0
       objectReference: {fileID: 0}
+    - target: {fileID: 224424860102480288, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.x
+      value: 221.2
+      objectReference: {fileID: 0}
+    - target: {fileID: 224424860102480288, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.y
+      value: -24.2
+      objectReference: {fileID: 0}
+    - target: {fileID: 224138536779500158, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.x
+      value: 221.19997
+      objectReference: {fileID: 0}
+    - target: {fileID: 224138536779500158, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.y
+      value: -24.200012
+      objectReference: {fileID: 0}
+    - target: {fileID: 224280634846545994, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.x
+      value: 340.32
+      objectReference: {fileID: 0}
+    - target: {fileID: 224280634846545994, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.y
+      value: -25.95
+      objectReference: {fileID: 0}
+    - target: {fileID: 224588602336079318, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.y
+      value: -98.3
+      objectReference: {fileID: 0}
+    - target: {fileID: 224588602336079318, guid: db0f163ff1982d64985f44d81eaf13d1,
+        type: 2}
+      propertyPath: m_AnchoredPosition.x
+      value: 272.1
+      objectReference: {fileID: 0}
     m_RemovedComponents: []
   m_ParentPrefab: {fileID: 100100000, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
   m_IsPrefabParent: 0
@@ -590,6 +644,31 @@ MonoBehaviour:
   m_Script: {fileID: 11500000, guid: 310b3bf0b547f6c4f911257303cc614d, type: 3}
   m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
   m_EditorClassIdentifier: 
+--- !u!1 &1218375778 stripped
+GameObject:
+  m_PrefabParentObject: {fileID: 1140438680430356, guid: 21bdff36071a0444eae34d1a5f0211ba,
+    type: 2}
+  m_PrefabInternal: {fileID: 1363941755}
+--- !u!114 &1218375779
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1218375778}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: a7d1d4617c19074448560370226ed445, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  iterationPerUpdate: 1
+  populationSize: 16
+  optimizerType: 0
+  initialStepSize: 1
+  mode: 1
+  maxIteration: 100
+  targetValue: 1.8
+  evaluationBatchSize: 8
+  iteration: 0
 --- !u!114 &1233761530
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -715,6 +794,11 @@ Prefab:
       propertyPath: maxIteration
       value: 50
       objectReference: {fileID: 0}
+    - target: {fileID: 114924759199247932, guid: 21bdff36071a0444eae34d1a5f0211ba,
+        type: 2}
+      propertyPath: synchronizedDecision
+      value: 0
+      objectReference: {fileID: 0}
     m_RemovedComponents: []
   m_ParentPrefab: {fileID: 100100000, guid: 21bdff36071a0444eae34d1a5f0211ba, type: 2}
   m_IsPrefabParent: 0
@@ -797,6 +881,12 @@ MonoBehaviour:
   discretePlayerActions: []
   defaultAction: 0
   brain: {fileID: 1327012119}
+--- !u!114 &1746133764 stripped
+MonoBehaviour:
+  m_PrefabParentObject: {fileID: 114726768938732050, guid: db0f163ff1982d64985f44d81eaf13d1,
+    type: 2}
+  m_PrefabInternal: {fileID: 1040820092}
+  m_Script: {fileID: 11500000, guid: 28e4f2c0965d97b48a60dfb2d1ab593a, type: 3}
 --- !u!114 &1886696250
 MonoBehaviour:
   m_ObjectHideFlags: 0
diff --git a/Assets/UnityTensorflow/Examples/IntelligentPool/Prefabs/PlayerMLAgentWithDecision.prefab b/Assets/UnityTensorflow/Examples/IntelligentPool/Prefabs/PlayerMLAgentWithDecision.prefab
@@ -55,12 +55,12 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   iterationPerUpdate: 1
-  populationSize: 30
+  populationSize: 10
   optimizerType: 0
   initialStepSize: 0.7
   mode: 1
-  maxIteration: 20
-  targetValue: 8
+  maxIteration: 15
+  targetValue: 2
   evaluationBatchSize: 8
   iteration: 0
 --- !u!114 &114440804119060818
@@ -75,7 +75,6 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   useDecision: 1
-  useMAESParamsFromAgent: 1
   useHeuristic: 1
   log: 1
   logInterval: 20
@@ -112,11 +111,7 @@ MonoBehaviour:
     resetOnDone: 0
     onDemandDecision: 1
     numberOfActionsBetweenDecisions: 1
-  callOnReadyInAgentAction: 1
-  maxIteration: 15
-  targetValue: 0.85
-  populationSize: 10
-  initialStepSize: 1
+  synchronizedDecision: 1
   shootSequence: 1
   resetAfterOneShot: 1
   resetOnStart: 1
diff --git a/Assets/UnityTensorflow/Examples/IntelligentPool/Scripts/BilliardAgent.cs b/Assets/UnityTensorflow/Examples/IntelligentPool/Scripts/BilliardAgent.cs
@@ -1,4 +1,5 @@
-﻿using System;
+﻿using MLAgents;
+using System;
 using System.Collections;
 using System.Collections.Generic;
 using UnityEngine;
@@ -162,4 +163,6 @@ public override void SetVisualizationMode(VisualizationMode visMode)
             visColor = new Color(0, 0, 0, 0);
         }
     }
+
+
 }
diff --git a/Assets/UnityTensorflow/Examples/IntelligentPool/Scripts/BilliardUIMLAgent.cs b/Assets/UnityTensorflow/Examples/IntelligentPool/Scripts/BilliardUIMLAgent.cs
@@ -32,10 +32,10 @@ private void Awake()
 
     private void Start()
     {
-        populationSizeSliderRef.value = agentRef.populationSize;
-        maxItrSliderRef.value = agentRef.maxIteration;
-        populationSizeTextRef.text = "Population size: " + agentRef.populationSize.ToString();
-        maxItrTextRef.text = "Max Iter: " + agentRef.maxIteration;
+        populationSizeSliderRef.value = agentRef.Optimizer.populationSize;
+        maxItrSliderRef.value = agentRef.Optimizer.maxIteration;
+        populationSizeTextRef.text = "Population size: " + agentRef.Optimizer.populationSize.ToString();
+        maxItrTextRef.text = "Max Iter: " + agentRef.Optimizer.maxIteration;
 
         rewardShapingToggleRef.isOn = gameSystemRef.defaultArena.rewardShaping;
         autoRequestToggleRef.isOn = agentRef.autoRequestDecision;
@@ -45,8 +45,8 @@ private void Start()
 
     private void Update()
     {
-        populationSizeSliderRef.value = agentRef.populationSize;
-        maxItrSliderRef.value = agentRef.maxIteration;
+        populationSizeSliderRef.value = agentRef.Optimizer.populationSize;
+        maxItrSliderRef.value = agentRef.Optimizer.maxIteration;
         rewardShapingToggleRef.isOn = gameSystemRef.defaultArena.rewardShaping;
 
         predictedScoreTextRef.text = "Predicted score: " + gameSystemRef.bestScore;
@@ -56,14 +56,14 @@ private void Update()
 
     public void OnPopulationSliderChanged(float value)
     {
-        agentRef.populationSize = Mathf.RoundToInt(value);
-        populationSizeTextRef.text = "Population size: " + agentRef.populationSize.ToString();
+        agentRef.Optimizer.populationSize = Mathf.RoundToInt(value);
+        populationSizeTextRef.text = "Population size: " + agentRef.Optimizer.populationSize.ToString();
     }
 
     public void OnIterationSliderChanged(float value)
     {
-        agentRef.maxIteration = Mathf.RoundToInt(value);
-        maxItrTextRef.text = "Max Iter: " + agentRef.maxIteration;
+        agentRef.Optimizer.maxIteration = Mathf.RoundToInt(value);
+        maxItrTextRef.text = "Max Iter: " + agentRef.Optimizer.maxIteration;
 
     }
 
diff --git a/Assets/UnityTensorflow/Examples/Walker/Scripts/RLModelPPOHierarchy.cs b/Assets/UnityTensorflow/Examples/Walker/Scripts/RLModelPPOHierarchy.cs
@@ -214,14 +214,22 @@ public override float[] EvaluateValue(float[,] vectorObservation, List<float[,,,
             {
                 for (int i = 0; i < outputAction.GetLength(1); ++i)
                 {
-                    var std = Mathf.Sqrt(vars[i]);
-                    var dis = new NormalDistribution(outputAction[j, i], std);
-
-                    if (useProbability)
-                        actions[j, i] = (float)dis.Generate();
-                    else
-                        actions[j, i] = outputAction[j, i];
-                    actionProbs[j, i] = (float)dis.ProbabilityDensityFunction(actions[j, i]);
+                    try
+                    {
+                        var std = Mathf.Sqrt(vars[i]);
+                        var dis = new NormalDistribution(outputAction[j, i], std);
+
+                        if (useProbability)
+                            actions[j, i] = (float)dis.Generate();
+                        else
+                            actions[j, i] = outputAction[j, i];
+                        actionProbs[j, i] = (float)dis.ProbabilityDensityFunction(actions[j, i]);
+                    }catch(Exception e)
+                    {
+                        Debug.LogWarning("NaN action from neural network detected. Force it to 0.");
+                        actions[j, i] = 0;
+                        actionProbs[j, i] = 1;
+                    }
                 }
             }
         }
@@ -276,8 +284,15 @@ public override float[] EvaluateValue(float[,] vectorObservation, List<float[,,,
                 for (int i = 0; i < outputAction.GetLength(1); ++i)
                 {
                     var std = Mathf.Sqrt(vars[i]);
-                    var dis = new NormalDistribution(outputAction[j, i], std);
+                    if (outputAction[j, i] == float.NaN || std == float.NaN || actions[j, i] == float.NaN)
+                    {
+                        actionProbs[j, i] = 0.5f;
+                        Debug.LogWarning("not valid output action mean:" + outputAction[j, i] + " or std:" + std +" or action to evaluate: " + actions[j, i]);
+                        continue;
+                    }
 
+                    var dis = new NormalDistribution(outputAction[j, i], std);
+                    
                     actionProbs[j, i] = (float)dis.ProbabilityDensityFunction(actions[j, i]);
                 }
             }
diff --git a/Assets/UnityTensorflow/MAESOptimization/AgentES.cs b/Assets/UnityTensorflow/MAESOptimization/AgentES.cs
@@ -6,15 +6,22 @@
 using System;
 using System.Linq;
 
+[RequireComponent(typeof(ESOptimizer))]
 public abstract class AgentES : Agent, IESOptimizable
 {
-    //for asynchronized decision, set this to false.
-    public bool callOnReadyInAgentAction = true;
-    public int maxIteration;
-    public float targetValue;
-    public int populationSize = 16;
-    public float initialStepSize = 1;
 
+
+    public ESOptimizer Optimizer { get; protected set; }
+
+    private void Awake()
+    {
+        Optimizer = GetComponent<ESOptimizer>();
+    }
+
+
+
+    //for asynchronized decision, set this to false.
+    public bool synchronizedDecision = true;
     public event System.Action<AgentES> OnEndOptimizationRequested;
     /// <summary>
     /// return the value of an action.
@@ -25,7 +32,7 @@ public abstract class AgentES : Agent, IESOptimizable
 
 
     /// <summary>
-    /// Implement this instead 
+    /// Implement this instead  of AgentAction()
     /// </summary>
     /// <param name="vectorAction"></param>
     public abstract void OnReady(double[] vectorAction);
@@ -37,24 +44,33 @@ public enum VisualizationMode
         Best,
         None
     }
-    public abstract void SetVisualizationMode(VisualizationMode visMode);
+    public virtual void SetVisualizationMode(VisualizationMode visMode) { }
 
     /// <summary>
     /// Don't override this method, implement OnActionReady() instead.
-    /// Set callOnReadyInAgentAction if you want this agent to react when calling AgentAction
+    /// Set synchronizedDecision to true if you want this agent to react when calling AgentAction
     /// </summary>
     /// <param name="vectorAction"></param>
     /// <param name="textAction"></param>
     public override void AgentAction(float[] vectorAction, string textAction)
     {
-        if (callOnReadyInAgentAction)
+        if (synchronizedDecision)
             OnReady(Array.ConvertAll(vectorAction, t => (double)t));
     }
     
+    public double[] Optimize(double[] initialMean = null)
+    {
+        return Optimizer.Optimize(this, initialMean);
+    }
+
+    public void OptimizeAsync(double[] initialMean = null)
+    {
+        Optimizer.StartOptimizingAsync(this, OnReady,initialMean);
+    }
 
     public void ForceEndOptimization()
     {
-        OnEndOptimizationRequested.Invoke(this);
+        Optimizer.StopOptimizing(OnReady);
     }
 
     public int GetParamDimension()

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`		`-using System;`
	`1`	`+using MLAgents;`
	`2`	`+using System;`
`2`	`3`	`using System.Collections;`
`3`	`4`	`using System.Collections.Generic;`
`4`	`5`	`using UnityEngine;`
`@@ -162,4 +163,6 @@ public override void SetVisualizationMode(VisualizationMode visMode)`
`162`	`163`	`visColor = new Color(0, 0, 0, 0);`
`163`	`164`	`}`
`164`	`165`	`}`
	`166`	`+`
	`167`	`+`
`165`	`168`	`}`