Skip to content

Commit 689369f

Browse files
committed
move improvements
1 parent 1f23b59 commit 689369f

File tree

6 files changed

+164
-45
lines changed

6 files changed

+164
-45
lines changed

Assets/UnityTensorflow/Examples/IntelligentPool/BilliardMAESOnly-OneShot-UseTrainer.unity

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -402,10 +402,6 @@ MonoBehaviour:
402402
m_Script: {fileID: 11500000, guid: 9cfc7edd5a153124fae99222c7dfb7de, type: 3}
403403
m_Name:
404404
m_EditorClassIdentifier:
405-
optimizer: 0
406-
optimizationMode: 1
407-
iterationPerFrame: 1
408-
evaluationBatchSize: 8
409405
debugVisualization: 1
410406
--- !u!4 &949524765
411407
Transform:
@@ -521,7 +517,7 @@ Prefab:
521517
type: 2}
522518
propertyPath: onValueChanged.m_PersistentCalls.m_Calls.Array.data[0].m_Target
523519
value:
524-
objectReference: {fileID: 1914681085}
520+
objectReference: {fileID: 1746133764}
525521
- target: {fileID: 114726768938732050, guid: db0f163ff1982d64985f44d81eaf13d1,
526522
type: 2}
527523
propertyPath: agentRef
@@ -567,9 +563,67 @@ Prefab:
567563
value: 0
568564
objectReference: {fileID: 0}
569565
- target: {fileID: 1202550805354182, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
566+
propertyPath: m_IsActive
567+
value: 1
568+
objectReference: {fileID: 0}
569+
- target: {fileID: 114487051412096316, guid: db0f163ff1982d64985f44d81eaf13d1,
570+
type: 2}
571+
propertyPath: onValueChanged.m_PersistentCalls.m_Calls.Array.data[0].m_MethodName
572+
value: OnAutoRequestToggled
573+
objectReference: {fileID: 0}
574+
- target: {fileID: 114487051412096316, guid: db0f163ff1982d64985f44d81eaf13d1,
575+
type: 2}
576+
propertyPath: onValueChanged.m_PersistentCalls.m_Calls.Array.data[0].m_Mode
577+
value: 0
578+
objectReference: {fileID: 0}
579+
- target: {fileID: 1866456776103920, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
580+
propertyPath: m_IsActive
581+
value: 1
582+
objectReference: {fileID: 0}
583+
- target: {fileID: 1010238890107942, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
570584
propertyPath: m_IsActive
571585
value: 0
572586
objectReference: {fileID: 0}
587+
- target: {fileID: 224424860102480288, guid: db0f163ff1982d64985f44d81eaf13d1,
588+
type: 2}
589+
propertyPath: m_AnchoredPosition.x
590+
value: 221.2
591+
objectReference: {fileID: 0}
592+
- target: {fileID: 224424860102480288, guid: db0f163ff1982d64985f44d81eaf13d1,
593+
type: 2}
594+
propertyPath: m_AnchoredPosition.y
595+
value: -24.2
596+
objectReference: {fileID: 0}
597+
- target: {fileID: 224138536779500158, guid: db0f163ff1982d64985f44d81eaf13d1,
598+
type: 2}
599+
propertyPath: m_AnchoredPosition.x
600+
value: 221.19997
601+
objectReference: {fileID: 0}
602+
- target: {fileID: 224138536779500158, guid: db0f163ff1982d64985f44d81eaf13d1,
603+
type: 2}
604+
propertyPath: m_AnchoredPosition.y
605+
value: -24.200012
606+
objectReference: {fileID: 0}
607+
- target: {fileID: 224280634846545994, guid: db0f163ff1982d64985f44d81eaf13d1,
608+
type: 2}
609+
propertyPath: m_AnchoredPosition.x
610+
value: 340.32
611+
objectReference: {fileID: 0}
612+
- target: {fileID: 224280634846545994, guid: db0f163ff1982d64985f44d81eaf13d1,
613+
type: 2}
614+
propertyPath: m_AnchoredPosition.y
615+
value: -25.95
616+
objectReference: {fileID: 0}
617+
- target: {fileID: 224588602336079318, guid: db0f163ff1982d64985f44d81eaf13d1,
618+
type: 2}
619+
propertyPath: m_AnchoredPosition.y
620+
value: -98.3
621+
objectReference: {fileID: 0}
622+
- target: {fileID: 224588602336079318, guid: db0f163ff1982d64985f44d81eaf13d1,
623+
type: 2}
624+
propertyPath: m_AnchoredPosition.x
625+
value: 272.1
626+
objectReference: {fileID: 0}
573627
m_RemovedComponents: []
574628
m_ParentPrefab: {fileID: 100100000, guid: db0f163ff1982d64985f44d81eaf13d1, type: 2}
575629
m_IsPrefabParent: 0
@@ -590,6 +644,31 @@ MonoBehaviour:
590644
m_Script: {fileID: 11500000, guid: 310b3bf0b547f6c4f911257303cc614d, type: 3}
591645
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
592646
m_EditorClassIdentifier:
647+
--- !u!1 &1218375778 stripped
648+
GameObject:
649+
m_PrefabParentObject: {fileID: 1140438680430356, guid: 21bdff36071a0444eae34d1a5f0211ba,
650+
type: 2}
651+
m_PrefabInternal: {fileID: 1363941755}
652+
--- !u!114 &1218375779
653+
MonoBehaviour:
654+
m_ObjectHideFlags: 0
655+
m_PrefabParentObject: {fileID: 0}
656+
m_PrefabInternal: {fileID: 0}
657+
m_GameObject: {fileID: 1218375778}
658+
m_Enabled: 1
659+
m_EditorHideFlags: 0
660+
m_Script: {fileID: 11500000, guid: a7d1d4617c19074448560370226ed445, type: 3}
661+
m_Name:
662+
m_EditorClassIdentifier:
663+
iterationPerUpdate: 1
664+
populationSize: 16
665+
optimizerType: 0
666+
initialStepSize: 1
667+
mode: 1
668+
maxIteration: 100
669+
targetValue: 1.8
670+
evaluationBatchSize: 8
671+
iteration: 0
593672
--- !u!114 &1233761530
594673
MonoBehaviour:
595674
m_ObjectHideFlags: 0
@@ -715,6 +794,11 @@ Prefab:
715794
propertyPath: maxIteration
716795
value: 50
717796
objectReference: {fileID: 0}
797+
- target: {fileID: 114924759199247932, guid: 21bdff36071a0444eae34d1a5f0211ba,
798+
type: 2}
799+
propertyPath: synchronizedDecision
800+
value: 0
801+
objectReference: {fileID: 0}
718802
m_RemovedComponents: []
719803
m_ParentPrefab: {fileID: 100100000, guid: 21bdff36071a0444eae34d1a5f0211ba, type: 2}
720804
m_IsPrefabParent: 0
@@ -797,6 +881,12 @@ MonoBehaviour:
797881
discretePlayerActions: []
798882
defaultAction: 0
799883
brain: {fileID: 1327012119}
884+
--- !u!114 &1746133764 stripped
885+
MonoBehaviour:
886+
m_PrefabParentObject: {fileID: 114726768938732050, guid: db0f163ff1982d64985f44d81eaf13d1,
887+
type: 2}
888+
m_PrefabInternal: {fileID: 1040820092}
889+
m_Script: {fileID: 11500000, guid: 28e4f2c0965d97b48a60dfb2d1ab593a, type: 3}
800890
--- !u!114 &1886696250
801891
MonoBehaviour:
802892
m_ObjectHideFlags: 0

Assets/UnityTensorflow/Examples/IntelligentPool/Prefabs/PlayerMLAgentWithDecision.prefab

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@ MonoBehaviour:
5555
m_Name:
5656
m_EditorClassIdentifier:
5757
iterationPerUpdate: 1
58-
populationSize: 30
58+
populationSize: 10
5959
optimizerType: 0
6060
initialStepSize: 0.7
6161
mode: 1
62-
maxIteration: 20
63-
targetValue: 8
62+
maxIteration: 15
63+
targetValue: 2
6464
evaluationBatchSize: 8
6565
iteration: 0
6666
--- !u!114 &114440804119060818
@@ -75,7 +75,6 @@ MonoBehaviour:
7575
m_Name:
7676
m_EditorClassIdentifier:
7777
useDecision: 1
78-
useMAESParamsFromAgent: 1
7978
useHeuristic: 1
8079
log: 1
8180
logInterval: 20
@@ -112,11 +111,7 @@ MonoBehaviour:
112111
resetOnDone: 0
113112
onDemandDecision: 1
114113
numberOfActionsBetweenDecisions: 1
115-
callOnReadyInAgentAction: 1
116-
maxIteration: 15
117-
targetValue: 0.85
118-
populationSize: 10
119-
initialStepSize: 1
114+
synchronizedDecision: 1
120115
shootSequence: 1
121116
resetAfterOneShot: 1
122117
resetOnStart: 1

Assets/UnityTensorflow/Examples/IntelligentPool/Scripts/BilliardAgent.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System;
1+
using MLAgents;
2+
using System;
23
using System.Collections;
34
using System.Collections.Generic;
45
using UnityEngine;
@@ -162,4 +163,6 @@ public override void SetVisualizationMode(VisualizationMode visMode)
162163
visColor = new Color(0, 0, 0, 0);
163164
}
164165
}
166+
167+
165168
}

Assets/UnityTensorflow/Examples/IntelligentPool/Scripts/BilliardUIMLAgent.cs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ private void Awake()
3232

3333
private void Start()
3434
{
35-
populationSizeSliderRef.value = agentRef.populationSize;
36-
maxItrSliderRef.value = agentRef.maxIteration;
37-
populationSizeTextRef.text = "Population size: " + agentRef.populationSize.ToString();
38-
maxItrTextRef.text = "Max Iter: " + agentRef.maxIteration;
35+
populationSizeSliderRef.value = agentRef.Optimizer.populationSize;
36+
maxItrSliderRef.value = agentRef.Optimizer.maxIteration;
37+
populationSizeTextRef.text = "Population size: " + agentRef.Optimizer.populationSize.ToString();
38+
maxItrTextRef.text = "Max Iter: " + agentRef.Optimizer.maxIteration;
3939

4040
rewardShapingToggleRef.isOn = gameSystemRef.defaultArena.rewardShaping;
4141
autoRequestToggleRef.isOn = agentRef.autoRequestDecision;
@@ -45,8 +45,8 @@ private void Start()
4545

4646
private void Update()
4747
{
48-
populationSizeSliderRef.value = agentRef.populationSize;
49-
maxItrSliderRef.value = agentRef.maxIteration;
48+
populationSizeSliderRef.value = agentRef.Optimizer.populationSize;
49+
maxItrSliderRef.value = agentRef.Optimizer.maxIteration;
5050
rewardShapingToggleRef.isOn = gameSystemRef.defaultArena.rewardShaping;
5151

5252
predictedScoreTextRef.text = "Predicted score: " + gameSystemRef.bestScore;
@@ -56,14 +56,14 @@ private void Update()
5656

5757
public void OnPopulationSliderChanged(float value)
5858
{
59-
agentRef.populationSize = Mathf.RoundToInt(value);
60-
populationSizeTextRef.text = "Population size: " + agentRef.populationSize.ToString();
59+
agentRef.Optimizer.populationSize = Mathf.RoundToInt(value);
60+
populationSizeTextRef.text = "Population size: " + agentRef.Optimizer.populationSize.ToString();
6161
}
6262

6363
public void OnIterationSliderChanged(float value)
6464
{
65-
agentRef.maxIteration = Mathf.RoundToInt(value);
66-
maxItrTextRef.text = "Max Iter: " + agentRef.maxIteration;
65+
agentRef.Optimizer.maxIteration = Mathf.RoundToInt(value);
66+
maxItrTextRef.text = "Max Iter: " + agentRef.Optimizer.maxIteration;
6767

6868
}
6969

Assets/UnityTensorflow/Examples/Walker/Scripts/RLModelPPOHierarchy.cs

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,22 @@ public override float[] EvaluateValue(float[,] vectorObservation, List<float[,,,
214214
{
215215
for (int i = 0; i < outputAction.GetLength(1); ++i)
216216
{
217-
var std = Mathf.Sqrt(vars[i]);
218-
var dis = new NormalDistribution(outputAction[j, i], std);
219-
220-
if (useProbability)
221-
actions[j, i] = (float)dis.Generate();
222-
else
223-
actions[j, i] = outputAction[j, i];
224-
actionProbs[j, i] = (float)dis.ProbabilityDensityFunction(actions[j, i]);
217+
try
218+
{
219+
var std = Mathf.Sqrt(vars[i]);
220+
var dis = new NormalDistribution(outputAction[j, i], std);
221+
222+
if (useProbability)
223+
actions[j, i] = (float)dis.Generate();
224+
else
225+
actions[j, i] = outputAction[j, i];
226+
actionProbs[j, i] = (float)dis.ProbabilityDensityFunction(actions[j, i]);
227+
}catch(Exception e)
228+
{
229+
Debug.LogWarning("NaN action from neural network detected. Force it to 0.");
230+
actions[j, i] = 0;
231+
actionProbs[j, i] = 1;
232+
}
225233
}
226234
}
227235
}
@@ -276,8 +284,15 @@ public override float[] EvaluateValue(float[,] vectorObservation, List<float[,,,
276284
for (int i = 0; i < outputAction.GetLength(1); ++i)
277285
{
278286
var std = Mathf.Sqrt(vars[i]);
279-
var dis = new NormalDistribution(outputAction[j, i], std);
287+
if (outputAction[j, i] == float.NaN || std == float.NaN || actions[j, i] == float.NaN)
288+
{
289+
actionProbs[j, i] = 0.5f;
290+
Debug.LogWarning("not valid output action mean:" + outputAction[j, i] + " or std:" + std +" or action to evaluate: " + actions[j, i]);
291+
continue;
292+
}
280293

294+
var dis = new NormalDistribution(outputAction[j, i], std);
295+
281296
actionProbs[j, i] = (float)dis.ProbabilityDensityFunction(actions[j, i]);
282297
}
283298
}

Assets/UnityTensorflow/MAESOptimization/AgentES.cs

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,22 @@
66
using System;
77
using System.Linq;
88

9+
[RequireComponent(typeof(ESOptimizer))]
910
public abstract class AgentES : Agent, IESOptimizable
1011
{
11-
//for asynchronized decision, set this to false.
12-
public bool callOnReadyInAgentAction = true;
13-
public int maxIteration;
14-
public float targetValue;
15-
public int populationSize = 16;
16-
public float initialStepSize = 1;
1712

13+
14+
public ESOptimizer Optimizer { get; protected set; }
15+
16+
private void Awake()
17+
{
18+
Optimizer = GetComponent<ESOptimizer>();
19+
}
20+
21+
22+
23+
//for asynchronized decision, set this to false.
24+
public bool synchronizedDecision = true;
1825
public event System.Action<AgentES> OnEndOptimizationRequested;
1926
/// <summary>
2027
/// return the value of an action.
@@ -25,7 +32,7 @@ public abstract class AgentES : Agent, IESOptimizable
2532

2633

2734
/// <summary>
28-
/// Implement this instead
35+
/// Implement this instead of AgentAction()
2936
/// </summary>
3037
/// <param name="vectorAction"></param>
3138
public abstract void OnReady(double[] vectorAction);
@@ -37,24 +44,33 @@ public enum VisualizationMode
3744
Best,
3845
None
3946
}
40-
public abstract void SetVisualizationMode(VisualizationMode visMode);
47+
public virtual void SetVisualizationMode(VisualizationMode visMode) { }
4148

4249
/// <summary>
4350
/// Don't override this method, implement OnActionReady() instead.
44-
/// Set callOnReadyInAgentAction if you want this agent to react when calling AgentAction
51+
/// Set synchronizedDecision to true if you want this agent to react when calling AgentAction
4552
/// </summary>
4653
/// <param name="vectorAction"></param>
4754
/// <param name="textAction"></param>
4855
public override void AgentAction(float[] vectorAction, string textAction)
4956
{
50-
if (callOnReadyInAgentAction)
57+
if (synchronizedDecision)
5158
OnReady(Array.ConvertAll(vectorAction, t => (double)t));
5259
}
5360

61+
public double[] Optimize(double[] initialMean = null)
62+
{
63+
return Optimizer.Optimize(this, initialMean);
64+
}
65+
66+
public void OptimizeAsync(double[] initialMean = null)
67+
{
68+
Optimizer.StartOptimizingAsync(this, OnReady,initialMean);
69+
}
5470

5571
public void ForceEndOptimization()
5672
{
57-
OnEndOptimizationRequested.Invoke(this);
73+
Optimizer.StopOptimizing(OnReady);
5874
}
5975

6076
public int GetParamDimension()

0 commit comments

Comments
 (0)