some small changes and documentation updates

tcmxx · tcmxx · commit 79e321fdcba0 · 2018-08-14T14:56:43.000+03:00
diff --git a/Assets/UnityTensorflow/Learning/CoreBrainInternalTrainable.cs b/Assets/UnityTensorflow/Learning/CoreBrainInternalTrainable.cs
@@ -91,6 +91,14 @@ public void DecideAction(Dictionary<Agent, AgentInfo> newAgentInfos)
                 agent.UpdateVectorAction(actionOutputs[agent].outputAction);
         }
 
+
+
+        if (trainerInterface.IsReadyUpdate() && trainerInterface.IsTraining() && trainerInterface.GetStep() <= trainerInterface.GetMaxStep())
+        {
+            trainerInterface.UpdateModel();
+        }
+
+
     }
 
     /// Displays the parameters of the CoreBrainInternal in the Inspector 
diff --git a/Assets/UnityTensorflow/Learning/Mimic/TrainerMimic.cs b/Assets/UnityTensorflow/Learning/Mimic/TrainerMimic.cs
@@ -126,8 +126,8 @@ public override Dictionary<Agent,TakeActionOutput> TakeAction(Dictionary<Agent,
 
         var agentList = new List<Agent>(agentInfos.Keys);
 
-        float[,] vectorObsAll = CreateVectorIInputBatch(agentInfos, agentList);
-        var visualObsAll = CreateVisualIInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
+        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
+        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
 
         float[,] actions = null;
         var evalOutput = modelSL.EvaluateAction(vectorObsAll, visualObsAll);
diff --git a/Assets/UnityTensorflow/Learning/NeuralEvolution/TrainerNeuralEvolution.cs b/Assets/UnityTensorflow/Learning/NeuralEvolution/TrainerNeuralEvolution.cs
@@ -139,8 +139,8 @@ public override Dictionary<Agent, TakeActionOutput> TakeAction(Dictionary<Agent,
 
         var agentList = new List<Agent>(agentInfos.Keys);
 
-        float[,] vectorObsAll = CreateVectorIInputBatch(agentInfos, agentList);
-        var visualObsAll = CreateVisualIInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
+        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
+        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
 
         float[,] actions = null;
         actions = modeNE.EvaluateAction(vectorObsAll, visualObsAll);
diff --git a/Assets/UnityTensorflow/Learning/PPO/RLNetworkAC.cs b/Assets/UnityTensorflow/Learning/PPO/RLNetworkAC.cs
@@ -7,29 +7,42 @@
 #endif
 using MLAgents;
 /// <summary>
-/// actor critic network abstract class
+/// actor critic network abstract class. Inherit from this class if you want to build your own neural network structure for RLModePPO.
 /// </summary>
 public abstract class RLNetworkAC : UnityNetwork
 {
 
 
     /// <summary>
-    /// 
+    /// Impelment this abstract method to build your own neural network
     /// </summary>
-    /// <param name="inVectorstate"></param>
-    /// <param name="inVisualState"></param>
-    /// <param name="inMemery"></param>
-    /// <param name="inPrevAction"></param>
-    /// <param name="outActionSize"></param>
-    /// <param name="actionSpace"></param>
+    /// <param name="inVectorstate">input vector observation tensor</param>
+    /// <param name="inVisualState">input visual observation tensors</param>
+    /// <param name="inMemery">input memory tensor. Not in use right now</param>
+    /// <param name="inPrevAction">input previous action tensor. Noe in use right now</param>
+    /// <param name="outActionSize">output action size</param>
+    /// <param name="actionSpace">action space</param>
     /// <param name="outAction">Output action. If action space is continuous, it is the mean; if aciton space is discrete, it is the probability of each action</param>
-    /// <param name="outValue"></param>
+    /// <param name="outValue">outout value.</param>
     /// <param name="outVariance">output variance. Only needed if the action space is continuous. It can either have batch dimension or not for RLModelPPO</param>
-    /// <param name="discreteActionProbabilitiesFor"></param>
     public abstract void BuildNetwork(Tensor inVectorstate, List<Tensor> inVisualState, Tensor inMemery, Tensor inPrevAction, int outActionSize, SpaceType actionSpace,
         out Tensor outAction, out Tensor outValue, out Tensor outVariance);
 
+    /// <summary>
+    /// return all weights of the neural network
+    /// </summary>
+    /// <returns>List of tensors that are weights of the neural network</returns>
     public abstract List<Tensor> GetWeights();
+
+    /// <summary>
+    /// return all weights for the actor
+    /// </summary>
+    /// <returns>List of tensors that are weights used by the actor in the neural network</returns>
     public abstract List<Tensor> GetActorWeights();
+
+    /// <summary>
+    /// return all weights for the critic
+    /// </summary>
+    /// <returns>List of tensors that are weights used by the critic in the neural network</returns>
     public abstract List<Tensor> GetCriticWeights();
 }
diff --git a/Assets/UnityTensorflow/Learning/PPO/TrainerPPO.cs b/Assets/UnityTensorflow/Learning/PPO/TrainerPPO.cs
@@ -176,7 +176,7 @@ public override void ProcessExperience(Dictionary<Agent, AgentInfo> currentInfo,
             {
                 //update process the episode data for PPO.
                 float nextValue = iModelPPO.EvaluateValue(Matrix.Reshape(agentNewInfo.stackedVectorObservation.ToArray(),1, agentNewInfo.stackedVectorObservation.Count),
-                    CreateVisualIInputBatch(newInfo, new List<Agent>() { agent },BrainToTrain.brainParameters.cameraResolutions))[0];
+                    CreateVisualInputBatch(newInfo, new List<Agent>() { agent },BrainToTrain.brainParameters.cameraResolutions))[0];
                 var advantages = RLUtils.GeneralAdvantageEst(rewardsEpisodeHistory[agent].ToArray(),
                     valuesEpisodeHistory[agent].ToArray(), parametersPPO.rewardDiscountFactor, parametersPPO.rewardGAEFactor, nextValue);
                 float[] targetValues = new float[advantages.Length];
@@ -227,8 +227,8 @@ public override Dictionary<Agent,TakeActionOutput> TakeAction(Dictionary<Agent,
         var result = new Dictionary<Agent, TakeActionOutput>();
         var agentList = new List<Agent>(agentInfos.Keys);
 
-        float[,] vectorObsAll = CreateVectorIInputBatch(agentInfos, agentList);
-        var visualObsAll = CreateVisualIInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
+        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
+        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
 
 
         float[,] actionProbs = null;
@@ -246,8 +246,8 @@ public override Dictionary<Agent,TakeActionOutput> TakeAction(Dictionary<Agent,
                 //if this agent will use the decision, use it
                 var info = agentInfos[agent];
                 var action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List<float>(actions.GetRow(i)));
-                float[,] vectorOb = CreateVectorIInputBatch(agentInfos, new List<Agent>() { agent});
-                var visualOb = CreateVisualIInputBatch(agentInfos, new List<Agent>() { agent }, BrainToTrain.brainParameters.cameraResolutions);
+                float[,] vectorOb = CreateVectorInputBatch(agentInfos, new List<Agent>() { agent});
+                var visualOb = CreateVisualInputBatch(agentInfos, new List<Agent>() { agent }, BrainToTrain.brainParameters.cameraResolutions);
                 var probs = iModelPPO.EvaluateProbability(vectorOb, action.Reshape(1, action.Length), visualOb);
 
                 var temp = new TakeActionOutput();
diff --git a/Assets/UnityTensorflow/Learning/Trainer.cs b/Assets/UnityTensorflow/Learning/Trainer.cs
@@ -23,28 +23,88 @@ public struct TakeActionOutput
     //public Dictionary<Agent, string> textAction;
 }
 
-
+/// <summary>
+/// Inplement this interface on any MonoBehaviour for your own trainer that can be used on CoreBrainInteralTrainable as a Trainer.
+/// </summary>
 public interface ITrainer
 {
+    /// <summary>
+    /// THis will be called to give you the reference to the Brain.
+    /// </summary>
+    /// <param name="brain"></param>
     void SetBrain(Brain brain);
+
+    /// <summary>
+    /// impelment all of your initialization here
+    /// </summary>
     void Initialize();
 
+    /// <summary>
+    /// Return the max steps of the training.
+    /// </summary>
+    /// <returns>max steps</returns>
     int GetMaxStep();
 
+    /// <summary>
+    /// return current steps.
+    /// </summary>
+    /// <returns>curren steps</returns>
     int GetStep();
+
+    /// <summary>
+    /// This will be called every fixed update when training is enabled.
+    /// </summary>
     void IncrementStep();
 
+    /// <summary>
+    /// Reset your trainer
+    /// </summary>
     void ResetTrainer();
 
+    /// <summary>
+    /// This will be called when an action on a agent is requested. Implement your logic to return the actions to take based on agent's current states.
+    /// </summary>
+    /// <param name="agentInfos">the information of agents that need actions.</param>
+    /// <returns>a disionary of agent and its action to take</returns>
     Dictionary<Agent, TakeActionOutput> TakeAction(Dictionary<Agent, AgentInfo> agentInfos);
+
+    /// <summary>
+    /// This will be called every loop when when training is enabled. You should record the infos of the agents based on the need of your algorithm.
+    /// </summary>
+    /// <param name="currentInfo">infomation of the agents before the action taken.</param>
+    /// <param name="newInfo">infomation of the agents after tha ction taken</param>
+    /// <param name="actionOutput">the action taken</param>
     void AddExperience(Dictionary<Agent, AgentInfo> currentInfo, Dictionary<Agent, AgentInfo> newInfo, Dictionary<Agent, TakeActionOutput> actionOutput);
+
+    /// <summary>
+    /// Same as AddExperience(), called every loop when training. You are supposed to process the collected data for episodes or something. You can do it in AddExperience as well...This method is called right after AddExperience().
+    /// </summary>
+    /// <param name="currentInfo">infomation of the agents before the action taken.</param>
+    /// <param name="newInfo">infomation of the agents after tha ction taken</param>
     void ProcessExperience(Dictionary<Agent, AgentInfo> currentInfo, Dictionary<Agent, AgentInfo> newInfo);
+
+    /// <summary>
+    /// When this returns true, UpdateModel() will be called();
+    /// </summary>
+    /// <returns>Whether it is ready to udpate the model.</returns>
     bool IsReadyUpdate();
+
+    /// <summary>
+    /// Put all of your logic for training the model. This is called when IsReadyUpdate()  returns true.
+    /// </summary>
     void UpdateModel();
 
+    /// <summary>
+    /// Return whether training is enabled. AddExperience(), ProcessExperience() and UpdateModel() will not be called if it returns false.
+    /// </summary>
+    /// <returns></returns>
     bool IsTraining();
 }
 
+
+/// <summary>
+/// A abstract class for trainer if you want to save some time impelmenting ITrainer...It provides some helper functions and stuff..., you can use this as based class instead of ITrainer.
+/// </summary>
 public abstract class Trainer : MonoBehaviour, ITrainer
 {
 
@@ -91,10 +151,10 @@ protected virtual void FixedUpdate()
         if (isTraining)
             modelRef.SetLearningRate(parameters.learningRate);
 
-        if (IsReadyUpdate() && isTraining && GetStep() <= GetMaxStep())
+        /*if (IsReadyUpdate() && isTraining && GetStep() <= GetMaxStep())   //moved into CoreBrainInternalTrainable
         {
             UpdateModel();
-        }
+        }*/
     }
 
     public virtual void SetBrain(Brain brain)
@@ -134,7 +194,9 @@ public virtual void ResetTrainer()
     public abstract void UpdateModel();
 
 
-
+    /// <summary>
+    /// save the model to the checkpoint path.
+    /// </summary>
     public void SaveModel()
     {
         var data = modelRef.SaveCheckpoint();
@@ -144,6 +206,10 @@ public void SaveModel()
         File.WriteAllBytes(fullPath, data);
         Debug.Log("Saved model checkpoint to " + fullPath);
     }
+
+    /// <summary>
+    /// Load the model ffrom the checkpointpath
+    /// </summary>
     public void LoadModel()
     {
         string fullPath = Path.GetFullPath(checkpointPath);
@@ -160,6 +226,12 @@ public void LoadModel()
     }
 
 
+    /// <summary>
+    /// return the 3D float array of the texture image.
+    /// </summary>
+    /// <param name="tex">texture</param>
+    /// <param name="blackAndWhite">whether return black and white</param>
+    /// <returns>HWC array of the image</returns>
     public static float[,,] TextureToArray(Texture2D tex, bool blackAndWhite)
     {
         int width = tex.width;
@@ -197,7 +269,15 @@ public void LoadModel()
         Buffer.BlockCopy(resultTemp, 0, result, 0, height * width * pixels * sizeof(float));
         return result;
     }
-    public static List<float[,,,]> CreateVisualIInputBatch(Dictionary<Agent, AgentInfo> currentInfo, List<Agent> agentList, resolution[] cameraResolutions)
+
+    /// <summary>
+    /// Create the visual input batch that can be used directly to feed neural network for all agents's camera visual inputs.
+    /// </summary>
+    /// <param name="currentInfo">Agents and their infomation wiht visual texture data</param>
+    /// <param name="agentList">List of agents that needs to be included in the output</param>
+    /// <param name="cameraResolutions">camera resolution data. Should be obtain from the Brain.</param>
+    /// <returns>List of visual input batch data. Each item in the list is for item in cameraResolution parameter</returns>
+    public static List<float[,,,]> CreateVisualInputBatch(Dictionary<Agent, AgentInfo> currentInfo, List<Agent> agentList, resolution[] cameraResolutions)
     {
         if (cameraResolutions == null || cameraResolutions.Length <= 0)
             return null;
@@ -218,8 +298,13 @@ public static List<float[,,,]> CreateVisualIInputBatch(Dictionary<Agent, AgentIn
         return observationMatrixList;
     }
 
-
-    public static float[,] CreateVectorIInputBatch(Dictionary<Agent, AgentInfo> currentInfo, List<Agent> agentList)
+    /// <summary>
+    /// Create vector observation batch data  that can be used directly to feed neural network.
+    /// </summary>
+    /// <param name="currentInfo">Agents and their infomation with vector observation</param>
+    /// <param name="agentList">List of agents that needs to be included in the output</param>
+    /// <returns>bacth vector observation data.</returns>
+    public static float[,] CreateVectorInputBatch(Dictionary<Agent, AgentInfo> currentInfo, List<Agent> agentList)
     {
         int obsSize = currentInfo[agentList[0]].stackedVectorObservation.Count;
         if(obsSize == 0)
diff --git a/Assets/UnityTensorflow/keras-sharp b/Assets/UnityTensorflow/keras-sharp
@@ -1 +1 @@
-Subproject commit 24fbd8ca6361e4ddc7ef688dcc04325853f9304c
+Subproject commit 69547d074516a8acac0da00c765b3110ca6ee3b5

Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,14 @@ public void DecideAction(Dictionary<Agent, AgentInfo> newAgentInfos)`
`91`	`91`	`agent.UpdateVectorAction(actionOutputs[agent].outputAction);`
`92`	`92`	`}`
`93`	`93`
	`94`	`+`
	`95`	`+`
	`96`	`+ if (trainerInterface.IsReadyUpdate() && trainerInterface.IsTraining() && trainerInterface.GetStep() <= trainerInterface.GetMaxStep())`
	`97`	`+ {`
	`98`	`+ trainerInterface.UpdateModel();`
	`99`	`+ }`
	`100`	`+`
	`101`	`+`
`94`	`102`	`}`
`95`	`103`
`96`	`104`	`/// Displays the parameters of the CoreBrainInternal in the Inspector`