TensorStack-AI · saddam213 · Nov 21, 2025
diff --git a/TensorStack.StableDiffusion/Config/AutoEncoderConfig.cs b/TensorStack.StableDiffusion/Config/AutoEncoderConfig.cs
@@ -1,5 +1,6 @@
 // Copyright (c) TensorStack. All rights reserved.
 // Licensed under the Apache 2.0 License.
+using System.Text.Json.Serialization;
 using TensorStack.Common;
 
 namespace TensorStack.StableDiffusion.Config
@@ -14,5 +15,11 @@ public record AutoEncoderModelConfig : ModelConfig
         public int LatentChannels { get; set; } = 4;
         public string DecoderModelPath { get; set; }
         public string EncoderModelPath { get; set; }
+
+        [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+        public float[] LatentsStd { get; set; }
+
+        [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+        public float[] LatentsMean { get; set; }
     }
 }
diff --git a/TensorStack.StableDiffusion/Enums/PipelineType.cs b/TensorStack.StableDiffusion/Enums/PipelineType.cs
@@ -11,6 +11,7 @@ public enum PipelineType
         StableCascade = 10,
         LatentConsistency = 20,
         Flux = 30,
-        Nitro = 40
+        Nitro = 40,
+        Wan = 50
     }
 }
diff --git a/TensorStack.StableDiffusion/Models/AutoEncoderModel.cs b/TensorStack.StableDiffusion/Models/AutoEncoderModel.cs
@@ -142,6 +142,7 @@ public virtual async Task<Tensor<float>> DecodeAsync(Tensor<float> inputTensor,
             if (!disableShift)
                 inputTensor.Add(ShiftFactor);
 
+            ApplyNormalization(inputTensor, Configuration.LatentsMean, Configuration.LatentsStd);
             var outputDimensions = new[] { 1, OutChannels, inputTensor.Dimensions[2] * Scale, inputTensor.Dimensions[3] * Scale };
             using (var modelParameters = new ModelParameters(Decoder.Metadata, cancellationToken))
             {
@@ -195,6 +196,39 @@ public virtual async Task<Tensor<float>> EncodeAsync(ImageTensor inputTensor, bo
         }
 
 
+        /// <summary>
+        /// Applies per-channel normalization to a latent tensor in-place, equivalent to:
+        /// <c>latents = latents / latentsStd + latentsMean</c>
+        /// </summary>
+        /// <param name="latents">The latents.</param>
+        /// <param name="latentsMean">Per-channel mean values. Length must equal the number of channels in <paramref name="latents"/>.</param>
+        /// <param name="latentsStd">Per-channel standard deviation values. Length must equal the number of channels in <paramref name="latents"/>. Each value is inverted (1 / std) before applying to the tensor.</param>
+        private static void ApplyNormalization(Tensor<float> latents, ReadOnlySpan<float> latentsMean, ReadOnlySpan<float> latentsStd)
+        {
+            if (latentsMean.IsEmpty || latentsStd.IsEmpty)
+                return;
+
+            var dimensions = latents.Dimensions;
+            var channels = dimensions[1];
+
+            Span<float> invStd = stackalloc float[channels];
+            for (int c = 0; c < channels; c++)
+                invStd[c] = 1f / latentsStd[c];
+
+            var data = latents.Memory.Span;
+            var strideC = data.Length / channels;
+
+            for (int c = 0; c < channels; c++)
+            {
+                var mean = latentsMean[c];
+                var inv = invStd[c];
+                var slice = data.Slice(c * strideC, strideC);
+                for (int i = 0; i < slice.Length; i++)
+                    slice[i] = slice[i] * inv + mean;
+            }
+        }
+
+
         /// <summary>
         /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
         /// </summary>

diff --git a/TensorStack.StableDiffusion/Models/TransformerWanModel.cs b/TensorStack.StableDiffusion/Models/TransformerWanModel.cs
@@ -0,0 +1,55 @@
+// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+using System.Threading;
+using System.Threading.Tasks;
+using TensorStack.Common;
+using TensorStack.Common.Tensor;
+using TensorStack.StableDiffusion.Config;
+
+namespace TensorStack.StableDiffusion.Models
+{
+    /// <summary>
+    /// TransformerModel: Wan Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
+    /// </summary>
+    public class TransformerWanModel : TransformerModel
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TransformerWanModel"/> class.
+        /// </summary>
+        /// <param name="configuration">The configuration.</param>
+        public TransformerWanModel(TransformerModelConfig configuration)
+            : base(configuration) { }
+
+
+        /// <summary>
+        /// Runs the Transformer model with the specified inputs
+        /// </summary>
+        /// <param name="timestep">The timestep.</param>
+        /// <param name="hiddenStates">The hidden states.</param>
+        /// <param name="encoderHiddenStates">The encoder hidden states.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        public async Task<Tensor<float>> RunAsync(int timestep, Tensor<float> hiddenStates, Tensor<float> encoderHiddenStates, CancellationToken cancellationToken = default)
+        {
+            if (!Transformer.IsLoaded())
+                await Transformer.LoadAsync(cancellationToken: cancellationToken);
+
+            using (var transformerParams = new ModelParameters(Transformer.Metadata, cancellationToken))
+            {
+                // Inputs
+                transformerParams.AddInput(hiddenStates);
+                transformerParams.AddScalarInput(timestep);
+                transformerParams.AddInput(encoderHiddenStates);
+
+                // Outputs
+                transformerParams.AddOutput(hiddenStates.Dimensions);
+
+                // Inference
+                using (var results = await Transformer.RunInferenceAsync(transformerParams))
+                {
+                    return results[0].ToTensor();
+                }
+            }
+        }
+
+    }
+}