From 32aca9794cd6e7cf989c2c8962b7d98b1407b53b Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Thu, 20 Nov 2025 10:00:19 +1300
Subject: [PATCH 1/3] QwenText/QwenImage pipelines

---
 .../Enums/PipelineType.cs                     |   3 +-
 .../Models/TransformerQwenModel.cs            |  62 +++
 .../Pipelines/Qwen/QwenBase.cs                | 451 ++++++++++++++++++
 .../Pipelines/Qwen/QwenConfig.cs              | 174 +++++++
 .../Pipelines/Qwen/QwenPipeline.cs            | 112 +++++
 .../Pipelines/Qwen/QwenConfig.cs              |   9 +
 .../Pipelines/Qwen/QwenPipeline.cs            | 255 ++++++++++
 7 files changed, 1065 insertions(+), 1 deletion(-)
 create mode 100644 TensorStack.StableDiffusion/Models/TransformerQwenModel.cs
 create mode 100644 TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
 create mode 100644 TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs
 create mode 100644 TensorStack.StableDiffusion/Pipelines/Qwen/QwenPipeline.cs
 create mode 100644 TensorStack.TextGeneration/Pipelines/Qwen/QwenConfig.cs
 create mode 100644 TensorStack.TextGeneration/Pipelines/Qwen/QwenPipeline.cs
diff --git a/TensorStack.StableDiffusion/Enums/PipelineType.cs b/TensorStack.StableDiffusion/Enums/PipelineType.cs
index c7aa346..1bd81a7 100644
--- a/TensorStack.StableDiffusion/Enums/PipelineType.cs
+++ b/TensorStack.StableDiffusion/Enums/PipelineType.cs
@@ -11,6 +11,7 @@ public enum PipelineType
         StableCascade = 10,
         LatentConsistency = 20,
         Flux = 30,
-        Nitro = 40
+        Nitro = 40,
+        Qwen = 50
     }
 }
diff --git a/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs b/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs
new file mode 100644
index 0000000..0f18ebe
--- /dev/null
+++ b/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs
@@ -0,0 +1,62 @@
+﻿// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+using System.Threading;
+using System.Threading.Tasks;
+using TensorStack.Common;
+using TensorStack.Common.Tensor;
+using TensorStack.StableDiffusion.Config;
+
+namespace TensorStack.StableDiffusion.Models
+{
+    /// <summary>
+    /// TransformerModel: QwenImageTransformer2DModel
+    /// </summary>
+    public class TransformerQwenModel : TransformerModel
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TransformerQwenModel"/> class.
+        /// </summary>
+        /// <param name="configuration">The configuration.</param>
+        public TransformerQwenModel(TransformerModelConfig configuration)
+            : base(configuration) { }
+
+
+        /// <summary>
+        /// Runs the Transformer model with the specified inputs
+        /// </summary>
+        /// <param name="timestep">The timestep.</param>
+        /// <param name="hiddenStates">The hidden states.</param>
+        /// <param name="encoderHiddenStates">The encoder hidden states.</param>
+        /// <param name="imgShapes">The image shapes.</param>
+        /// <param name="cancellationToken">The cancellation token that can be used by other objects or threads to receive notice of cancellation.</param>
+        public async Task<Tensor<float>> RunAsync(int timestep, Tensor<float> hiddenStates, Tensor<float> encoderHiddenStates, Tensor<float> imgShapes, CancellationToken cancellationToken = default)
+        {
+            if (!Transformer.IsLoaded())
+                await Transformer.LoadAsync(cancellationToken: cancellationToken);
+
+            var txtSequenceLength = encoderHiddenStates.Dimensions[1];
+            var encoderHiddenStatesMask = new Tensor<float>([1, txtSequenceLength]);
+            encoderHiddenStatesMask.Fill(1);
+            using (var transformerParams = new ModelParameters(Transformer.Metadata, cancellationToken))
+            {
+                // Inputs
+                transformerParams.AddInput(hiddenStates);
+                transformerParams.AddScalarInput(timestep);
+                transformerParams.AddInput(encoderHiddenStatesMask);
+                transformerParams.AddInput(encoderHiddenStates);
+                transformerParams.AddInput(imgShapes);
+                transformerParams.AddScalarInput(txtSequenceLength);
+
+                // Outputs
+                transformerParams.AddOutput(hiddenStates.Dimensions);
+
+                // Inference
+                using (var results = await Transformer.RunInferenceAsync(transformerParams))
+                {
+                    return results[0].ToTensor();
+                }
+            }
+        }
+
+    }
+}
diff --git a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
new file mode 100644
index 0000000..fbb4e9d
--- /dev/null
+++ b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
@@ -0,0 +1,451 @@
+﻿// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+using Microsoft.Extensions.Logging;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Threading;
+using System.Threading.Tasks;
+using TensorStack.Common;
+using TensorStack.Common.Tensor;
+using TensorStack.StableDiffusion.Common;
+using TensorStack.StableDiffusion.Enums;
+using TensorStack.StableDiffusion.Models;
+using TensorStack.StableDiffusion.Schedulers;
+using TensorStack.TextGeneration.Tokenizers;
+using QwenTextConfig = TensorStack.TextGeneration.Pipelines.Qwen.QwenConfig;
+using QwenTextPipeline = TensorStack.TextGeneration.Pipelines.Qwen.QwenPipeline;
+
+namespace TensorStack.StableDiffusion.Pipelines.Qwen
+{
+    public abstract class QwenBase : PipelineBase
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="QwenBase"/> class.
+        /// </summary>
+        /// <param name="transformer">The transformer.</param>
+        /// <param name="textEncoder">The text encoder.</param>
+        /// <param name="autoEncoder">The automatic encoder.</param>
+        /// <param name="logger">The logger.</param>
+        public QwenBase(TransformerQwenModel transformer, QwenTextPipeline textEncoder, AutoEncoderModel autoEncoder, ILogger logger = default) : base(logger)
+        {
+            Transformer = transformer;
+            TextEncoder = textEncoder;
+            AutoEncoder = autoEncoder;
+            Initialize();
+            Logger?.LogInformation("[QwenPipeline] Name: {Name}", Name);
+        }
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="QwenBase"/> class.
+        /// </summary>
+        /// <param name="configuration">The configuration.</param>
+        /// <param name="logger">The logger.</param>
+        public QwenBase(QwenConfig configuration, ILogger logger = default) : this(
+            new TransformerQwenModel(configuration.Transformer),
+            new QwenTextPipeline(new QwenTextConfig
+            {
+                OutputLastHiddenStates = true,
+                DecoderConfig = configuration.TextEncoder,
+                Tokenizer = new BPETokenizer(configuration.Tokenizer),
+            }),
+            new AutoEncoderModel(configuration.AutoEncoder),
+            logger)
+        {
+            Name = configuration.Name;
+        }
+
+
+        /// <summary>
+        /// Gets the type of the pipeline.
+        /// </summary>
+        public override PipelineType PipelineType => PipelineType.Qwen;
+
+        /// <summary>
+        /// Gets the friendly name.
+        /// </summary>
+        public override string Name { get; init; } = nameof(PipelineType.Qwen);
+
+        /// <summary>
+        /// Gets the TextEncoder.
+        /// </summary>
+        public QwenTextPipeline TextEncoder { get; init; }
+
+        /// <summary>
+        /// Gets the transformer.
+        /// </summary>
+        public TransformerQwenModel Transformer { get; init; }
+
+        /// <summary>
+        /// Gets the automatic encoder.
+        /// </summary>
+        public AutoEncoderModel AutoEncoder { get; init; }
+
+
+        /// <summary>
+        /// Loads the pipeline.
+        /// </summary>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        public Task LoadAsync(CancellationToken cancellationToken = default)
+        {
+            // Qwen pipelines are lazy loaded on first run
+            return Task.CompletedTask;
+        }
+
+
+        /// <summary>
+        /// Unloads the pipeline.
+        /// </summary>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        public async Task UnloadAsync(CancellationToken cancellationToken = default)
+        {
+            await Task.WhenAll
+            (
+                Transformer.UnloadAsync(),
+                TextEncoder.UnloadAsync(cancellationToken),
+                AutoEncoder.EncoderUnloadAsync(),
+                AutoEncoder.DecoderUnloadAsync()
+            );
+            Logger?.LogInformation("[{PipeLineType}] Pipeline Unloaded", PipelineType);
+        }
+
+
+        /// <summary>
+        /// Validates the options.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        protected override void ValidateOptions(GenerateOptions options)
+        {
+            base.ValidateOptions(options);
+            if (!Transformer.HasControlNet && options.HasControlNet)
+                throw new ArgumentException("Model does not support ControlNet");
+        }
+
+
+        /// <summary>
+        /// Creates the prompt input embeddings.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        protected async Task<PromptResult> CreatePromptAsync(IPipelineOptions options, CancellationToken cancellationToken = default)
+        {
+            var cachedPrompt = GetPromptCache(options);
+            if (cachedPrompt is not null)
+                return cachedPrompt;
+
+            // Conditional Prompt
+            var promptEmbeds = await TextEncoder.GetLastHiddenState(new TextGeneration.Common.GenerateOptions
+            {
+                Seed = options.Seed,
+                Prompt = options.Prompt,
+                MinLength = 128,
+                MaxLength = 128
+            }, cancellationToken);
+
+            // Unconditional prompt
+            var negativePromptEmbeds = await TextEncoder.GetLastHiddenState(new TextGeneration.Common.GenerateOptions
+            {
+                Seed = options.Seed,
+                Prompt = options.NegativePrompt,
+                MinLength = 128,
+                MaxLength = 128
+            }, cancellationToken);
+
+            return SetPromptCache(options, new PromptResult(promptEmbeds, default, negativePromptEmbeds, default));
+        }
+
+
+        /// <summary>
+        /// Decode the model latents to image
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="latents">The latents.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        protected async Task<ImageTensor> DecodeLatentsAsync(IPipelineOptions options, Tensor<float> latents, CancellationToken cancellationToken = default)
+        {
+            var timestamp = Logger.LogBegin(LogLevel.Debug, "[DecodeLatentsAsync] Begin AutoEncoder Decode");
+            var decoderResult = await AutoEncoder.DecodeAsync(latents, cancellationToken: cancellationToken);
+            if (options.IsLowMemoryEnabled || options.IsLowMemoryDecoderEnabled)
+                await AutoEncoder.DecoderUnloadAsync();
+
+            Logger.LogEnd(LogLevel.Debug, timestamp, "[DecodeLatentsAsync] AutoEncoder Decode Complete");
+            return decoderResult.AsImageTensor();
+        }
+
+
+        /// <summary>
+        /// Encode the image to model latents
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="image">The latents.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        private async Task<Tensor<float>> EncodeLatentsAsync(IPipelineOptions options, CancellationToken cancellationToken = default)
+        {
+            var timestamp = Logger.LogBegin(LogLevel.Debug, "[EncodeLatentsAsync] Begin AutoEncoder Encode");
+            var cacheResult = GetEncoderCache(options);
+            if (cacheResult is not null)
+            {
+                Logger.LogEnd(LogLevel.Debug, timestamp, "[EncodeLatentsAsync] AutoEncoder Encode Complete, Cached Result.");
+                return cacheResult;
+            }
+
+            var inputTensor = options.InputImage.ResizeImage(options.Width, options.Height);
+            var encoderResult = await AutoEncoder.EncodeAsync(inputTensor, cancellationToken: cancellationToken);
+            if (options.IsLowMemoryEnabled || options.IsLowMemoryEncoderEnabled)
+                await AutoEncoder.EncoderUnloadAsync();
+
+            Logger.LogEnd(LogLevel.Debug, timestamp, "[EncodeLatentsAsync] AutoEncoder Encode Complete");
+            return SetEncoderCache(options, encoderResult);
+        }
+
+
+        /// <summary>
+        /// Run Transformer model inference
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="progressCallback">The progress callback.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        protected async Task<Tensor<float>> RunInferenceAsync(IPipelineOptions options, IScheduler scheduler, PromptResult prompt, IProgress<GenerateProgress> progressCallback = null, CancellationToken cancellationToken = default)
+        {
+            var timestamp = Logger.LogBegin(LogLevel.Debug, "[RunInferenceAsync] Begin Transformer Inference");
+
+            // Prompt
+            var isGuidanceEnabled = IsGuidanceEnabled(options);
+            var conditionalEmbeds = prompt.PromptEmbeds;
+            var unconditionalEmbeds = prompt.NegativePromptEmbeds;
+
+            // Latents
+            var latents = await CreateLatentInputAsync(options, scheduler, cancellationToken);
+
+            // Create ImgShapes
+            var imgShapes = new Tensor<float>([1, 64, 64]); // TODO:
+
+            // Load Model
+            await LoadTransformerAsync(options, progressCallback, cancellationToken);
+
+            // Timesteps
+            var timesteps = scheduler.GetTimesteps();
+            for (int i = 0; i < timesteps.Count; i++)
+            {
+                var timestep = timesteps[i];
+                var steptime = Stopwatch.GetTimestamp();
+                cancellationToken.ThrowIfCancellationRequested();
+
+                // Inputs.
+                var latentInput = scheduler.ScaleInput(timestep, latents);
+
+                // Inference
+                var conditional = await Transformer.RunAsync
+                (
+                    timestep,
+                    latentInput,
+                    conditionalEmbeds,
+                    imgShapes,
+                    cancellationToken: cancellationToken
+                );
+
+                // Guidance
+                if (isGuidanceEnabled)
+                {
+                    var unconditional = await Transformer.RunAsync
+                    (
+                        timestep,
+                        latentInput,
+                        unconditionalEmbeds,
+                        imgShapes,
+                        cancellationToken: cancellationToken
+                    );
+                    conditional = ApplyGuidance(conditional, unconditional, options.GuidanceScale);
+                }
+
+                // Scheduler
+                var stepResult = scheduler.Step(timestep, conditional, latents);
+
+                // Result
+                latents = stepResult.Sample;
+
+                // Progress
+                if (scheduler.IsFinalOrder)
+                    progressCallback.Notify(scheduler.CurrentStep, scheduler.TotalSteps, latents, steptime);
+
+                Logger.LogEnd(LogLevel.Debug, steptime, $"[RunInferenceAsync] Step: {i + 1}/{timesteps.Count}");
+            }
+
+            // Unload
+            if (options.IsLowMemoryEnabled || options.IsLowMemoryComputeEnabled)
+                await Transformer.UnloadAsync();
+
+            Logger.LogEnd(LogLevel.Debug, timestamp, "[RunInferenceAsync] Transformer Inference Complete");
+            return UnpackLatents(latents, options.Width, options.Height);
+        }
+
+
+
+        /// <summary>
+        /// Create latent input.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="scheduler">The scheduler.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        private async Task<Tensor<float>> CreateLatentInputAsync(IPipelineOptions options, IScheduler scheduler, CancellationToken cancellationToken = default)
+        {
+            if (options.HasInputImage)
+            {
+                var timestep = scheduler.GetStartTimestep();
+                var encoderResult = await EncodeLatentsAsync(options, cancellationToken);
+                var noiseTensor = scheduler.CreateRandomSample(encoderResult.Dimensions);
+                return PackLatents(scheduler.ScaleNoise(timestep, encoderResult, noiseTensor));
+            }
+
+            var height = options.Height * 2 / AutoEncoder.LatentChannels;
+            var width = options.Width * 2 / AutoEncoder.LatentChannels;
+            return PackLatents(scheduler.CreateRandomSample([1, AutoEncoder.LatentChannels, height, width]));
+        }
+
+
+        /// <summary>
+        /// Gets the model optimizations.
+        /// </summary>
+        /// <param name="generateOptions">The generate options.</param>
+        /// <param name="progressCallback">The progress callback.</param>
+        private ModelOptimization GetOptimizations(IPipelineOptions generateOptions, IProgress<GenerateProgress> progressCallback = null)
+        {
+            var optimizations = new ModelOptimization(Optimization.None);
+            if (Transformer.HasOptimizationsChanged(optimizations))
+            {
+                progressCallback.Notify("Optimizing Pipeline...");
+            }
+            return optimizations;
+        }
+
+
+        /// <summary>
+        /// Determines whether classifier-free guidance is enabled
+        /// </summary>
+        /// <param name="options">The options.</param>
+        private bool IsGuidanceEnabled(IPipelineOptions options)
+        {
+            return options.GuidanceScale > 1;
+        }
+
+
+        /// <summary>
+        /// Load Transformer with optimizations
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="progressCallback">The progress callback.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        private async Task<ModelMetadata> LoadTransformerAsync(IPipelineOptions options, IProgress<GenerateProgress> progressCallback = null, CancellationToken cancellationToken = default)
+        {
+            var optimizations = GetOptimizations(options, progressCallback);
+            return await Transformer.LoadAsync(optimizations, cancellationToken);
+        }
+
+        /// <summary>
+        /// Packs the latents.
+        /// </summary>
+        /// <param name="latents">The latents.</param>
+        /// <returns></returns>
+        protected Tensor<float> PackLatents(Tensor<float> latents)
+        {
+            var height = latents.Dimensions[2] / 2;
+            var width = latents.Dimensions[3] / 2;
+            latents = latents.Reshape([1, AutoEncoder.LatentChannels, height, 2, width, 2]);
+            latents = latents.Permute([0, 2, 4, 1, 3, 5]);
+            latents = latents.Reshape([1, height * width, AutoEncoder.LatentChannels * 4]);
+            return latents;
+        }
+
+
+        /// <summary>
+        /// Unpacks the latents.
+        /// </summary>
+        /// <param name="latents">The latents.</param>
+        /// <param name="width">The width.</param>
+        /// <param name="height">The height.</param>
+        /// <returns></returns>
+        protected Tensor<float> UnpackLatents(Tensor<float> latents, int width, int height)
+        {
+            var channels = latents.Dimensions[2];
+            height = height / AutoEncoder.LatentChannels;
+            width = width / AutoEncoder.LatentChannels;
+            latents = latents.Reshape([1, height, width, channels / 4, 2, 2]);
+            latents = latents.Permute([0, 3, 1, 4, 2, 5]);
+            latents = latents.Reshape([1, channels / (2 * 2), height * 2, width * 2]);
+            return latents;
+        }
+
+
+        /// <summary>
+        /// Checks the state of the pipeline.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        protected override async Task CheckPipelineState(IPipelineOptions options)
+        {
+            // Check Transformer/ControlNet status
+            if (options.HasControlNet && Transformer.IsLoaded())
+                await Transformer.UnloadAsync();
+            if (!options.HasControlNet && Transformer.IsControlNetLoaded())
+                await Transformer.UnloadControlNetAsync();
+
+            // Check LowMemory status
+            if ((options.IsLowMemoryEnabled || options.IsLowMemoryTextEncoderEnabled)) // TODO
+                await TextEncoder.UnloadAsync();
+            if ((options.IsLowMemoryEnabled || options.IsLowMemoryComputeEnabled) && Transformer.IsLoaded())
+                await Transformer.UnloadAsync();
+            if ((options.IsLowMemoryEnabled || options.IsLowMemoryComputeEnabled) && Transformer.IsControlNetLoaded())
+                await Transformer.UnloadControlNetAsync();
+            if ((options.IsLowMemoryEnabled || options.IsLowMemoryEncoderEnabled) && AutoEncoder.IsEncoderLoaded())
+                await AutoEncoder.EncoderUnloadAsync();
+            if ((options.IsLowMemoryEnabled || options.IsLowMemoryDecoderEnabled) && AutoEncoder.IsDecoderLoaded())
+                await AutoEncoder.DecoderUnloadAsync();
+        }
+
+
+        /// <summary>
+        /// Configures the supported schedulers.
+        /// </summary>
+        protected override IReadOnlyList<SchedulerType> ConfigureSchedulers()
+        {
+            return [SchedulerType.FlowMatchEulerDiscrete, SchedulerType.FlowMatchEulerDynamic];
+        }
+
+
+        /// <summary>
+        /// Configures the default SchedulerOptions.
+        /// </summary>
+        protected override GenerateOptions ConfigureDefaultOptions()
+        {
+            var options = new GenerateOptions
+            {
+                Steps = 28,
+                Shift = 1f,
+                Width = 1024,
+                Height = 1024,
+                GuidanceScale = 3.5f,
+                Scheduler = SchedulerType.FlowMatchEulerDiscrete
+            };
+            return options;
+        }
+
+
+        /// <summary>
+        /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
+        /// </summary>
+        private bool _disposed;
+        protected override void Dispose(bool disposing)
+        {
+            if (_disposed)
+                return;
+            if (disposing)
+            {
+                TextEncoder?.Dispose();
+                Transformer?.Dispose();
+                AutoEncoder?.Dispose();
+            }
+            _disposed = true;
+        }
+    }
+}
diff --git a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs
new file mode 100644
index 0000000..96afdb1
--- /dev/null
+++ b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs
@@ -0,0 +1,174 @@
+﻿// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+using System;
+using System.IO;
+using System.Linq;
+using TensorStack.Common;
+using TensorStack.StableDiffusion.Config;
+using TensorStack.StableDiffusion.Enums;
+using TensorStack.TextGeneration.Common;
+using TensorStack.TextGeneration.Tokenizers;
+
+namespace TensorStack.StableDiffusion.Pipelines.Qwen
+{
+    public record QwenConfig : PipelineConfig
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="QwenConfig"/> class.
+        /// </summary>
+        public QwenConfig()
+        {
+            Tokenizer = new TokenizerConfig
+            {
+                BOS = 151643,
+                EOS = 151645
+            };
+            TextEncoder = new DecoderConfig
+            {
+                NumHeads = 28,
+                NumLayers = 28,
+                NumKVHeads = 4,
+                HiddenSize = 3584,
+                VocabSize = 152064
+            };
+            Transformer = new TransformerModelConfig
+            {
+                InChannels = 64,
+                OutChannels = 16,
+                JointAttention = 3584,
+                PooledProjection = 768,
+                IsOptimizationSupported = true
+            };
+            AutoEncoder = new AutoEncoderModelConfig
+            {
+                Scale = 16,
+                LatentChannels = 16,
+                ScaleFactor = 1
+            };
+        }
+
+        public string Name { get; init; } = "Qwen";
+        public override PipelineType Pipeline { get; } = PipelineType.Qwen;
+        public TokenizerConfig Tokenizer { get; init; }
+        public DecoderConfig TextEncoder { get; init; }
+        public TransformerModelConfig Transformer { get; init; }
+        public AutoEncoderModelConfig AutoEncoder { get; init; }
+
+
+        /// <summary>
+        /// Sets the execution provider for all models.
+        /// </summary>
+        /// <param name="executionProvider">The execution provider.</param>
+        public override void SetProvider(ExecutionProvider executionProvider)
+        {
+            TextEncoder.SetProvider(executionProvider);
+            Transformer.SetProvider(executionProvider);
+            AutoEncoder.SetProvider(executionProvider);
+        }
+
+
+        /// <summary>
+        /// Saves the configuration to file.
+        /// </summary>
+        /// <param name="configFile">The configuration file.</param>
+        /// <param name="useRelativePaths">if set to <c>true</c> use relative paths.</param>
+        public override void Save(string configFile, bool useRelativePaths = true)
+        {
+            ConfigService.Serialize(configFile, this, useRelativePaths);
+        }
+
+
+        /// <summary>
+        /// Create Qwen configuration from default values
+        /// </summary>
+        /// <param name="name">The name.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <returns>QwenConfig.</returns>
+        public static QwenConfig FromDefault(string name, ModelType modelType, ExecutionProvider executionProvider = default)
+        {
+            var config = new QwenConfig { Name = name };
+            config.Transformer.ModelType = modelType;
+            config.SetProvider(executionProvider);
+            return config;
+        }
+
+
+        /// <summary>
+        /// Create StableDiffusionv configuration from json file
+        /// </summary>
+        /// <param name="configFile">The configuration file.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <returns>QwenConfig.</returns>
+        public static QwenConfig FromFile(string configFile, ExecutionProvider executionProvider = default)
+        {
+            var config = ConfigService.Deserialize<QwenConfig>(configFile);
+            config.SetProvider(executionProvider);
+            return config;
+        }
+
+
+        /// <summary>
+        /// Create Qwen configuration from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        public static QwenConfig FromFolder(string modelFolder, ModelType modelType, ExecutionProvider executionProvider = default)
+        {
+            return CreateFromFolder(modelFolder, default, modelType, executionProvider);
+        }
+
+
+        /// <summary>
+        /// Create Qwen configuration from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="variant">The variant.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <returns>QwenConfig.</returns>
+        public static QwenConfig FromFolder(string modelFolder, string variant, ModelType modelType, ExecutionProvider executionProvider = default)
+        {
+            return CreateFromFolder(modelFolder, variant, modelType, executionProvider);
+        }
+
+
+        /// <summary>
+        /// Create Qwen configuration from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="variant">The variant.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <returns>QwenConfig.</returns>
+        public static QwenConfig FromFolder(string modelFolder, string variant, ExecutionProvider executionProvider = default)
+        {
+            string[] typeOptions = ["Turbo", "Distilled", "Dist"];
+            var modelType = typeOptions.Any(v => variant.Contains(v, StringComparison.OrdinalIgnoreCase)) ? ModelType.Turbo : ModelType.Base;
+            return CreateFromFolder(modelFolder, variant, modelType, executionProvider);
+        }
+
+
+        /// <summary>
+        /// Create Qwen configuration from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="variant">The variant.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <returns>QwenConfig.</returns>
+        private static QwenConfig CreateFromFolder(string modelFolder, string variant, ModelType modelType, ExecutionProvider executionProvider)
+        {
+            var config = FromDefault(Path.GetFileNameWithoutExtension(modelFolder), modelType, executionProvider);
+            config.Tokenizer.Path = Path.Combine(modelFolder, "tokenizer");
+            config.TextEncoder.Path = GetVariantPath(modelFolder, "text_encoder", "model.onnx", variant);
+            config.Transformer.Path = GetVariantPath(modelFolder, "transformer", "model.onnx", variant);
+            config.AutoEncoder.DecoderModelPath = GetVariantPath(modelFolder, "vae_decoder", "model.onnx", variant);
+            config.AutoEncoder.EncoderModelPath = GetVariantPath(modelFolder, "vae_encoder", "model.onnx", variant);
+            var controlNetPath = GetVariantPath(modelFolder, "transformer", "controlnet.onnx", variant);
+            if (File.Exists(controlNetPath))
+                config.Transformer.ControlNetPath = controlNetPath;
+            return config;
+        }
+    }
+}
diff --git a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenPipeline.cs b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenPipeline.cs
new file mode 100644
index 0000000..cfc3de4
--- /dev/null
+++ b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenPipeline.cs
@@ -0,0 +1,112 @@
+﻿// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+using Microsoft.Extensions.Logging;
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using TensorStack.Common;
+using TensorStack.Common.Pipeline;
+using TensorStack.Common.Tensor;
+using TensorStack.StableDiffusion.Common;
+using TensorStack.StableDiffusion.Enums;
+using TensorStack.StableDiffusion.Models;
+using QwenTextPipeline = TensorStack.TextGeneration.Pipelines.Qwen.QwenPipeline;
+
+namespace TensorStack.StableDiffusion.Pipelines.Qwen
+{
+    public class QwenPipeline : QwenBase, IPipeline<ImageTensor, GenerateOptions, GenerateProgress>
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="QwenPipeline"/> class.
+        /// </summary>
+        /// <param name="transformer">The transformer.</param>
+        /// <param name="textEncoder">The text encoder.</param>
+        /// <param name="autoEncoder">The automatic encoder.</param>
+        /// <param name="logger">The logger.</param>
+        public QwenPipeline(TransformerQwenModel transformer, QwenTextPipeline textEncoder, AutoEncoderModel autoEncoder, ILogger logger = null)
+            : base(transformer, textEncoder, autoEncoder, logger) { }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="QwenPipeline"/> class.
+        /// </summary>
+        /// <param name="configuration">The configuration.</param>
+        /// <param name="logger">The logger.</param>
+        public QwenPipeline(QwenConfig configuration, ILogger logger = null)
+            : base(configuration, logger) { }
+
+
+        /// <summary>
+        /// Run ImageTensor pipeline.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="progressCallback">The progress callback.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        public async Task<ImageTensor> RunAsync(GenerateOptions options, IProgress<GenerateProgress> progressCallback = null, CancellationToken cancellationToken = default)
+        {
+            ValidateOptions(options);
+
+            var prompt = await CreatePromptAsync(options, cancellationToken);
+            using (var scheduler = CreateScheduler(options))
+            {
+                var latents = await RunInferenceAsync(options, scheduler, prompt, progressCallback, cancellationToken);
+                return await DecodeLatentsAsync(options, latents, cancellationToken);
+            }
+        }
+
+
+        /// <summary>
+        /// Create Qwen pipeline from StableDiffusionConfig file
+        /// </summary>
+        /// <param name="configFile">The configuration file.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <param name="logger">The logger.</param>
+        /// <returns>QwenPipeline.</returns>
+        public static QwenPipeline FromConfig(string configFile, ExecutionProvider executionProvider, ILogger logger = default)
+        {
+            return new QwenPipeline(QwenConfig.FromFile(configFile, executionProvider), logger);
+        }
+
+
+        /// <summary>
+        /// Create Qwen pipeline from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <param name="logger">The logger.</param>
+        /// <returns>QwenPipeline.</returns>
+        public static QwenPipeline FromFolder(string modelFolder, ModelType modelType, ExecutionProvider executionProvider, ILogger logger = default)
+        {
+            return new QwenPipeline(QwenConfig.FromFolder(modelFolder, modelType, executionProvider), logger);
+        }
+
+
+        /// <summary>
+        /// Create Qwen pipeline from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="variant">The variant.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <param name="logger">The logger.</param>
+        /// <returns>QwenPipeline.</returns>
+        public static QwenPipeline FromFolder(string modelFolder, string variant, ModelType modelType, ExecutionProvider executionProvider, ILogger logger = default)
+        {
+            return new QwenPipeline(QwenConfig.FromFolder(modelFolder, variant, modelType, executionProvider), logger);
+        }
+
+
+        /// <summary>
+        /// Create Qwen pipeline from folder structure
+        /// </summary>
+        /// <param name="modelFolder">The model folder.</param>
+        /// <param name="variant">The variant.</param>
+        /// <param name="executionProvider">The execution provider.</param>
+        /// <param name="logger">The logger.</param>
+        /// <returns>QwenPipeline.</returns>
+        public static QwenPipeline FromFolder(string modelFolder, string variant, ExecutionProvider executionProvider, ILogger logger = default)
+        {
+            return new QwenPipeline(QwenConfig.FromFolder(modelFolder, variant, executionProvider), logger);
+        }
+    }
+}
diff --git a/TensorStack.TextGeneration/Pipelines/Qwen/QwenConfig.cs b/TensorStack.TextGeneration/Pipelines/Qwen/QwenConfig.cs
new file mode 100644
index 0000000..eee8524
--- /dev/null
+++ b/TensorStack.TextGeneration/Pipelines/Qwen/QwenConfig.cs
@@ -0,0 +1,9 @@
+﻿using TensorStack.TextGeneration.Common;
+
+namespace TensorStack.TextGeneration.Pipelines.Qwen
+{
+    public record QwenConfig : TransformerConfig
+    {
+        public bool OutputLastHiddenStates { get; set; }
+    }
+}
diff --git a/TensorStack.TextGeneration/Pipelines/Qwen/QwenPipeline.cs b/TensorStack.TextGeneration/Pipelines/Qwen/QwenPipeline.cs
new file mode 100644
index 0000000..e1964f7
--- /dev/null
+++ b/TensorStack.TextGeneration/Pipelines/Qwen/QwenPipeline.cs
@@ -0,0 +1,255 @@
+// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+
+using System;
+using System.IO;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using TensorStack.Common;
+using TensorStack.Common.Pipeline;
+using TensorStack.Common.Tensor;
+using TensorStack.TextGeneration.Cache;
+using TensorStack.TextGeneration.Common;
+using TensorStack.TextGeneration.Processing;
+using TensorStack.TextGeneration.Tokenizers;
+
+namespace TensorStack.TextGeneration.Pipelines.Qwen
+{
+    public class QwenPipeline : DecoderPipeline<GenerateOptions>,
+        IPipeline<GenerateResult, GenerateOptions, GenerateProgress>,
+        IPipeline<GenerateResult[], SearchOptions, GenerateProgress>
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="QwenPipeline"/> class.
+        /// </summary>
+        /// <param name="tokenizerConfig">The tokenizer configuration.</param>
+        /// <param name="decoderConfig">The decoder configuration.</param>
+        public QwenPipeline(QwenConfig configuration)
+            : base(configuration.Tokenizer, configuration.DecoderConfig)
+        {
+            Configuration = configuration;
+        }
+
+        public QwenConfig Configuration { get; }
+
+
+        /// <summary>
+        /// Runs the GreedySearch inference
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns></returns>
+        public virtual async Task<GenerateResult> RunAsync(GenerateOptions options, IProgress<GenerateProgress> progressCallback = null, CancellationToken cancellationToken = default)
+        {
+            await TokenizePromptAsync(options);
+            var sequence = await GreedySearchAsync(options, progressCallback, cancellationToken);
+            using (sequence)
+            {
+                return new GenerateResult
+                {
+                    Score = sequence.Score,
+                    Result = Tokenizer.Decode(sequence.Tokens),
+                    Tokens = sequence.Tokens,
+                    LastHiddenState = sequence.LastHiddenState
+                };
+            }
+        }
+
+
+        /// <summary>
+        /// Runs the BeamSearch inference
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="progressCallback">The progress callback.</param>
+        /// <param name="cancellationToken">The cancellation token that can be used by other objects or threads to receive notice of cancellation.</param>
+        public async Task<GenerateResult[]> RunAsync(SearchOptions options, IProgress<GenerateProgress> progressCallback = null, CancellationToken cancellationToken = default)
+        {
+            await TokenizePromptAsync(options);
+
+            var sequences = await BeamSearchAsync(options, progressCallback, cancellationToken);
+            var results = new GenerateResult[sequences.Length];
+            for (int beam = 0; beam < sequences.Length; beam++)
+            {
+                var sequence = sequences[beam];
+                using (sequence)
+                {
+                    results[beam] = new GenerateResult
+                    {
+                        Beam = beam,
+                        Score = sequence.Score,
+                        PenaltyScore = sequence.PenaltyScore,
+                        Result = Tokenizer.Decode(sequence.Tokens),
+                        Tokens = sequence.Tokens,
+                        LastHiddenState = sequence.LastHiddenState
+                    };
+                }
+            }
+            return results;
+        }
+
+
+        /// <summary>
+        /// Gets the LastHiddenState.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        public async Task<Tensor<float>> GetLastHiddenState(GenerateOptions options, CancellationToken cancellationToken = default)
+        {
+            await TokenizePromptAsync(options);
+            using (var sequence = await InitializeAsync(options))
+            {
+                return sequence.LastHiddenState;
+            }
+        }
+
+
+        /// <summary>
+        /// Tokenize the prompt
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <returns>A Task representing the asynchronous operation.</returns>
+        protected override async Task TokenizePromptAsync(GenerateOptions options)
+        {
+            var tokenizerResult = await Tokenizer.EncodeAsync(options.Prompt);
+            var inputIds = tokenizerResult.InputIds.Span.Pad(Tokenizer.EOS, options.MinLength);
+            var mask = tokenizerResult.Mask.Span.Pad(0, options.MinLength);
+            TokenizerOutput = new TokenizerResult(inputIds, mask);
+        }
+
+
+        /// <summary>
+        /// Gets the token processors.
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <returns>ITokenProcessor[].</returns>
+        protected override ITokenProcessor[] GetTokenProcessors(GenerateOptions options)
+        {
+            return
+            [
+                new EOSTokenProcessor(options.MinLength, Tokenizer.EOS),
+                new MaxLengthTokenProcessor(options.MaxLength)
+            ];
+        }
+
+
+        /// <summary>
+        /// Initialize the Decoder cache
+        /// </summary>
+        /// <param name="options">The options.</param>
+        /// <returns>A Task&lt;Sequence&gt; representing the asynchronous operation.</returns>
+        protected override async Task<Sequence> InitializeAsync(GenerateOptions options)
+        {
+            var modelMetadata = await Decoder.LoadAsync();
+            var kvCache = new KVCacheDecoder(modelMetadata, DecoderConfig.NumHeads, DecoderConfig.NumLayers, DecoderConfig.HiddenSize, DecoderConfig.NumKVHeads, options.MaxLength);
+            var sequence = new Sequence(kvCache, Tokenizer.BOS);
+            sequence.Initialize(0);
+
+            var position = TokenizerOutput.Length;
+            var inputIds = TokenizerOutput.InputIds;
+            var positionIds = GetPositionIds(modelMetadata, 0, position);
+            var attentionMask = new Tensor<long>([1, position], 1);
+            RunDecoderInternal(modelMetadata, sequence, inputIds, positionIds, attentionMask, false);
+            return sequence;
+        }
+
+
+        /// <summary>
+        /// Run decoder model
+        /// </summary>
+        /// <param name="sequence">The sequence.</param>
+        /// <returns>A Task&lt;Tensor`1&gt; representing the asynchronous operation.</returns>
+        protected override async Task<Tensor<float>> RunDecoderAsync(Sequence sequence)
+        {
+            var modelMetadata = await Decoder.LoadAsync();
+            var position = TokenizerOutput.Length + sequence.Tokens.Count;
+            var inputIds = new Tensor<long>([1, 1], sequence.Tokens[^1]);
+            var positionIds = GetPositionIds(modelMetadata, position);
+            var attentionMask = new Tensor<long>([1, position], 1);
+            return RunDecoderInternal(modelMetadata, sequence, inputIds, positionIds, attentionMask, true);
+        }
+
+
+        /// <summary>
+        /// Runs the decoder
+        /// </summary>
+        /// <param name="modelMetadata">The model metadata.</param>
+        /// <param name="sequence">The sequence.</param>
+        /// <param name="inputIds">The input ids.</param>
+        /// <param name="positionIds">The position ids.</param>
+        /// <param name="attentionMask">The attention mask.</param>
+        /// <param name="useBranchCache">if set to <c>true</c> [use branch cache].</param>
+        private Tensor<float> RunDecoderInternal(ModelMetadata modelMetadata, Sequence sequence, Tensor<long> inputIds, Tensor<long> positionIds, Tensor<long> attentionMask, bool useBranchCache)
+        {
+            using (var parameters = new ModelParameters(modelMetadata))
+            {
+                // Inputs
+                parameters.AddInput(inputIds);
+                parameters.AddInput(attentionMask);
+                if (positionIds != null)
+                    parameters.AddInput(positionIds);
+
+                foreach (var pastKeyValue in sequence.Cache)
+                    parameters.AddInput(pastKeyValue, false);
+
+                // Outputs
+                foreach (var output in modelMetadata.Outputs)
+                    parameters.AddOutput();
+
+                // Result
+                var modelResult = Decoder.RunInference(parameters);
+                using (var logitsResult = modelResult[0])
+                {
+                    var dimension = logitsResult.GetDimensions();
+                    var logits = logitsResult.ToTensor(dimension[1..]);
+                    var lastHiddenState = Configuration.OutputLastHiddenStates ? modelResult[^1].ToTensor() : default;
+                    var presentKeyValues = Configuration.OutputLastHiddenStates ? modelResult.ToArray()[1..^1] : modelResult.ToArray()[1..];
+                    sequence.UpdateCache(presentKeyValues, useBranchCache, lastHiddenState);
+                    return logits;
+                }
+            }
+        }
+
+
+        /// <summary>
+        /// Creates the QwenPipeline
+        /// </summary>
+        /// <param name="provider">The provider.</param>
+        /// <param name="modelPath">The model path.</param>
+        /// <param name="model">The decoder model.</param>
+        /// <returns>QwenPipeline.</returns>
+        public static QwenPipeline Create(ExecutionProvider provider, string modelPath, string model = "model.onnx")
+        {
+            // Qwen-2.5-VL
+            // https://huggingface.co/Qwen/Qwen-Image/blob/main/text_encoder/config.json
+            var numHeads = 28;
+            var numLayers = 28;
+            var hiddenSize = 3584;
+            var numKVHeads = 4;
+            var vocabSize = 152064;
+            var config = new QwenConfig
+            {
+                OutputLastHiddenStates = true,
+                Tokenizer = new BPETokenizer(new TokenizerConfig
+                {
+                    BOS = 151643,
+                    EOS = 151645,
+                    Path = modelPath
+                }),
+                DecoderConfig = new DecoderConfig
+                {
+                    Path = Path.Combine(modelPath, model),
+                    VocabSize = vocabSize,
+                    NumHeads = numHeads,
+                    NumLayers = numLayers,
+                    HiddenSize = hiddenSize,
+                    NumKVHeads = numKVHeads
+                }
+            };
+
+            config.DecoderConfig.SetProvider(provider);
+            return new QwenPipeline(config);
+        }
+
+    }
+}
\ No newline at end of file

From 8ee1e354d04218497ff1125402a44fe790344eab Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Thu, 20 Nov 2025 12:45:38 +1300
Subject: [PATCH 2/3] Add AutoEncoder LatentsMean/LatentsStd normalization

---
 .../Config/AutoEncoderConfig.cs               |  9 ++++-
 .../Models/AutoEncoderModel.cs                | 34 ++++++++++++++++
 .../Pipelines/Qwen/QwenBase.cs                |  2 +-
 .../Pipelines/Qwen/QwenConfig.cs              | 40 ++++++++++++++++++-
 4 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/TensorStack.StableDiffusion/Config/AutoEncoderConfig.cs b/TensorStack.StableDiffusion/Config/AutoEncoderConfig.cs
index e85251a..dc81919 100644
--- a/TensorStack.StableDiffusion/Config/AutoEncoderConfig.cs
+++ b/TensorStack.StableDiffusion/Config/AutoEncoderConfig.cs
@@ -1,5 +1,6 @@
 ﻿// Copyright (c) TensorStack. All rights reserved.
 // Licensed under the Apache 2.0 License.
+using System.Text.Json.Serialization;
 using TensorStack.Common;
 
 namespace TensorStack.StableDiffusion.Config
@@ -14,5 +15,11 @@ public record AutoEncoderModelConfig : ModelConfig
         public int LatentChannels { get; set; } = 4;
         public string DecoderModelPath { get; set; }
         public string EncoderModelPath { get; set; }
-    }
+
+        [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+        public float[] LatentsStd { get; set; }
+
+        [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+        public float[] LatentsMean {get; set; }
+}
 }
diff --git a/TensorStack.StableDiffusion/Models/AutoEncoderModel.cs b/TensorStack.StableDiffusion/Models/AutoEncoderModel.cs
index d50be16..1049b33 100644
--- a/TensorStack.StableDiffusion/Models/AutoEncoderModel.cs
+++ b/TensorStack.StableDiffusion/Models/AutoEncoderModel.cs
@@ -142,6 +142,7 @@ public virtual async Task<Tensor<float>> DecodeAsync(Tensor<float> inputTensor,
             if (!disableShift)
                 inputTensor.Add(ShiftFactor);
 
+            ApplyNormalization(inputTensor, Configuration.LatentsMean, Configuration.LatentsStd);
             var outputDimensions = new[] { 1, OutChannels, inputTensor.Dimensions[2] * Scale, inputTensor.Dimensions[3] * Scale };
             using (var modelParameters = new ModelParameters(Decoder.Metadata, cancellationToken))
             {
@@ -195,6 +196,39 @@ public virtual async Task<Tensor<float>> EncodeAsync(ImageTensor inputTensor, bo
         }
 
 
+        /// <summary>
+        /// Applies per-channel normalization to a latent tensor in-place, equivalent to:
+        /// <c>latents = latents / latentsStd + latentsMean</c>
+        /// </summary>
+        /// <param name="latents">The latents.</param>
+        /// <param name="latentsMean">Per-channel mean values. Length must equal the number of channels in <paramref name="latents"/>.</param>
+        /// <param name="latentsStd">Per-channel standard deviation values. Length must equal the number of channels in <paramref name="latents"/>. Each value is inverted (1 / std) before applying to the tensor.</param>
+        private static void ApplyNormalization(Tensor<float> latents, ReadOnlySpan<float> latentsMean, ReadOnlySpan<float> latentsStd)
+        {
+            if (latentsMean.IsEmpty || latentsStd.IsEmpty)
+                return;
+
+            var dimensions = latents.Dimensions;
+            var channels = dimensions[1];
+
+            Span<float> invStd = stackalloc float[channels];
+            for (int c = 0; c < channels; c++)
+                invStd[c] = 1f / latentsStd[c];
+
+            var data = latents.Memory.Span;
+            var strideC = data.Length / channels;
+
+            for (int c = 0; c < channels; c++)
+            {
+                var mean = latentsMean[c];
+                var inv = invStd[c];
+                var slice = data.Slice(c * strideC, strideC);
+                for (int i = 0; i < slice.Length; i++)
+                    slice[i] = slice[i] * inv + mean;
+            }
+        }
+
+
         /// <summary>
         /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
         /// </summary>
diff --git a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
index fbb4e9d..de78e18 100644
--- a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
+++ b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
@@ -165,7 +165,7 @@ protected async Task<PromptResult> CreatePromptAsync(IPipelineOptions options, C
         protected async Task<ImageTensor> DecodeLatentsAsync(IPipelineOptions options, Tensor<float> latents, CancellationToken cancellationToken = default)
         {
             var timestamp = Logger.LogBegin(LogLevel.Debug, "[DecodeLatentsAsync] Begin AutoEncoder Decode");
-            var decoderResult = await AutoEncoder.DecodeAsync(latents, cancellationToken: cancellationToken);
+            var decoderResult = await AutoEncoder.DecodeAsync(latents, disableShift: true, disableScale: true, cancellationToken: cancellationToken);
             if (options.IsLowMemoryEnabled || options.IsLowMemoryDecoderEnabled)
                 await AutoEncoder.DecoderUnloadAsync();
 
diff --git a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs
index 96afdb1..4d1bff4 100644
--- a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs
+++ b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenConfig.cs
@@ -43,7 +43,45 @@ public QwenConfig()
             {
                 Scale = 16,
                 LatentChannels = 16,
-                ScaleFactor = 1
+                ScaleFactor = 1,
+                LatentsMean =
+                [
+                    -0.7571f,
+                    -0.7089f,
+                    -0.9113f,
+                    0.1075f,
+                    -0.1745f,
+                    0.9653f,
+                    -0.1517f,
+                    1.5508f,
+                    0.4134f,
+                    -0.0715f,
+                    0.5517f,
+                    -0.3632f,
+                    -0.1922f,
+                    -0.9497f,
+                    0.2503f,
+                    -0.2921f
+                ],
+                LatentsStd =
+                [
+                    2.8184f,
+                    1.4541f,
+                    2.3275f,
+                    2.6558f,
+                    1.2196f,
+                    1.7708f,
+                    2.6052f,
+                    2.0743f,
+                    3.2687f,
+                    2.1526f,
+                    2.8652f,
+                    1.5579f,
+                    1.6382f,
+                    1.1253f,
+                    2.8251f,
+                    1.916f
+                ]
             };
         }
 

From 1d95332e1e9c2d6fd114bb1a4063589269c7f4df Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Thu, 20 Nov 2025 14:07:33 +1300
Subject: [PATCH 3/3] txt_seq_lens and encoder_hidden_states_mask inputs can be
 calculated in model

---
 .../Models/TransformerQwenModel.cs                       | 9 ++-------
 TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs   | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs b/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs
index 0f18ebe..c88471f 100644
--- a/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs
+++ b/TensorStack.StableDiffusion/Models/TransformerQwenModel.cs
@@ -29,23 +29,18 @@ public TransformerQwenModel(TransformerModelConfig configuration)
         /// <param name="encoderHiddenStates">The encoder hidden states.</param>
         /// <param name="imgShapes">The image shapes.</param>
         /// <param name="cancellationToken">The cancellation token that can be used by other objects or threads to receive notice of cancellation.</param>
-        public async Task<Tensor<float>> RunAsync(int timestep, Tensor<float> hiddenStates, Tensor<float> encoderHiddenStates, Tensor<float> imgShapes, CancellationToken cancellationToken = default)
+        public async Task<Tensor<float>> RunAsync(int timestep, Tensor<float> hiddenStates, Tensor<float> encoderHiddenStates, Tensor<long> imgShapes, CancellationToken cancellationToken = default)
         {
             if (!Transformer.IsLoaded())
                 await Transformer.LoadAsync(cancellationToken: cancellationToken);
 
-            var txtSequenceLength = encoderHiddenStates.Dimensions[1];
-            var encoderHiddenStatesMask = new Tensor<float>([1, txtSequenceLength]);
-            encoderHiddenStatesMask.Fill(1);
             using (var transformerParams = new ModelParameters(Transformer.Metadata, cancellationToken))
             {
                 // Inputs
                 transformerParams.AddInput(hiddenStates);
-                transformerParams.AddScalarInput(timestep);
-                transformerParams.AddInput(encoderHiddenStatesMask);
                 transformerParams.AddInput(encoderHiddenStates);
+                transformerParams.AddScalarInput(timestep);
                 transformerParams.AddInput(imgShapes);
-                transformerParams.AddScalarInput(txtSequenceLength);
 
                 // Outputs
                 transformerParams.AddOutput(hiddenStates.Dimensions);
diff --git a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
index de78e18..78c1c34 100644
--- a/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
+++ b/TensorStack.StableDiffusion/Pipelines/Qwen/QwenBase.cs
@@ -220,7 +220,7 @@ protected async Task<Tensor<float>> RunInferenceAsync(IPipelineOptions options,
             var latents = await CreateLatentInputAsync(options, scheduler, cancellationToken);
 
             // Create ImgShapes
-            var imgShapes = new Tensor<float>([1, 64, 64]); // TODO:
+            var imgShapes = new Tensor<long>([1, 64, 64]); // TODO: H/W
 
             // Load Model
             await LoadTransformerAsync(options, progressCallback, cancellationToken);