diff --git a/Directory.Build.props b/Directory.Build.props index b14204e9a..dbd769f4c 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -70,7 +70,7 @@ net8.0 net8.0 $(SamplesFrameworks);net472 - $(DefineConstants);FEATURE_ASYNC_SAX_XML + $(DefineConstants);TASKS_SUPPORTED diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs index 351344502..cf508c1ff 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs @@ -17,7 +17,7 @@ namespace DocumentFormat.OpenXml /// /// Represents the Open XML part reader class. /// - public class OpenXmlPartReader : OpenXmlReader + public partial class OpenXmlPartReader : OpenXmlReader { private readonly IRootElementFeature _rootElements; private readonly IOpenXmlNamespaceResolver _resolver; @@ -100,7 +100,7 @@ public OpenXmlPartReader(Stream partStream, IFeatureCollection features, OpenXml _resolver = features.GetRequired(); _rootElements = features.GetRequired(); - _xmlReader = CreateReader(partStream, options.CloseStream, options.MaxCharactersInPart, ignoreWhitespace: options.IgnoreWhitespace, out _standalone, out _encoding); + _xmlReader = CreateReader(partStream, options, out _standalone, out _encoding); } /// @@ -402,11 +402,18 @@ public override void Skip() /// true if the next element was read successfully; false if there are no more elements to read. private bool MoveToNextElement() { - switch (_elementState) + if (_elementState == ElementState.Null) { - case ElementState.Null: - return ReadRoot(); + return ReadRoot(); + } + return MoveToNextElementHelper(); + } + + private bool MoveToNextElementHelper() + { + switch (_elementState) + { case ElementState.EOF: return false; @@ -667,17 +674,20 @@ public override void Close() _xmlReader.Close(); } - private XmlReader CreateReader(Stream partStream, bool closeInput, long maxCharactersInPart, bool ignoreWhitespace, out bool? standalone, out string? encoding) + private XmlReader CreateReader(Stream partStream, OpenXmlPartReaderOptions options, out bool? standalone, out string? encoding) { var settings = new XmlReaderSettings { - MaxCharactersInDocument = maxCharactersInPart, - CloseInput = closeInput, - IgnoreWhitespace = ignoreWhitespace, + MaxCharactersInDocument = options.MaxCharactersInPart, + CloseInput = options.CloseStream, + IgnoreWhitespace = options.IgnoreWhitespace, #if NET35 ProhibitDtd = true, #else DtdProcessing = DtdProcessing.Prohibit, +#endif +#if TASKS_SUPPORTED + Async = options.Async, #endif }; @@ -727,6 +737,11 @@ private bool ReadRoot() _xmlReader.Skip(); } + return ReadRootHelper(); + } + + private bool ReadRootHelper() + { if (_xmlReader.EOF || !_xmlReader.IsStartElement()) { throw new InvalidDataException(ExceptionMessages.PartIsEmpty); diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs new file mode 100644 index 000000000..c282124b0 --- /dev/null +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Text; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif +using System.Xml; + +namespace DocumentFormat.OpenXml; + +/// +/// Represents the Open XML part reader class. +/// +public partial class OpenXmlPartReader : OpenXmlReader +{ + /// + /// Gets the type of the current node in the XML document being read. + /// + /// + /// The indicates the type of the current node, such as + /// Element, Attribute, Text, CDATA, Comment, or others. + /// This property provides information about the structure of the XML document + /// and is useful for determining how to process the current node. + /// + public override XmlNodeType NodeType + { + get + { + return _xmlReader.NodeType; + } + } + +#if TASKS_SUPPORTED + /// + /// Asynchronously reads the next element in the Open XML document. + /// + /// + /// A task that represents the asynchronous read operation. The task result is true if the next element + /// was read successfully; false if there are no more elements to read. + /// + public async override Task ReadAsync() + { + ThrowIfObjectDisposed(); + + bool result = await MoveToNextElementAsync().ConfigureAwait(false); + + if (result && !ReadMiscNodes) + { + // skip miscellaneous node + while (result && IsMiscNode) + { + result = await MoveToNextElementAsync().ConfigureAwait(false); + } + } + + return result; + } + + public async override Task ReadFirstChildAsync() + { + //ThrowIfObjectDisposed(); + + //bool result = await MoveToFirstChildAsync().ConfigureAwait(true); + + //if (result && !ReadMiscNodes) + //{ + // // skip miscellaneous node + // while (result && IsMiscNode) + // { + // result = MoveToNextSibling(); + // } + //} + + //return result; + return true; + } + + private async Task MoveToNextElementAsync() + { + if (_elementState == ElementState.Null) + { + return await ReadRootAsync().ConfigureAwait(false); + } + + return MoveToNextElementHelper(); + } + + private async Task ReadRootAsync() + { + Debug.Assert(_elementState == ElementState.Null); + Debug.Assert(_elementStack.Count == 0); + + // TODO: should we take care of entity? ]> + // TODO: is it OK that we skip all prologue ( DOCTYPE, Comment, PT ) ? + await _xmlReader.MoveToContentAsync().ConfigureAwait(false); + + while (!_xmlReader.EOF && _xmlReader.NodeType != XmlNodeType.Element) + { + await _xmlReader.SkipAsync().ConfigureAwait(false); + } + + return ReadRootHelper(); + } +#endif +} diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs index 53c3c64f0..5a57522d6 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs @@ -31,11 +31,26 @@ public struct OpenXmlPartReaderOptions /// public bool CloseStream { get; set; } +#if TASKS_SUPPORTED + /// + /// Gets or sets a value indicating whether the part reader should operate asynchronously. + /// + /// + /// When set to true, the reader will use asynchronous methods for reading XML data, + /// allowing non-blocking operations. This property is only available when the build target + /// supports asynchronous SAX XML processing. + /// + public bool Async { get; set; } +#endif + internal OpenXmlPartReaderOptions UpdateForPart(OpenXmlPart part) => new() { ReadMiscellaneousNodes = ReadMiscellaneousNodes, MaxCharactersInPart = MaxCharactersInPart != 0 ? MaxCharactersInPart : part.MaxCharactersInPart, IgnoreWhitespace = IgnoreWhitespace, CloseStream = true, +#if TASKS_SUPPORTED + Async = Async, +#endif }; } diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs index 7dbb47209..141e41106 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs @@ -6,7 +6,7 @@ using System.Collections.Generic; using System.IO; using System.Text; -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED using DocumentFormat.OpenXml.Framework; using System.Threading.Tasks; #endif @@ -82,7 +82,7 @@ public OpenXmlPartWriter(OpenXmlPart openXmlPart, OpenXmlPartWriterSettings sett { CloseOutput = true, Encoding = settings.Encoding, -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED Async = settings.Async, #endif }; @@ -146,7 +146,7 @@ public OpenXmlPartWriter(Stream partStream, OpenXmlPartWriterSettings settings) { CloseOutput = settings.CloseOutput, Encoding = settings.Encoding, -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED Async = settings.Async, #endif }; @@ -430,7 +430,7 @@ public override void Close() #endregion // Async Methods -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED /// /// Asynchronously writes the XML declaration with the version "1.0". /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs index 88e076308..c955f69c8 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs @@ -10,7 +10,7 @@ namespace DocumentFormat.OpenXml; /// public class OpenXmlPartWriterSettings { -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED /// /// Gets or sets a value indicating whether asynchronous methods can be used. /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs index 5cf655c24..c74de8671 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs @@ -6,6 +6,9 @@ using System.Collections.Generic; using System.Collections.ObjectModel; using System.IO; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif using System.Xml; namespace DocumentFormat.OpenXml @@ -183,6 +186,17 @@ public virtual bool HasAttributes /// public abstract string Prefix { get; } + /// + /// Gets the type of the current node in the XML document being read. + /// + /// + /// The indicates the type of the current node, such as + /// Element, Attribute, Text, CDATA, Comment, or others. + /// This property provides information about the structure of the XML document + /// and is useful for determining how to process the current node. + /// + public virtual XmlNodeType NodeType { get; } + /// /// Gets an instance of if available for the current reader. /// @@ -235,6 +249,42 @@ public virtual bool HasAttributes /// public abstract void Close(); +#if TASKS_SUPPORTED + #region Async methods + + /// + /// Asynchronously reads the next element in the Open XML document. + /// + /// + /// A task that represents the asynchronous read operation. The task result is true if the next element + /// was read successfully; false if there are no more elements to read. + /// + /// + /// This method is only available when the build target supports asynchronous SAX XML processing. + /// + public virtual Task ReadAsync() + { + return Task.FromResult(Read()); + } + + /// + /// Asynchronously moves the reader to the first child element of the current node. + /// + /// + /// A task that represents the asynchronous operation. The task result is true if the first child element + /// was read successfully; false if there are no child elements to read. + /// + /// + /// This method can only be called when the reader is positioned on an element start. If no child elements exist, + /// the reader will move to the end tag of the current element. + /// + public virtual Task ReadFirstChildAsync() + { + return Task.FromResult(ReadFirstChild()); + } + #endregion +#endif + /// /// Thrown if the object is disposed. /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs index 24fa6af83..00c58299c 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs @@ -6,7 +6,7 @@ using System.Collections.Generic; using System.IO; using System.Text; -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED using System.Threading.Tasks; #endif @@ -131,7 +131,7 @@ protected OpenXmlWriter() /// public abstract void Close(); -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED /// /// Asynchronously writes the XML declaration with the version "1.0". /// diff --git a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt index c7d7da820..2c88b12d4 100644 --- a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt +++ b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt @@ -1009,3 +1009,6 @@ DocumentFormat.OpenXml.OpenXmlPartWriterSettings.Encoding.set -> void DocumentFormat.OpenXml.OpenXmlPartWriterSettings.OpenXmlPartWriterSettings() -> void DocumentFormat.OpenXml.OpenXmlPartWriter.OpenXmlPartWriter(DocumentFormat.OpenXml.Packaging.OpenXmlPart! openXmlPart, DocumentFormat.OpenXml.OpenXmlPartWriterSettings! settings) -> void DocumentFormat.OpenXml.OpenXmlPartWriter.OpenXmlPartWriter(System.IO.Stream! partStream, DocumentFormat.OpenXml.OpenXmlPartWriterSettings! settings) -> void +DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType +virtual DocumentFormat.OpenXml.OpenXmlReader.NodeType.get -> System.Xml.XmlNodeType +override DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType diff --git a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt index ab058de62..ddf94f5ae 100644 --- a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt +++ b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt @@ -1 +1,6 @@ #nullable enable +DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.get -> bool +DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.set -> void +override DocumentFormat.OpenXml.OpenXmlPartReader.ReadAsync() -> System.Threading.Tasks.Task! +virtual DocumentFormat.OpenXml.OpenXmlReader.ReadAsync() -> System.Threading.Tasks.Task! +virtual DocumentFormat.OpenXml.OpenXmlReader.ReadFirstChildAsync() -> System.Threading.Tasks.Task! diff --git a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs index 76b3bd350..d56a3cee4 100644 --- a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs @@ -4,6 +4,9 @@ using DocumentFormat.OpenXml.Features; using DocumentFormat.OpenXml.Framework; using System; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif using System.Xml; namespace DocumentFormat.OpenXml @@ -53,6 +56,14 @@ protected override void Dispose(bool disposing) /// public override bool Read() => BaseReader.Read(); +#if TASKS_SUPPORTED + public override Task ReadAsync() => BaseReader.ReadAsync(); + + public override Task SkipAsync() => BaseReader.SkipAsync(); + + public override Task MoveToContentAsync() => BaseReader.MoveToContentAsync(); +#endif + /// public override string GetAttribute(int index) => BaseReader.GetAttribute(index); diff --git a/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs b/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs index 60d4f83e1..4a3b929a7 100644 --- a/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs +++ b/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs @@ -239,7 +239,7 @@ public void WriteStringExceptionTest7() } } -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED [Fact] public async Task WriteStartDocumentAsync_ShouldWriteStartDocument() {