From 67becfc025b8e56635d65c1460459ec532bc9c60 Mon Sep 17 00:00:00 2001 From: Michael Bowen <10384982+mikeebowen@users.noreply.github.com> Date: Fri, 9 May 2025 10:06:13 -0700 Subject: [PATCH 1/3] car ReadAsync --- Directory.Build.props | 2 +- .../OpenXmlPartReader.cs | 27 +++++++++++++++---- .../OpenXmlPartReaderOptions.cs | 15 +++++++++++ .../OpenXmlPartWriter.cs | 8 +++--- .../OpenXmlPartWriterSettings.cs | 2 +- .../OpenXmlReader.cs | 24 +++++++++++++++++ .../OpenXmlWriter.cs | 4 +-- .../PublicAPI/PublicAPI.Unshipped.txt | 4 +++ .../XmlConvertingReader.cs | 7 +++++ .../ofapiTest/OpenXmlWriterTest.cs | 2 +- 10 files changed, 81 insertions(+), 14 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index b14204e9a..dbd769f4c 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -70,7 +70,7 @@ net8.0 net8.0 $(SamplesFrameworks);net472 - $(DefineConstants);FEATURE_ASYNC_SAX_XML + $(DefineConstants);TASKS_SUPPORTED diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs index 351344502..396b66d6c 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs @@ -10,6 +10,9 @@ using System.Diagnostics; using System.IO; using System.Linq; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif using System.Xml; namespace DocumentFormat.OpenXml @@ -100,7 +103,7 @@ public OpenXmlPartReader(Stream partStream, IFeatureCollection features, OpenXml _resolver = features.GetRequired(); _rootElements = features.GetRequired(); - _xmlReader = CreateReader(partStream, options.CloseStream, options.MaxCharactersInPart, ignoreWhitespace: options.IgnoreWhitespace, out _standalone, out _encoding); + _xmlReader = CreateReader(partStream, options, out _standalone, out _encoding); } /// @@ -394,6 +397,17 @@ public override void Skip() /// public override IXmlLineInfo GetLineInfo() => XmlLineInfo.Get(_xmlReader); + #region Async methods +#if TASKS_SUPPORTED + public override Task ReadAsync() + { + ThrowIfObjectDisposed(); + + + return _xmlReader.ReadAsync(); + } +#endif + #endregion #region private methods /// @@ -667,17 +681,20 @@ public override void Close() _xmlReader.Close(); } - private XmlReader CreateReader(Stream partStream, bool closeInput, long maxCharactersInPart, bool ignoreWhitespace, out bool? standalone, out string? encoding) + private XmlReader CreateReader(Stream partStream, OpenXmlPartReaderOptions options, out bool? standalone, out string? encoding) { var settings = new XmlReaderSettings { - MaxCharactersInDocument = maxCharactersInPart, - CloseInput = closeInput, - IgnoreWhitespace = ignoreWhitespace, + MaxCharactersInDocument = options.MaxCharactersInPart, + CloseInput = options.CloseStream, + IgnoreWhitespace = options.IgnoreWhitespace, #if NET35 ProhibitDtd = true, #else DtdProcessing = DtdProcessing.Prohibit, +#endif +#if TASKS_SUPPORTED + Async = options.Async, #endif }; diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs index 53c3c64f0..5a57522d6 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs @@ -31,11 +31,26 @@ public struct OpenXmlPartReaderOptions /// public bool CloseStream { get; set; } +#if TASKS_SUPPORTED + /// + /// Gets or sets a value indicating whether the part reader should operate asynchronously. + /// + /// + /// When set to true, the reader will use asynchronous methods for reading XML data, + /// allowing non-blocking operations. This property is only available when the build target + /// supports asynchronous SAX XML processing. + /// + public bool Async { get; set; } +#endif + internal OpenXmlPartReaderOptions UpdateForPart(OpenXmlPart part) => new() { ReadMiscellaneousNodes = ReadMiscellaneousNodes, MaxCharactersInPart = MaxCharactersInPart != 0 ? MaxCharactersInPart : part.MaxCharactersInPart, IgnoreWhitespace = IgnoreWhitespace, CloseStream = true, +#if TASKS_SUPPORTED + Async = Async, +#endif }; } diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs index 7dbb47209..141e41106 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs @@ -6,7 +6,7 @@ using System.Collections.Generic; using System.IO; using System.Text; -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED using DocumentFormat.OpenXml.Framework; using System.Threading.Tasks; #endif @@ -82,7 +82,7 @@ public OpenXmlPartWriter(OpenXmlPart openXmlPart, OpenXmlPartWriterSettings sett { CloseOutput = true, Encoding = settings.Encoding, -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED Async = settings.Async, #endif }; @@ -146,7 +146,7 @@ public OpenXmlPartWriter(Stream partStream, OpenXmlPartWriterSettings settings) { CloseOutput = settings.CloseOutput, Encoding = settings.Encoding, -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED Async = settings.Async, #endif }; @@ -430,7 +430,7 @@ public override void Close() #endregion // Async Methods -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED /// /// Asynchronously writes the XML declaration with the version "1.0". /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs index 88e076308..c955f69c8 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriterSettings.cs @@ -10,7 +10,7 @@ namespace DocumentFormat.OpenXml; /// public class OpenXmlPartWriterSettings { -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED /// /// Gets or sets a value indicating whether asynchronous methods can be used. /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs index 5cf655c24..494b2f34b 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs @@ -6,6 +6,9 @@ using System.Collections.Generic; using System.Collections.ObjectModel; using System.IO; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif using System.Xml; namespace DocumentFormat.OpenXml @@ -235,6 +238,27 @@ public virtual bool HasAttributes /// public abstract void Close(); +#if TASKS_SUPPORTED + #region Async methods + + /// + /// Asynchronously reads the next element in the Open XML document. + /// + /// + /// A task that represents the asynchronous read operation. The task result is true if the next element + /// was read successfully; false if there are no more elements to read. + /// + /// + /// This method is only available when the build target supports asynchronous SAX XML processing. + /// It allows non-blocking operations for reading large Open XML documents. + /// + public virtual Task ReadAsync() + { + return Task.FromResult(Read()); + } + #endregion +#endif + /// /// Thrown if the object is disposed. /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs index 24fa6af83..00c58299c 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs @@ -6,7 +6,7 @@ using System.Collections.Generic; using System.IO; using System.Text; -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED using System.Threading.Tasks; #endif @@ -131,7 +131,7 @@ protected OpenXmlWriter() /// public abstract void Close(); -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED /// /// Asynchronously writes the XML declaration with the version "1.0". /// diff --git a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt index ab058de62..9d6daa1d5 100644 --- a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt +++ b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt @@ -1 +1,5 @@ #nullable enable +DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.get -> bool +DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.set -> void +override DocumentFormat.OpenXml.OpenXmlPartReader.ReadAsync() -> System.Threading.Tasks.Task! +virtual DocumentFormat.OpenXml.OpenXmlReader.ReadAsync() -> System.Threading.Tasks.Task! diff --git a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs index 76b3bd350..6ccaa3e81 100644 --- a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs @@ -4,6 +4,9 @@ using DocumentFormat.OpenXml.Features; using DocumentFormat.OpenXml.Framework; using System; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif using System.Xml; namespace DocumentFormat.OpenXml @@ -53,6 +56,10 @@ protected override void Dispose(bool disposing) /// public override bool Read() => BaseReader.Read(); +#if TASKS_SUPPORTED + public override Task ReadAsync() => BaseReader.ReadAsync(); +#endif + /// public override string GetAttribute(int index) => BaseReader.GetAttribute(index); diff --git a/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs b/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs index 60d4f83e1..4a3b929a7 100644 --- a/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs +++ b/test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlWriterTest.cs @@ -239,7 +239,7 @@ public void WriteStringExceptionTest7() } } -#if FEATURE_ASYNC_SAX_XML +#if TASKS_SUPPORTED [Fact] public async Task WriteStartDocumentAsync_ShouldWriteStartDocument() { From 97592df8e3a6c962eee24981d72a333c95978d48 Mon Sep 17 00:00:00 2001 From: Michael Bowen <10384982+mikeebowen@users.noreply.github.com> Date: Fri, 9 May 2025 11:37:56 -0700 Subject: [PATCH 2/3] can get element contents --- .../OpenXmlPartReader.cs | 108 +++++++++++++++++- .../OpenXmlReader.cs | 10 ++ .../PublicAPI/PublicAPI.Unshipped.txt | 3 + .../XmlConvertingReader.cs | 2 + 4 files changed, 122 insertions(+), 1 deletion(-) diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs index 396b66d6c..6a1aa7e0f 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs @@ -400,14 +400,120 @@ public override void Skip() #region Async methods #if TASKS_SUPPORTED public override Task ReadAsync() + { + return _xmlReader.ReadAsync(); + } + + public override Task GetValueAsync() { ThrowIfObjectDisposed(); + return _xmlReader.GetValueAsync(); + } - return _xmlReader.ReadAsync(); + public async override Task ReadFirstChildAsync() + { + ThrowIfObjectDisposed(); + + bool result = await MoveToFirstChildAsync().ConfigureAwait(true); + + if (result && !ReadMiscNodes) + { + // skip miscellaneous node + while (result && IsMiscNode) + { + result = MoveToNextSibling(); + } + } + + return result; + } + + private async Task MoveToFirstChildAsync() + { + switch (_elementState) + { + case ElementState.EOF: + return false; + + case ElementState.Start: + if (!(await _xmlReader.ReadAsync().ConfigureAwait(true))) + { + // should can read. + Debug.Assert(false); + return false; + } + + GetElementInformation(); + if (_elementState == ElementState.End) + { + return false; + } + + return true; + + case ElementState.LeafStart: + _elementState = ElementState.LeafEnd; + return false; + + case ElementState.End: + case ElementState.LeafEnd: + case ElementState.LoadEnd: + case ElementState.MiscNode: + return false; + + case ElementState.Null: + ThrowIfNull(); + break; + + default: + break; + } + + return false; + } + + private async Task InnerSkipAsync() + { + switch (_elementState) + { + case ElementState.Null: + ThrowIfNull(); + break; + + case ElementState.EOF: + return; + + case ElementState.Start: + case ElementState.End: + case ElementState.MiscNode: + _xmlReader.Skip(); + _elementStack.Pop(); + GetElementInformation(); + return; + + case ElementState.LeafStart: + // no move, just process cursor + _elementStack.Pop(); + GetElementInformation(); + return; + + case ElementState.LeafEnd: + case ElementState.LoadEnd: + // cursor is leaf element, pop stack, no move + _elementStack.Pop(); + GetElementInformation(); + return; + + default: + break; + } + + return; } #endif #endregion + #region private methods /// diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs index 494b2f34b..5a5e9eaec 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs @@ -256,6 +256,16 @@ public virtual Task ReadAsync() { return Task.FromResult(Read()); } + + public virtual Task GetValueAsync() + { + return Task.FromResult(GetText()); + } + + public virtual Task ReadFirstChildAsync() + { + return Task.FromResult(ReadFirstChild()); + } #endregion #endif diff --git a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt index 9d6daa1d5..de1ab634e 100644 --- a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt +++ b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt @@ -1,5 +1,8 @@ #nullable enable DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.get -> bool DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.set -> void +override DocumentFormat.OpenXml.OpenXmlPartReader.GetValueAsync() -> System.Threading.Tasks.Task! override DocumentFormat.OpenXml.OpenXmlPartReader.ReadAsync() -> System.Threading.Tasks.Task! +virtual DocumentFormat.OpenXml.OpenXmlReader.GetValueAsync() -> System.Threading.Tasks.Task! virtual DocumentFormat.OpenXml.OpenXmlReader.ReadAsync() -> System.Threading.Tasks.Task! +virtual DocumentFormat.OpenXml.OpenXmlReader.ReadFirstChildAsync() -> System.Threading.Tasks.Task! diff --git a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs index 6ccaa3e81..9abc39998 100644 --- a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs @@ -58,6 +58,8 @@ protected override void Dispose(bool disposing) #if TASKS_SUPPORTED public override Task ReadAsync() => BaseReader.ReadAsync(); + + public override Task GetValueAsync() => BaseReader.GetValueAsync(); #endif /// From 873869ec00603fee51a1eb5182d0a27ebf17680a Mon Sep 17 00:00:00 2001 From: Michael Bowen <10384982+mikeebowen@users.noreply.github.com> Date: Fri, 9 May 2025 13:51:42 -0700 Subject: [PATCH 3/3] create helper methods and make OpenXmlPartReader a partial class --- .../OpenXmlPartReader.cs | 140 ++---------------- .../OpenXmlPartReaderAsync.cs | 110 ++++++++++++++ .../OpenXmlReader.cs | 28 +++- .../PublicAPI/PublicAPI.Shipped.txt | 3 + .../PublicAPI/PublicAPI.Unshipped.txt | 2 - .../XmlConvertingReader.cs | 4 +- 6 files changed, 154 insertions(+), 133 deletions(-) create mode 100644 src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs index 6a1aa7e0f..cf508c1ff 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs @@ -10,9 +10,6 @@ using System.Diagnostics; using System.IO; using System.Linq; -#if TASKS_SUPPORTED -using System.Threading.Tasks; -#endif using System.Xml; namespace DocumentFormat.OpenXml @@ -20,7 +17,7 @@ namespace DocumentFormat.OpenXml /// /// Represents the Open XML part reader class. /// - public class OpenXmlPartReader : OpenXmlReader + public partial class OpenXmlPartReader : OpenXmlReader { private readonly IRootElementFeature _rootElements; private readonly IOpenXmlNamespaceResolver _resolver; @@ -397,123 +394,6 @@ public override void Skip() /// public override IXmlLineInfo GetLineInfo() => XmlLineInfo.Get(_xmlReader); - #region Async methods -#if TASKS_SUPPORTED - public override Task ReadAsync() - { - return _xmlReader.ReadAsync(); - } - - public override Task GetValueAsync() - { - ThrowIfObjectDisposed(); - - return _xmlReader.GetValueAsync(); - } - - public async override Task ReadFirstChildAsync() - { - ThrowIfObjectDisposed(); - - bool result = await MoveToFirstChildAsync().ConfigureAwait(true); - - if (result && !ReadMiscNodes) - { - // skip miscellaneous node - while (result && IsMiscNode) - { - result = MoveToNextSibling(); - } - } - - return result; - } - - private async Task MoveToFirstChildAsync() - { - switch (_elementState) - { - case ElementState.EOF: - return false; - - case ElementState.Start: - if (!(await _xmlReader.ReadAsync().ConfigureAwait(true))) - { - // should can read. - Debug.Assert(false); - return false; - } - - GetElementInformation(); - if (_elementState == ElementState.End) - { - return false; - } - - return true; - - case ElementState.LeafStart: - _elementState = ElementState.LeafEnd; - return false; - - case ElementState.End: - case ElementState.LeafEnd: - case ElementState.LoadEnd: - case ElementState.MiscNode: - return false; - - case ElementState.Null: - ThrowIfNull(); - break; - - default: - break; - } - - return false; - } - - private async Task InnerSkipAsync() - { - switch (_elementState) - { - case ElementState.Null: - ThrowIfNull(); - break; - - case ElementState.EOF: - return; - - case ElementState.Start: - case ElementState.End: - case ElementState.MiscNode: - _xmlReader.Skip(); - _elementStack.Pop(); - GetElementInformation(); - return; - - case ElementState.LeafStart: - // no move, just process cursor - _elementStack.Pop(); - GetElementInformation(); - return; - - case ElementState.LeafEnd: - case ElementState.LoadEnd: - // cursor is leaf element, pop stack, no move - _elementStack.Pop(); - GetElementInformation(); - return; - - default: - break; - } - - return; - } -#endif - #endregion - #region private methods /// @@ -522,11 +402,18 @@ private async Task InnerSkipAsync() /// true if the next element was read successfully; false if there are no more elements to read. private bool MoveToNextElement() { - switch (_elementState) + if (_elementState == ElementState.Null) { - case ElementState.Null: - return ReadRoot(); + return ReadRoot(); + } + return MoveToNextElementHelper(); + } + + private bool MoveToNextElementHelper() + { + switch (_elementState) + { case ElementState.EOF: return false; @@ -850,6 +737,11 @@ private bool ReadRoot() _xmlReader.Skip(); } + return ReadRootHelper(); + } + + private bool ReadRootHelper() + { if (_xmlReader.EOF || !_xmlReader.IsStartElement()) { throw new InvalidDataException(ExceptionMessages.PartIsEmpty); diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs new file mode 100644 index 000000000..c282124b0 --- /dev/null +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Text; +#if TASKS_SUPPORTED +using System.Threading.Tasks; +#endif +using System.Xml; + +namespace DocumentFormat.OpenXml; + +/// +/// Represents the Open XML part reader class. +/// +public partial class OpenXmlPartReader : OpenXmlReader +{ + /// + /// Gets the type of the current node in the XML document being read. + /// + /// + /// The indicates the type of the current node, such as + /// Element, Attribute, Text, CDATA, Comment, or others. + /// This property provides information about the structure of the XML document + /// and is useful for determining how to process the current node. + /// + public override XmlNodeType NodeType + { + get + { + return _xmlReader.NodeType; + } + } + +#if TASKS_SUPPORTED + /// + /// Asynchronously reads the next element in the Open XML document. + /// + /// + /// A task that represents the asynchronous read operation. The task result is true if the next element + /// was read successfully; false if there are no more elements to read. + /// + public async override Task ReadAsync() + { + ThrowIfObjectDisposed(); + + bool result = await MoveToNextElementAsync().ConfigureAwait(false); + + if (result && !ReadMiscNodes) + { + // skip miscellaneous node + while (result && IsMiscNode) + { + result = await MoveToNextElementAsync().ConfigureAwait(false); + } + } + + return result; + } + + public async override Task ReadFirstChildAsync() + { + //ThrowIfObjectDisposed(); + + //bool result = await MoveToFirstChildAsync().ConfigureAwait(true); + + //if (result && !ReadMiscNodes) + //{ + // // skip miscellaneous node + // while (result && IsMiscNode) + // { + // result = MoveToNextSibling(); + // } + //} + + //return result; + return true; + } + + private async Task MoveToNextElementAsync() + { + if (_elementState == ElementState.Null) + { + return await ReadRootAsync().ConfigureAwait(false); + } + + return MoveToNextElementHelper(); + } + + private async Task ReadRootAsync() + { + Debug.Assert(_elementState == ElementState.Null); + Debug.Assert(_elementStack.Count == 0); + + // TODO: should we take care of entity? ]> + // TODO: is it OK that we skip all prologue ( DOCTYPE, Comment, PT ) ? + await _xmlReader.MoveToContentAsync().ConfigureAwait(false); + + while (!_xmlReader.EOF && _xmlReader.NodeType != XmlNodeType.Element) + { + await _xmlReader.SkipAsync().ConfigureAwait(false); + } + + return ReadRootHelper(); + } +#endif +} diff --git a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs index 5a5e9eaec..c74de8671 100644 --- a/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs @@ -186,6 +186,17 @@ public virtual bool HasAttributes /// public abstract string Prefix { get; } + /// + /// Gets the type of the current node in the XML document being read. + /// + /// + /// The indicates the type of the current node, such as + /// Element, Attribute, Text, CDATA, Comment, or others. + /// This property provides information about the structure of the XML document + /// and is useful for determining how to process the current node. + /// + public virtual XmlNodeType NodeType { get; } + /// /// Gets an instance of if available for the current reader. /// @@ -250,18 +261,23 @@ public virtual bool HasAttributes /// /// /// This method is only available when the build target supports asynchronous SAX XML processing. - /// It allows non-blocking operations for reading large Open XML documents. /// public virtual Task ReadAsync() { return Task.FromResult(Read()); } - public virtual Task GetValueAsync() - { - return Task.FromResult(GetText()); - } - + /// + /// Asynchronously moves the reader to the first child element of the current node. + /// + /// + /// A task that represents the asynchronous operation. The task result is true if the first child element + /// was read successfully; false if there are no child elements to read. + /// + /// + /// This method can only be called when the reader is positioned on an element start. If no child elements exist, + /// the reader will move to the end tag of the current element. + /// public virtual Task ReadFirstChildAsync() { return Task.FromResult(ReadFirstChild()); diff --git a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt index c7d7da820..2c88b12d4 100644 --- a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt +++ b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt @@ -1009,3 +1009,6 @@ DocumentFormat.OpenXml.OpenXmlPartWriterSettings.Encoding.set -> void DocumentFormat.OpenXml.OpenXmlPartWriterSettings.OpenXmlPartWriterSettings() -> void DocumentFormat.OpenXml.OpenXmlPartWriter.OpenXmlPartWriter(DocumentFormat.OpenXml.Packaging.OpenXmlPart! openXmlPart, DocumentFormat.OpenXml.OpenXmlPartWriterSettings! settings) -> void DocumentFormat.OpenXml.OpenXmlPartWriter.OpenXmlPartWriter(System.IO.Stream! partStream, DocumentFormat.OpenXml.OpenXmlPartWriterSettings! settings) -> void +DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType +virtual DocumentFormat.OpenXml.OpenXmlReader.NodeType.get -> System.Xml.XmlNodeType +override DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType diff --git a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt index de1ab634e..ddf94f5ae 100644 --- a/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt +++ b/src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt @@ -1,8 +1,6 @@ #nullable enable DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.get -> bool DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.set -> void -override DocumentFormat.OpenXml.OpenXmlPartReader.GetValueAsync() -> System.Threading.Tasks.Task! override DocumentFormat.OpenXml.OpenXmlPartReader.ReadAsync() -> System.Threading.Tasks.Task! -virtual DocumentFormat.OpenXml.OpenXmlReader.GetValueAsync() -> System.Threading.Tasks.Task! virtual DocumentFormat.OpenXml.OpenXmlReader.ReadAsync() -> System.Threading.Tasks.Task! virtual DocumentFormat.OpenXml.OpenXmlReader.ReadFirstChildAsync() -> System.Threading.Tasks.Task! diff --git a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs index 9abc39998..d56a3cee4 100644 --- a/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs +++ b/src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs @@ -59,7 +59,9 @@ protected override void Dispose(bool disposing) #if TASKS_SUPPORTED public override Task ReadAsync() => BaseReader.ReadAsync(); - public override Task GetValueAsync() => BaseReader.GetValueAsync(); + public override Task SkipAsync() => BaseReader.SkipAsync(); + + public override Task MoveToContentAsync() => BaseReader.MoveToContentAsync(); #endif ///