From 34b35ec1e22881330cebd4b9cd30efca39d8eeba Mon Sep 17 00:00:00 2001 From: John Dailey Date: Sat, 16 Mar 2024 19:39:53 -0400 Subject: [PATCH 01/20] Make scan and fetch async in prep for HttpClient update --- Example/FetcherTest.cs | 4 ++-- FaviconFetcher.Tests/FetcherTests.cs | 5 +++-- FaviconFetcher.Tests/Utility/MockSource.cs | 10 +++++----- FaviconFetcher/Fetcher.cs | 13 +++++++------ FaviconFetcher/HttpSource.cs | 13 +++++++------ FaviconFetcher/ISource.cs | 4 ++-- FaviconFetcher/Scanner.cs | 8 ++++++-- .../SubScanners/BrowserconfigXmlScanner.cs | 4 ++-- FaviconFetcher/SubScanners/DefaultScanner.cs | 5 +++-- FaviconFetcher/SubScanners/FaviconIcoScanner.cs | 4 +++- FaviconFetcher/SubScanners/ManifestJsonScanner.cs | 4 ++-- FaviconFetcher/SubScanners/SubScanner.cs | 3 ++- FaviconFetcher/Utility/FetchJob.cs | 10 +++++----- 13 files changed, 49 insertions(+), 38 deletions(-) diff --git a/Example/FetcherTest.cs b/Example/FetcherTest.cs index 0a477ef..170f7b2 100644 --- a/Example/FetcherTest.cs +++ b/Example/FetcherTest.cs @@ -19,7 +19,7 @@ public FetcherTest() InitializeComponent(); } - private void btnFetch_Click(object sender, EventArgs e) + private async void btnFetch_Click(object sender, EventArgs e) { try { @@ -31,7 +31,7 @@ private void btnFetch_Click(object sender, EventArgs e) picIcon.Size = new Size(16, 16); picIcon.Image = null; - var image = new Fetcher().Fetch(uri, new FetchOptions + var image = await new Fetcher().Fetch(uri, new FetchOptions { MinimumSize = new IconSize(minSize, minSize), MaximumSize = new IconSize(maxSize, maxSize), diff --git a/FaviconFetcher.Tests/FetcherTests.cs b/FaviconFetcher.Tests/FetcherTests.cs index 5a37c21..4cceeff 100644 --- a/FaviconFetcher.Tests/FetcherTests.cs +++ b/FaviconFetcher.Tests/FetcherTests.cs @@ -1,4 +1,5 @@ using System; +using System.Threading.Tasks; using FaviconFetcher.Tests.Utility; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -56,7 +57,7 @@ public void Fetcher_PerfectIsLastLink_MakeTwoRequests() } [TestMethod] - public void Fetcher_MultipleIconsInFile_UseBest() + public async Task Fetcher_MultipleIconsInFile_UseBest() { var uri = new Uri("http://www.example.com"); var source = new MockSource(); @@ -66,7 +67,7 @@ public void Fetcher_MultipleIconsInFile_UseBest() source.AddImageResource(new Uri(uri, "/favicon.ico"), new IconSize(48, 48)); var fetcher = new Fetcher(source); - var image = fetcher.FetchClosest(uri, new IconSize(32, 32)); + var image = await fetcher.FetchClosest(uri, new IconSize(32, 32)); Assert.AreEqual(2, source.RequestCount); Assert.AreEqual(new IconSize(32, 32), image.Size); diff --git a/FaviconFetcher.Tests/Utility/MockSource.cs b/FaviconFetcher.Tests/Utility/MockSource.cs index ae6b90b..70eea2b 100644 --- a/FaviconFetcher.Tests/Utility/MockSource.cs +++ b/FaviconFetcher.Tests/Utility/MockSource.cs @@ -45,15 +45,15 @@ public void AddImageResource(Uri uri, IconImage image) _imageResourceMap[uri].Add(image); } - public IEnumerable DownloadImages(Uri uri) + public Task> DownloadImages(Uri uri) { ++RequestCount; if (!_imageResourceMap.ContainsKey(uri)) - return new IconImage[] { }; - return _imageResourceMap[uri]; + return Task.FromResult(new IconImage[] { } as System.Collections.Generic.IEnumerable); + return Task.FromResult(_imageResourceMap[uri] as System.Collections.Generic.IEnumerable); } - public StreamReader DownloadText(Uri uri) + public Task DownloadText(Uri uri) { ++RequestCount; if (!_textResourceMap.ContainsKey(uri)) @@ -65,7 +65,7 @@ public StreamReader DownloadText(Uri uri) writer.Write(contents); writer.Flush(); memoryStream.Position = 0; - return new StreamReader(memoryStream); + return Task.FromResult(new StreamReader(memoryStream)); } } diff --git a/FaviconFetcher/Fetcher.cs b/FaviconFetcher/Fetcher.cs index 8e1a7b3..4d26de6 100644 --- a/FaviconFetcher/Fetcher.cs +++ b/FaviconFetcher/Fetcher.cs @@ -1,6 +1,7 @@ using FaviconFetcher.Utility; using System; using System.Net; +using System.Threading.Tasks; namespace FaviconFetcher { @@ -35,9 +36,9 @@ public Fetcher(ISource source) /// The webpage to scan for favicons. /// The target size of the favicon. /// The closest favicon to the size, or null. - public IconImage FetchClosest(Uri uri, IconSize size) + public async Task FetchClosest(Uri uri, IconSize size) { - return Fetch(uri, new FetchOptions + return await Fetch(uri, new FetchOptions { PerfectSize = size }); @@ -49,9 +50,9 @@ public IconImage FetchClosest(Uri uri, IconSize size) /// The webpage to scan for favicons. /// The target size of the favicon. /// The favicon matching the size, or null. - public IconImage FetchExact(Uri uri, IconSize size) + public async Task FetchExact(Uri uri, IconSize size) { - return Fetch(uri, new FetchOptions + return await Fetch(uri, new FetchOptions { MinimumSize = size, MaximumSize = size, @@ -65,10 +66,10 @@ public IconImage FetchExact(Uri uri, IconSize size) /// The webpage to scan for favicons. /// Filters for the returned result. /// The matching favicon, or null. - public IconImage Fetch(Uri uri, FetchOptions options) + public async Task Fetch(Uri uri, FetchOptions options) { using (var fetch = new FetchJob(Source, uri, options)) - return fetch.ScanAndFetch(); + return await fetch.ScanAndFetch(); } } diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index 6eaf32b..94e8721 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -4,6 +4,7 @@ using System.Net; using System.Net.Cache; using System.Text; +using System.Threading.Tasks; namespace FaviconFetcher { @@ -43,9 +44,9 @@ public HttpSource(WebProxy proxy = null) /// /// The uri of the resource to download. /// A reader for the resource, or null. - public StreamReader DownloadText(Uri uri) + public async Task DownloadText(Uri uri) { - var response = _GetWebResponse(uri); + var response = await _GetWebResponse(uri); if (response.StatusCode != HttpStatusCode.OK) { response.Dispose(); // since we won't be passing on the response stream. @@ -73,14 +74,14 @@ public StreamReader DownloadText(Uri uri) /// /// The URI of the image file to download. /// All of the images found within the file. - public IEnumerable DownloadImages(Uri uri) + public async Task> DownloadImages(Uri uri) { var images = new List(); var contentType = string.Empty; var memoryStream = new MemoryStream(); Uri responseUri = null; - using (var response = _GetWebResponse(uri)) + using (var response = await _GetWebResponse(uri)) { if (response.StatusCode != HttpStatusCode.OK) return images; @@ -100,7 +101,7 @@ public IEnumerable DownloadImages(Uri uri) { var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); // Try fetching same resource at the root of the redirected URI - using (var response = _GetWebResponse(redirectedUri)) + using (var response = await _GetWebResponse(redirectedUri)) { if (response.StatusCode != HttpStatusCode.OK) return images; @@ -166,7 +167,7 @@ private IEnumerable _ExtractIcoSizes(Stream stream) } // Setup and make a web request, returning the response. - private HttpWebResponse _GetWebResponse(Uri uri) + private async Task _GetWebResponse(Uri uri) { var request = WebRequest.Create(uri) as HttpWebRequest; request.CachePolicy = CachePolicy; diff --git a/FaviconFetcher/ISource.cs b/FaviconFetcher/ISource.cs index 8b8b208..8c7d786 100644 --- a/FaviconFetcher/ISource.cs +++ b/FaviconFetcher/ISource.cs @@ -17,7 +17,7 @@ public interface ISource /// /// The URI of the resource to download. /// A reader for the resource, or null. - StreamReader DownloadText(Uri uri); + Task DownloadText(Uri uri); /// /// Downloads all images from a URI. @@ -27,6 +27,6 @@ public interface ISource /// /// The URI of the image file to download. /// All of the images found within the file, or an empty list. - IEnumerable DownloadImages(Uri uri); + Task> DownloadImages(Uri uri); } } diff --git a/FaviconFetcher/Scanner.cs b/FaviconFetcher/Scanner.cs index 8743c23..4a5b982 100644 --- a/FaviconFetcher/Scanner.cs +++ b/FaviconFetcher/Scanner.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Net; +using System.Threading.Tasks; namespace FaviconFetcher { @@ -45,10 +46,13 @@ public IEnumerable Scan(Uri uri) while (scans.Count > 0 && max_scans-- > 0) { var scan = scans.Dequeue(); - scan.Start(); + + // Yielding a Task type requires Net8, so work-around + var task = Task.Run(async () => await scan.Start()); + task.Wait(); // Go through found favicon references - foreach (var result in scan.Results) + foreach (ScanResult result in scan.Results) yield return result; // Add all subscanners that are suggested diff --git a/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs b/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs index 5dbcc45..6465977 100644 --- a/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs +++ b/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs @@ -22,9 +22,9 @@ public BrowserconfigXmlScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public async override Task Start() { - using (var reader = Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri)) { if (reader != null) _ParseContent(new TextParser(reader)); diff --git a/FaviconFetcher/SubScanners/DefaultScanner.cs b/FaviconFetcher/SubScanners/DefaultScanner.cs index d2d2244..25df3ea 100644 --- a/FaviconFetcher/SubScanners/DefaultScanner.cs +++ b/FaviconFetcher/SubScanners/DefaultScanner.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Text; +using System.Threading.Tasks; #if DEBUG [assembly: InternalsVisibleTo("FaviconFetcher.Tests")] @@ -17,9 +18,9 @@ public DefaultScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public async override Task Start() { - using (var reader = Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri)) { if (reader != null) _ParsePage(new TextParser(reader)); diff --git a/FaviconFetcher/SubScanners/FaviconIcoScanner.cs b/FaviconFetcher/SubScanners/FaviconIcoScanner.cs index 82238a4..b517f53 100644 --- a/FaviconFetcher/SubScanners/FaviconIcoScanner.cs +++ b/FaviconFetcher/SubScanners/FaviconIcoScanner.cs @@ -19,13 +19,15 @@ public FaviconIcoScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public override Task Start() { Results.Add(new ScanResult { Location = new Uri(TargetUri, "/favicon.ico"), ExpectedSize = new IconSize(16, 16) }); + + return Task.CompletedTask; } } diff --git a/FaviconFetcher/SubScanners/ManifestJsonScanner.cs b/FaviconFetcher/SubScanners/ManifestJsonScanner.cs index 94811d8..29d666f 100644 --- a/FaviconFetcher/SubScanners/ManifestJsonScanner.cs +++ b/FaviconFetcher/SubScanners/ManifestJsonScanner.cs @@ -20,9 +20,9 @@ public ManifestJsonScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public async override Task Start() { - using (var reader = Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri)) { if (reader != null) _ParseContent(reader); diff --git a/FaviconFetcher/SubScanners/SubScanner.cs b/FaviconFetcher/SubScanners/SubScanner.cs index d1a08a1..f2861aa 100644 --- a/FaviconFetcher/SubScanners/SubScanner.cs +++ b/FaviconFetcher/SubScanners/SubScanner.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Threading.Tasks; namespace FaviconFetcher.SubScanners { @@ -26,7 +27,7 @@ public SubScanner(ISource source, Uri uri) } // Start the scan for favicons. - public abstract void Start(); + public abstract Task Start(); } } diff --git a/FaviconFetcher/Utility/FetchJob.cs b/FaviconFetcher/Utility/FetchJob.cs index 3a63e1a..b3d57ab 100644 --- a/FaviconFetcher/Utility/FetchJob.cs +++ b/FaviconFetcher/Utility/FetchJob.cs @@ -37,7 +37,7 @@ public void Dispose() } // Scan and fetches best icon per Options. - public IconImage ScanAndFetch() + public async Task ScanAndFetch() { var parsedUris = new HashSet(); foreach (var possibleIcon in new Scanner(Source).Scan(TargetUri)) @@ -50,7 +50,7 @@ public IconImage ScanAndFetch() // Hopefully we've already found it if (_IsPerfect(possibleIcon.ExpectedSize)) { - var image = DownloadImages_ReturnPerfect(possibleIcon.Location); + var image = await DownloadImages_ReturnPerfect(possibleIcon.Location); if (image != null) return image; } @@ -65,7 +65,7 @@ public IconImage ScanAndFetch() // Download them, prioritizing those closest to perfect foreach (var possibleIcon in notVerified) { - var image = DownloadImages_ReturnPerfect(possibleIcon.Location); + var image = await DownloadImages_ReturnPerfect(possibleIcon.Location); if (image != null) return image; } @@ -78,9 +78,9 @@ public IconImage ScanAndFetch() // Downloads images. If perfect found, returns it. - private IconImage DownloadImages_ReturnPerfect(Uri uri) + private async Task DownloadImages_ReturnPerfect(Uri uri) { - foreach (var image in Source.DownloadImages(uri)) + foreach (var image in await Source.DownloadImages(uri)) { if (_IsPerfect(image.Size)) return image; From 447684e84b045f07f5ce930276eecca15b60301a Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:52:52 -0400 Subject: [PATCH 02/20] Make tests async compatible --- FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs | 6 +++--- FaviconFetcher.Tests/DefaultScannerTests.cs | 10 +++++----- FaviconFetcher.Tests/ScannerTests.cs | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs b/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs index 78eb536..0859691 100644 --- a/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs +++ b/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs @@ -29,7 +29,7 @@ public void Start_ValidXml_Parse() "); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(4, scanner.Results.Count); } @@ -43,7 +43,7 @@ public void Start_InvalidXml_Skip() >"); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(0, scanner.Results.Count); } @@ -68,7 +68,7 @@ public void Start_ContainsInvalidUri_Skip() "); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(3, scanner.Results.Count); } diff --git a/FaviconFetcher.Tests/DefaultScannerTests.cs b/FaviconFetcher.Tests/DefaultScannerTests.cs index 7b50228..f2789e2 100644 --- a/FaviconFetcher.Tests/DefaultScannerTests.cs +++ b/FaviconFetcher.Tests/DefaultScannerTests.cs @@ -19,7 +19,7 @@ public void Results_OneLink_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -39,7 +39,7 @@ public void Results_LinkHrefWithoutQuotes_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -59,7 +59,7 @@ public void Results_LinkRelInCaps_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -79,7 +79,7 @@ public void Results_HtmlInCaps_ParseIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(1, scanner.Results.Count); } @@ -95,7 +95,7 @@ public void Results_LinkHasSizes_UseSizes() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { diff --git a/FaviconFetcher.Tests/ScannerTests.cs b/FaviconFetcher.Tests/ScannerTests.cs index c3552ad..23c5083 100644 --- a/FaviconFetcher.Tests/ScannerTests.cs +++ b/FaviconFetcher.Tests/ScannerTests.cs @@ -39,7 +39,7 @@ public void Scan_AllResources_FindAll() "); var scanner = new Scanner(source); - var results = scanner.Scan(uri).ToArray(); + var results = scanner.Scan(uri).Result.ToArray(); Assert.AreEqual(12, results.Length); } From f28363b89b3f8c1e57ee58d442c80903bc33fb5c Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:53:25 -0400 Subject: [PATCH 03/20] Make Scanner.Scan non-blocking --- FaviconFetcher/Scanner.cs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/FaviconFetcher/Scanner.cs b/FaviconFetcher/Scanner.cs index 4a5b982..aa641c4 100644 --- a/FaviconFetcher/Scanner.cs +++ b/FaviconFetcher/Scanner.cs @@ -32,12 +32,14 @@ public Scanner(ISource source) } /// - /// Scans a URI for references to favicons. + /// Scans a URI for references to favicons asynchronously. /// /// The uri of the webpage to scan for favicon references. /// An enumerable of found favicon references. - public IEnumerable Scan(Uri uri) + public async Task> Scan(Uri uri) { + var scanResults = new List(); + var scans = new Queue(); scans.Enqueue(new DefaultScanner(Source, uri)); @@ -47,18 +49,15 @@ public IEnumerable Scan(Uri uri) { var scan = scans.Dequeue(); - // Yielding a Task type requires Net8, so work-around - var task = Task.Run(async () => await scan.Start()); - task.Wait(); - - // Go through found favicon references - foreach (ScanResult result in scan.Results) - yield return result; + await scan.Start(cancelTokenSource); + scanResults.AddRange(scan.Results); // Add all subscanners that are suggested foreach (var suggested in scan.SuggestedScanners) scans.Enqueue(suggested); } + + return scanResults; } } From 515f758f5ff28cb08e7be9789332ef8dfa278fff Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:56:47 -0400 Subject: [PATCH 04/20] Add cancellation token support --- FaviconFetcher.Tests/Utility/MockSource.cs | 5 ++- FaviconFetcher/Fetcher.cs | 38 ++++++++++++------- FaviconFetcher/HttpSource.cs | 35 ++++++++++++----- FaviconFetcher/ISource.cs | 9 +++-- FaviconFetcher/Scanner.cs | 7 +++- .../SubScanners/BrowserconfigXmlScanner.cs | 5 ++- FaviconFetcher/SubScanners/DefaultScanner.cs | 5 ++- .../SubScanners/FaviconIcoScanner.cs | 3 +- .../SubScanners/ManifestJsonScanner.cs | 5 ++- FaviconFetcher/SubScanners/SubScanner.cs | 3 +- FaviconFetcher/Utility/FetchJob.cs | 13 ++++--- 11 files changed, 84 insertions(+), 44 deletions(-) diff --git a/FaviconFetcher.Tests/Utility/MockSource.cs b/FaviconFetcher.Tests/Utility/MockSource.cs index 70eea2b..7c4a835 100644 --- a/FaviconFetcher.Tests/Utility/MockSource.cs +++ b/FaviconFetcher.Tests/Utility/MockSource.cs @@ -4,6 +4,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher.Tests.Utility @@ -45,7 +46,7 @@ public void AddImageResource(Uri uri, IconImage image) _imageResourceMap[uri].Add(image); } - public Task> DownloadImages(Uri uri) + public Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) { ++RequestCount; if (!_imageResourceMap.ContainsKey(uri)) @@ -53,7 +54,7 @@ public Task> DownloadImages(Uri uri) return Task.FromResult(_imageResourceMap[uri] as System.Collections.Generic.IEnumerable); } - public Task DownloadText(Uri uri) + public Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) { ++RequestCount; if (!_textResourceMap.ContainsKey(uri)) diff --git a/FaviconFetcher/Fetcher.cs b/FaviconFetcher/Fetcher.cs index 4d26de6..3232790 100644 --- a/FaviconFetcher/Fetcher.cs +++ b/FaviconFetcher/Fetcher.cs @@ -1,6 +1,7 @@ using FaviconFetcher.Utility; using System; using System.Net; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher @@ -35,13 +36,17 @@ public Fetcher(ISource source) /// /// The webpage to scan for favicons. /// The target size of the favicon. + /// An optional flag for cancelling the fetch. /// The closest favicon to the size, or null. - public async Task FetchClosest(Uri uri, IconSize size) + public async Task FetchClosest(Uri uri, IconSize size, CancellationTokenSource cancelTokenSource = null) { - return await Fetch(uri, new FetchOptions - { - PerfectSize = size - }); + return await Fetch( + uri, + new FetchOptions + { + PerfectSize = size + }, + cancelTokenSource); } /// @@ -49,15 +54,19 @@ public async Task FetchClosest(Uri uri, IconSize size) /// /// The webpage to scan for favicons. /// The target size of the favicon. + /// An optional flag for cancelling the fetch. /// The favicon matching the size, or null. - public async Task FetchExact(Uri uri, IconSize size) + public async Task FetchExact(Uri uri, IconSize size, CancellationTokenSource cancelTokenSource = null) { - return await Fetch(uri, new FetchOptions - { - MinimumSize = size, - MaximumSize = size, - PerfectSize = size - }); + return await Fetch( + uri, + new FetchOptions + { + MinimumSize = size, + MaximumSize = size, + PerfectSize = size + }, + cancelTokenSource); } /// @@ -65,11 +74,12 @@ public async Task FetchExact(Uri uri, IconSize size) /// /// The webpage to scan for favicons. /// Filters for the returned result. + /// An optional flag for cancelling the fetch. /// The matching favicon, or null. - public async Task Fetch(Uri uri, FetchOptions options) + public async Task Fetch(Uri uri, FetchOptions options, CancellationTokenSource cancelTokenSource = null) { using (var fetch = new FetchJob(Source, uri, options)) - return await fetch.ScanAndFetch(); + return await fetch.ScanAndFetch(cancelTokenSource); } } diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index 94e8721..d96e7b6 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -4,6 +4,7 @@ using System.Net; using System.Net.Cache; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher @@ -43,13 +44,20 @@ public HttpSource(WebProxy proxy = null) /// Internal use only. Downloads a text resource from a URI. /// /// The uri of the resource to download. + /// An optional flag for cancelling the download. /// A reader for the resource, or null. - public async Task DownloadText(Uri uri) + public async Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) { - var response = await _GetWebResponse(uri); - if (response.StatusCode != HttpStatusCode.OK) + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var response = await _GetWebResponse(uri, cancelToken); + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) { - response.Dispose(); // since we won't be passing on the response stream. + response?.Dispose(); // since we won't be passing on the response stream. return null; } @@ -73,17 +81,24 @@ public async Task DownloadText(Uri uri) /// Internal use only. Downloads all images from a URI. /// /// The URI of the image file to download. + /// An optional flag for cancelling the download. /// All of the images found within the file. - public async Task> DownloadImages(Uri uri) + public async Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + var images = new List(); var contentType = string.Empty; var memoryStream = new MemoryStream(); Uri responseUri = null; - using (var response = await _GetWebResponse(uri)) + using (var response = await _GetWebResponse(uri, cancelToken)) { - if (response.StatusCode != HttpStatusCode.OK) + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) return images; contentType = response.ContentType.ToLower(); @@ -101,9 +116,11 @@ public async Task> DownloadImages(Uri uri) { var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); // Try fetching same resource at the root of the redirected URI - using (var response = await _GetWebResponse(redirectedUri)) + using (var response = await _GetWebResponse(redirectedUri, cancelToken)) { - if (response.StatusCode != HttpStatusCode.OK) + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) return images; contentType = response.ContentType; diff --git a/FaviconFetcher/ISource.cs b/FaviconFetcher/ISource.cs index 8c7d786..1126d5b 100644 --- a/FaviconFetcher/ISource.cs +++ b/FaviconFetcher/ISource.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher @@ -16,8 +17,9 @@ public interface ISource /// Downloads a text-based resource from a URI. /// /// The URI of the resource to download. + /// An optional flag for cancelling the download. /// A reader for the resource, or null. - Task DownloadText(Uri uri); + Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource); /// /// Downloads all images from a URI. @@ -25,8 +27,9 @@ public interface ISource /// /// Multiple images are returned, because some file formats allow multiple images. /// - /// The URI of the image file to download. + /// An optional flag for cancelling the download. + /// /// All of the images found within the file, or an empty list. - Task> DownloadImages(Uri uri); + Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource); } } diff --git a/FaviconFetcher/Scanner.cs b/FaviconFetcher/Scanner.cs index aa641c4..4db154a 100644 --- a/FaviconFetcher/Scanner.cs +++ b/FaviconFetcher/Scanner.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Net; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher @@ -35,8 +36,9 @@ public Scanner(ISource source) /// Scans a URI for references to favicons asynchronously. /// /// The uri of the webpage to scan for favicon references. + /// An optional flag for cancelling the scan. /// An enumerable of found favicon references. - public async Task> Scan(Uri uri) + public async Task> Scan(Uri uri, CancellationTokenSource cancelTokenSource = null) { var scanResults = new List(); @@ -45,7 +47,8 @@ public async Task> Scan(Uri uri) // While we have subscanners queued var max_scans = 4; - while (scans.Count > 0 && max_scans-- > 0) + while (scans.Count > 0 && max_scans-- > 0 + && (cancelTokenSource == null || !cancelTokenSource.IsCancellationRequested)) { var scan = scans.Dequeue(); diff --git a/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs b/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs index 6465977..ce5e94d 100644 --- a/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs +++ b/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using System.Runtime.Serialization; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Xml; using System.Xml.Schema; @@ -22,9 +23,9 @@ public BrowserconfigXmlScanner(ISource source, Uri uri) : base(source, uri) { } - public async override Task Start() + public async override Task Start(CancellationTokenSource cancelTokenSource = null) { - using (var reader = await Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri, cancelTokenSource)) { if (reader != null) _ParseContent(new TextParser(reader)); diff --git a/FaviconFetcher/SubScanners/DefaultScanner.cs b/FaviconFetcher/SubScanners/DefaultScanner.cs index 25df3ea..e2d9ce6 100644 --- a/FaviconFetcher/SubScanners/DefaultScanner.cs +++ b/FaviconFetcher/SubScanners/DefaultScanner.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Text; +using System.Threading; using System.Threading.Tasks; #if DEBUG @@ -18,9 +19,9 @@ public DefaultScanner(ISource source, Uri uri) : base(source, uri) { } - public async override Task Start() + public async override Task Start(CancellationTokenSource cancelTokenSource = null) { - using (var reader = await Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri, cancelTokenSource)) { if (reader != null) _ParsePage(new TextParser(reader)); diff --git a/FaviconFetcher/SubScanners/FaviconIcoScanner.cs b/FaviconFetcher/SubScanners/FaviconIcoScanner.cs index b517f53..e99db99 100644 --- a/FaviconFetcher/SubScanners/FaviconIcoScanner.cs +++ b/FaviconFetcher/SubScanners/FaviconIcoScanner.cs @@ -5,6 +5,7 @@ using System.Net.Http; using System.Runtime.CompilerServices; using System.Text; +using System.Threading; using System.Threading.Tasks; #if DEBUG @@ -19,7 +20,7 @@ public FaviconIcoScanner(ISource source, Uri uri) : base(source, uri) { } - public override Task Start() + public override Task Start(CancellationTokenSource cancelTokenSource = null) { Results.Add(new ScanResult { diff --git a/FaviconFetcher/SubScanners/ManifestJsonScanner.cs b/FaviconFetcher/SubScanners/ManifestJsonScanner.cs index 29d666f..8e711ed 100644 --- a/FaviconFetcher/SubScanners/ManifestJsonScanner.cs +++ b/FaviconFetcher/SubScanners/ManifestJsonScanner.cs @@ -7,6 +7,7 @@ using System.Runtime.Serialization; using System.Runtime.Serialization.Json; using System.Text; +using System.Threading; using System.Threading.Tasks; #if DEBUG @@ -20,9 +21,9 @@ public ManifestJsonScanner(ISource source, Uri uri) : base(source, uri) { } - public async override Task Start() + public async override Task Start(CancellationTokenSource cancelTokenSource = null) { - using (var reader = await Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri, cancelTokenSource)) { if (reader != null) _ParseContent(reader); diff --git a/FaviconFetcher/SubScanners/SubScanner.cs b/FaviconFetcher/SubScanners/SubScanner.cs index f2861aa..e7e85fa 100644 --- a/FaviconFetcher/SubScanners/SubScanner.cs +++ b/FaviconFetcher/SubScanners/SubScanner.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher.SubScanners @@ -27,7 +28,7 @@ public SubScanner(ISource source, Uri uri) } // Start the scan for favicons. - public abstract Task Start(); + public abstract Task Start(CancellationTokenSource cancelTokenSource = null); } } diff --git a/FaviconFetcher/Utility/FetchJob.cs b/FaviconFetcher/Utility/FetchJob.cs index b3d57ab..7afdd1b 100644 --- a/FaviconFetcher/Utility/FetchJob.cs +++ b/FaviconFetcher/Utility/FetchJob.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher.Utility @@ -37,10 +38,10 @@ public void Dispose() } // Scan and fetches best icon per Options. - public async Task ScanAndFetch() + public async Task ScanAndFetch(CancellationTokenSource cancelTokenSource) { var parsedUris = new HashSet(); - foreach (var possibleIcon in new Scanner(Source).Scan(TargetUri)) + foreach (var possibleIcon in await new Scanner(Source).Scan(TargetUri, cancelTokenSource)) { // Because the scanner can return duplicate URIs. if (parsedUris.Contains(possibleIcon.Location)) @@ -50,7 +51,7 @@ public async Task ScanAndFetch() // Hopefully we've already found it if (_IsPerfect(possibleIcon.ExpectedSize)) { - var image = await DownloadImages_ReturnPerfect(possibleIcon.Location); + var image = await DownloadImages_ReturnPerfect(possibleIcon.Location, cancelTokenSource); if (image != null) return image; } @@ -65,7 +66,7 @@ public async Task ScanAndFetch() // Download them, prioritizing those closest to perfect foreach (var possibleIcon in notVerified) { - var image = await DownloadImages_ReturnPerfect(possibleIcon.Location); + var image = await DownloadImages_ReturnPerfect(possibleIcon.Location, cancelTokenSource); if (image != null) return image; } @@ -78,9 +79,9 @@ public async Task ScanAndFetch() // Downloads images. If perfect found, returns it. - private async Task DownloadImages_ReturnPerfect(Uri uri) + private async Task DownloadImages_ReturnPerfect(Uri uri, CancellationTokenSource cancelTokenSource) { - foreach (var image in await Source.DownloadImages(uri)) + foreach (var image in await Source.DownloadImages(uri, cancelTokenSource)) { if (_IsPerfect(image.Size)) return image; From 456ceaac0c88305d4ecde86fdf8a8d28387816f4 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:58:38 -0400 Subject: [PATCH 05/20] Update example with with new async/cancellation token --- Example/FetcherTest.cs | 42 ++++++++++++++++++++++++++++++++++++------ Example/ScannerTest.cs | 29 +++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/Example/FetcherTest.cs b/Example/FetcherTest.cs index 170f7b2..0a5f90b 100644 --- a/Example/FetcherTest.cs +++ b/Example/FetcherTest.cs @@ -7,6 +7,7 @@ using System.Drawing; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; @@ -19,8 +20,20 @@ public FetcherTest() InitializeComponent(); } + private bool _isFetching = false; + private CancellationTokenSource _cancellationTokenSource = null; + private async void btnFetch_Click(object sender, EventArgs e) { + if (_isFetching && _cancellationTokenSource != null) + { + _cancellationTokenSource.Cancel(); + return; + } + + _isFetching = true; + ((Button)sender).Text = "Cancel"; + try { var uri = new Uri(txtUri.Text); @@ -28,25 +41,42 @@ private async void btnFetch_Click(object sender, EventArgs e) var maxSize = (int)numMaxSize.Value; var perfectSize = (int)numPerfectSize.Value; + _cancellationTokenSource = new CancellationTokenSource(); + + //await Task.Delay(10000, _cancellationTokenSource.Token); + picIcon.Size = new Size(16, 16); picIcon.Image = null; - var image = await new Fetcher().Fetch(uri, new FetchOptions - { - MinimumSize = new IconSize(minSize, minSize), - MaximumSize = new IconSize(maxSize, maxSize), - PerfectSize = new IconSize(perfectSize, perfectSize) - }); + var image = await new Fetcher().Fetch( + uri, + new FetchOptions + { + MinimumSize = new IconSize(minSize, minSize), + MaximumSize = new IconSize(maxSize, maxSize), + PerfectSize = new IconSize(perfectSize, perfectSize) + }, + _cancellationTokenSource); + if (image != null) { picIcon.Size = new Size(image.Size.Width, image.Size.Height); picIcon.Image = image.ToSKBitmap().ToBitmap(); } } + catch (TaskCanceledException) + { + _cancellationTokenSource?.Dispose(); + } catch (Exception ex) { MessageBox.Show(ex.Message); } + finally + { + _isFetching = false; + ((Button)sender).Text = "Fetch"; + } } } } diff --git a/Example/ScannerTest.cs b/Example/ScannerTest.cs index 4054ed8..3f892f1 100644 --- a/Example/ScannerTest.cs +++ b/Example/ScannerTest.cs @@ -5,6 +5,7 @@ using System.Data; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; @@ -27,15 +28,29 @@ private void lstResults_Resize(object sender, EventArgs e) _ExpandLocationColumn(); } - private void btnScan_Click(object sender, EventArgs e) + private bool _isScanning = false; + private CancellationTokenSource _cancellationTokenSource = null; + + private async void btnScan_Click(object sender, EventArgs e) { try { + if (_isScanning && _cancellationTokenSource != null) + { + _cancellationTokenSource.Cancel(); + return; + } + + _isScanning = true; + ((Button)sender).Text = "Cancel"; + var uri = new Uri(txtUri.Text); lstResults.Items.Clear(); - foreach (var result in new Scanner().Scan(uri)) + _cancellationTokenSource = new CancellationTokenSource(); + + foreach (var result in await new Scanner().Scan(uri, _cancellationTokenSource)) { lstResults.Items.Add(new ListViewItem(new[]{ result.ExpectedSize.ToString(), @@ -43,10 +58,20 @@ private void btnScan_Click(object sender, EventArgs e) })); } } + catch (TaskCanceledException) + { + _cancellationTokenSource?.Dispose(); + } catch (Exception ex) { MessageBox.Show(ex.Message); } + finally + { + _isScanning = false; + ((Button)sender).Text = "Scan"; + _cancellationTokenSource?.Dispose(); + } } private void _ExpandLocationColumn() From 157bd88d33185b06604ea01b8fed2d175585d0b2 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:59:00 -0400 Subject: [PATCH 06/20] Remove disabled debug code --- Example/FetcherTest.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/Example/FetcherTest.cs b/Example/FetcherTest.cs index 0a5f90b..e49ad34 100644 --- a/Example/FetcherTest.cs +++ b/Example/FetcherTest.cs @@ -43,8 +43,6 @@ private async void btnFetch_Click(object sender, EventArgs e) _cancellationTokenSource = new CancellationTokenSource(); - //await Task.Delay(10000, _cancellationTokenSource.Token); - picIcon.Size = new Size(16, 16); picIcon.Image = null; From 964a61e9cbfc5abbf1f397098632b3d9b5546f01 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:59:18 -0400 Subject: [PATCH 07/20] Make sure cancellation token is Disposed if not cancelled --- Example/FetcherTest.cs | 6 ++---- Example/ScannerTest.cs | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Example/FetcherTest.cs b/Example/FetcherTest.cs index e49ad34..1bcb526 100644 --- a/Example/FetcherTest.cs +++ b/Example/FetcherTest.cs @@ -62,16 +62,14 @@ private async void btnFetch_Click(object sender, EventArgs e) picIcon.Image = image.ToSKBitmap().ToBitmap(); } } - catch (TaskCanceledException) - { - _cancellationTokenSource?.Dispose(); - } + catch (TaskCanceledException) { } catch (Exception ex) { MessageBox.Show(ex.Message); } finally { + _cancellationTokenSource?.Dispose(); _isFetching = false; ((Button)sender).Text = "Fetch"; } diff --git a/Example/ScannerTest.cs b/Example/ScannerTest.cs index 3f892f1..3737cef 100644 --- a/Example/ScannerTest.cs +++ b/Example/ScannerTest.cs @@ -58,16 +58,14 @@ private async void btnScan_Click(object sender, EventArgs e) })); } } - catch (TaskCanceledException) - { - _cancellationTokenSource?.Dispose(); - } + catch (TaskCanceledException) { } catch (Exception ex) { MessageBox.Show(ex.Message); } finally { + _cancellationTokenSource?.Dispose(); _isScanning = false; ((Button)sender).Text = "Scan"; _cancellationTokenSource?.Dispose(); From 8a8978b1b16c1a7366f16ea9818c71beaeb80a9b Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:59:34 -0400 Subject: [PATCH 08/20] Simplify processing check and make sure user can trigger duplicate actions --- Example/FetcherTest.cs | 4 ++-- Example/ScannerTest.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Example/FetcherTest.cs b/Example/FetcherTest.cs index 1bcb526..bae730d 100644 --- a/Example/FetcherTest.cs +++ b/Example/FetcherTest.cs @@ -25,9 +25,9 @@ public FetcherTest() private async void btnFetch_Click(object sender, EventArgs e) { - if (_isFetching && _cancellationTokenSource != null) + if (_isFetching) { - _cancellationTokenSource.Cancel(); + _cancellationTokenSource?.Cancel(); return; } diff --git a/Example/ScannerTest.cs b/Example/ScannerTest.cs index 3737cef..a4512b2 100644 --- a/Example/ScannerTest.cs +++ b/Example/ScannerTest.cs @@ -35,9 +35,9 @@ private async void btnScan_Click(object sender, EventArgs e) { try { - if (_isScanning && _cancellationTokenSource != null) + if (_isScanning) { - _cancellationTokenSource.Cancel(); + _cancellationTokenSource?.Cancel(); return; } From 0007b2f02b3b16dd46fd5a7d5abdeb73332976c6 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 16:59:51 -0400 Subject: [PATCH 09/20] Fix tests to all passing --- FaviconFetcher.Tests/DefaultScannerTests.cs | 62 +++++++++---------- FaviconFetcher.Tests/FetcherTests.cs | 10 +-- .../ManifestJsonScannerTests.cs | 6 +- FaviconFetcher.Tests/Utility/MockSource.cs | 2 +- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/FaviconFetcher.Tests/DefaultScannerTests.cs b/FaviconFetcher.Tests/DefaultScannerTests.cs index f2789e2..dfc6008 100644 --- a/FaviconFetcher.Tests/DefaultScannerTests.cs +++ b/FaviconFetcher.Tests/DefaultScannerTests.cs @@ -121,7 +121,7 @@ public void Results_MultipleLinks_FindAll() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(2, scanner.Results.Count); } @@ -137,7 +137,7 @@ public void Results_AppleLink_UseSize57x57() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -157,7 +157,7 @@ public void Results_SizedAppleLink_UseSpecifiedSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -177,7 +177,7 @@ public void Results_SizeInName_UseGuessedSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -197,7 +197,7 @@ public void Results_SizeInNameAndAttribute_UseAttributeSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -217,7 +217,7 @@ public void Results_BigNumberInUri_Accepted() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -237,7 +237,7 @@ public void Results_LinkInBody_Ignored() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(0, scanner.Results.Count); } @@ -253,7 +253,7 @@ public void Results_QuoteInUri_Accepted() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -274,7 +274,7 @@ public void Results_InvalidUri_Skip() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -295,7 +295,7 @@ public void Results_BaseAfterLocation_ModifiesPreviousLocation() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -316,7 +316,7 @@ public void Results_AbsoluteBase_PrefixesToLocations() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -337,7 +337,7 @@ public void Results_RelativeBase_PrefixesTargetAndBaseToLocations() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -358,7 +358,7 @@ public void Results_EmptyBase_IgnoresIt() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -379,7 +379,7 @@ public void Results_InvalidBase_IgnoresIt() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(new ScanResult { @@ -396,7 +396,7 @@ public void SuggestedScanners_NoLinks_SuggestFaviconIco() source.AddTextResource(uri, "Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -415,7 +415,7 @@ public void SuggestedScanners_InvalidLinks_SuggestFaviconIco() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -429,7 +429,7 @@ public void SuggestedScanners_NotFound_SuggestFaviconIco() var source = new MockSource(); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().Wait(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -444,7 +444,7 @@ public void SuggestedScanners_HtmlQuoteUnterminated_SuggestFaviconIco() source.AddTextResource(uri, " DownloadText(Uri uri, CancellationTokenSource cancelTo { ++RequestCount; if (!_textResourceMap.ContainsKey(uri)) - return null; + return Task.FromResult((StreamReader)null); var contents = _textResourceMap[uri]; var memoryStream = new MemoryStream(); From c2b7c5db3065633a0e132380c45b045a42e35191 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 17:57:35 -0400 Subject: [PATCH 10/20] Make WebRequest cancellable --- .../Extensions/WebRequestExtension.cs | 36 +++++++++++++++++++ FaviconFetcher/HttpSource.cs | 13 +++++-- 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 FaviconFetcher/Extensions/WebRequestExtension.cs diff --git a/FaviconFetcher/Extensions/WebRequestExtension.cs b/FaviconFetcher/Extensions/WebRequestExtension.cs new file mode 100644 index 0000000..87ef45e --- /dev/null +++ b/FaviconFetcher/Extensions/WebRequestExtension.cs @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using System.Threading; + +namespace FaviconFetcher.Extensions +{ + public static class WebRequestExtension + { + public static async Task WithCancellation(this Task task, CancellationToken cancellationToken, Action action, bool useSynchronizationContext = true) + { + using (cancellationToken.Register(action, useSynchronizationContext)) + { + try + { + return await task; + } + catch (Exception ex) + { + if (cancellationToken.IsCancellationRequested) + { + // the WebException will be available as Exception.InnerException + + // NetStandard 2 form doesn't include cancellationToken as 3rd param + throw new TaskCanceledException(ex.Message, ex); + //throw new OperationCanceledException(ex.Message, ex, cancellationToken); + } + + // cancellation hasn't been requested, rethrow the original WebException + throw; + } + } + } + } +} diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index d96e7b6..f654c91 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -184,9 +184,14 @@ private IEnumerable _ExtractIcoSizes(Stream stream) } // Setup and make a web request, returning the response. - private async Task _GetWebResponse(Uri uri) + private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) { + System.Diagnostics.Debug.WriteLine("Fetching"); + +#pragma warning disable SYSLIB0014 // Type or member is obsolete var request = WebRequest.Create(uri) as HttpWebRequest; +#pragma warning restore SYSLIB0014 // Type or member is obsolete + request.CachePolicy = CachePolicy; request.UserAgent = UserAgent; @@ -197,7 +202,11 @@ private async Task _GetWebResponse(Uri uri) // so we need to handle it in a try-catch. try { - return request.GetResponse() as HttpWebResponse; + return await request.GetResponseAsync().WithCancellation(cancellationToken, request.Abort, true) as HttpWebResponse; + } + catch (TaskCanceledException) + { + return null; } catch (WebException ex) { From 6d48cf2b8021140f992d1489dd390fcdd462abf9 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 17:59:58 -0400 Subject: [PATCH 11/20] Make WebRequest cancellable --- FaviconFetcher/HttpSource.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index f654c91..a9be5ed 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -1,4 +1,5 @@ -using System; +using FaviconFetcher.Extensions; +using System; using System.Collections.Generic; using System.IO; using System.Net; From 90be6a2d8ea559c1d0236486bca6d9e44b8be66f Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 18:00:59 -0400 Subject: [PATCH 12/20] Make memory copy operations async --- FaviconFetcher/HttpSource.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index a9be5ed..86f2c47 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -103,7 +103,7 @@ public async Task> DownloadImages(Uri uri, CancellationTo return images; contentType = response.ContentType.ToLower(); - response.GetResponseStream().CopyTo(memoryStream); + await response.GetResponseStream().CopyToAsync(memoryStream); // Were we redirected and received a non-image response? if (!uri.Equals(response.ResponseUri) @@ -126,7 +126,7 @@ public async Task> DownloadImages(Uri uri, CancellationTo contentType = response.ContentType; memoryStream = new MemoryStream(); - response.GetResponseStream().CopyTo(memoryStream); + await response.GetResponseStream().CopyToAsync(memoryStream); } } From 585178989fa1c2b11f6ea320f49da7cf2cbd7b67 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 21:06:59 -0400 Subject: [PATCH 13/20] Make cancellation token on ScanAndFetch optional --- FaviconFetcher/Utility/FetchJob.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FaviconFetcher/Utility/FetchJob.cs b/FaviconFetcher/Utility/FetchJob.cs index 7afdd1b..fd2adba 100644 --- a/FaviconFetcher/Utility/FetchJob.cs +++ b/FaviconFetcher/Utility/FetchJob.cs @@ -38,7 +38,7 @@ public void Dispose() } // Scan and fetches best icon per Options. - public async Task ScanAndFetch(CancellationTokenSource cancelTokenSource) + public async Task ScanAndFetch(CancellationTokenSource cancelTokenSource = null) { var parsedUris = new HashSet(); foreach (var possibleIcon in await new Scanner(Source).Scan(TargetUri, cancelTokenSource)) From 220033976f4668a676ef3e077805ccf35126fc43 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 21:10:42 -0400 Subject: [PATCH 14/20] Update readme with await modifiers --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d9d147f..c91d17e 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ The fetcher will scan a webpage for favicons and download the one that best matc ```csharp var fetcher = new Fetcher(); -var image = fetcher.FetchClosest(uri, new Size(16, 16)); +var image = await fetcher.FetchClosest(uri, new Size(16, 16)); // Don't forget to dispose of the image when no longer needed. ``` @@ -53,7 +53,7 @@ To get a list of possible favicons without downloading any, use the scanner. ```csharp var scanner = new Scanner(); -foreach (var result in scanner.Scan(uri)) +foreach (var result in await scanner.Scan(uri)) { var expectedSize = result.ExpectedSize; var absoluteUri = result.Location; @@ -73,7 +73,7 @@ var source = new HttpSource() { CachePolicy = new RequestCachePolicy(RequestCacheLevel.CacheIfAvailable) }; var fetcher = new Fetcher(source); -var image = fetcher.FetchClosest(uri, new Size(16, 16)); +var image = await fetcher.FetchClosest(uri, new Size(16, 16)); // Don't forget to dispose of the image when no longer needed. ``` From bcfcd87424b2d8448285ed259bfd2ae562e240cf Mon Sep 17 00:00:00 2001 From: John Dailey Date: Wed, 20 Mar 2024 21:39:08 -0400 Subject: [PATCH 15/20] Use GetAwaiter to unwrap exceptions in tests --- .../BrowserConfigXmlScannerTests.cs | 6 +- FaviconFetcher.Tests/DefaultScannerTests.cs | 72 +++++++++---------- FaviconFetcher.Tests/FetcherTests.cs | 8 +-- .../ManifestJsonScannerTests.cs | 6 +- FaviconFetcher.Tests/ScannerTests.cs | 2 +- 5 files changed, 47 insertions(+), 47 deletions(-) diff --git a/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs b/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs index 0859691..2f869f0 100644 --- a/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs +++ b/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs @@ -29,7 +29,7 @@ public void Start_ValidXml_Parse() "); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(4, scanner.Results.Count); } @@ -43,7 +43,7 @@ public void Start_InvalidXml_Skip() >"); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); } @@ -68,7 +68,7 @@ public void Start_ContainsInvalidUri_Skip() "); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(3, scanner.Results.Count); } diff --git a/FaviconFetcher.Tests/DefaultScannerTests.cs b/FaviconFetcher.Tests/DefaultScannerTests.cs index dfc6008..48cc1a2 100644 --- a/FaviconFetcher.Tests/DefaultScannerTests.cs +++ b/FaviconFetcher.Tests/DefaultScannerTests.cs @@ -19,7 +19,7 @@ public void Results_OneLink_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -39,7 +39,7 @@ public void Results_LinkHrefWithoutQuotes_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -59,7 +59,7 @@ public void Results_LinkRelInCaps_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -79,7 +79,7 @@ public void Results_HtmlInCaps_ParseIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(1, scanner.Results.Count); } @@ -95,7 +95,7 @@ public void Results_LinkHasSizes_UseSizes() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -121,7 +121,7 @@ public void Results_MultipleLinks_FindAll() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(2, scanner.Results.Count); } @@ -137,7 +137,7 @@ public void Results_AppleLink_UseSize57x57() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -157,7 +157,7 @@ public void Results_SizedAppleLink_UseSpecifiedSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -177,7 +177,7 @@ public void Results_SizeInName_UseGuessedSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -197,7 +197,7 @@ public void Results_SizeInNameAndAttribute_UseAttributeSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -217,7 +217,7 @@ public void Results_BigNumberInUri_Accepted() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -237,7 +237,7 @@ public void Results_LinkInBody_Ignored() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); } @@ -253,7 +253,7 @@ public void Results_QuoteInUri_Accepted() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -274,7 +274,7 @@ public void Results_InvalidUri_Skip() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -295,7 +295,7 @@ public void Results_BaseAfterLocation_ModifiesPreviousLocation() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -316,7 +316,7 @@ public void Results_AbsoluteBase_PrefixesToLocations() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -337,7 +337,7 @@ public void Results_RelativeBase_PrefixesTargetAndBaseToLocations() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -358,7 +358,7 @@ public void Results_EmptyBase_IgnoresIt() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -379,7 +379,7 @@ public void Results_InvalidBase_IgnoresIt() "); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -396,7 +396,7 @@ public void SuggestedScanners_NoLinks_SuggestFaviconIco() source.AddTextResource(uri, "Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -415,7 +415,7 @@ public void SuggestedScanners_InvalidLinks_SuggestFaviconIco() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -429,7 +429,7 @@ public void SuggestedScanners_NotFound_SuggestFaviconIco() var source = new MockSource(); var scanner = new DefaultScanner(source, uri); - scanner.Start().Wait(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -444,7 +444,7 @@ public void SuggestedScanners_HtmlQuoteUnterminated_SuggestFaviconIco() source.AddTextResource(uri, ""); var scanner = new Scanner(source); - var results = scanner.Scan(uri).Result.ToArray(); + var results = scanner.Scan(uri).GetAwaiter().GetResult().ToArray(); Assert.AreEqual(12, results.Length); } From 8fdeabc051777d5342a636fe29edcba93d0f0e1b Mon Sep 17 00:00:00 2001 From: John Dailey Date: Thu, 21 Mar 2024 02:32:38 -0400 Subject: [PATCH 16/20] Remove debugging output --- FaviconFetcher/HttpSource.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index 86f2c47..5c454de 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -187,8 +187,6 @@ private IEnumerable _ExtractIcoSizes(Stream stream) // Setup and make a web request, returning the response. private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) { - System.Diagnostics.Debug.WriteLine("Fetching"); - #pragma warning disable SYSLIB0014 // Type or member is obsolete var request = WebRequest.Create(uri) as HttpWebRequest; #pragma warning restore SYSLIB0014 // Type or member is obsolete From 5a875e3f898bdadd281114b1b049cb05bd3a190e Mon Sep 17 00:00:00 2001 From: John Dailey Date: Thu, 21 Mar 2024 02:48:00 -0400 Subject: [PATCH 17/20] Move old WebRequest HttpSource into WebRequestSource --- FaviconFetcher/WebRequestSource.cs | 239 +++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 FaviconFetcher/WebRequestSource.cs diff --git a/FaviconFetcher/WebRequestSource.cs b/FaviconFetcher/WebRequestSource.cs new file mode 100644 index 0000000..22d7e94 --- /dev/null +++ b/FaviconFetcher/WebRequestSource.cs @@ -0,0 +1,239 @@ +using FaviconFetcher.Extensions; +using System; +using System.Collections.Generic; +using System.IO; +using System.Net; +using System.Net.Cache; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace FaviconFetcher +{ + /// + /// Default tool used by FaviconFetcher to download resources from a website. + /// + public class WebRequestSource : ISource + { + /// + /// The cache policy used for web requests. + /// + public RequestCachePolicy CachePolicy = WebRequest.DefaultCachePolicy; + + /// + /// The HTTP User-agent header sent for web requests. The "e" is in + /// "fetch" is swapped out with the number "3" here because a number + /// of sites block requests with "fetch" in the userAgent. + /// + public string UserAgent = "FaviconF3tcher/1.2"; + + /// + /// Proxy used for getting web requests + /// + public WebProxy RequestsProxy = null; + + /// + /// Creates a HttpSource for accessing the websites + /// + /// (Optional) Proxy used for getting web requests + public WebRequestSource(WebProxy proxy = null) + { + RequestsProxy = proxy; + } + + /// + /// Internal use only. Downloads a text resource from a URI. + /// + /// The uri of the resource to download. + /// An optional flag for cancelling the download. + /// A reader for the resource, or null. + public async Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var response = await _GetWebResponse(uri, cancelToken); + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + { + response?.Dispose(); // since we won't be passing on the response stream. + return null; + } + + // Header has priority. + // Byte Order Mark is second priority. + // Otherwise default to ASCII, since it'll be ASCII-compatible. + if (response.ContentType.Contains("charset=")) + { + try + { + var charset = response.CharacterSet.Replace("\"", ""); + var encoding = Encoding.GetEncoding(charset); + return new StreamReader(response.GetResponseStream(), encoding); + } + catch (NotSupportedException) { } + } + return new StreamReader(response.GetResponseStream(), Encoding.ASCII, true); + } + + /// + /// Internal use only. Downloads all images from a URI. + /// + /// The URI of the image file to download. + /// An optional flag for cancelling the download. + /// All of the images found within the file. + public async Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var images = new List(); + var contentType = string.Empty; + var memoryStream = new MemoryStream(); + Uri responseUri = null; + + using (var response = await _GetWebResponse(uri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.ContentType.ToLower(); + await response.GetResponseStream().CopyToAsync(memoryStream); + + // Were we redirected and received a non-image response? + if (!uri.Equals(response.ResponseUri) + && contentType.Contains("text/html")) + { + responseUri = response.ResponseUri; + } + } + + if (responseUri != null) + { + var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); + // Try fetching same resource at the root of the redirected URI + using (var response = await _GetWebResponse(redirectedUri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.ContentType; + memoryStream = new MemoryStream(); + await response.GetResponseStream().CopyToAsync(memoryStream); + } + } + + // Ico file + if (_IsContentTypeIco(contentType)) + { + try + { + foreach (var size in _ExtractIcoSizes(memoryStream)) + { + memoryStream.Position = 0; + images.Add(IconImage.FromIco(memoryStream, size)); + } + return images; + } + + // Sometimes a website lies about "ico". + catch (EndOfStreamException) { } + catch (ArgumentException) { } + // We'll let this fall through to try another image type. + memoryStream.Position = 0; + } + + // Other image type + try + { + images.Add(IconImage.FromStream(memoryStream)); + } + catch (ArgumentException) {} + return images; + } + + + // Extract image sizes from ICO file + private IEnumerable _ExtractIcoSizes(Stream stream) + { + var reader = new BinaryReader(stream, Encoding.UTF8, true); + + // Skip to count + stream.Seek(4, SeekOrigin.Begin); + var count = reader.ReadInt16(); + + var sizes = new List(); + for (var i = 0; i != count; ++i) + { + var offset = 6 + i * 16; + stream.Seek(offset, SeekOrigin.Begin); + int width = reader.ReadByte(); + if (width == 0) width = 256; + int height = reader.ReadByte(); + if (height == 0) height = 256; + sizes.Add(new IconSize(width, height)); + } + + return sizes; + } + + // Setup and make a web request, returning the response. + private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) + { +#pragma warning disable SYSLIB0014 // Type or member is obsolete + var request = WebRequest.Create(uri) as HttpWebRequest; +#pragma warning restore SYSLIB0014 // Type or member is obsolete + + request.CachePolicy = CachePolicy; + request.UserAgent = UserAgent; + + if (RequestsProxy != null) + request.Proxy = RequestsProxy; + + // GetResponse returns response in exception if error code... + // so we need to handle it in a try-catch. + try + { + return await request.GetResponseAsync().WithCancellation(cancellationToken, request.Abort, true) as HttpWebResponse; + } + catch (TaskCanceledException) + { + return null; + } + catch (WebException ex) + { + if (ex.Response == null) + throw; + return ex.Response as HttpWebResponse; + } + } + + // Check whether the file is an ico. + private bool _IsContentTypeIco(string contentType) + { + // Check content type + var iconTypes = new[] { + "image/x-icon", + "image/vnd.microsoft.icon", + "image/ico", + "image/icon", + "text/ico", + "application/ico" + }; + foreach (var iconType in iconTypes) + { + if (contentType.Contains(iconType)) + return true; + } + return false; + } + + } +} From 9dc834b3a6efabc6843ad0ca2df143eae6c37404 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Thu, 21 Mar 2024 02:48:22 -0400 Subject: [PATCH 18/20] Make HttpSource alias for WebRequestSource for compatability --- FaviconFetcher/HttpSource.cs | 229 +---------------------------------- 1 file changed, 3 insertions(+), 226 deletions(-) diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index 5c454de..ffda464 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -1,239 +1,16 @@ -using FaviconFetcher.Extensions; -using System; -using System.Collections.Generic; -using System.IO; -using System.Net; -using System.Net.Cache; -using System.Text; -using System.Threading; -using System.Threading.Tasks; +using System.Net; namespace FaviconFetcher { /// /// Default tool used by FaviconFetcher to download resources from a website. /// - public class HttpSource : ISource + public class HttpSource : WebRequestSource { - /// - /// The cache policy used for web requests. - /// - public RequestCachePolicy CachePolicy = WebRequest.DefaultCachePolicy; - - /// - /// The HTTP User-agent header sent for web requests. The "e" is in - /// "fetch" is swapped out with the number "3" here because a number - /// of sites block requests with "fetch" in the userAgent. - /// - public string UserAgent = "FaviconF3tcher/1.2"; - - /// - /// Proxy used for getting web requests - /// - public WebProxy RequestsProxy = null; - /// /// Creates a HttpSource for accessing the websites /// /// (Optional) Proxy used for getting web requests - public HttpSource(WebProxy proxy = null) - { - RequestsProxy = proxy; - } - - /// - /// Internal use only. Downloads a text resource from a URI. - /// - /// The uri of the resource to download. - /// An optional flag for cancelling the download. - /// A reader for the resource, or null. - public async Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) - { - var cancelToken = cancelTokenSource != null - ? cancelTokenSource.Token - : CancellationToken.None; - - var response = await _GetWebResponse(uri, cancelToken); - if (cancelToken.IsCancellationRequested - || response == null - || response.StatusCode != HttpStatusCode.OK) - { - response?.Dispose(); // since we won't be passing on the response stream. - return null; - } - - // Header has priority. - // Byte Order Mark is second priority. - // Otherwise default to ASCII, since it'll be ASCII-compatible. - if (response.ContentType.Contains("charset=")) - { - try - { - var charset = response.CharacterSet.Replace("\"", ""); - var encoding = Encoding.GetEncoding(charset); - return new StreamReader(response.GetResponseStream(), encoding); - } - catch (NotSupportedException) { } - } - return new StreamReader(response.GetResponseStream(), Encoding.ASCII, true); - } - - /// - /// Internal use only. Downloads all images from a URI. - /// - /// The URI of the image file to download. - /// An optional flag for cancelling the download. - /// All of the images found within the file. - public async Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) - { - var cancelToken = cancelTokenSource != null - ? cancelTokenSource.Token - : CancellationToken.None; - - var images = new List(); - var contentType = string.Empty; - var memoryStream = new MemoryStream(); - Uri responseUri = null; - - using (var response = await _GetWebResponse(uri, cancelToken)) - { - if (cancelToken.IsCancellationRequested - || response == null - || response.StatusCode != HttpStatusCode.OK) - return images; - - contentType = response.ContentType.ToLower(); - await response.GetResponseStream().CopyToAsync(memoryStream); - - // Were we redirected and received a non-image response? - if (!uri.Equals(response.ResponseUri) - && contentType.Contains("text/html")) - { - responseUri = response.ResponseUri; - } - } - - if (responseUri != null) - { - var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); - // Try fetching same resource at the root of the redirected URI - using (var response = await _GetWebResponse(redirectedUri, cancelToken)) - { - if (cancelToken.IsCancellationRequested - || response == null - || response.StatusCode != HttpStatusCode.OK) - return images; - - contentType = response.ContentType; - memoryStream = new MemoryStream(); - await response.GetResponseStream().CopyToAsync(memoryStream); - } - } - - // Ico file - if (_IsContentTypeIco(contentType)) - { - try - { - foreach (var size in _ExtractIcoSizes(memoryStream)) - { - memoryStream.Position = 0; - images.Add(IconImage.FromIco(memoryStream, size)); - } - return images; - } - - // Sometimes a website lies about "ico". - catch (EndOfStreamException) { } - catch (ArgumentException) { } - // We'll let this fall through to try another image type. - memoryStream.Position = 0; - } - - // Other image type - try - { - images.Add(IconImage.FromStream(memoryStream)); - } - catch (ArgumentException) {} - return images; - } - - - // Extract image sizes from ICO file - private IEnumerable _ExtractIcoSizes(Stream stream) - { - var reader = new BinaryReader(stream, Encoding.UTF8, true); - - // Skip to count - stream.Seek(4, SeekOrigin.Begin); - var count = reader.ReadInt16(); - - var sizes = new List(); - for (var i = 0; i != count; ++i) - { - var offset = 6 + i * 16; - stream.Seek(offset, SeekOrigin.Begin); - int width = reader.ReadByte(); - if (width == 0) width = 256; - int height = reader.ReadByte(); - if (height == 0) height = 256; - sizes.Add(new IconSize(width, height)); - } - - return sizes; - } - - // Setup and make a web request, returning the response. - private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) - { -#pragma warning disable SYSLIB0014 // Type or member is obsolete - var request = WebRequest.Create(uri) as HttpWebRequest; -#pragma warning restore SYSLIB0014 // Type or member is obsolete - - request.CachePolicy = CachePolicy; - request.UserAgent = UserAgent; - - if (RequestsProxy != null) - request.Proxy = RequestsProxy; - - // GetResponse returns response in exception if error code... - // so we need to handle it in a try-catch. - try - { - return await request.GetResponseAsync().WithCancellation(cancellationToken, request.Abort, true) as HttpWebResponse; - } - catch (TaskCanceledException) - { - return null; - } - catch (WebException ex) - { - if (ex.Response == null) - throw; - return ex.Response as HttpWebResponse; - } - } - - // Check whether the file is an ico. - private bool _IsContentTypeIco(string contentType) - { - // Check content type - var iconTypes = new[] { - "image/x-icon", - "image/vnd.microsoft.icon", - "image/ico", - "image/icon", - "text/ico", - "application/ico" - }; - foreach (var iconType in iconTypes) - { - if (contentType.Contains(iconType)) - return true; - } - return false; - } - + public HttpSource(WebProxy proxy = null) : base(proxy) { } } } From 9e2487534fa38cc46dc91343937cbbd10f8d0d9a Mon Sep 17 00:00:00 2001 From: John Dailey Date: Thu, 21 Mar 2024 02:48:37 -0400 Subject: [PATCH 19/20] Add new HttpClientSource --- FaviconFetcher/HttpClientSource.cs | 317 +++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 FaviconFetcher/HttpClientSource.cs diff --git a/FaviconFetcher/HttpClientSource.cs b/FaviconFetcher/HttpClientSource.cs new file mode 100644 index 0000000..36105a3 --- /dev/null +++ b/FaviconFetcher/HttpClientSource.cs @@ -0,0 +1,317 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Net; +using System.Net.Cache; +using System.Net.Http; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace FaviconFetcher +{ + /// + /// Default tool used by FaviconFetcher to download resources from a website. + /// + public class HttpClientSource : ISource + { + /// HttpClient for performing requests for this source + /// Static to be thread-safe + /// + /// https://learn.microsoft.com/en-us/dotnet/fundamentals/runtime-libraries/system-net-http-httpclient + /// HttpClient is intended to be instantiated once and reused + /// throughout the life of an application. This should probably + /// be refactored to utilize a singleton outside HttpSource + /// so that multiple Scan/Fetch doesn't instantiate new + /// instances of HttpClient. + static private HttpClient _httpClient = null; + + /// Handler that stores HttpClient parameters such as proxy and cookie settings + /// Static to be thread-safe along with HttpClient + static private HttpClientHandler _httpClientHandler = null; + + /// + /// The cache policy used for web requests. + /// + public RequestCachePolicy CachePolicy = WebRequest.DefaultCachePolicy; + + /// + /// The HTTP User-agent header sent for web requests. The "e" is in + /// "fetch" is swapped out with the number "3" here because a number + /// of sites block requests with "fetch" in the userAgent. + /// + public string UserAgent = "FaviconF3tcher/1.2"; + + /// + /// Proxy used for getting web requests + /// + public WebProxy RequestsProxy = null; + + /// + /// Creates a HttpSource for accessing the websites + /// + /// (Optional) Proxy used for getting web requests + public HttpClientSource(WebProxy proxy = null) + { + RequestsProxy = proxy; + } + + /// + /// Internal use only. Downloads a text resource from a URI. + /// + /// The uri of the resource to download. + /// An optional flag for cancelling the download. + /// A reader for the resource, or null. + public async Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var response = await _GetWebResponse(uri, cancelToken); + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + { + response?.Dispose(); // since we won't be passing on the response stream. + return null; + } + + // Header has priority. + // Byte Order Mark is second priority. + // Otherwise default to ASCII, since it'll be ASCII-compatible. + if (response.Content.Headers.ContentType.ToString().ToLower().Contains("charset=")) + { + try + { + var charset = response.Content.Headers.ContentType.CharSet.Replace("\"", ""); + var encoding = Encoding.GetEncoding(charset); + return new StreamReader(await response.Content.ReadAsStreamAsync(), encoding); + } + catch (NotSupportedException) { } + } + return new StreamReader(await response.Content.ReadAsStreamAsync(), Encoding.ASCII, true); + } + + /// + /// Internal use only. Downloads all images from a URI. + /// + /// The URI of the image file to download. + /// An optional flag for cancelling the download. + /// All of the images found within the file. + public async Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var images = new List(); + var contentType = string.Empty; + var memoryStream = new MemoryStream(); + Uri responseUri = null; + + using (var response = await _GetWebResponse(uri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.Content.Headers.ContentType.ToString().ToLower(); + await (await response.Content.ReadAsStreamAsync()).CopyToAsync(memoryStream); + + // Were we redirected and received a non-image response? + if (response.RequestMessage != null + && !uri.Equals(response.RequestMessage.RequestUri) + && contentType.Contains("text/html")) + { + responseUri = response.RequestMessage.RequestUri; + } + } + + if (responseUri != null) + { + var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); + // Try fetching same resource at the root of the redirected URI + using (var response = await _GetWebResponse(redirectedUri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.Content.Headers.ContentType.ToString().ToLower(); + memoryStream = new MemoryStream(); + await (await response.Content.ReadAsStreamAsync()).CopyToAsync(memoryStream); + } + } + + // Ico file + if (_IsContentTypeIco(contentType)) + { + try + { + foreach (var size in _ExtractIcoSizes(memoryStream)) + { + memoryStream.Position = 0; + images.Add(IconImage.FromIco(memoryStream, size)); + } + return images; + } + + // Sometimes a website lies about "ico". + catch (EndOfStreamException) { } + catch (ArgumentException) { } + // We'll let this fall through to try another image type. + memoryStream.Position = 0; + } + + // Other image type + try + { + images.Add(IconImage.FromStream(memoryStream)); + } + catch (ArgumentException) { } + return images; + } + + + // Extract image sizes from ICO file + private IEnumerable _ExtractIcoSizes(Stream stream) + { + var reader = new BinaryReader(stream, Encoding.UTF8, true); + + // Skip to count + stream.Seek(4, SeekOrigin.Begin); + var count = reader.ReadInt16(); + + var sizes = new List(); + for (var i = 0; i != count; ++i) + { + var offset = 6 + i * 16; + stream.Seek(offset, SeekOrigin.Begin); + int width = reader.ReadByte(); + if (width == 0) width = 256; + int height = reader.ReadByte(); + if (height == 0) height = 256; + sizes.Add(new IconSize(width, height)); + } + + return sizes; + } + + // Setup and make a web request, returning the response. + private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) + { + HttpResponseMessage response = null; + + try + { + var request = new HttpRequestMessage(HttpMethod.Get, uri); + + // A number of sites are picky about incoming requests and block + // or delay without a User-Agent, Accept-Language and/or + // an Accept-Encoding header, so we add them here. Note that the + // handler must be set to auto-decompress any specified encodings. + // It's worth mentioning that some sites block less common UA + // strings, so calling app may need to specify a common agent. + _ = request.Headers.UserAgent.TryParseAdd(UserAgent); + _ = request.Headers.AcceptLanguage.TryParseAdd("en-US,en"); // TODO : Get system locale languages + _ = request.Headers.AcceptEncoding.TryParseAdd("deflate,gzip;q=1.0,*;q=0.5"); + + response = await SourceHttpClient.SendAsync(request, cancellationToken); + response.EnsureSuccessStatusCode(); + + return response; + } + catch (TaskCanceledException) + { + return null; + } + catch (HttpRequestException ex) + { + return response != null + ? new HttpResponseMessage(response.StatusCode) + : new HttpResponseMessage(HttpStatusCode.ServiceUnavailable); + } + } + + // Check whether the file is an ico. + private bool _IsContentTypeIco(string contentType) + { + // Check content type + var iconTypes = new[] { + "image/x-icon", + "image/vnd.microsoft.icon", + "image/ico", + "image/icon", + "text/ico", + "application/ico" + }; + foreach (var iconType in iconTypes) + { + if (contentType.Contains(iconType)) + return true; + } + return false; + } + + /// + /// Returns an instantiated HttpClient + /// + private HttpClient SourceHttpClient + { + get + { + return GetHttpClient(this); + } + } + + /// + /// Returns the current static HttpClient for the source, + /// creating a new one when required + /// + private static HttpClient GetHttpClient(HttpClientSource source) + { + if (_httpClient == null) + { + _httpClient = new HttpClient(GetHttpClientHandler(source)); + _httpClient.DefaultRequestHeaders.ConnectionClose = true; // Don't keep connections alive + } + + return _httpClient; + } + + /// + /// Returns the current static HttpClientHandler for the source, + /// creating a new one when required. Settings can only be changed + /// before requests are made. + /// + private static HttpClientHandler GetHttpClientHandler(HttpClientSource source) + { + if (_httpClientHandler == null) + { + _httpClientHandler = new HttpClientHandler(); + _httpClientHandler.UseProxy = false; + _httpClientHandler.AllowAutoRedirect = true; + _httpClientHandler.MaxAutomaticRedirections = 5; + _httpClientHandler.MaxConnectionsPerServer = 1; + // Request mechanism sets Allow-Encoding header to gzip/deflate, so we need to enable it + _httpClientHandler.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; + + // Some hosts require cookies to persist after redirects + _httpClientHandler.CookieContainer = new CookieContainer(); ; + _httpClientHandler.UseCookies = true; + + if (source.RequestsProxy != null) + { + _httpClientHandler.UseProxy = true; + _httpClientHandler.Proxy = source.RequestsProxy; + } + } + + return _httpClientHandler; + } + + } +} From 260edd37c6c2611c5bd1251cd08dfe0e1075a880 Mon Sep 17 00:00:00 2001 From: John Dailey Date: Thu, 28 Mar 2024 03:39:15 -0400 Subject: [PATCH 20/20] Only allow GZip and Deflate encodings for DOTNET2 compat --- FaviconFetcher/HttpClientSource.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FaviconFetcher/HttpClientSource.cs b/FaviconFetcher/HttpClientSource.cs index 36105a3..9cde0c1 100644 --- a/FaviconFetcher/HttpClientSource.cs +++ b/FaviconFetcher/HttpClientSource.cs @@ -217,7 +217,7 @@ private async Task _GetWebResponse(Uri uri, CancellationTok // strings, so calling app may need to specify a common agent. _ = request.Headers.UserAgent.TryParseAdd(UserAgent); _ = request.Headers.AcceptLanguage.TryParseAdd("en-US,en"); // TODO : Get system locale languages - _ = request.Headers.AcceptEncoding.TryParseAdd("deflate,gzip;q=1.0,*;q=0.5"); + _ = request.Headers.AcceptEncoding.TryParseAdd("deflate,gzip"); response = await SourceHttpClient.SendAsync(request, cancellationToken); response.EnsureSuccessStatusCode();