diff --git a/Example/FetcherTest.cs b/Example/FetcherTest.cs index 0a477ef..bae730d 100644 --- a/Example/FetcherTest.cs +++ b/Example/FetcherTest.cs @@ -7,6 +7,7 @@ using System.Drawing; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; @@ -19,8 +20,20 @@ public FetcherTest() InitializeComponent(); } - private void btnFetch_Click(object sender, EventArgs e) + private bool _isFetching = false; + private CancellationTokenSource _cancellationTokenSource = null; + + private async void btnFetch_Click(object sender, EventArgs e) { + if (_isFetching) + { + _cancellationTokenSource?.Cancel(); + return; + } + + _isFetching = true; + ((Button)sender).Text = "Cancel"; + try { var uri = new Uri(txtUri.Text); @@ -28,25 +41,38 @@ private void btnFetch_Click(object sender, EventArgs e) var maxSize = (int)numMaxSize.Value; var perfectSize = (int)numPerfectSize.Value; + _cancellationTokenSource = new CancellationTokenSource(); + picIcon.Size = new Size(16, 16); picIcon.Image = null; - var image = new Fetcher().Fetch(uri, new FetchOptions - { - MinimumSize = new IconSize(minSize, minSize), - MaximumSize = new IconSize(maxSize, maxSize), - PerfectSize = new IconSize(perfectSize, perfectSize) - }); + var image = await new Fetcher().Fetch( + uri, + new FetchOptions + { + MinimumSize = new IconSize(minSize, minSize), + MaximumSize = new IconSize(maxSize, maxSize), + PerfectSize = new IconSize(perfectSize, perfectSize) + }, + _cancellationTokenSource); + if (image != null) { picIcon.Size = new Size(image.Size.Width, image.Size.Height); picIcon.Image = image.ToSKBitmap().ToBitmap(); } } + catch (TaskCanceledException) { } catch (Exception ex) { MessageBox.Show(ex.Message); } + finally + { + _cancellationTokenSource?.Dispose(); + _isFetching = false; + ((Button)sender).Text = "Fetch"; + } } } } diff --git a/Example/ScannerTest.cs b/Example/ScannerTest.cs index 4054ed8..a4512b2 100644 --- a/Example/ScannerTest.cs +++ b/Example/ScannerTest.cs @@ -5,6 +5,7 @@ using System.Data; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; @@ -27,15 +28,29 @@ private void lstResults_Resize(object sender, EventArgs e) _ExpandLocationColumn(); } - private void btnScan_Click(object sender, EventArgs e) + private bool _isScanning = false; + private CancellationTokenSource _cancellationTokenSource = null; + + private async void btnScan_Click(object sender, EventArgs e) { try { + if (_isScanning) + { + _cancellationTokenSource?.Cancel(); + return; + } + + _isScanning = true; + ((Button)sender).Text = "Cancel"; + var uri = new Uri(txtUri.Text); lstResults.Items.Clear(); - foreach (var result in new Scanner().Scan(uri)) + _cancellationTokenSource = new CancellationTokenSource(); + + foreach (var result in await new Scanner().Scan(uri, _cancellationTokenSource)) { lstResults.Items.Add(new ListViewItem(new[]{ result.ExpectedSize.ToString(), @@ -43,10 +58,18 @@ private void btnScan_Click(object sender, EventArgs e) })); } } + catch (TaskCanceledException) { } catch (Exception ex) { MessageBox.Show(ex.Message); } + finally + { + _cancellationTokenSource?.Dispose(); + _isScanning = false; + ((Button)sender).Text = "Scan"; + _cancellationTokenSource?.Dispose(); + } } private void _ExpandLocationColumn() diff --git a/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs b/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs index 78eb536..2f869f0 100644 --- a/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs +++ b/FaviconFetcher.Tests/BrowserConfigXmlScannerTests.cs @@ -29,7 +29,7 @@ public void Start_ValidXml_Parse() "); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(4, scanner.Results.Count); } @@ -43,7 +43,7 @@ public void Start_InvalidXml_Skip() >"); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); } @@ -68,7 +68,7 @@ public void Start_ContainsInvalidUri_Skip() "); var scanner = new BrowserconfigXmlScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(3, scanner.Results.Count); } diff --git a/FaviconFetcher.Tests/DefaultScannerTests.cs b/FaviconFetcher.Tests/DefaultScannerTests.cs index 7b50228..48cc1a2 100644 --- a/FaviconFetcher.Tests/DefaultScannerTests.cs +++ b/FaviconFetcher.Tests/DefaultScannerTests.cs @@ -19,7 +19,7 @@ public void Results_OneLink_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -39,7 +39,7 @@ public void Results_LinkHrefWithoutQuotes_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -59,7 +59,7 @@ public void Results_LinkRelInCaps_FindIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -79,7 +79,7 @@ public void Results_HtmlInCaps_ParseIt() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(1, scanner.Results.Count); } @@ -95,7 +95,7 @@ public void Results_LinkHasSizes_UseSizes() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -121,7 +121,7 @@ public void Results_MultipleLinks_FindAll() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(2, scanner.Results.Count); } @@ -137,7 +137,7 @@ public void Results_AppleLink_UseSize57x57() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -157,7 +157,7 @@ public void Results_SizedAppleLink_UseSpecifiedSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -177,7 +177,7 @@ public void Results_SizeInName_UseGuessedSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -197,7 +197,7 @@ public void Results_SizeInNameAndAttribute_UseAttributeSize() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -217,7 +217,7 @@ public void Results_BigNumberInUri_Accepted() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -237,7 +237,7 @@ public void Results_LinkInBody_Ignored() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); } @@ -253,7 +253,7 @@ public void Results_QuoteInUri_Accepted() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -274,7 +274,7 @@ public void Results_InvalidUri_Skip() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -295,7 +295,7 @@ public void Results_BaseAfterLocation_ModifiesPreviousLocation() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -316,7 +316,7 @@ public void Results_AbsoluteBase_PrefixesToLocations() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -337,7 +337,7 @@ public void Results_RelativeBase_PrefixesTargetAndBaseToLocations() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -358,7 +358,7 @@ public void Results_EmptyBase_IgnoresIt() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -379,7 +379,7 @@ public void Results_InvalidBase_IgnoresIt() "); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(new ScanResult { @@ -396,7 +396,7 @@ public void SuggestedScanners_NoLinks_SuggestFaviconIco() source.AddTextResource(uri, "Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -415,7 +415,7 @@ public void SuggestedScanners_InvalidLinks_SuggestFaviconIco() Fake content."); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -429,7 +429,7 @@ public void SuggestedScanners_NotFound_SuggestFaviconIco() var source = new MockSource(); var scanner = new DefaultScanner(source, uri); - scanner.Start(); + scanner.Start().GetAwaiter(); Assert.AreEqual(0, scanner.Results.Count); Assert.AreEqual(1, scanner.SuggestedScanners.Count); @@ -444,7 +444,7 @@ public void SuggestedScanners_HtmlQuoteUnterminated_SuggestFaviconIco() source.AddTextResource(uri, ""); var scanner = new Scanner(source); - var results = scanner.Scan(uri).ToArray(); + var results = scanner.Scan(uri).GetAwaiter().GetResult().ToArray(); Assert.AreEqual(12, results.Length); } diff --git a/FaviconFetcher.Tests/Utility/MockSource.cs b/FaviconFetcher.Tests/Utility/MockSource.cs index ae6b90b..e155151 100644 --- a/FaviconFetcher.Tests/Utility/MockSource.cs +++ b/FaviconFetcher.Tests/Utility/MockSource.cs @@ -4,6 +4,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher.Tests.Utility @@ -45,19 +46,19 @@ public void AddImageResource(Uri uri, IconImage image) _imageResourceMap[uri].Add(image); } - public IEnumerable DownloadImages(Uri uri) + public Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) { ++RequestCount; if (!_imageResourceMap.ContainsKey(uri)) - return new IconImage[] { }; - return _imageResourceMap[uri]; + return Task.FromResult(new IconImage[] { } as System.Collections.Generic.IEnumerable); + return Task.FromResult(_imageResourceMap[uri] as System.Collections.Generic.IEnumerable); } - public StreamReader DownloadText(Uri uri) + public Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) { ++RequestCount; if (!_textResourceMap.ContainsKey(uri)) - return null; + return Task.FromResult((StreamReader)null); var contents = _textResourceMap[uri]; var memoryStream = new MemoryStream(); @@ -65,7 +66,7 @@ public StreamReader DownloadText(Uri uri) writer.Write(contents); writer.Flush(); memoryStream.Position = 0; - return new StreamReader(memoryStream); + return Task.FromResult(new StreamReader(memoryStream)); } } diff --git a/FaviconFetcher/Extensions/WebRequestExtension.cs b/FaviconFetcher/Extensions/WebRequestExtension.cs new file mode 100644 index 0000000..87ef45e --- /dev/null +++ b/FaviconFetcher/Extensions/WebRequestExtension.cs @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using System.Threading; + +namespace FaviconFetcher.Extensions +{ + public static class WebRequestExtension + { + public static async Task WithCancellation(this Task task, CancellationToken cancellationToken, Action action, bool useSynchronizationContext = true) + { + using (cancellationToken.Register(action, useSynchronizationContext)) + { + try + { + return await task; + } + catch (Exception ex) + { + if (cancellationToken.IsCancellationRequested) + { + // the WebException will be available as Exception.InnerException + + // NetStandard 2 form doesn't include cancellationToken as 3rd param + throw new TaskCanceledException(ex.Message, ex); + //throw new OperationCanceledException(ex.Message, ex, cancellationToken); + } + + // cancellation hasn't been requested, rethrow the original WebException + throw; + } + } + } + } +} diff --git a/FaviconFetcher/Fetcher.cs b/FaviconFetcher/Fetcher.cs index 8e1a7b3..3232790 100644 --- a/FaviconFetcher/Fetcher.cs +++ b/FaviconFetcher/Fetcher.cs @@ -1,6 +1,8 @@ using FaviconFetcher.Utility; using System; using System.Net; +using System.Threading; +using System.Threading.Tasks; namespace FaviconFetcher { @@ -34,13 +36,17 @@ public Fetcher(ISource source) /// /// The webpage to scan for favicons. /// The target size of the favicon. + /// An optional flag for cancelling the fetch. /// The closest favicon to the size, or null. - public IconImage FetchClosest(Uri uri, IconSize size) + public async Task FetchClosest(Uri uri, IconSize size, CancellationTokenSource cancelTokenSource = null) { - return Fetch(uri, new FetchOptions - { - PerfectSize = size - }); + return await Fetch( + uri, + new FetchOptions + { + PerfectSize = size + }, + cancelTokenSource); } /// @@ -48,15 +54,19 @@ public IconImage FetchClosest(Uri uri, IconSize size) /// /// The webpage to scan for favicons. /// The target size of the favicon. + /// An optional flag for cancelling the fetch. /// The favicon matching the size, or null. - public IconImage FetchExact(Uri uri, IconSize size) + public async Task FetchExact(Uri uri, IconSize size, CancellationTokenSource cancelTokenSource = null) { - return Fetch(uri, new FetchOptions - { - MinimumSize = size, - MaximumSize = size, - PerfectSize = size - }); + return await Fetch( + uri, + new FetchOptions + { + MinimumSize = size, + MaximumSize = size, + PerfectSize = size + }, + cancelTokenSource); } /// @@ -64,11 +74,12 @@ public IconImage FetchExact(Uri uri, IconSize size) /// /// The webpage to scan for favicons. /// Filters for the returned result. + /// An optional flag for cancelling the fetch. /// The matching favicon, or null. - public IconImage Fetch(Uri uri, FetchOptions options) + public async Task Fetch(Uri uri, FetchOptions options, CancellationTokenSource cancelTokenSource = null) { using (var fetch = new FetchJob(Source, uri, options)) - return fetch.ScanAndFetch(); + return await fetch.ScanAndFetch(cancelTokenSource); } } diff --git a/FaviconFetcher/HttpClientSource.cs b/FaviconFetcher/HttpClientSource.cs new file mode 100644 index 0000000..9cde0c1 --- /dev/null +++ b/FaviconFetcher/HttpClientSource.cs @@ -0,0 +1,317 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Net; +using System.Net.Cache; +using System.Net.Http; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace FaviconFetcher +{ + /// + /// Default tool used by FaviconFetcher to download resources from a website. + /// + public class HttpClientSource : ISource + { + /// HttpClient for performing requests for this source + /// Static to be thread-safe + /// + /// https://learn.microsoft.com/en-us/dotnet/fundamentals/runtime-libraries/system-net-http-httpclient + /// HttpClient is intended to be instantiated once and reused + /// throughout the life of an application. This should probably + /// be refactored to utilize a singleton outside HttpSource + /// so that multiple Scan/Fetch doesn't instantiate new + /// instances of HttpClient. + static private HttpClient _httpClient = null; + + /// Handler that stores HttpClient parameters such as proxy and cookie settings + /// Static to be thread-safe along with HttpClient + static private HttpClientHandler _httpClientHandler = null; + + /// + /// The cache policy used for web requests. + /// + public RequestCachePolicy CachePolicy = WebRequest.DefaultCachePolicy; + + /// + /// The HTTP User-agent header sent for web requests. The "e" is in + /// "fetch" is swapped out with the number "3" here because a number + /// of sites block requests with "fetch" in the userAgent. + /// + public string UserAgent = "FaviconF3tcher/1.2"; + + /// + /// Proxy used for getting web requests + /// + public WebProxy RequestsProxy = null; + + /// + /// Creates a HttpSource for accessing the websites + /// + /// (Optional) Proxy used for getting web requests + public HttpClientSource(WebProxy proxy = null) + { + RequestsProxy = proxy; + } + + /// + /// Internal use only. Downloads a text resource from a URI. + /// + /// The uri of the resource to download. + /// An optional flag for cancelling the download. + /// A reader for the resource, or null. + public async Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var response = await _GetWebResponse(uri, cancelToken); + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + { + response?.Dispose(); // since we won't be passing on the response stream. + return null; + } + + // Header has priority. + // Byte Order Mark is second priority. + // Otherwise default to ASCII, since it'll be ASCII-compatible. + if (response.Content.Headers.ContentType.ToString().ToLower().Contains("charset=")) + { + try + { + var charset = response.Content.Headers.ContentType.CharSet.Replace("\"", ""); + var encoding = Encoding.GetEncoding(charset); + return new StreamReader(await response.Content.ReadAsStreamAsync(), encoding); + } + catch (NotSupportedException) { } + } + return new StreamReader(await response.Content.ReadAsStreamAsync(), Encoding.ASCII, true); + } + + /// + /// Internal use only. Downloads all images from a URI. + /// + /// The URI of the image file to download. + /// An optional flag for cancelling the download. + /// All of the images found within the file. + public async Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var images = new List(); + var contentType = string.Empty; + var memoryStream = new MemoryStream(); + Uri responseUri = null; + + using (var response = await _GetWebResponse(uri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.Content.Headers.ContentType.ToString().ToLower(); + await (await response.Content.ReadAsStreamAsync()).CopyToAsync(memoryStream); + + // Were we redirected and received a non-image response? + if (response.RequestMessage != null + && !uri.Equals(response.RequestMessage.RequestUri) + && contentType.Contains("text/html")) + { + responseUri = response.RequestMessage.RequestUri; + } + } + + if (responseUri != null) + { + var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); + // Try fetching same resource at the root of the redirected URI + using (var response = await _GetWebResponse(redirectedUri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.Content.Headers.ContentType.ToString().ToLower(); + memoryStream = new MemoryStream(); + await (await response.Content.ReadAsStreamAsync()).CopyToAsync(memoryStream); + } + } + + // Ico file + if (_IsContentTypeIco(contentType)) + { + try + { + foreach (var size in _ExtractIcoSizes(memoryStream)) + { + memoryStream.Position = 0; + images.Add(IconImage.FromIco(memoryStream, size)); + } + return images; + } + + // Sometimes a website lies about "ico". + catch (EndOfStreamException) { } + catch (ArgumentException) { } + // We'll let this fall through to try another image type. + memoryStream.Position = 0; + } + + // Other image type + try + { + images.Add(IconImage.FromStream(memoryStream)); + } + catch (ArgumentException) { } + return images; + } + + + // Extract image sizes from ICO file + private IEnumerable _ExtractIcoSizes(Stream stream) + { + var reader = new BinaryReader(stream, Encoding.UTF8, true); + + // Skip to count + stream.Seek(4, SeekOrigin.Begin); + var count = reader.ReadInt16(); + + var sizes = new List(); + for (var i = 0; i != count; ++i) + { + var offset = 6 + i * 16; + stream.Seek(offset, SeekOrigin.Begin); + int width = reader.ReadByte(); + if (width == 0) width = 256; + int height = reader.ReadByte(); + if (height == 0) height = 256; + sizes.Add(new IconSize(width, height)); + } + + return sizes; + } + + // Setup and make a web request, returning the response. + private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) + { + HttpResponseMessage response = null; + + try + { + var request = new HttpRequestMessage(HttpMethod.Get, uri); + + // A number of sites are picky about incoming requests and block + // or delay without a User-Agent, Accept-Language and/or + // an Accept-Encoding header, so we add them here. Note that the + // handler must be set to auto-decompress any specified encodings. + // It's worth mentioning that some sites block less common UA + // strings, so calling app may need to specify a common agent. + _ = request.Headers.UserAgent.TryParseAdd(UserAgent); + _ = request.Headers.AcceptLanguage.TryParseAdd("en-US,en"); // TODO : Get system locale languages + _ = request.Headers.AcceptEncoding.TryParseAdd("deflate,gzip"); + + response = await SourceHttpClient.SendAsync(request, cancellationToken); + response.EnsureSuccessStatusCode(); + + return response; + } + catch (TaskCanceledException) + { + return null; + } + catch (HttpRequestException ex) + { + return response != null + ? new HttpResponseMessage(response.StatusCode) + : new HttpResponseMessage(HttpStatusCode.ServiceUnavailable); + } + } + + // Check whether the file is an ico. + private bool _IsContentTypeIco(string contentType) + { + // Check content type + var iconTypes = new[] { + "image/x-icon", + "image/vnd.microsoft.icon", + "image/ico", + "image/icon", + "text/ico", + "application/ico" + }; + foreach (var iconType in iconTypes) + { + if (contentType.Contains(iconType)) + return true; + } + return false; + } + + /// + /// Returns an instantiated HttpClient + /// + private HttpClient SourceHttpClient + { + get + { + return GetHttpClient(this); + } + } + + /// + /// Returns the current static HttpClient for the source, + /// creating a new one when required + /// + private static HttpClient GetHttpClient(HttpClientSource source) + { + if (_httpClient == null) + { + _httpClient = new HttpClient(GetHttpClientHandler(source)); + _httpClient.DefaultRequestHeaders.ConnectionClose = true; // Don't keep connections alive + } + + return _httpClient; + } + + /// + /// Returns the current static HttpClientHandler for the source, + /// creating a new one when required. Settings can only be changed + /// before requests are made. + /// + private static HttpClientHandler GetHttpClientHandler(HttpClientSource source) + { + if (_httpClientHandler == null) + { + _httpClientHandler = new HttpClientHandler(); + _httpClientHandler.UseProxy = false; + _httpClientHandler.AllowAutoRedirect = true; + _httpClientHandler.MaxAutomaticRedirections = 5; + _httpClientHandler.MaxConnectionsPerServer = 1; + // Request mechanism sets Allow-Encoding header to gzip/deflate, so we need to enable it + _httpClientHandler.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; + + // Some hosts require cookies to persist after redirects + _httpClientHandler.CookieContainer = new CookieContainer(); ; + _httpClientHandler.UseCookies = true; + + if (source.RequestsProxy != null) + { + _httpClientHandler.UseProxy = true; + _httpClientHandler.Proxy = source.RequestsProxy; + } + } + + return _httpClientHandler; + } + + } +} diff --git a/FaviconFetcher/HttpSource.cs b/FaviconFetcher/HttpSource.cs index 6eaf32b..ffda464 100644 --- a/FaviconFetcher/HttpSource.cs +++ b/FaviconFetcher/HttpSource.cs @@ -1,213 +1,16 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Net; -using System.Net.Cache; -using System.Text; +using System.Net; namespace FaviconFetcher { /// /// Default tool used by FaviconFetcher to download resources from a website. /// - public class HttpSource : ISource + public class HttpSource : WebRequestSource { - /// - /// The cache policy used for web requests. - /// - public RequestCachePolicy CachePolicy = WebRequest.DefaultCachePolicy; - - /// - /// The HTTP User-agent header sent for web requests. The "e" is in - /// "fetch" is swapped out with the number "3" here because a number - /// of sites block requests with "fetch" in the userAgent. - /// - public string UserAgent = "FaviconF3tcher/1.2"; - - /// - /// Proxy used for getting web requests - /// - public WebProxy RequestsProxy = null; - /// /// Creates a HttpSource for accessing the websites /// /// (Optional) Proxy used for getting web requests - public HttpSource(WebProxy proxy = null) - { - RequestsProxy = proxy; - } - - /// - /// Internal use only. Downloads a text resource from a URI. - /// - /// The uri of the resource to download. - /// A reader for the resource, or null. - public StreamReader DownloadText(Uri uri) - { - var response = _GetWebResponse(uri); - if (response.StatusCode != HttpStatusCode.OK) - { - response.Dispose(); // since we won't be passing on the response stream. - return null; - } - - // Header has priority. - // Byte Order Mark is second priority. - // Otherwise default to ASCII, since it'll be ASCII-compatible. - if (response.ContentType.Contains("charset=")) - { - try - { - var charset = response.CharacterSet.Replace("\"", ""); - var encoding = Encoding.GetEncoding(charset); - return new StreamReader(response.GetResponseStream(), encoding); - } - catch (NotSupportedException) { } - } - return new StreamReader(response.GetResponseStream(), Encoding.ASCII, true); - } - - /// - /// Internal use only. Downloads all images from a URI. - /// - /// The URI of the image file to download. - /// All of the images found within the file. - public IEnumerable DownloadImages(Uri uri) - { - var images = new List(); - var contentType = string.Empty; - var memoryStream = new MemoryStream(); - Uri responseUri = null; - - using (var response = _GetWebResponse(uri)) - { - if (response.StatusCode != HttpStatusCode.OK) - return images; - - contentType = response.ContentType.ToLower(); - response.GetResponseStream().CopyTo(memoryStream); - - // Were we redirected and received a non-image response? - if (!uri.Equals(response.ResponseUri) - && contentType.Contains("text/html")) - { - responseUri = response.ResponseUri; - } - } - - if (responseUri != null) - { - var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); - // Try fetching same resource at the root of the redirected URI - using (var response = _GetWebResponse(redirectedUri)) - { - if (response.StatusCode != HttpStatusCode.OK) - return images; - - contentType = response.ContentType; - memoryStream = new MemoryStream(); - response.GetResponseStream().CopyTo(memoryStream); - } - } - - // Ico file - if (_IsContentTypeIco(contentType)) - { - try - { - foreach (var size in _ExtractIcoSizes(memoryStream)) - { - memoryStream.Position = 0; - images.Add(IconImage.FromIco(memoryStream, size)); - } - return images; - } - - // Sometimes a website lies about "ico". - catch (EndOfStreamException) { } - catch (ArgumentException) { } - // We'll let this fall through to try another image type. - memoryStream.Position = 0; - } - - // Other image type - try - { - images.Add(IconImage.FromStream(memoryStream)); - } - catch (ArgumentException) {} - return images; - } - - - // Extract image sizes from ICO file - private IEnumerable _ExtractIcoSizes(Stream stream) - { - var reader = new BinaryReader(stream, Encoding.UTF8, true); - - // Skip to count - stream.Seek(4, SeekOrigin.Begin); - var count = reader.ReadInt16(); - - var sizes = new List(); - for (var i = 0; i != count; ++i) - { - var offset = 6 + i * 16; - stream.Seek(offset, SeekOrigin.Begin); - int width = reader.ReadByte(); - if (width == 0) width = 256; - int height = reader.ReadByte(); - if (height == 0) height = 256; - sizes.Add(new IconSize(width, height)); - } - - return sizes; - } - - // Setup and make a web request, returning the response. - private HttpWebResponse _GetWebResponse(Uri uri) - { - var request = WebRequest.Create(uri) as HttpWebRequest; - request.CachePolicy = CachePolicy; - request.UserAgent = UserAgent; - - if (RequestsProxy != null) - request.Proxy = RequestsProxy; - - // GetResponse returns response in exception if error code... - // so we need to handle it in a try-catch. - try - { - return request.GetResponse() as HttpWebResponse; - } - catch (WebException ex) - { - if (ex.Response == null) - throw; - return ex.Response as HttpWebResponse; - } - } - - // Check whether the file is an ico. - private bool _IsContentTypeIco(string contentType) - { - // Check content type - var iconTypes = new[] { - "image/x-icon", - "image/vnd.microsoft.icon", - "image/ico", - "image/icon", - "text/ico", - "application/ico" - }; - foreach (var iconType in iconTypes) - { - if (contentType.Contains(iconType)) - return true; - } - return false; - } - + public HttpSource(WebProxy proxy = null) : base(proxy) { } } } diff --git a/FaviconFetcher/ISource.cs b/FaviconFetcher/ISource.cs index 8b8b208..1126d5b 100644 --- a/FaviconFetcher/ISource.cs +++ b/FaviconFetcher/ISource.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher @@ -16,8 +17,9 @@ public interface ISource /// Downloads a text-based resource from a URI. /// /// The URI of the resource to download. + /// An optional flag for cancelling the download. /// A reader for the resource, or null. - StreamReader DownloadText(Uri uri); + Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource); /// /// Downloads all images from a URI. @@ -25,8 +27,9 @@ public interface ISource /// /// Multiple images are returned, because some file formats allow multiple images. /// - /// The URI of the image file to download. + /// An optional flag for cancelling the download. + /// /// All of the images found within the file, or an empty list. - IEnumerable DownloadImages(Uri uri); + Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource); } } diff --git a/FaviconFetcher/Scanner.cs b/FaviconFetcher/Scanner.cs index 8743c23..4db154a 100644 --- a/FaviconFetcher/Scanner.cs +++ b/FaviconFetcher/Scanner.cs @@ -2,6 +2,8 @@ using System; using System.Collections.Generic; using System.Net; +using System.Threading; +using System.Threading.Tasks; namespace FaviconFetcher { @@ -31,30 +33,34 @@ public Scanner(ISource source) } /// - /// Scans a URI for references to favicons. + /// Scans a URI for references to favicons asynchronously. /// /// The uri of the webpage to scan for favicon references. + /// An optional flag for cancelling the scan. /// An enumerable of found favicon references. - public IEnumerable Scan(Uri uri) + public async Task> Scan(Uri uri, CancellationTokenSource cancelTokenSource = null) { + var scanResults = new List(); + var scans = new Queue(); scans.Enqueue(new DefaultScanner(Source, uri)); // While we have subscanners queued var max_scans = 4; - while (scans.Count > 0 && max_scans-- > 0) + while (scans.Count > 0 && max_scans-- > 0 + && (cancelTokenSource == null || !cancelTokenSource.IsCancellationRequested)) { var scan = scans.Dequeue(); - scan.Start(); - // Go through found favicon references - foreach (var result in scan.Results) - yield return result; + await scan.Start(cancelTokenSource); + scanResults.AddRange(scan.Results); // Add all subscanners that are suggested foreach (var suggested in scan.SuggestedScanners) scans.Enqueue(suggested); } + + return scanResults; } } diff --git a/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs b/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs index 5dbcc45..ce5e94d 100644 --- a/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs +++ b/FaviconFetcher/SubScanners/BrowserconfigXmlScanner.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using System.Runtime.Serialization; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Xml; using System.Xml.Schema; @@ -22,9 +23,9 @@ public BrowserconfigXmlScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public async override Task Start(CancellationTokenSource cancelTokenSource = null) { - using (var reader = Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri, cancelTokenSource)) { if (reader != null) _ParseContent(new TextParser(reader)); diff --git a/FaviconFetcher/SubScanners/DefaultScanner.cs b/FaviconFetcher/SubScanners/DefaultScanner.cs index d2d2244..e2d9ce6 100644 --- a/FaviconFetcher/SubScanners/DefaultScanner.cs +++ b/FaviconFetcher/SubScanners/DefaultScanner.cs @@ -3,6 +3,8 @@ using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Text; +using System.Threading; +using System.Threading.Tasks; #if DEBUG [assembly: InternalsVisibleTo("FaviconFetcher.Tests")] @@ -17,9 +19,9 @@ public DefaultScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public async override Task Start(CancellationTokenSource cancelTokenSource = null) { - using (var reader = Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri, cancelTokenSource)) { if (reader != null) _ParsePage(new TextParser(reader)); diff --git a/FaviconFetcher/SubScanners/FaviconIcoScanner.cs b/FaviconFetcher/SubScanners/FaviconIcoScanner.cs index 82238a4..e99db99 100644 --- a/FaviconFetcher/SubScanners/FaviconIcoScanner.cs +++ b/FaviconFetcher/SubScanners/FaviconIcoScanner.cs @@ -5,6 +5,7 @@ using System.Net.Http; using System.Runtime.CompilerServices; using System.Text; +using System.Threading; using System.Threading.Tasks; #if DEBUG @@ -19,13 +20,15 @@ public FaviconIcoScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public override Task Start(CancellationTokenSource cancelTokenSource = null) { Results.Add(new ScanResult { Location = new Uri(TargetUri, "/favicon.ico"), ExpectedSize = new IconSize(16, 16) }); + + return Task.CompletedTask; } } diff --git a/FaviconFetcher/SubScanners/ManifestJsonScanner.cs b/FaviconFetcher/SubScanners/ManifestJsonScanner.cs index 94811d8..8e711ed 100644 --- a/FaviconFetcher/SubScanners/ManifestJsonScanner.cs +++ b/FaviconFetcher/SubScanners/ManifestJsonScanner.cs @@ -7,6 +7,7 @@ using System.Runtime.Serialization; using System.Runtime.Serialization.Json; using System.Text; +using System.Threading; using System.Threading.Tasks; #if DEBUG @@ -20,9 +21,9 @@ public ManifestJsonScanner(ISource source, Uri uri) : base(source, uri) { } - public override void Start() + public async override Task Start(CancellationTokenSource cancelTokenSource = null) { - using (var reader = Source.DownloadText(TargetUri)) + using (var reader = await Source.DownloadText(TargetUri, cancelTokenSource)) { if (reader != null) _ParseContent(reader); diff --git a/FaviconFetcher/SubScanners/SubScanner.cs b/FaviconFetcher/SubScanners/SubScanner.cs index d1a08a1..e7e85fa 100644 --- a/FaviconFetcher/SubScanners/SubScanner.cs +++ b/FaviconFetcher/SubScanners/SubScanner.cs @@ -1,5 +1,7 @@ using System; using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; namespace FaviconFetcher.SubScanners { @@ -26,7 +28,7 @@ public SubScanner(ISource source, Uri uri) } // Start the scan for favicons. - public abstract void Start(); + public abstract Task Start(CancellationTokenSource cancelTokenSource = null); } } diff --git a/FaviconFetcher/Utility/FetchJob.cs b/FaviconFetcher/Utility/FetchJob.cs index 3a63e1a..fd2adba 100644 --- a/FaviconFetcher/Utility/FetchJob.cs +++ b/FaviconFetcher/Utility/FetchJob.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace FaviconFetcher.Utility @@ -37,10 +38,10 @@ public void Dispose() } // Scan and fetches best icon per Options. - public IconImage ScanAndFetch() + public async Task ScanAndFetch(CancellationTokenSource cancelTokenSource = null) { var parsedUris = new HashSet(); - foreach (var possibleIcon in new Scanner(Source).Scan(TargetUri)) + foreach (var possibleIcon in await new Scanner(Source).Scan(TargetUri, cancelTokenSource)) { // Because the scanner can return duplicate URIs. if (parsedUris.Contains(possibleIcon.Location)) @@ -50,7 +51,7 @@ public IconImage ScanAndFetch() // Hopefully we've already found it if (_IsPerfect(possibleIcon.ExpectedSize)) { - var image = DownloadImages_ReturnPerfect(possibleIcon.Location); + var image = await DownloadImages_ReturnPerfect(possibleIcon.Location, cancelTokenSource); if (image != null) return image; } @@ -65,7 +66,7 @@ public IconImage ScanAndFetch() // Download them, prioritizing those closest to perfect foreach (var possibleIcon in notVerified) { - var image = DownloadImages_ReturnPerfect(possibleIcon.Location); + var image = await DownloadImages_ReturnPerfect(possibleIcon.Location, cancelTokenSource); if (image != null) return image; } @@ -78,9 +79,9 @@ public IconImage ScanAndFetch() // Downloads images. If perfect found, returns it. - private IconImage DownloadImages_ReturnPerfect(Uri uri) + private async Task DownloadImages_ReturnPerfect(Uri uri, CancellationTokenSource cancelTokenSource) { - foreach (var image in Source.DownloadImages(uri)) + foreach (var image in await Source.DownloadImages(uri, cancelTokenSource)) { if (_IsPerfect(image.Size)) return image; diff --git a/FaviconFetcher/WebRequestSource.cs b/FaviconFetcher/WebRequestSource.cs new file mode 100644 index 0000000..22d7e94 --- /dev/null +++ b/FaviconFetcher/WebRequestSource.cs @@ -0,0 +1,239 @@ +using FaviconFetcher.Extensions; +using System; +using System.Collections.Generic; +using System.IO; +using System.Net; +using System.Net.Cache; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace FaviconFetcher +{ + /// + /// Default tool used by FaviconFetcher to download resources from a website. + /// + public class WebRequestSource : ISource + { + /// + /// The cache policy used for web requests. + /// + public RequestCachePolicy CachePolicy = WebRequest.DefaultCachePolicy; + + /// + /// The HTTP User-agent header sent for web requests. The "e" is in + /// "fetch" is swapped out with the number "3" here because a number + /// of sites block requests with "fetch" in the userAgent. + /// + public string UserAgent = "FaviconF3tcher/1.2"; + + /// + /// Proxy used for getting web requests + /// + public WebProxy RequestsProxy = null; + + /// + /// Creates a HttpSource for accessing the websites + /// + /// (Optional) Proxy used for getting web requests + public WebRequestSource(WebProxy proxy = null) + { + RequestsProxy = proxy; + } + + /// + /// Internal use only. Downloads a text resource from a URI. + /// + /// The uri of the resource to download. + /// An optional flag for cancelling the download. + /// A reader for the resource, or null. + public async Task DownloadText(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var response = await _GetWebResponse(uri, cancelToken); + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + { + response?.Dispose(); // since we won't be passing on the response stream. + return null; + } + + // Header has priority. + // Byte Order Mark is second priority. + // Otherwise default to ASCII, since it'll be ASCII-compatible. + if (response.ContentType.Contains("charset=")) + { + try + { + var charset = response.CharacterSet.Replace("\"", ""); + var encoding = Encoding.GetEncoding(charset); + return new StreamReader(response.GetResponseStream(), encoding); + } + catch (NotSupportedException) { } + } + return new StreamReader(response.GetResponseStream(), Encoding.ASCII, true); + } + + /// + /// Internal use only. Downloads all images from a URI. + /// + /// The URI of the image file to download. + /// An optional flag for cancelling the download. + /// All of the images found within the file. + public async Task> DownloadImages(Uri uri, CancellationTokenSource cancelTokenSource) + { + var cancelToken = cancelTokenSource != null + ? cancelTokenSource.Token + : CancellationToken.None; + + var images = new List(); + var contentType = string.Empty; + var memoryStream = new MemoryStream(); + Uri responseUri = null; + + using (var response = await _GetWebResponse(uri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.ContentType.ToLower(); + await response.GetResponseStream().CopyToAsync(memoryStream); + + // Were we redirected and received a non-image response? + if (!uri.Equals(response.ResponseUri) + && contentType.Contains("text/html")) + { + responseUri = response.ResponseUri; + } + } + + if (responseUri != null) + { + var redirectedUri = new Uri(responseUri.GetLeftPart(UriPartial.Authority).ToString() + uri.PathAndQuery); + // Try fetching same resource at the root of the redirected URI + using (var response = await _GetWebResponse(redirectedUri, cancelToken)) + { + if (cancelToken.IsCancellationRequested + || response == null + || response.StatusCode != HttpStatusCode.OK) + return images; + + contentType = response.ContentType; + memoryStream = new MemoryStream(); + await response.GetResponseStream().CopyToAsync(memoryStream); + } + } + + // Ico file + if (_IsContentTypeIco(contentType)) + { + try + { + foreach (var size in _ExtractIcoSizes(memoryStream)) + { + memoryStream.Position = 0; + images.Add(IconImage.FromIco(memoryStream, size)); + } + return images; + } + + // Sometimes a website lies about "ico". + catch (EndOfStreamException) { } + catch (ArgumentException) { } + // We'll let this fall through to try another image type. + memoryStream.Position = 0; + } + + // Other image type + try + { + images.Add(IconImage.FromStream(memoryStream)); + } + catch (ArgumentException) {} + return images; + } + + + // Extract image sizes from ICO file + private IEnumerable _ExtractIcoSizes(Stream stream) + { + var reader = new BinaryReader(stream, Encoding.UTF8, true); + + // Skip to count + stream.Seek(4, SeekOrigin.Begin); + var count = reader.ReadInt16(); + + var sizes = new List(); + for (var i = 0; i != count; ++i) + { + var offset = 6 + i * 16; + stream.Seek(offset, SeekOrigin.Begin); + int width = reader.ReadByte(); + if (width == 0) width = 256; + int height = reader.ReadByte(); + if (height == 0) height = 256; + sizes.Add(new IconSize(width, height)); + } + + return sizes; + } + + // Setup and make a web request, returning the response. + private async Task _GetWebResponse(Uri uri, CancellationToken cancellationToken) + { +#pragma warning disable SYSLIB0014 // Type or member is obsolete + var request = WebRequest.Create(uri) as HttpWebRequest; +#pragma warning restore SYSLIB0014 // Type or member is obsolete + + request.CachePolicy = CachePolicy; + request.UserAgent = UserAgent; + + if (RequestsProxy != null) + request.Proxy = RequestsProxy; + + // GetResponse returns response in exception if error code... + // so we need to handle it in a try-catch. + try + { + return await request.GetResponseAsync().WithCancellation(cancellationToken, request.Abort, true) as HttpWebResponse; + } + catch (TaskCanceledException) + { + return null; + } + catch (WebException ex) + { + if (ex.Response == null) + throw; + return ex.Response as HttpWebResponse; + } + } + + // Check whether the file is an ico. + private bool _IsContentTypeIco(string contentType) + { + // Check content type + var iconTypes = new[] { + "image/x-icon", + "image/vnd.microsoft.icon", + "image/ico", + "image/icon", + "text/ico", + "application/ico" + }; + foreach (var iconType in iconTypes) + { + if (contentType.Contains(iconType)) + return true; + } + return false; + } + + } +} diff --git a/README.md b/README.md index d9d147f..c91d17e 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ The fetcher will scan a webpage for favicons and download the one that best matc ```csharp var fetcher = new Fetcher(); -var image = fetcher.FetchClosest(uri, new Size(16, 16)); +var image = await fetcher.FetchClosest(uri, new Size(16, 16)); // Don't forget to dispose of the image when no longer needed. ``` @@ -53,7 +53,7 @@ To get a list of possible favicons without downloading any, use the scanner. ```csharp var scanner = new Scanner(); -foreach (var result in scanner.Scan(uri)) +foreach (var result in await scanner.Scan(uri)) { var expectedSize = result.ExpectedSize; var absoluteUri = result.Location; @@ -73,7 +73,7 @@ var source = new HttpSource() { CachePolicy = new RequestCachePolicy(RequestCacheLevel.CacheIfAvailable) }; var fetcher = new Fetcher(source); -var image = fetcher.FetchClosest(uri, new Size(16, 16)); +var image = await fetcher.FetchClosest(uri, new Size(16, 16)); // Don't forget to dispose of the image when no longer needed. ```