From 9527989837a12e4fde3ed412ba896dbd4fa6cff7 Mon Sep 17 00:00:00 2001 From: Blake Burkhart Date: Wed, 15 Apr 2020 18:32:07 -0500 Subject: [PATCH 1/3] Escape metacharacters when building regexes from glob patterns --- src/utils.js | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/utils.js b/src/utils.js index 5164d9c..b2a5a64 100644 --- a/src/utils.js +++ b/src/utils.js @@ -93,9 +93,10 @@ export const matchesSavedMap = (url, matchDomainOnly, {host}) => { // turning glob into regex isn't the worst thing: // 1. * becomes .* // 2. ? becomes .? - return new RegExp(host.substr(1) - .replace(/\*/g, '.*') - .replace(/\?/g, '.?')) + // Because the string is regex escaped, you must match \* to instead of * + return new RegExp(escapeRegExp(host.substr(1)) + .replace(/\\\*/g, '.*') + .replace(/\\\?/g, '.?')) .test(toMatch); } else { const key = urlKeyFromUrl(urlO); @@ -135,3 +136,13 @@ export function formatString(string, context) { return replacement; }); } + +/** + * Escape all regex metacharacters in a string + * + * @param string {String} + */ +function escapeRegExp(string) { + // From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Escaping + return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} From 27378d000c911b39cc41dc16264ec8fa63d5e936 Mon Sep 17 00:00:00 2001 From: Blake Burkhart Date: Wed, 15 Apr 2020 18:52:09 -0500 Subject: [PATCH 2/3] Always match the entire domain with patterns when match domain only is enabled --- src/utils.js | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/utils.js b/src/utils.js index b2a5a64..f461acf 100644 --- a/src/utils.js +++ b/src/utils.js @@ -83,7 +83,11 @@ export const matchesSavedMap = (url, matchDomainOnly, {host}) => { } if (host[0] === PREFIX_REGEX) { - const regex = host.substr(1); + let regex = host.substr(1); + if (matchDomainOnly) { + // This might generate double ^^ characters, but that works anyway + regex = "^" + regex + "$"; + } try { return new RegExp(regex).test(toMatch); } catch (e) { @@ -94,10 +98,13 @@ export const matchesSavedMap = (url, matchDomainOnly, {host}) => { // 1. * becomes .* // 2. ? becomes .? // Because the string is regex escaped, you must match \* to instead of * - return new RegExp(escapeRegExp(host.substr(1)) - .replace(/\\\*/g, '.*') - .replace(/\\\?/g, '.?')) - .test(toMatch); + let regex = escapeRegExp(host.substr(1)) + .replace(/\\\*/g, '.*') + .replace(/\\\?/g, '.?') + if (matchDomainOnly) { + regex = "^" + regex + "$"; + } + return new RegExp(regex).test(toMatch); } else { const key = urlKeyFromUrl(urlO); const _url = ((key.indexOf('/') === -1) ? key.concat('/') : key).toLowerCase(); From 0152d79a3f75042fee4af281470520c0b9ebfcaf Mon Sep 17 00:00:00 2001 From: Blake Burkhart Date: Wed, 15 Apr 2020 17:30:17 -0500 Subject: [PATCH 3/3] Add tests for matching domain substrings and wildcard subdomains for regex and glob patterns --- src/__tests__/utils.spec.js | 50 +++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/src/__tests__/utils.spec.js b/src/__tests__/utils.spec.js index 784ab21..e5c27cc 100644 --- a/src/__tests__/utils.spec.js +++ b/src/__tests__/utils.spec.js @@ -77,45 +77,69 @@ describe('utils', () => { }); }); - function testPrefixes(isRegex) { - isRegex = !!isRegex; - const simplePattern = isRegex? - '@duckduckgo\\.com' : '!duckduckgo.com'; + function testPrefixes(pattern, expectedUrl, evilUrl) { return () => { it('should match url without path', () => { expect( utils.matchesSavedMap( - 'https://duckduckgo.com', + expectedUrl, matchDomainOnly, { - host: simplePattern, + host: pattern, }) ).toBe(true); }); it('should match url with path', () => { expect( utils.matchesSavedMap( - 'https://duckduckgo.com/?q=search+me+baby', + expectedUrl + '/?q=search+me+baby', matchDomainOnly, { - host: simplePattern, + host: pattern, }) ).toBe(true); }); let prefix = matchDomainOnly ? 'should not' : 'should'; - let description = `${prefix} match url with pattern only in path`; + let description = `${pattern} ${prefix} match ${evilUrl}`; it(description, () => { expect( utils.matchesSavedMap( - 'https://google.com/?q=duckduckgo', + evilUrl, matchDomainOnly, { - host: simplePattern, + host: pattern, }) ).toBe(!matchDomainOnly); }); }; } - describe('with regex host prefix', testPrefixes(true)); - describe('with glob host prefix', testPrefixes()); + describe('with regex host prefix', testPrefixes( + '@duckduckgo\\.com', + 'https://duckduckgo.com', + 'https://google.com/?q=duckduckgo')); + + describe('with glob host prefix', testPrefixes( + '!duckduckgo.com', + 'https://duckduckgo.com', + 'https://google.com/?q=duckduckgo')); + + describe('with regex host prefix', testPrefixes( + '@duckduckgo\\.com', + 'https://duckduckgo.com', + 'https://evil.duckduckgo.com.evil.com')); + + describe('with glob host prefix', testPrefixes( + '!duckduckgo.com', + 'https://duckduckgo.com', + 'https://evil.duckduckgo.com.evil.com')); + + describe('with glob subdomain prefix', testPrefixes( + '!*.duckduckgo.com', + 'https://example.duckduckgo.com', + 'https://notduckduckgo.com')); + + describe('with regex subdomain prefix', testPrefixes( + '@(.+)\\.duckduckgo\\.com', + 'https://example.duckduckgo.com', + 'https://notduckduckgo.com')); }; }