diff --git a/AUTHORS.rst b/AUTHORS.rst index 5bcd74c943b..1299c1a4748 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -83,6 +83,7 @@ Contributors * Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix * Louis Maddox -- better docstrings * Łukasz Langa -- partial support for autodoc +* Lukas Wieg -- JavaScript search improvement * Marco Buttu -- doctest extension (pyversion option) * Mark Ostroth -- semantic HTML contributions * Martin Hans -- autodoc improvements diff --git a/CHANGES.rst b/CHANGES.rst index 2886c55cd03..41d85292735 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -130,6 +130,7 @@ Bugs fixed directly defined in certain cases, depending on autodoc processing order. Patch by Jeremy Maitin-Shepard. +* #13892: HTML search: fix word exclusion in the search by prefixing words with "-". Testing diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 5a7628a18a2..926e79e0bde 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -171,15 +171,28 @@ const _orderResultsByScoreThenName = (a, b) => { * Default splitQuery function. Can be overridden in ``sphinx.search`` with a * custom function per language. * - * The regular expression works by splitting the string on consecutive characters - * that are not Unicode letters, numbers, underscores, or emoji characters. - * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + * The `consecutiveLetters` regular expression works by matching consecutive characters + * that are Unicode letters, numbers, underscores, or emoji characters. + * + * The `searchWords` regular expression works by matching a word like structure + * that matches the `consecutiveLetters` with or without a leading hyphen '-' which is + * used to exclude search terms later on. */ if (typeof splitQuery === "undefined") { - var splitQuery = (query) => - query - .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) - .filter((term) => term); // remove remaining empty strings + var splitQuery = (query) => { + const consecutiveLetters = + /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu; + const searchWords = new RegExp( + `(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`, + "gu", + ); + return Array.from( + query + .matchAll(searchWords) + .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word"). + .filter((term) => term), // remove remaining empty strings. + ); + }; } /** @@ -627,15 +640,18 @@ const Search = { // ensure that none of the excluded terms is in the search result if ( - [...excludedTerms].some( - (term) => - terms[term] === file - || titleTerms[term] === file - || (terms[term] || []).includes(file) - || (titleTerms[term] || []).includes(file), - ) + [...excludedTerms].some((excludedTerm) => { + // Both mappings will contain either a single integer or a list of integers. + // Converting them to lists makes the comparison more readable. + let excludedTermFiles = [].concat(terms[excludedTerm]); + let excludedTitleFiles = [].concat(titleTerms[excludedTerm]); + return ( + excludedTermFiles.includes(file) + || excludedTitleFiles.includes(file) + ); + }) ) - break; + continue; // select one (max) score for the file. const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w))); diff --git a/tests/js/fixtures/search_exclusion/searchindex.js b/tests/js/fixtures/search_exclusion/searchindex.js new file mode 100644 index 00000000000..8e21d1001d2 --- /dev/null +++ b/tests/js/fixtures/search_exclusion/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles":{"Excluded Page":[[0,null]],"Main Page":[[1,null]]},"docnames":["excluded","index"],"envversion":{"sphinx":66,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["excluded.rst","index.rst"],"indexentries":{"excluded":[[1,"index-0",false]]},"objects":{},"objnames":{},"objtypes":{},"terms":{"A":1,"This":[0,1],"can":1,"check":1,"document":1,"exclud":1,"filter":1,"fixtur":1,"hypen":1,"includ":1,"penguin":0,"project":1,"queri":1,"result":1,"search":1,"search_exclus":1,"second":1,"special":0,"specifi":1,"start":1,"term":1,"test":1,"use":1,"will":1,"word":0},"titles":["Excluded Page","Main Page"],"titleterms":{"exclud":0,"main":1,"page":[0,1]}}) \ No newline at end of file diff --git a/tests/js/roots/search_exclusion/conf.py b/tests/js/roots/search_exclusion/conf.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/js/roots/search_exclusion/excluded.rst b/tests/js/roots/search_exclusion/excluded.rst new file mode 100644 index 00000000000..62f8485153b --- /dev/null +++ b/tests/js/roots/search_exclusion/excluded.rst @@ -0,0 +1,4 @@ +Excluded Page +============= + +This is a page with the special word penguin. diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst new file mode 100644 index 00000000000..29f3a13012d --- /dev/null +++ b/tests/js/roots/search_exclusion/index.rst @@ -0,0 +1,12 @@ +Main Page +========= + +This is the main page of the ``search_exclusion`` test project. + +This document is used as a test fixture to check that search results can be +filtered in the query by specifying excluded terms. + +A term which starts with a hypen will be used as excluded term. + +Include a second page which can be excluded in the search: +:index:`excluded` diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index d00689c907c..b57d23b499b 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -66,6 +66,33 @@ describe("Basic html theme search", function () { expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); }); + it("should find results when excluded terms are used", function () { + // This fixture already existed and has the right data to make this test work. + // Replace with another matching fixture if necessary. + eval(loadFixture("search_exclusion/searchindex.js")); + + // It's important that the searchterm is included in multiple pages while the + // excluded term is not included in all pages. + // In this case the ``for`` is included in the two existing pages while the ``ask`` + // is only included in one page. + [_searchQuery, searchterms, excluded, ..._remainingItems] = + Search._parseQuery("page -penguin"); + + // prettier-ignore + hits = [[ + 'index', + 'Main Page', + '', + null, + 15, + 'index.rst', + 'text' + ]]; + + expect(excluded).toEqual(new Set(["penguin"])); + expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); + }); + it('should partially-match "sphinx" when in title index', function () { eval(loadFixture("partial/searchindex.js")); @@ -295,6 +322,16 @@ describe("splitQuery regression tests", () => { expect(parts).toEqual(["Pin", "Code"]); }); + it("can keep underscores in words", () => { + const parts = splitQuery("python_function"); + expect(parts).toEqual(["python_function"]); + }); + + it("can maintain negated search words", () => { + const parts = splitQuery("Pin -Code"); + expect(parts).toEqual(["Pin", "-Code"]); + }); + it("can split Chinese characters", () => { const parts = splitQuery("Hello from 中国 上海"); expect(parts).toEqual(["Hello", "from", "中国", "上海"]);