From 0449e4825b30b0974b57fc237bf0eae73f866662 Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Sat, 13 Sep 2025 11:15:00 +0200 Subject: [PATCH 01/10] Fix that negated search words are filtered out in the splitQuery. --- sphinx/themes/basic/static/searchtools.js | 22 +++++++++++++++------- tests/js/searchtools.spec.js | 10 ++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 5a7628a18a2..6aa4f216fda 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -171,15 +171,23 @@ const _orderResultsByScoreThenName = (a, b) => { * Default splitQuery function. Can be overridden in ``sphinx.search`` with a * custom function per language. * - * The regular expression works by splitting the string on consecutive characters - * that are not Unicode letters, numbers, underscores, or emoji characters. - * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + * The `consecutiveLetters` regular expression works by matching consecutive characters + * that are Unicode letters, numbers, underscores, or emoji characters. + * + * The `searchWords` regular expression works by matching a word like structure + * that matches the `consecutiveLetters` with or without a leading hyphen '-' which is + * used to exclude search terms later on. */ if (typeof splitQuery === "undefined") { - var splitQuery = (query) => - query - .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) - .filter((term) => term); // remove remaining empty strings + var splitQuery = (query) => { + const consecutiveLetters = /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu; + const searchWords = new RegExp(`(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`, "gu"); + return Array.from( + query.matchAll(searchWords) + .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word"). + .filter((term) => term) // remove remaining empty strings. + ); + } } /** diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index d00689c907c..3007cf23757 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -295,6 +295,16 @@ describe("splitQuery regression tests", () => { expect(parts).toEqual(["Pin", "Code"]); }); + it("can keep underscores in words", () => { + const parts = splitQuery("python_function"); + expect(parts).toEqual(["python_function"]); + }); + + it("can maintain negated search words", () => { + const parts = splitQuery("Pin -Code"); + expect(parts).toEqual(["Pin", "-Code"]); + }); + it("can split Chinese characters", () => { const parts = splitQuery("Hello from 中国 上海"); expect(parts).toEqual(["Hello", "from", "中国", "上海"]); From dbfbd46a513e7d0dd6ceeae20fc26c54081c46e5 Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Sun, 14 Sep 2025 11:04:59 +0200 Subject: [PATCH 02/10] Fix that negated search words create a SyntaxError: Invalid or unexpected token. --- sphinx/themes/basic/static/searchtools.js | 12 +++++++----- tests/js/searchtools.spec.js | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 6aa4f216fda..fc89a54cd5e 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -636,11 +636,13 @@ const Search = { // ensure that none of the excluded terms is in the search result if ( [...excludedTerms].some( - (term) => - terms[term] === file - || titleTerms[term] === file - || (terms[term] || []).includes(file) - || (titleTerms[term] || []).includes(file), + (excludedTerm) => { + // Both mappings will contain either a single integer or a list of integers. + // Converting them to lists makes the comparison more readable. + let excludedTermFiles = [].concat(terms[excludedTerm]); + let excludedTitleFiles = [].concat(titleTerms[excludedTerm]); + return excludedTermFiles.includes(file) || excludedTitleFiles.includes(file); + } ) ) break; diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index 3007cf23757..900f8d9a81e 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -66,6 +66,25 @@ describe("Basic html theme search", function () { expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); }); + it("should be able to exclude search terms", function () { + eval(loadFixture("titles/searchindex.js")); + + [_searchQuery, searchterms, excluded, ..._remainingItems] = + Search._parseQuery("main page -function"); + // prettier-ignore + hits = [[ + 'index', + 'Main Page', + '', + null, + 15, + 'index.rst', + 'text' + ]]; + expect(excluded).toEqual(new Set(["function"])); + expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); + }); + it('should partially-match "sphinx" when in title index', function () { eval(loadFixture("partial/searchindex.js")); From 7a8d46ee881b3f45786101a10850ba0ffc94fdca Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Mon, 15 Sep 2025 10:22:32 +0200 Subject: [PATCH 03/10] Fix that excluded words would abort the entire search if matched in one page. --- sphinx/themes/basic/static/searchtools.js | 2 +- tests/js/searchtools.spec.js | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index fc89a54cd5e..8edd6d04b5e 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -645,7 +645,7 @@ const Search = { } ) ) - break; + continue; // select one (max) score for the file. const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w))); diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index 900f8d9a81e..8a73748a08f 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -66,22 +66,29 @@ describe("Basic html theme search", function () { expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); }); - it("should be able to exclude search terms", function () { + it("should find results when excluded terms are used", function () { + // This fixture already existed and has the right data to make this test work. + // Replace with another matching fixture if necessary. eval(loadFixture("titles/searchindex.js")); + // It's important that the searchterm is included in multiple pages while the + // excluded term is not included in all pages. + // In this case the ``for`` is included in the two existing pages while the ``ask`` + // is only included in one page. [_searchQuery, searchterms, excluded, ..._remainingItems] = - Search._parseQuery("main page -function"); + Search._parseQuery("for -ask"); + // prettier-ignore hits = [[ - 'index', - 'Main Page', + 'relevance', + 'Relevance', '', null, - 15, - 'index.rst', + 2, + 'relevance.rst', 'text' ]]; - expect(excluded).toEqual(new Set(["function"])); + expect(excluded).toEqual(new Set(["ask"])); expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); }); From cfdfda47b216fed6802ce69f7f91874022e72214 Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Mon, 15 Sep 2025 11:27:35 +0200 Subject: [PATCH 04/10] Fix format and add contribution information. --- AUTHORS.rst | 1 + CHANGES.rst | 1 + sphinx/themes/basic/static/searchtools.js | 36 +++++++++++++---------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 5bcd74c943b..1299c1a4748 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -83,6 +83,7 @@ Contributors * Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix * Louis Maddox -- better docstrings * Łukasz Langa -- partial support for autodoc +* Lukas Wieg -- JavaScript search improvement * Marco Buttu -- doctest extension (pyversion option) * Mark Ostroth -- semantic HTML contributions * Martin Hans -- autodoc improvements diff --git a/CHANGES.rst b/CHANGES.rst index 64f94e14ec3..53970ceb2d1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -118,6 +118,7 @@ Bugs fixed for objects documented as ``:py:data:`` to be hyperlinked in function signatures. * #13858: doctest: doctest blocks are now correctly added to a group defined by the configuration variable ``doctest_test_doctest_blocks``. +* #13892: search: support word exclusion in the search by prefixing words with hyphen. Testing diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 8edd6d04b5e..926e79e0bde 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -180,14 +180,19 @@ const _orderResultsByScoreThenName = (a, b) => { */ if (typeof splitQuery === "undefined") { var splitQuery = (query) => { - const consecutiveLetters = /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu; - const searchWords = new RegExp(`(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`, "gu"); + const consecutiveLetters = + /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu; + const searchWords = new RegExp( + `(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`, + "gu", + ); return Array.from( - query.matchAll(searchWords) - .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word"). - .filter((term) => term) // remove remaining empty strings. + query + .matchAll(searchWords) + .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word"). + .filter((term) => term), // remove remaining empty strings. ); - } + }; } /** @@ -635,15 +640,16 @@ const Search = { // ensure that none of the excluded terms is in the search result if ( - [...excludedTerms].some( - (excludedTerm) => { - // Both mappings will contain either a single integer or a list of integers. - // Converting them to lists makes the comparison more readable. - let excludedTermFiles = [].concat(terms[excludedTerm]); - let excludedTitleFiles = [].concat(titleTerms[excludedTerm]); - return excludedTermFiles.includes(file) || excludedTitleFiles.includes(file); - } - ) + [...excludedTerms].some((excludedTerm) => { + // Both mappings will contain either a single integer or a list of integers. + // Converting them to lists makes the comparison more readable. + let excludedTermFiles = [].concat(terms[excludedTerm]); + let excludedTitleFiles = [].concat(titleTerms[excludedTerm]); + return ( + excludedTermFiles.includes(file) + || excludedTitleFiles.includes(file) + ); + }) ) continue; From 80d6ad1b938b117484db6f6beb75a0a221c2cb24 Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Mon, 15 Sep 2025 11:29:38 +0200 Subject: [PATCH 05/10] Fix long line. --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 53970ceb2d1..a57ea60e8a8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -118,7 +118,7 @@ Bugs fixed for objects documented as ``:py:data:`` to be hyperlinked in function signatures. * #13858: doctest: doctest blocks are now correctly added to a group defined by the configuration variable ``doctest_test_doctest_blocks``. -* #13892: search: support word exclusion in the search by prefixing words with hyphen. +* #13892: search: support word exclusion in the search by prefixing words with "-". Testing From 9b06fd581a189e35328ffea1e1c7e17abaac788e Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Fri, 19 Sep 2025 22:11:17 +0200 Subject: [PATCH 06/10] Add a dedicated fixture for testing excluded words in searches. --- tests/js/roots/search_exclusion/conf.py | 0 tests/js/roots/search_exclusion/excluded.rst | 4 ++++ tests/js/roots/search_exclusion/index.rst | 12 ++++++++++++ tests/js/searchtools.spec.js | 15 ++++++++------- 4 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 tests/js/roots/search_exclusion/conf.py create mode 100644 tests/js/roots/search_exclusion/excluded.rst create mode 100644 tests/js/roots/search_exclusion/index.rst diff --git a/tests/js/roots/search_exclusion/conf.py b/tests/js/roots/search_exclusion/conf.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/js/roots/search_exclusion/excluded.rst b/tests/js/roots/search_exclusion/excluded.rst new file mode 100644 index 00000000000..62f8485153b --- /dev/null +++ b/tests/js/roots/search_exclusion/excluded.rst @@ -0,0 +1,4 @@ +Excluded Page +============= + +This is a page with the special word penguin. diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst new file mode 100644 index 00000000000..94520c59db9 --- /dev/null +++ b/tests/js/roots/search_exclusion/index.rst @@ -0,0 +1,12 @@ +Main Page +========= + +This is the main page of the ``search_exclusion`` test project. + +This document is used as a test fixture to check that search results can be +filtered in the query by specifying excluded terms. + +A term which starts with a hypen will be used as excluded term. + +Include a second page which can be excluded in the search: +:index:`excluded` \ No newline at end of file diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index 8a73748a08f..b57d23b499b 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -69,26 +69,27 @@ describe("Basic html theme search", function () { it("should find results when excluded terms are used", function () { // This fixture already existed and has the right data to make this test work. // Replace with another matching fixture if necessary. - eval(loadFixture("titles/searchindex.js")); + eval(loadFixture("search_exclusion/searchindex.js")); // It's important that the searchterm is included in multiple pages while the // excluded term is not included in all pages. // In this case the ``for`` is included in the two existing pages while the ``ask`` // is only included in one page. [_searchQuery, searchterms, excluded, ..._remainingItems] = - Search._parseQuery("for -ask"); + Search._parseQuery("page -penguin"); // prettier-ignore hits = [[ - 'relevance', - 'Relevance', + 'index', + 'Main Page', '', null, - 2, - 'relevance.rst', + 15, + 'index.rst', 'text' ]]; - expect(excluded).toEqual(new Set(["ask"])); + + expect(excluded).toEqual(new Set(["penguin"])); expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); }); From 11d2e2e4e9b25f42b556aa0b9daca42b5e42994c Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Fri, 19 Sep 2025 22:21:51 +0200 Subject: [PATCH 07/10] Is the pipeline flaky? --- tests/js/roots/search_exclusion/index.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst index 94520c59db9..0a47d842169 100644 --- a/tests/js/roots/search_exclusion/index.rst +++ b/tests/js/roots/search_exclusion/index.rst @@ -9,4 +9,6 @@ filtered in the query by specifying excluded terms. A term which starts with a hypen will be used as excluded term. Include a second page which can be excluded in the search: -:index:`excluded` \ No newline at end of file +:index:`excluded` + +is the pipeline flaky? \ No newline at end of file From 6cc4055bc4870064b5e615825b2e87f94f86c37b Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Fri, 19 Sep 2025 22:28:12 +0200 Subject: [PATCH 08/10] Add missing file. --- tests/js/fixtures/search_exclusion/searchindex.js | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/js/fixtures/search_exclusion/searchindex.js diff --git a/tests/js/fixtures/search_exclusion/searchindex.js b/tests/js/fixtures/search_exclusion/searchindex.js new file mode 100644 index 00000000000..8e21d1001d2 --- /dev/null +++ b/tests/js/fixtures/search_exclusion/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles":{"Excluded Page":[[0,null]],"Main Page":[[1,null]]},"docnames":["excluded","index"],"envversion":{"sphinx":66,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["excluded.rst","index.rst"],"indexentries":{"excluded":[[1,"index-0",false]]},"objects":{},"objnames":{},"objtypes":{},"terms":{"A":1,"This":[0,1],"can":1,"check":1,"document":1,"exclud":1,"filter":1,"fixtur":1,"hypen":1,"includ":1,"penguin":0,"project":1,"queri":1,"result":1,"search":1,"search_exclus":1,"second":1,"special":0,"specifi":1,"start":1,"term":1,"test":1,"use":1,"will":1,"word":0},"titles":["Excluded Page","Main Page"],"titleterms":{"exclud":0,"main":1,"page":[0,1]}}) \ No newline at end of file From ffc193b2aea3bfc314156387f4c911a74a47e205 Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Fri, 19 Sep 2025 22:28:40 +0200 Subject: [PATCH 09/10] Remove debug change. --- tests/js/roots/search_exclusion/index.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst index 0a47d842169..29f3a13012d 100644 --- a/tests/js/roots/search_exclusion/index.rst +++ b/tests/js/roots/search_exclusion/index.rst @@ -10,5 +10,3 @@ A term which starts with a hypen will be used as excluded term. Include a second page which can be excluded in the search: :index:`excluded` - -is the pipeline flaky? \ No newline at end of file From a4764b1cfac26898b41cd279d2d1ed88b9921e94 Mon Sep 17 00:00:00 2001 From: Lukas Wieg Date: Tue, 7 Oct 2025 20:45:14 +0200 Subject: [PATCH 10/10] Reduce line length to fix doclinter. --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 069d91f7fdd..c400ec32574 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -120,7 +120,7 @@ Bugs fixed configuration variable ``doctest_test_doctest_blocks``. * #13885: Coverage builder: Fix TypeError when warning about missing modules. Patch by Damien Ayers. -* #13892: HTML search: support word exclusion in the search by prefixing words with "-". +* #13892: HTML search: fix word exclusion in the search by prefixing words with "-". Testing