Skip to content
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Contributors
* Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix
* Louis Maddox -- better docstrings
* Łukasz Langa -- partial support for autodoc
* Lukas Wieg -- JavaScript search improvement
* Marco Buttu -- doctest extension (pyversion option)
* Mark Ostroth -- semantic HTML contributions
* Martin Hans -- autodoc improvements
Expand Down
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Bugs fixed
configuration variable ``doctest_test_doctest_blocks``.
* #13885: Coverage builder: Fix TypeError when warning about missing modules.
Patch by Damien Ayers.
* #13892: HTML search: support word exclusion in the search by prefixing words with "-".


Testing
Expand Down
46 changes: 31 additions & 15 deletions sphinx/themes/basic/static/searchtools.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,15 +171,28 @@ const _orderResultsByScoreThenName = (a, b) => {
* Default splitQuery function. Can be overridden in ``sphinx.search`` with a
* custom function per language.
*
* The regular expression works by splitting the string on consecutive characters
* that are not Unicode letters, numbers, underscores, or emoji characters.
* This is the same as ``\W+`` in Python, preserving the surrogate pair area.
* The `consecutiveLetters` regular expression works by matching consecutive characters
* that are Unicode letters, numbers, underscores, or emoji characters.
*
* The `searchWords` regular expression works by matching a word like structure
* that matches the `consecutiveLetters` with or without a leading hyphen '-' which is
* used to exclude search terms later on.
*/
if (typeof splitQuery === "undefined") {
var splitQuery = (query) =>
query
.split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
.filter((term) => term); // remove remaining empty strings
var splitQuery = (query) => {
const consecutiveLetters =
/[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu;
const searchWords = new RegExp(
`(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`,
"gu",
);
return Array.from(
query
.matchAll(searchWords)
.map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word").
.filter((term) => term), // remove remaining empty strings.
);
};
}

/**
Expand Down Expand Up @@ -627,15 +640,18 @@ const Search = {

// ensure that none of the excluded terms is in the search result
if (
[...excludedTerms].some(
(term) =>
terms[term] === file
|| titleTerms[term] === file
|| (terms[term] || []).includes(file)
|| (titleTerms[term] || []).includes(file),
)
[...excludedTerms].some((excludedTerm) => {
// Both mappings will contain either a single integer or a list of integers.
// Converting them to lists makes the comparison more readable.
let excludedTermFiles = [].concat(terms[excludedTerm]);
let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
return (
excludedTermFiles.includes(file)
|| excludedTitleFiles.includes(file)
);
})
)
break;
continue;

// select one (max) score for the file.
const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w)));
Expand Down
1 change: 1 addition & 0 deletions tests/js/fixtures/search_exclusion/searchindex.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file.
4 changes: 4 additions & 0 deletions tests/js/roots/search_exclusion/excluded.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Excluded Page
=============

This is a page with the special word penguin.
12 changes: 12 additions & 0 deletions tests/js/roots/search_exclusion/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Main Page
=========

This is the main page of the ``search_exclusion`` test project.

This document is used as a test fixture to check that search results can be
filtered in the query by specifying excluded terms.

A term which starts with a hypen will be used as excluded term.

Include a second page which can be excluded in the search:
:index:`excluded`
37 changes: 37 additions & 0 deletions tests/js/searchtools.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,33 @@ describe("Basic html theme search", function () {
expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
});

it("should find results when excluded terms are used", function () {
// This fixture already existed and has the right data to make this test work.
// Replace with another matching fixture if necessary.
eval(loadFixture("search_exclusion/searchindex.js"));

// It's important that the searchterm is included in multiple pages while the
// excluded term is not included in all pages.
// In this case the ``for`` is included in the two existing pages while the ``ask``
// is only included in one page.
[_searchQuery, searchterms, excluded, ..._remainingItems] =
Search._parseQuery("page -penguin");

// prettier-ignore
hits = [[
'index',
'Main Page',
'',
null,
15,
'index.rst',
'text'
]];

expect(excluded).toEqual(new Set(["penguin"]));
expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
});

it('should partially-match "sphinx" when in title index', function () {
eval(loadFixture("partial/searchindex.js"));

Expand Down Expand Up @@ -295,6 +322,16 @@ describe("splitQuery regression tests", () => {
expect(parts).toEqual(["Pin", "Code"]);
});

it("can keep underscores in words", () => {
const parts = splitQuery("python_function");
expect(parts).toEqual(["python_function"]);
});

it("can maintain negated search words", () => {
const parts = splitQuery("Pin -Code");
expect(parts).toEqual(["Pin", "-Code"]);
});

it("can split Chinese characters", () => {
const parts = splitQuery("Hello from 中国 上海");
expect(parts).toEqual(["Hello", "from", "中国", "上海"]);
Expand Down
Loading